aboutsummaryrefslogtreecommitdiffstats
path: root/net/tipc
diff options
context:
space:
mode:
Diffstat (limited to 'net/tipc')
-rw-r--r--net/tipc/Kconfig5
-rw-r--r--net/tipc/addr.c8
-rw-r--r--net/tipc/addr.h45
-rw-r--r--net/tipc/bcast.c40
-rw-r--r--net/tipc/bcast.h13
-rw-r--r--net/tipc/bearer.c151
-rw-r--r--net/tipc/bearer.h18
-rw-r--r--net/tipc/core.c9
-rw-r--r--net/tipc/core.h19
-rw-r--r--net/tipc/crypto.c1122
-rw-r--r--net/tipc/crypto.h49
-rw-r--r--net/tipc/discover.c16
-rw-r--r--net/tipc/eth_media.c6
-rw-r--r--net/tipc/group.c19
-rw-r--r--net/tipc/group.h3
-rw-r--r--net/tipc/ib_media.c2
-rw-r--r--net/tipc/link.c615
-rw-r--r--net/tipc/link.h12
-rw-r--r--net/tipc/monitor.c81
-rw-r--r--net/tipc/msg.c123
-rw-r--r--net/tipc/msg.h128
-rw-r--r--net/tipc/name_distr.c267
-rw-r--r--net/tipc/name_distr.h11
-rw-r--r--net/tipc/name_table.c463
-rw-r--r--net/tipc/name_table.h77
-rw-r--r--net/tipc/net.c37
-rw-r--r--net/tipc/net.h1
-rw-r--r--net/tipc/netlink.c5
-rw-r--r--net/tipc/netlink_compat.c42
-rw-r--r--net/tipc/node.c358
-rw-r--r--net/tipc/node.h10
-rw-r--r--net/tipc/socket.c849
-rw-r--r--net/tipc/socket.h2
-rw-r--r--net/tipc/subscr.c85
-rw-r--r--net/tipc/subscr.h37
-rw-r--r--net/tipc/sysctl.c18
-rw-r--r--net/tipc/topsrv.c59
-rw-r--r--net/tipc/trace.c2
-rw-r--r--net/tipc/trace.h17
-rw-r--r--net/tipc/udp_media.c31
40 files changed, 3107 insertions, 1748 deletions
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 716b61a701a8..be1c4003d67d 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -6,7 +6,8 @@
menuconfig TIPC
tristate "The TIPC Protocol"
depends on INET
- ---help---
+ depends on IPV6 || IPV6=n
+ help
The Transparent Inter Process Communication (TIPC) protocol is
specially designed for intra cluster communication. This protocol
originates from Ericsson where it has been used in carrier grade
@@ -55,6 +56,6 @@ config TIPC_DIAG
tristate "TIPC: socket monitoring interface"
depends on TIPC
default y
- ---help---
+ help
Support for TIPC socket monitoring interface used by ss tool.
If unsure, say Y.
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 0f1eaed1bd1b..fd0796269eed 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 2000-2006, 2018, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -55,12 +56,11 @@ bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr)
void tipc_set_node_id(struct net *net, u8 *id)
{
struct tipc_net *tn = tipc_net(net);
- u32 *tmp = (u32 *)id;
memcpy(tn->node_id, id, NODE_ID_LEN);
tipc_nodeid2string(tn->node_id_string, id);
- tn->trial_addr = tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3];
- pr_info("Own node identity %s, cluster identity %u\n",
+ tn->trial_addr = hash128to32(id);
+ pr_info("Node identity %s, cluster identity %u\n",
tipc_own_id_string(net), tn->net_id);
}
@@ -76,7 +76,7 @@ void tipc_set_node_addr(struct net *net, u32 addr)
}
tn->trial_addr = addr;
tn->addr_trial_end = jiffies;
- pr_info("32-bit node address hash set to %x\n", addr);
+ pr_info("Node number set to %u\n", addr);
}
char *tipc_nodeid2string(char *str, u8 *id)
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 31bee0ea7b3e..0772cfadaa0d 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -3,6 +3,7 @@
*
* Copyright (c) 2000-2006, 2018, Ericsson AB
* Copyright (c) 2004-2005, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -43,6 +44,50 @@
#include <net/netns/generic.h>
#include "core.h"
+/* Struct tipc_uaddr: internal version of struct sockaddr_tipc.
+ * Must be kept aligned both regarding field positions and size.
+ */
+struct tipc_uaddr {
+ unsigned short family;
+ unsigned char addrtype;
+ signed char scope;
+ union {
+ struct {
+ struct tipc_service_addr sa;
+ u32 lookup_node;
+ };
+ struct tipc_service_range sr;
+ struct tipc_socket_addr sk;
+ };
+};
+
+static inline void tipc_uaddr(struct tipc_uaddr *ua, u32 atype, u32 scope,
+ u32 type, u32 lower, u32 upper)
+{
+ ua->family = AF_TIPC;
+ ua->addrtype = atype;
+ ua->scope = scope;
+ ua->sr.type = type;
+ ua->sr.lower = lower;
+ ua->sr.upper = upper;
+}
+
+static inline bool tipc_uaddr_valid(struct tipc_uaddr *ua, int len)
+{
+ u32 atype;
+
+ if (len < sizeof(struct sockaddr_tipc))
+ return false;
+ atype = ua->addrtype;
+ if (ua->family != AF_TIPC)
+ return false;
+ if (atype == TIPC_SERVICE_ADDR || atype == TIPC_SOCKET_ADDR)
+ return true;
+ if (atype == TIPC_SERVICE_RANGE)
+ return ua->sr.upper >= ua->sr.lower;
+ return false;
+}
+
static inline u32 tipc_own_addr(struct net *net)
{
return tipc_net(net)->node_addr;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 4c20be08b9c4..593846d25214 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -46,6 +46,7 @@
#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
const char tipc_bclink_name[] = "broadcast-link";
+unsigned long sysctl_tipc_bc_retruni __read_mostly;
/**
* struct tipc_bc_base - base structure for keeping broadcast send state
@@ -107,6 +108,8 @@ static void tipc_bcbase_select_primary(struct net *net)
{
struct tipc_bc_base *bb = tipc_bc_base(net);
int all_dests = tipc_link_bc_peers(bb->link);
+ int max_win = tipc_link_max_win(bb->link);
+ int min_win = tipc_link_min_win(bb->link);
int i, mtu, prim;
bb->primary_bearer = INVALID_BEARER_ID;
@@ -120,8 +123,12 @@ static void tipc_bcbase_select_primary(struct net *net)
continue;
mtu = tipc_bearer_mtu(net, i);
- if (mtu < tipc_link_mtu(bb->link))
+ if (mtu < tipc_link_mtu(bb->link)) {
tipc_link_set_mtu(bb->link, mtu);
+ tipc_link_set_queue_limits(bb->link,
+ min_win,
+ max_win);
+ }
bb->bcast_support &= tipc_bearer_bcast_support(net, i);
if (bb->dests[i] < all_dests)
continue;
@@ -249,8 +256,8 @@ static void tipc_bcast_select_xmit_method(struct net *net, int dests,
* Consumes the buffer chain.
* Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE
*/
-static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
- u16 *cong_link_cnt)
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ u16 *cong_link_cnt)
{
struct tipc_link *l = tipc_bc_sndlink(net);
struct sk_buff_head xmitq;
@@ -474,7 +481,7 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
__skb_queue_head_init(&xmitq);
tipc_bcast_lock(net);
- tipc_link_bc_ack_rcv(l, acked, &xmitq);
+ tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq, NULL);
tipc_bcast_unlock(net);
tipc_bcbase_xmit(net, &xmitq);
@@ -489,9 +496,11 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
* RCU is locked, no other locks set
*/
int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
- struct tipc_msg *hdr)
+ struct tipc_msg *hdr,
+ struct sk_buff_head *retrq)
{
struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ struct tipc_gap_ack_blks *ga;
struct sk_buff_head xmitq;
int rc = 0;
@@ -501,8 +510,13 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
if (msg_type(hdr) != STATE_MSG) {
tipc_link_bc_init_rcv(l, hdr);
} else if (!msg_bc_ack_invalid(hdr)) {
- tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
- rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq);
+ tipc_get_gap_ack_blks(&ga, l, hdr, false);
+ if (!sysctl_tipc_bc_retruni)
+ retrq = &xmitq;
+ rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr),
+ msg_bc_gap(hdr), ga, &xmitq,
+ retrq);
+ rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq);
}
tipc_bcast_unlock(net);
@@ -555,10 +569,8 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l)
tipc_sk_rcv(net, inputq);
}
-int tipc_bclink_reset_stats(struct net *net)
+int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l)
{
- struct tipc_link *l = tipc_bc_sndlink(net);
-
if (!l)
return -ENOPROTOOPT;
@@ -579,7 +591,7 @@ static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win)
if (max_win > TIPC_MAX_LINK_WIN)
return -EINVAL;
tipc_bcast_lock(net);
- tipc_link_set_queue_limits(l, BCLINK_WIN_MIN, max_win);
+ tipc_link_set_queue_limits(l, tipc_link_min_win(l), max_win);
tipc_bcast_unlock(net);
return 0;
}
@@ -686,8 +698,8 @@ int tipc_bcast_init(struct net *net)
tn->bcbase = bb;
spin_lock_init(&tipc_net(net)->bclock);
- if (!tipc_link_bc_create(net, 0, 0,
- FB_MTU,
+ if (!tipc_link_bc_create(net, 0, 0, NULL,
+ one_page_mtu,
BCLINK_WIN_DEFAULT,
BCLINK_WIN_DEFAULT,
0,
@@ -746,7 +758,7 @@ void tipc_nlist_purge(struct tipc_nlist *nl)
nl->local = false;
}
-u32 tipc_bcast_get_broadcast_mode(struct net *net)
+u32 tipc_bcast_get_mode(struct net *net)
{
struct tipc_bc_base *bb = tipc_bc_base(net);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 9e847d9617d3..2d9352dc7b0e 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -45,6 +45,7 @@ struct tipc_nl_msg;
struct tipc_nlist;
struct tipc_nitem;
extern const char tipc_bclink_name[];
+extern unsigned long sysctl_tipc_bc_retruni;
#define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000)
@@ -89,16 +90,20 @@ void tipc_bcast_toggle_rcast(struct net *net, bool supp);
int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct tipc_mc_method *method, struct tipc_nlist *dests,
u16 *cong_link_cnt);
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ u16 *cong_link_cnt);
int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
struct tipc_msg *hdr);
int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
- struct tipc_msg *hdr);
-int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
+ struct tipc_msg *hdr,
+ struct sk_buff_head *retrq);
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *bcl);
int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
-int tipc_bclink_reset_stats(struct net *net);
+int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l);
-u32 tipc_bcast_get_broadcast_mode(struct net *net);
+u32 tipc_bcast_get_mode(struct net *net);
u32 tipc_bcast_get_broadcast_ratio(struct net *net);
void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 34ca7b789eba..35cac7733fd3 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -72,6 +72,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
/**
* tipc_media_find - locates specified media object by name
+ * @name: name to locate
*/
struct tipc_media *tipc_media_find(const char *name)
{
@@ -86,6 +87,7 @@ struct tipc_media *tipc_media_find(const char *name)
/**
* media_find_id - locates specified media object by type identifier
+ * @type: type identifier to locate
*/
static struct tipc_media *media_find_id(u8 type)
{
@@ -100,6 +102,9 @@ static struct tipc_media *media_find_id(u8 type)
/**
* tipc_media_addr_printf - record media address in print buffer
+ * @buf: output buffer
+ * @len: output buffer size remaining
+ * @a: input media address
*/
int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a)
{
@@ -127,7 +132,7 @@ int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a)
* @name: ptr to bearer name string
* @name_parts: ptr to area for bearer name components (or NULL if not needed)
*
- * Returns 1 if bearer name is valid, otherwise 0.
+ * Return: 1 if bearer name is valid, otherwise 0.
*/
static int bearer_name_validate(const char *name,
struct tipc_bearer_names *name_parts)
@@ -139,10 +144,7 @@ static int bearer_name_validate(const char *name,
u32 if_len;
/* copy bearer name & ensure length is OK */
- name_copy[TIPC_MAX_BEARER_NAME - 1] = 0;
- /* need above in case non-Posix strncpy() doesn't pad with nulls */
- strncpy(name_copy, name, TIPC_MAX_BEARER_NAME);
- if (name_copy[TIPC_MAX_BEARER_NAME - 1] != 0)
+ if (strscpy(name_copy, name, TIPC_MAX_BEARER_NAME) < 0)
return 0;
/* ensure all component parts of bearer name are present */
@@ -169,6 +171,8 @@ static int bearer_name_validate(const char *name,
/**
* tipc_bearer_find - locates bearer object with matching bearer name
+ * @net: the applicable net namespace
+ * @name: bearer name to locate
*/
struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
{
@@ -231,10 +235,17 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
/**
* tipc_enable_bearer - enable bearer with the given name
+ * @net: the applicable net namespace
+ * @name: bearer name to enable
+ * @disc_domain: bearer domain
+ * @prio: bearer priority
+ * @attr: nlattr array
+ * @extack: netlink extended ack
*/
static int tipc_enable_bearer(struct net *net, const char *name,
u32 disc_domain, u32 prio,
- struct nlattr *attr[])
+ struct nlattr *attr[],
+ struct netlink_ext_ack *extack)
{
struct tipc_net *tn = tipc_net(net);
struct tipc_bearer_names b_names;
@@ -245,20 +256,23 @@ static int tipc_enable_bearer(struct net *net, const char *name,
int bearer_id = 0;
int res = -EINVAL;
char *errstr = "";
+ u32 i;
if (!bearer_name_validate(name, &b_names)) {
- errstr = "illegal name";
- goto rejected;
+ NL_SET_ERR_MSG(extack, "Illegal name");
+ return res;
}
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
errstr = "illegal priority";
+ NL_SET_ERR_MSG(extack, "Illegal priority");
goto rejected;
}
m = tipc_media_find(b_names.media_name);
if (!m) {
errstr = "media not registered";
+ NL_SET_ERR_MSG(extack, "Media not registered");
goto rejected;
}
@@ -266,33 +280,43 @@ static int tipc_enable_bearer(struct net *net, const char *name,
prio = m->priority;
/* Check new bearer vs existing ones and find free bearer id if any */
- while (bearer_id < MAX_BEARERS) {
- b = rtnl_dereference(tn->bearer_list[bearer_id]);
- if (!b)
- break;
+ bearer_id = MAX_BEARERS;
+ i = MAX_BEARERS;
+ while (i-- != 0) {
+ b = rtnl_dereference(tn->bearer_list[i]);
+ if (!b) {
+ bearer_id = i;
+ continue;
+ }
if (!strcmp(name, b->name)) {
errstr = "already enabled";
+ NL_SET_ERR_MSG(extack, "Already enabled");
goto rejected;
}
- bearer_id++;
- if (b->priority != prio)
- continue;
- if (++with_this_prio <= 2)
- continue;
- pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
- name, prio);
- if (prio == TIPC_MIN_LINK_PRI) {
- errstr = "cannot adjust to lower";
- goto rejected;
+
+ if (b->priority == prio &&
+ (++with_this_prio > 2)) {
+ pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
+ name, prio);
+
+ if (prio == TIPC_MIN_LINK_PRI) {
+ errstr = "cannot adjust to lower";
+ NL_SET_ERR_MSG(extack, "Cannot adjust to lower");
+ goto rejected;
+ }
+
+ pr_warn("Bearer <%s>: trying with adjusted priority\n",
+ name);
+ prio--;
+ bearer_id = MAX_BEARERS;
+ i = MAX_BEARERS;
+ with_this_prio = 1;
}
- pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
- prio--;
- bearer_id = 0;
- with_this_prio = 1;
}
if (bearer_id >= MAX_BEARERS) {
errstr = "max 3 bearers permitted";
+ NL_SET_ERR_MSG(extack, "Max 3 bearers permitted");
goto rejected;
}
@@ -306,6 +330,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
if (res) {
kfree(b);
errstr = "failed to enable media";
+ NL_SET_ERR_MSG(extack, "Failed to enable media");
goto rejected;
}
@@ -316,25 +341,28 @@ static int tipc_enable_bearer(struct net *net, const char *name,
b->domain = disc_domain;
b->net_plane = bearer_id + 'A';
b->priority = prio;
- test_and_set_bit_lock(0, &b->up);
refcount_set(&b->refcnt, 1);
res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
if (res) {
bearer_disable(net, b);
errstr = "failed to create discoverer";
+ NL_SET_ERR_MSG(extack, "Failed to create discoverer");
goto rejected;
}
- rcu_assign_pointer(tn->bearer_list[bearer_id], b);
- if (skb)
- tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
-
+ /* Create monitoring data before accepting activate messages */
if (tipc_mon_create(net, bearer_id)) {
bearer_disable(net, b);
+ kfree_skb(skb);
return -ENOMEM;
}
+ test_and_set_bit_lock(0, &b->up);
+ rcu_assign_pointer(tn->bearer_list[bearer_id], b);
+ if (skb)
+ tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
+
pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
return res;
@@ -345,6 +373,8 @@ rejected:
/**
* tipc_reset_bearer - Reset all links established over this bearer
+ * @net: the applicable net namespace
+ * @b: the target bearer
*/
static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
{
@@ -366,7 +396,9 @@ void tipc_bearer_put(struct tipc_bearer *b)
}
/**
- * bearer_disable
+ * bearer_disable - disable this bearer
+ * @net: the applicable net namespace
+ * @b: the bearer to disable
*
* Note: This routine assumes caller holds RTNL lock.
*/
@@ -431,12 +463,13 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
b->bcast_addr.media_id = b->media->type_id;
b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
b->mtu = dev->mtu;
- b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr);
+ b->media->raw2addr(b, &b->addr, (const char *)dev->dev_addr);
rcu_assign_pointer(dev->tipc_ptr, b);
return 0;
}
/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface
+ * @b: the target bearer
*
* Mark L2 bearer as inactive so that incoming buffers are thrown away
*/
@@ -453,6 +486,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b)
/**
* tipc_l2_send_msg - send a TIPC packet out over an L2 interface
+ * @net: the associated network namespace
* @skb: the packet to be sent
* @b: the bearer through which the packet is to be sent
* @dest: peer destination address
@@ -595,7 +629,7 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
/**
* tipc_l2_rcv_msg - handle incoming TIPC message from an interface
- * @buf: the received packet
+ * @skb: the received message
* @dev: the net device that the packet was received on
* @pt: the packet_type structure which was used to register this handler
* @orig_dev: the original receive net device in case the device is a bond
@@ -652,7 +686,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
test_and_set_bit_lock(0, &b->up);
break;
}
- /* fall through */
+ fallthrough;
case NETDEV_GOING_DOWN:
clear_bit_unlock(0, &b->up);
tipc_reset_bearer(net, b);
@@ -670,7 +704,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
break;
case NETDEV_CHANGEADDR:
b->media->raw2addr(b, &b->addr,
- (char *)dev->dev_addr);
+ (const char *)dev->dev_addr);
tipc_reset_bearer(net, b);
break;
case NETDEV_UNREGISTER:
@@ -735,7 +769,7 @@ void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts)
skb->pkt_type = PACKET_HOST;
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->protocol = eth_type_trans(skb, dev);
- netif_rx_ni(skb);
+ netif_rx(skb);
}
}
@@ -754,7 +788,7 @@ int tipc_attach_loopback(struct net *net)
if (!dev)
return -ENODEV;
- dev_hold(dev);
+ netdev_hold(dev, &tn->loopback_pt.dev_tracker, GFP_KERNEL);
tn->loopback_pt.dev = dev;
tn->loopback_pt.type = htons(ETH_P_TIPC);
tn->loopback_pt.func = tipc_loopback_rcv_pkt;
@@ -767,7 +801,7 @@ void tipc_detach_loopback(struct net *net)
struct tipc_net *tn = tipc_net(net);
dev_remove_pack(&tn->loopback_pt);
- dev_put(net->loopback_dev);
+ netdev_put(net->loopback_dev, &tn->loopback_pt.dev_tracker);
}
/* Caller should hold rtnl_lock to protect the bearer */
@@ -894,6 +928,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info)
bearer = tipc_bearer_find(net, name);
if (!bearer) {
err = -EINVAL;
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
goto err_out;
}
@@ -933,8 +968,10 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
bearer = tipc_bearer_find(net, name);
- if (!bearer)
+ if (!bearer) {
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
return -EINVAL;
+ }
bearer_disable(net, bearer);
@@ -992,7 +1029,8 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
}
- return tipc_enable_bearer(net, bearer, domain, prio, attrs);
+ return tipc_enable_bearer(net, bearer, domain, prio, attrs,
+ info->extack);
}
int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
@@ -1031,6 +1069,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
b = tipc_bearer_find(net, name);
if (!b) {
rtnl_unlock();
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
return -EINVAL;
}
@@ -1071,8 +1110,10 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
b = tipc_bearer_find(net, name);
- if (!b)
+ if (!b) {
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
return -EINVAL;
+ }
if (attrs[TIPC_NLA_BEARER_PROP]) {
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
@@ -1091,12 +1132,18 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
if (props[TIPC_NLA_PROP_WIN])
b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
if (props[TIPC_NLA_PROP_MTU]) {
- if (b->media->type_id != TIPC_MEDIA_TYPE_UDP)
+ if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU property is unsupported");
return -EINVAL;
+ }
#ifdef CONFIG_TIPC_MEDIA_UDP
if (tipc_udp_mtu_bad(nla_get_u32
- (props[TIPC_NLA_PROP_MTU])))
+ (props[TIPC_NLA_PROP_MTU]))) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU value is out-of-range");
return -EINVAL;
+ }
b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU);
#endif
@@ -1224,6 +1271,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
media = tipc_media_find(name);
if (!media) {
+ NL_SET_ERR_MSG(info->extack, "Media not found");
err = -EINVAL;
goto err_out;
}
@@ -1260,9 +1308,10 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]);
m = tipc_media_find(name);
- if (!m)
+ if (!m) {
+ NL_SET_ERR_MSG(info->extack, "Media not found");
return -EINVAL;
-
+ }
if (attrs[TIPC_NLA_MEDIA_PROP]) {
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
@@ -1278,12 +1327,18 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
if (props[TIPC_NLA_PROP_WIN])
m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
if (props[TIPC_NLA_PROP_MTU]) {
- if (m->type_id != TIPC_MEDIA_TYPE_UDP)
+ if (m->type_id != TIPC_MEDIA_TYPE_UDP) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU property is unsupported");
return -EINVAL;
+ }
#ifdef CONFIG_TIPC_MEDIA_UDP
if (tipc_udp_mtu_bad(nla_get_u32
- (props[TIPC_NLA_PROP_MTU])))
+ (props[TIPC_NLA_PROP_MTU]))) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU value is out-of-range");
return -EINVAL;
+ }
m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
#endif
}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index bc0023119da2..490ad6e5f7a3 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -93,7 +93,8 @@ struct tipc_bearer;
* @raw2addr: convert from raw addr format to media addr format
* @priority: default link (and bearer) priority
* @tolerance: default time (in ms) before declaring link failure
- * @window: default window (in packets) before declaring link congestion
+ * @min_win: minimum window (in packets) before declaring link congestion
+ * @max_win: maximum window (in packets) before declaring link congestion
* @mtu: max packet size bearer can support for media type not dependent on
* underlying device MTU
* @type_id: TIPC media identifier
@@ -116,7 +117,7 @@ struct tipc_media {
char *msg);
int (*raw2addr)(struct tipc_bearer *b,
struct tipc_media_addr *addr,
- char *raw);
+ const char *raw);
u32 priority;
u32 tolerance;
u32 min_win;
@@ -138,21 +139,24 @@ struct tipc_media {
* @pt: packet type for bearer
* @rcu: rcu struct for tipc_bearer
* @priority: default link priority for bearer
- * @window: default window size for bearer
+ * @min_win: minimum window (in packets) before declaring link congestion
+ * @max_win: maximum window (in packets) before declaring link congestion
* @tolerance: default link tolerance for bearer
* @domain: network domain to which links can be established
* @identity: array index of this bearer within TIPC bearer array
- * @link_req: ptr to (optional) structure making periodic link setup requests
+ * @disc: ptr to link setup request
* @net_plane: network plane ('A' through 'H') currently associated with bearer
+ * @up: bearer up flag (bit 0)
+ * @refcnt: tipc_bearer reference counter
*
* Note: media-specific code is responsible for initialization of the fields
* indicated below when a bearer is enabled; TIPC's generic bearer code takes
* care of initializing all other fields.
*/
struct tipc_bearer {
- void __rcu *media_ptr; /* initalized by media */
- u32 mtu; /* initalized by media */
- struct tipc_media_addr addr; /* initalized by media */
+ void __rcu *media_ptr; /* initialized by media */
+ u32 mtu; /* initialized by media */
+ struct tipc_media_addr addr; /* initialized by media */
char name[TIPC_MAX_BEARER_NAME];
struct tipc_media *media;
struct tipc_media_addr bcast_addr;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 4f6dc74adf45..434e70eabe08 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -60,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net)
tn->trial_addr = 0;
tn->addr_trial_end = 0;
tn->capabilities = TIPC_NODE_CAPABILITIES;
+ INIT_WORK(&tn->work, tipc_net_finalize_work);
memset(tn->node_id, 0, sizeof(tn->node_id));
memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
@@ -80,8 +81,6 @@ static int __net_init tipc_init_net(struct net *net)
if (err)
goto out_nametbl;
- INIT_LIST_HEAD(&tn->dist_queue);
-
err = tipc_bcast_init(net);
if (err)
goto out_bclink;
@@ -107,14 +106,20 @@ out_crypto:
static void __net_exit tipc_exit_net(struct net *net)
{
+ struct tipc_net *tn = tipc_net(net);
+
tipc_detach_loopback(net);
tipc_net_stop(net);
+ /* Make sure the tipc_net_finalize_work() finished */
+ cancel_work_sync(&tn->work);
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
#ifdef CONFIG_TIPC_CRYPTO
tipc_crypto_stop(&tipc_net(net)->crypto_tx);
#endif
+ while (atomic_read(&tn->wq_count))
+ cond_resched();
}
static void __net_exit tipc_pernet_pre_exit(struct net *net)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 631d83c9705f..0a3f7a70a50a 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -3,6 +3,7 @@
*
* Copyright (c) 2005-2006, 2013-2018 Ericsson AB
* Copyright (c) 2005-2007, 2010-2013, Wind River Systems
+ * Copyright (c) 2020, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -126,9 +127,6 @@ struct tipc_net {
spinlock_t nametbl_lock;
struct name_table *nametbl;
- /* Name dist queue */
- struct list_head dist_queue;
-
/* Topology subscription server */
struct tipc_topsrv *topsrv;
atomic_t subscription_count;
@@ -143,6 +141,10 @@ struct tipc_net {
/* TX crypto handler */
struct tipc_crypto *crypto_tx;
#endif
+ /* Work item for net finalize */
+ struct work_struct work;
+ /* The numbers of work queues in schedule */
+ atomic_t wq_count;
};
static inline struct tipc_net *tipc_net(struct net *net)
@@ -205,6 +207,17 @@ static inline u32 tipc_net_hash_mixes(struct net *net, int tn_rand)
return net_hash_mix(&init_net) ^ net_hash_mix(net) ^ tn_rand;
}
+static inline u32 hash128to32(char *bytes)
+{
+ __be32 *tmp = (__be32 *)bytes;
+ u32 res;
+
+ res = ntohl(tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3]);
+ if (likely(res))
+ return res;
+ return ntohl(tmp[0] | tmp[1] | tmp[2] | tmp[3]);
+}
+
#ifdef CONFIG_SYSCTL
int tipc_register_sysctl(void);
void tipc_unregister_sysctl(void);
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index c8c47fc72653..f09316a9035f 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* net/tipc/crypto.c: TIPC crypto for key handling & packet en/decryption
*
* Copyright (c) 2019, Ericsson AB
@@ -36,28 +36,34 @@
#include <crypto/aead.h>
#include <crypto/aes.h>
+#include <crypto/rng.h>
#include "crypto.h"
+#include "msg.h"
+#include "bcast.h"
-#define TIPC_TX_PROBE_LIM msecs_to_jiffies(1000) /* > 1s */
-#define TIPC_TX_LASTING_LIM msecs_to_jiffies(120000) /* 2 mins */
+#define TIPC_TX_GRACE_PERIOD msecs_to_jiffies(5000) /* 5s */
+#define TIPC_TX_LASTING_TIME msecs_to_jiffies(10000) /* 10s */
#define TIPC_RX_ACTIVE_LIM msecs_to_jiffies(3000) /* 3s */
-#define TIPC_RX_PASSIVE_LIM msecs_to_jiffies(180000) /* 3 mins */
+#define TIPC_RX_PASSIVE_LIM msecs_to_jiffies(15000) /* 15s */
+
#define TIPC_MAX_TFMS_DEF 10
#define TIPC_MAX_TFMS_LIM 1000
-/**
+#define TIPC_REKEYING_INTV_DEF (60 * 24) /* default: 1 day */
+
+/*
* TIPC Key ids
*/
enum {
- KEY_UNUSED = 0,
- KEY_MIN,
- KEY_1 = KEY_MIN,
+ KEY_MASTER = 0,
+ KEY_MIN = KEY_MASTER,
+ KEY_1 = 1,
KEY_2,
KEY_3,
KEY_MAX = KEY_3,
};
-/**
+/*
* TIPC Crypto statistics
*/
enum {
@@ -81,8 +87,10 @@ static const char *hstats[MAX_STATS] = {"ok", "nok", "async", "async_ok",
/* Max TFMs number per key */
int sysctl_tipc_max_tfms __read_mostly = TIPC_MAX_TFMS_DEF;
+/* Key exchange switch, default: on */
+int sysctl_tipc_key_exchange_enabled __read_mostly = 1;
-/**
+/*
* struct tipc_key - TIPC keys' status indicator
*
* 7 6 5 4 3 2 1 0
@@ -115,6 +123,8 @@ struct tipc_key {
/**
* struct tipc_tfm - TIPC TFM structure to form a list of TFMs
+ * @tfm: cipher handle/key
+ * @list: linked list of TFMs
*/
struct tipc_tfm {
struct crypto_aead *tfm;
@@ -130,8 +140,10 @@ struct tipc_tfm {
* @salt: the key's SALT value
* @authsize: authentication tag size (max = 16)
* @mode: crypto mode is applied to the key
- * @hint[]: a hint for user key
+ * @hint: a hint for user key
* @rcu: struct rcu_head
+ * @key: the aead key
+ * @gen: the key's generation
* @seqno: the key seqno (cluster scope)
* @refcnt: the key reference counter
*/
@@ -144,8 +156,10 @@ struct tipc_aead {
u32 salt;
u8 authsize;
u8 mode;
- char hint[TIPC_AEAD_HINT_LEN + 1];
+ char hint[2 * TIPC_AEAD_HINT_LEN + 1];
struct rcu_head rcu;
+ struct tipc_aead_key *key;
+ u16 gen;
atomic64_t seqno ____cacheline_aligned;
refcount_t refcnt ____cacheline_aligned;
@@ -154,6 +168,7 @@ struct tipc_aead {
/**
* struct tipc_crypto_stats - TIPC Crypto statistics
+ * @stat: array of crypto statistics
*/
struct tipc_crypto_stats {
unsigned int stat[MAX_STATS];
@@ -165,26 +180,57 @@ struct tipc_crypto_stats {
* @node: TIPC node (RX)
* @aead: array of pointers to AEAD keys for encryption/decryption
* @peer_rx_active: replicated peer RX active key index
+ * @key_gen: TX/RX key generation
* @key: the key states
- * @working: the crypto is working or not
+ * @skey_mode: session key's mode
+ * @skey: received session key
+ * @wq: common workqueue on TX crypto
+ * @work: delayed work sched for TX/RX
+ * @key_distr: key distributing state
+ * @rekeying_intv: rekeying interval (in minutes)
* @stats: the crypto statistics
+ * @name: the crypto name
* @sndnxt: the per-peer sndnxt (TX)
* @timer1: general timer 1 (jiffies)
- * @timer2: general timer 1 (jiffies)
+ * @timer2: general timer 2 (jiffies)
+ * @working: the crypto is working or not
+ * @key_master: flag indicates if master key exists
+ * @legacy_user: flag indicates if a peer joins w/o master key (for bwd comp.)
+ * @nokey: no key indication
+ * @flags: combined flags field
* @lock: tipc_key lock
*/
struct tipc_crypto {
struct net *net;
struct tipc_node *node;
- struct tipc_aead __rcu *aead[KEY_MAX + 1]; /* key[0] is UNUSED */
+ struct tipc_aead __rcu *aead[KEY_MAX + 1];
atomic_t peer_rx_active;
+ u16 key_gen;
struct tipc_key key;
- u8 working:1;
+ u8 skey_mode;
+ struct tipc_aead_key *skey;
+ struct workqueue_struct *wq;
+ struct delayed_work work;
+#define KEY_DISTR_SCHED 1
+#define KEY_DISTR_COMPL 2
+ atomic_t key_distr;
+ u32 rekeying_intv;
+
struct tipc_crypto_stats __percpu *stats;
+ char name[48];
atomic64_t sndnxt ____cacheline_aligned;
unsigned long timer1;
unsigned long timer2;
+ union {
+ struct {
+ u8 working:1;
+ u8 key_master:1;
+ u8 legacy_user:1;
+ u8 nokey: 1;
+ };
+ u8 flags;
+ };
spinlock_t lock; /* crypto lock */
} ____cacheline_aligned;
@@ -234,23 +280,35 @@ static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
u8 new_active,
u8 new_pending);
static int tipc_crypto_key_attach(struct tipc_crypto *c,
- struct tipc_aead *aead, u8 pos);
+ struct tipc_aead *aead, u8 pos,
+ bool master_key);
static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending);
static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
struct tipc_crypto *rx,
- struct sk_buff *skb);
-static void tipc_crypto_key_synch(struct tipc_crypto *rx, u8 new_rx_active,
- struct tipc_msg *hdr);
+ struct sk_buff *skb,
+ u8 tx_key);
+static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb);
static int tipc_crypto_key_revoke(struct net *net, u8 tx_key);
+static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode, u8 type);
static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
struct tipc_bearer *b,
struct sk_buff **skb, int err);
static void tipc_crypto_do_cmd(struct net *net, int cmd);
static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf);
-#ifdef TIPC_CRYPTO_DEBUG
static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
char *buf);
-#endif
+static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey,
+ u16 gen, u8 mode, u32 dnode);
+static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr);
+static void tipc_crypto_work_tx(struct work_struct *work);
+static void tipc_crypto_work_rx(struct work_struct *work);
+static int tipc_aead_key_generate(struct tipc_aead_key *skey);
+
+#define is_tx(crypto) (!(crypto)->node)
+#define is_rx(crypto) (!is_tx(crypto))
#define key_next(cur) ((cur) % KEY_MAX + 1)
@@ -259,7 +317,7 @@ static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
#define tipc_aead_rcu_replace(rcu_ptr, ptr, lock) \
do { \
- typeof(rcu_ptr) __tmp = rcu_dereference_protected((rcu_ptr), \
+ struct tipc_aead *__tmp = rcu_dereference_protected((rcu_ptr), \
lockdep_is_held(lock)); \
rcu_assign_pointer((rcu_ptr), (ptr)); \
tipc_aead_put(__tmp); \
@@ -270,31 +328,58 @@ do { \
/**
* tipc_aead_key_validate - Validate a AEAD user key
+ * @ukey: pointer to user key data
+ * @info: netlink info pointer
*/
-int tipc_aead_key_validate(struct tipc_aead_key *ukey)
+int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info)
{
int keylen;
/* Check if algorithm exists */
if (unlikely(!crypto_has_alg(ukey->alg_name, 0, 0))) {
- pr_info("Not found cipher: \"%s\"!\n", ukey->alg_name);
+ GENL_SET_ERR_MSG(info, "unable to load the algorithm (module existed?)");
return -ENODEV;
}
/* Currently, we only support the "gcm(aes)" cipher algorithm */
- if (strcmp(ukey->alg_name, "gcm(aes)"))
+ if (strcmp(ukey->alg_name, "gcm(aes)")) {
+ GENL_SET_ERR_MSG(info, "not supported yet the algorithm");
return -ENOTSUPP;
+ }
/* Check if key size is correct */
keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE;
if (unlikely(keylen != TIPC_AES_GCM_KEY_SIZE_128 &&
keylen != TIPC_AES_GCM_KEY_SIZE_192 &&
- keylen != TIPC_AES_GCM_KEY_SIZE_256))
- return -EINVAL;
+ keylen != TIPC_AES_GCM_KEY_SIZE_256)) {
+ GENL_SET_ERR_MSG(info, "incorrect key length (20, 28 or 36 octets?)");
+ return -EKEYREJECTED;
+ }
return 0;
}
+/**
+ * tipc_aead_key_generate - Generate new session key
+ * @skey: input/output key with new content
+ *
+ * Return: 0 in case of success, otherwise < 0
+ */
+static int tipc_aead_key_generate(struct tipc_aead_key *skey)
+{
+ int rc = 0;
+
+ /* Fill the key's content with a random value via RNG cipher */
+ rc = crypto_get_default_rng();
+ if (likely(!rc)) {
+ rc = crypto_rng_get_bytes(crypto_default_rng, skey->key,
+ skey->keylen);
+ crypto_put_default_rng();
+ }
+
+ return rc;
+}
+
static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead)
{
struct tipc_aead *tmp;
@@ -326,7 +411,8 @@ static void tipc_aead_free(struct rcu_head *rp)
if (aead->cloned) {
tipc_aead_put(aead->cloned);
} else {
- head = *this_cpu_ptr(aead->tfm_entry);
+ head = *get_cpu_ptr(aead->tfm_entry);
+ put_cpu_ptr(aead->tfm_entry);
list_for_each_entry_safe(tfm_entry, tmp, &head->list, list) {
crypto_free_aead(tfm_entry->tfm);
list_del(&tfm_entry->list);
@@ -338,6 +424,7 @@ static void tipc_aead_free(struct rcu_head *rp)
kfree(head);
}
free_percpu(aead->tfm_entry);
+ kfree_sensitive(aead->key);
kfree(aead);
}
@@ -396,13 +483,19 @@ static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val)
/**
* tipc_aead_tfm_next - Move TFM entry to the next one in list and return it
+ * @aead: the AEAD key pointer
*/
static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead)
{
- struct tipc_tfm **tfm_entry = this_cpu_ptr(aead->tfm_entry);
+ struct tipc_tfm **tfm_entry;
+ struct crypto_aead *tfm;
+ tfm_entry = get_cpu_ptr(aead->tfm_entry);
*tfm_entry = list_next_entry(*tfm_entry, list);
- return (*tfm_entry)->tfm;
+ tfm = (*tfm_entry)->tfm;
+ put_cpu_ptr(tfm_entry);
+
+ return tfm;
}
/**
@@ -441,7 +534,7 @@ static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey,
/* Allocate per-cpu TFM entry pointer */
tmp->tfm_entry = alloc_percpu(struct tipc_tfm *);
if (!tmp->tfm_entry) {
- kzfree(tmp);
+ kfree_sensitive(tmp);
return -ENOMEM;
}
@@ -491,18 +584,23 @@ static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey,
/* Not any TFM is allocated? */
if (!tfm_cnt) {
free_percpu(tmp->tfm_entry);
- kzfree(tmp);
+ kfree_sensitive(tmp);
return err;
}
- /* Copy some chars from the user key as a hint */
- memcpy(tmp->hint, ukey->key, TIPC_AEAD_HINT_LEN);
- tmp->hint[TIPC_AEAD_HINT_LEN] = '\0';
+ /* Form a hex string of some last bytes as the key's hint */
+ bin2hex(tmp->hint, ukey->key + keylen - TIPC_AEAD_HINT_LEN,
+ TIPC_AEAD_HINT_LEN);
/* Initialize the other data */
tmp->mode = mode;
tmp->cloned = NULL;
tmp->authsize = TIPC_AES_GCM_TAG_SIZE;
+ tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL);
+ if (!tmp->key) {
+ tipc_aead_free(&tmp->rcu);
+ return -ENOMEM;
+ }
memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE);
atomic_set(&tmp->users, 0);
atomic64_set(&tmp->seqno, 0);
@@ -545,7 +643,7 @@ static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src)
aead->tfm_entry = alloc_percpu_gfp(struct tipc_tfm *, GFP_ATOMIC);
if (unlikely(!aead->tfm_entry)) {
- kzfree(aead);
+ kfree_sensitive(aead);
return -ENOMEM;
}
@@ -627,9 +725,9 @@ static void *tipc_aead_mem_alloc(struct crypto_aead *tfm,
* @__dnode: TIPC dest node if "known"
*
* Return:
- * 0 : if the encryption has completed
- * -EINPROGRESS/-EBUSY : if a callback will be performed
- * < 0 : the encryption has failed
+ * * 0 : if the encryption has completed
+ * * -EINPROGRESS/-EBUSY : if a callback will be performed
+ * * < 0 : the encryption has failed
*/
static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
struct tipc_bearer *b,
@@ -657,29 +755,16 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
* but there is no frag_list, it should be still fine!
* Otherwise, we must cow it to be a writable buffer with the tailroom.
*/
-#ifdef TIPC_CRYPTO_DEBUG
SKB_LINEAR_ASSERT(skb);
if (tailen > skb_tailroom(skb)) {
- pr_warn("TX: skb tailroom is not enough: %d, requires: %d\n",
- skb_tailroom(skb), tailen);
+ pr_debug("TX(): skb tailroom is not enough: %d, requires: %d\n",
+ skb_tailroom(skb), tailen);
}
-#endif
- if (unlikely(!skb_cloned(skb) && tailen <= skb_tailroom(skb))) {
- nsg = 1;
- trailer = skb;
- } else {
- /* TODO: We could avoid skb_cow_data() if skb has no frag_list
- * e.g. by skb_fill_page_desc() to add another page to the skb
- * with the wanted tailen... However, page skbs look not often,
- * so take it easy now!
- * Cloned skbs e.g. from link_xmit() seems no choice though :(
- */
- nsg = skb_cow_data(skb, tailen, &trailer);
- if (unlikely(nsg < 0)) {
- pr_err("TX: skb_cow_data() returned %d\n", nsg);
- return nsg;
- }
+ nsg = skb_cow_data(skb, tailen, &trailer);
+ if (unlikely(nsg < 0)) {
+ pr_err("TX: skb_cow_data() returned %d\n", nsg);
+ return nsg;
}
pskb_put(skb, trailer, tailen);
@@ -706,7 +791,7 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
ehdr = (struct tipc_ehdr *)skb->data;
salt = aead->salt;
if (aead->mode == CLUSTER_KEY)
- salt ^= ehdr->addr; /* __be32 */
+ salt ^= __be32_to_cpu(ehdr->addr);
else if (__dnode)
salt ^= tipc_node_get_addr(__dnode);
memcpy(iv, &salt, 4);
@@ -757,10 +842,12 @@ static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err)
switch (err) {
case 0:
this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]);
+ rcu_read_lock();
if (likely(test_bit(0, &b->up)))
b->media->send_msg(net, skb, b, &tx_ctx->dst);
else
kfree_skb(skb);
+ rcu_read_unlock();
break;
case -EINPROGRESS:
return;
@@ -783,9 +870,9 @@ static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err)
* @b: TIPC bearer where the message has been received
*
* Return:
- * 0 : if the decryption has completed
- * -EINPROGRESS/-EBUSY : if a callback will be performed
- * < 0 : the decryption has failed
+ * * 0 : if the decryption has completed
+ * * -EINPROGRESS/-EBUSY : if a callback will be performed
+ * * < 0 : the decryption has failed
*/
static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
struct sk_buff *skb, struct tipc_bearer *b)
@@ -804,16 +891,10 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
if (unlikely(!aead))
return -ENOKEY;
- /* Cow skb data if needed */
- if (likely(!skb_cloned(skb) &&
- (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) {
- nsg = 1 + skb_shinfo(skb)->nr_frags;
- } else {
- nsg = skb_cow_data(skb, 0, &unused);
- if (unlikely(nsg < 0)) {
- pr_err("RX: skb_cow_data() returned %d\n", nsg);
- return nsg;
- }
+ nsg = skb_cow_data(skb, 0, &unused);
+ if (unlikely(nsg < 0)) {
+ pr_err("RX: skb_cow_data() returned %d\n", nsg);
+ return nsg;
}
/* Allocate memory for the AEAD operation */
@@ -835,7 +916,7 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
ehdr = (struct tipc_ehdr *)skb->data;
salt = aead->salt;
if (aead->mode == CLUSTER_KEY)
- salt ^= ehdr->addr; /* __be32 */
+ salt ^= __be32_to_cpu(ehdr->addr);
else if (ehdr->destined)
salt ^= tipc_own_addr(net);
memcpy(iv, &salt, 4);
@@ -914,7 +995,7 @@ static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr)
* tipc_ehdr_validate - Validate an encryption message
* @skb: the message buffer
*
- * Returns "true" if this is a valid encryption message, otherwise "false"
+ * Return: "true" if this is a valid encryption message, otherwise "false"
*/
bool tipc_ehdr_validate(struct sk_buff *skb)
{
@@ -932,8 +1013,6 @@ bool tipc_ehdr_validate(struct sk_buff *skb)
return false;
if (unlikely(skb->len <= ehsz + TIPC_AES_GCM_TAG_SIZE))
return false;
- if (unlikely(!ehdr->tx_key))
- return false;
return true;
}
@@ -986,6 +1065,8 @@ static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead,
ehdr->tx_key = tx_key;
ehdr->destined = (__rx) ? 1 : 0;
ehdr->rx_key_active = (__rx) ? __rx->key.active : 0;
+ ehdr->rx_nokey = (__rx) ? __rx->nokey : 0;
+ ehdr->master_key = aead->crypto->key_master;
ehdr->reserved_1 = 0;
ehdr->reserved_2 = 0;
@@ -1011,23 +1092,16 @@ static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
u8 new_active,
u8 new_pending)
{
-#ifdef TIPC_CRYPTO_DEBUG
struct tipc_key old = c->key;
char buf[32];
-#endif
c->key.keys = ((new_passive & KEY_MASK) << (KEY_BITS * 2)) |
((new_active & KEY_MASK) << (KEY_BITS)) |
((new_pending & KEY_MASK));
-#ifdef TIPC_CRYPTO_DEBUG
- pr_info("%s(%s): key changing %s ::%pS\n",
- (c->node) ? "RX" : "TX",
- (c->node) ? tipc_node_get_id_str(c->node) :
- tipc_own_id_string(c->net),
- tipc_key_change_dump(old, c->key, buf),
- __builtin_return_address(0));
-#endif
+ pr_debug("%s: key changing %s ::%pS\n", c->name,
+ tipc_key_change_dump(old, c->key, buf),
+ __builtin_return_address(0));
}
/**
@@ -1035,6 +1109,7 @@ static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
* @c: TIPC crypto to which new key is attached
* @ukey: the user key
* @mode: the key mode (CLUSTER_KEY or PER_NODE_KEY)
+ * @master_key: specify this is a cluster master key
*
* A new TIPC AEAD key will be allocated and initiated with the specified user
* key, then attached to the TIPC crypto.
@@ -1042,7 +1117,7 @@ static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
* Return: new key id in case of success, otherwise: < 0
*/
int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
- u8 mode)
+ u8 mode, bool master_key)
{
struct tipc_aead *aead = NULL;
int rc = 0;
@@ -1052,17 +1127,11 @@ int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
/* Attach it to the crypto */
if (likely(!rc)) {
- rc = tipc_crypto_key_attach(c, aead, 0);
+ rc = tipc_crypto_key_attach(c, aead, 0, master_key);
if (rc < 0)
tipc_aead_free(&aead->rcu);
}
- pr_info("%s(%s): key initiating, rc %d!\n",
- (c->node) ? "RX" : "TX",
- (c->node) ? tipc_node_get_id_str(c->node) :
- tipc_own_id_string(c->net),
- rc);
-
return rc;
}
@@ -1071,58 +1140,58 @@ int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
* @c: TIPC crypto to which the new AEAD key is attached
* @aead: the new AEAD key pointer
* @pos: desired slot in the crypto key array, = 0 if any!
+ * @master_key: specify this is a cluster master key
*
* Return: new key id in case of success, otherwise: -EBUSY
*/
static int tipc_crypto_key_attach(struct tipc_crypto *c,
- struct tipc_aead *aead, u8 pos)
+ struct tipc_aead *aead, u8 pos,
+ bool master_key)
{
- u8 new_pending, new_passive, new_key;
struct tipc_key key;
int rc = -EBUSY;
+ u8 new_key;
spin_lock_bh(&c->lock);
key = c->key;
+ if (master_key) {
+ new_key = KEY_MASTER;
+ goto attach;
+ }
if (key.active && key.passive)
goto exit;
- if (key.passive && !tipc_aead_users(c->aead[key.passive]))
- goto exit;
if (key.pending) {
- if (pos)
- goto exit;
if (tipc_aead_users(c->aead[key.pending]) > 0)
goto exit;
+ /* if (pos): ok with replacing, will be aligned when needed */
/* Replace it */
- new_pending = key.pending;
- new_passive = key.passive;
- new_key = new_pending;
+ new_key = key.pending;
} else {
if (pos) {
if (key.active && pos != key_next(key.active)) {
- new_pending = key.pending;
- new_passive = pos;
- new_key = new_passive;
+ key.passive = pos;
+ new_key = pos;
goto attach;
} else if (!key.active && !key.passive) {
- new_pending = pos;
- new_passive = key.passive;
- new_key = new_pending;
+ key.pending = pos;
+ new_key = pos;
goto attach;
}
}
- new_pending = key_next(key.active ?: key.passive);
- new_passive = key.passive;
- new_key = new_pending;
+ key.pending = key_next(key.active ?: key.passive);
+ new_key = key.pending;
}
attach:
aead->crypto = c;
- tipc_crypto_key_set_state(c, new_passive, key.active, new_pending);
+ aead->gen = (is_tx(c)) ? ++c->key_gen : c->key_gen;
tipc_aead_rcu_replace(c->aead[new_key], aead, &c->lock);
-
+ if (likely(c->key.keys != key.keys))
+ tipc_crypto_key_set_state(c, key.passive, key.active,
+ key.pending);
c->working = 1;
- c->timer1 = jiffies;
- c->timer2 = jiffies;
+ c->nokey = 0;
+ c->key_master |= master_key;
rc = new_key;
exit:
@@ -1132,14 +1201,33 @@ exit:
void tipc_crypto_key_flush(struct tipc_crypto *c)
{
+ struct tipc_crypto *tx, *rx;
int k;
spin_lock_bh(&c->lock);
- c->working = 0;
+ if (is_rx(c)) {
+ /* Try to cancel pending work */
+ rx = c;
+ tx = tipc_net(rx->net)->crypto_tx;
+ if (cancel_delayed_work(&rx->work)) {
+ kfree(rx->skey);
+ rx->skey = NULL;
+ atomic_xchg(&rx->key_distr, 0);
+ tipc_node_put(rx->node);
+ }
+ /* RX stopping => decrease TX key users if any */
+ k = atomic_xchg(&rx->peer_rx_active, 0);
+ if (k) {
+ tipc_aead_users_dec(tx->aead[k], 0);
+ /* Mark the point TX key users changed */
+ tx->timer1 = jiffies;
+ }
+ }
+
+ c->flags = 0;
tipc_crypto_key_set_state(c, 0, 0, 0);
for (k = KEY_MIN; k <= KEY_MAX; k++)
tipc_crypto_key_detach(c->aead[k], &c->lock);
- atomic_set(&c->peer_rx_active, 0);
atomic64_set(&c->sndnxt, 0);
spin_unlock_bh(&c->lock);
}
@@ -1198,7 +1286,8 @@ static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending)
rcu_assign_pointer(rx->aead[new_passive], tmp2);
refcount_set(&tmp1->refcnt, 1);
aligned = true;
- pr_info("RX(%s): key is aligned!\n", tipc_node_get_id_str(rx->node));
+ pr_info_ratelimited("%s: key[%d] -> key[%d]\n", rx->name, key.pending,
+ new_pending);
exit:
spin_unlock(&rx->lock);
@@ -1210,6 +1299,7 @@ exit:
* @tx: TX crypto handle
* @rx: RX crypto handle (can be NULL)
* @skb: the message skb which will be decrypted later
+ * @tx_key: peer TX key id
*
* This function looks up the existing TX keys and pick one which is suitable
* for the message decryption, that must be a cluster key and not used before
@@ -1219,7 +1309,8 @@ exit:
*/
static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
struct tipc_crypto *rx,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ u8 tx_key)
{
struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(skb);
struct tipc_aead *aead = NULL;
@@ -1238,6 +1329,10 @@ static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
/* Pick one TX key */
spin_lock(&tx->lock);
+ if (tx_key == KEY_MASTER) {
+ aead = tipc_aead_rcu_ptr(tx->aead[KEY_MASTER], &tx->lock);
+ goto done;
+ }
do {
k = (i == 0) ? key.pending :
((i == 1) ? key.active : key.passive);
@@ -1257,9 +1352,12 @@ static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
skb->next = skb_clone(skb, GFP_ATOMIC);
if (unlikely(!skb->next))
pr_warn("Failed to clone skb for next round if any\n");
- WARN_ON(!refcount_inc_not_zero(&aead->refcnt));
break;
} while (++i < 3);
+
+done:
+ if (likely(aead))
+ WARN_ON(!refcount_inc_not_zero(&aead->refcnt));
spin_unlock(&tx->lock);
return aead;
@@ -1268,53 +1366,73 @@ static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
/**
* tipc_crypto_key_synch: Synch own key data according to peer key status
* @rx: RX crypto handle
- * @new_rx_active: latest RX active key from peer
- * @hdr: TIPCv2 message
+ * @skb: TIPCv2 message buffer (incl. the ehdr from peer)
*
* This function updates the peer node related data as the peer RX active key
* has changed, so the number of TX keys' users on this node are increased and
* decreased correspondingly.
*
+ * It also considers if peer has no key, then we need to make own master key
+ * (if any) taking over i.e. starting grace period and also trigger key
+ * distributing process.
+ *
* The "per-peer" sndnxt is also reset when the peer key has switched.
*/
-static void tipc_crypto_key_synch(struct tipc_crypto *rx, u8 new_rx_active,
- struct tipc_msg *hdr)
+static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb)
{
- struct net *net = rx->net;
- struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
- u8 cur_rx_active;
+ struct tipc_ehdr *ehdr = (struct tipc_ehdr *)skb_network_header(skb);
+ struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
+ struct tipc_msg *hdr = buf_msg(skb);
+ u32 self = tipc_own_addr(rx->net);
+ u8 cur, new;
+ unsigned long delay;
- /* TX might be even not ready yet */
- if (unlikely(!tx->key.active && !tx->key.pending))
- return;
+ /* Update RX 'key_master' flag according to peer, also mark "legacy" if
+ * a peer has no master key.
+ */
+ rx->key_master = ehdr->master_key;
+ if (!rx->key_master)
+ tx->legacy_user = 1;
- cur_rx_active = atomic_read(&rx->peer_rx_active);
- if (likely(cur_rx_active == new_rx_active))
+ /* For later cases, apply only if message is destined to this node */
+ if (!ehdr->destined || msg_short(hdr) || msg_destnode(hdr) != self)
return;
- /* Make sure this message destined for this node */
- if (unlikely(msg_short(hdr) ||
- msg_destnode(hdr) != tipc_own_addr(net)))
- return;
+ /* Case 1: Peer has no keys, let's make master key take over */
+ if (ehdr->rx_nokey) {
+ /* Set or extend grace period */
+ tx->timer2 = jiffies;
+ /* Schedule key distributing for the peer if not yet */
+ if (tx->key.keys &&
+ !atomic_cmpxchg(&rx->key_distr, 0, KEY_DISTR_SCHED)) {
+ get_random_bytes(&delay, 2);
+ delay %= 5;
+ delay = msecs_to_jiffies(500 * ++delay);
+ if (queue_delayed_work(tx->wq, &rx->work, delay))
+ tipc_node_get(rx->node);
+ }
+ } else {
+ /* Cancel a pending key distributing if any */
+ atomic_xchg(&rx->key_distr, 0);
+ }
- /* Peer RX active key has changed, try to update owns' & TX users */
- if (atomic_cmpxchg(&rx->peer_rx_active,
- cur_rx_active,
- new_rx_active) == cur_rx_active) {
- if (new_rx_active)
- tipc_aead_users_inc(tx->aead[new_rx_active], INT_MAX);
- if (cur_rx_active)
- tipc_aead_users_dec(tx->aead[cur_rx_active], 0);
+ /* Case 2: Peer RX active key has changed, let's update own TX users */
+ cur = atomic_read(&rx->peer_rx_active);
+ new = ehdr->rx_key_active;
+ if (tx->key.keys &&
+ cur != new &&
+ atomic_cmpxchg(&rx->peer_rx_active, cur, new) == cur) {
+ if (new)
+ tipc_aead_users_inc(tx->aead[new], INT_MAX);
+ if (cur)
+ tipc_aead_users_dec(tx->aead[cur], 0);
atomic64_set(&rx->sndnxt, 0);
/* Mark the point TX key users changed */
tx->timer1 = jiffies;
-#ifdef TIPC_CRYPTO_DEBUG
- pr_info("TX(%s): key users changed %d-- %d++, peer RX(%s)\n",
- tipc_own_id_string(net), cur_rx_active,
- new_rx_active, tipc_node_get_id_str(rx->node));
-#endif
+ pr_debug("%s: key users changed %d-- %d++, peer %s\n",
+ tx->name, cur, new, rx->name);
}
}
@@ -1332,7 +1450,7 @@ static int tipc_crypto_key_revoke(struct net *net, u8 tx_key)
tipc_crypto_key_detach(tx->aead[key.active], &tx->lock);
spin_unlock(&tx->lock);
- pr_warn("TX(%s): key is revoked!\n", tipc_own_id_string(net));
+ pr_warn("%s: key is revoked\n", tx->name);
return -EKEYREVOKED;
}
@@ -1349,66 +1467,76 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net,
if (!c)
return -ENOMEM;
+ /* Allocate workqueue on TX */
+ if (!node) {
+ c->wq = alloc_ordered_workqueue("tipc_crypto", 0);
+ if (!c->wq) {
+ kfree(c);
+ return -ENOMEM;
+ }
+ }
+
/* Allocate statistic structure */
c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC);
if (!c->stats) {
- kzfree(c);
+ if (c->wq)
+ destroy_workqueue(c->wq);
+ kfree_sensitive(c);
return -ENOMEM;
}
- c->working = 0;
+ c->flags = 0;
c->net = net;
c->node = node;
+ get_random_bytes(&c->key_gen, 2);
tipc_crypto_key_set_state(c, 0, 0, 0);
+ atomic_set(&c->key_distr, 0);
atomic_set(&c->peer_rx_active, 0);
atomic64_set(&c->sndnxt, 0);
c->timer1 = jiffies;
c->timer2 = jiffies;
+ c->rekeying_intv = TIPC_REKEYING_INTV_DEF;
spin_lock_init(&c->lock);
- *crypto = c;
+ scnprintf(c->name, 48, "%s(%s)", (is_rx(c)) ? "RX" : "TX",
+ (is_rx(c)) ? tipc_node_get_id_str(c->node) :
+ tipc_own_id_string(c->net));
+ if (is_rx(c))
+ INIT_DELAYED_WORK(&c->work, tipc_crypto_work_rx);
+ else
+ INIT_DELAYED_WORK(&c->work, tipc_crypto_work_tx);
+
+ *crypto = c;
return 0;
}
void tipc_crypto_stop(struct tipc_crypto **crypto)
{
- struct tipc_crypto *c, *tx, *rx;
- bool is_rx;
+ struct tipc_crypto *c = *crypto;
u8 k;
- if (!*crypto)
+ if (!c)
return;
- rcu_read_lock();
- /* RX stopping? => decrease TX key users if any */
- is_rx = !!((*crypto)->node);
- if (is_rx) {
- rx = *crypto;
- tx = tipc_net(rx->net)->crypto_tx;
- k = atomic_read(&rx->peer_rx_active);
- if (k) {
- tipc_aead_users_dec(tx->aead[k], 0);
- /* Mark the point TX key users changed */
- tx->timer1 = jiffies;
- }
+ /* Flush any queued works & destroy wq */
+ if (is_tx(c)) {
+ c->rekeying_intv = 0;
+ cancel_delayed_work_sync(&c->work);
+ destroy_workqueue(c->wq);
}
/* Release AEAD keys */
- c = *crypto;
+ rcu_read_lock();
for (k = KEY_MIN; k <= KEY_MAX; k++)
tipc_aead_put(rcu_dereference(c->aead[k]));
rcu_read_unlock();
-
- pr_warn("%s(%s) has been purged, node left!\n",
- (is_rx) ? "RX" : "TX",
- (is_rx) ? tipc_node_get_id_str((*crypto)->node) :
- tipc_own_id_string((*crypto)->net));
+ pr_debug("%s: has been stopped\n", c->name);
/* Free this crypto statistics */
free_percpu(c->stats);
*crypto = NULL;
- kzfree(c);
+ kfree_sensitive(c);
}
void tipc_crypto_timeout(struct tipc_crypto *rx)
@@ -1416,106 +1544,91 @@ void tipc_crypto_timeout(struct tipc_crypto *rx)
struct tipc_net *tn = tipc_net(rx->net);
struct tipc_crypto *tx = tn->crypto_tx;
struct tipc_key key;
- u8 new_pending, new_passive;
int cmd;
- /* TX key activating:
- * The pending key (users > 0) -> active
- * The active key if any (users == 0) -> free
- */
+ /* TX pending: taking all users & stable -> active */
spin_lock(&tx->lock);
key = tx->key;
if (key.active && tipc_aead_users(tx->aead[key.active]) > 0)
goto s1;
if (!key.pending || tipc_aead_users(tx->aead[key.pending]) <= 0)
goto s1;
- if (time_before(jiffies, tx->timer1 + TIPC_TX_LASTING_LIM))
+ if (time_before(jiffies, tx->timer1 + TIPC_TX_LASTING_TIME))
goto s1;
tipc_crypto_key_set_state(tx, key.passive, key.pending, 0);
if (key.active)
tipc_crypto_key_detach(tx->aead[key.active], &tx->lock);
this_cpu_inc(tx->stats->stat[STAT_SWITCHES]);
- pr_info("TX(%s): key %d is activated!\n", tipc_own_id_string(tx->net),
- key.pending);
+ pr_info("%s: key[%d] is activated\n", tx->name, key.pending);
s1:
spin_unlock(&tx->lock);
- /* RX key activating:
- * The pending key (users > 0) -> active
- * The active key if any -> passive, freed later
- */
+ /* RX pending: having user -> active */
spin_lock(&rx->lock);
key = rx->key;
if (!key.pending || tipc_aead_users(rx->aead[key.pending]) <= 0)
goto s2;
- new_pending = (key.passive &&
- !tipc_aead_users(rx->aead[key.passive])) ?
- key.passive : 0;
- new_passive = (key.active) ?: ((new_pending) ? 0 : key.passive);
- tipc_crypto_key_set_state(rx, new_passive, key.pending, new_pending);
+ if (key.active)
+ key.passive = key.active;
+ key.active = key.pending;
+ rx->timer2 = jiffies;
+ tipc_crypto_key_set_state(rx, key.passive, key.active, 0);
this_cpu_inc(rx->stats->stat[STAT_SWITCHES]);
- pr_info("RX(%s): key %d is activated!\n",
- tipc_node_get_id_str(rx->node), key.pending);
+ pr_info("%s: key[%d] is activated\n", rx->name, key.pending);
goto s5;
s2:
- /* RX key "faulty" switching:
- * The faulty pending key (users < -30) -> passive
- * The passive key (users = 0) -> pending
- * Note: This only happens after RX deactivated - s3!
- */
- key = rx->key;
- if (!key.pending || tipc_aead_users(rx->aead[key.pending]) > -30)
- goto s3;
- if (!key.passive || tipc_aead_users(rx->aead[key.passive]) != 0)
+ /* RX pending: not working -> remove */
+ if (!key.pending || tipc_aead_users(rx->aead[key.pending]) > -10)
goto s3;
- new_pending = key.passive;
- new_passive = key.pending;
- tipc_crypto_key_set_state(rx, new_passive, key.active, new_pending);
+ tipc_crypto_key_set_state(rx, key.passive, key.active, 0);
+ tipc_crypto_key_detach(rx->aead[key.pending], &rx->lock);
+ pr_debug("%s: key[%d] is removed\n", rx->name, key.pending);
goto s5;
s3:
- /* RX key deactivating:
- * The passive key if any -> pending
- * The active key -> passive (users = 0) / pending
- * The pending key if any -> passive (users = 0)
- */
- key = rx->key;
+ /* RX active: timed out or no user -> pending */
if (!key.active)
goto s4;
- if (time_before(jiffies, rx->timer1 + TIPC_RX_ACTIVE_LIM))
+ if (time_before(jiffies, rx->timer1 + TIPC_RX_ACTIVE_LIM) &&
+ tipc_aead_users(rx->aead[key.active]) > 0)
goto s4;
- new_pending = (key.passive) ?: key.active;
- new_passive = (key.passive) ? key.active : key.pending;
- tipc_aead_users_set(rx->aead[new_pending], 0);
- if (new_passive)
- tipc_aead_users_set(rx->aead[new_passive], 0);
- tipc_crypto_key_set_state(rx, new_passive, 0, new_pending);
- pr_info("RX(%s): key %d is deactivated!\n",
- tipc_node_get_id_str(rx->node), key.active);
+ if (key.pending)
+ key.passive = key.active;
+ else
+ key.pending = key.active;
+ rx->timer2 = jiffies;
+ tipc_crypto_key_set_state(rx, key.passive, 0, key.pending);
+ tipc_aead_users_set(rx->aead[key.pending], 0);
+ pr_debug("%s: key[%d] is deactivated\n", rx->name, key.active);
goto s5;
s4:
- /* RX key passive -> freed: */
- key = rx->key;
- if (!key.passive || !tipc_aead_users(rx->aead[key.passive]))
+ /* RX passive: outdated or not working -> free */
+ if (!key.passive)
goto s5;
- if (time_before(jiffies, rx->timer2 + TIPC_RX_PASSIVE_LIM))
+ if (time_before(jiffies, rx->timer2 + TIPC_RX_PASSIVE_LIM) &&
+ tipc_aead_users(rx->aead[key.passive]) > -10)
goto s5;
tipc_crypto_key_set_state(rx, 0, key.active, key.pending);
tipc_crypto_key_detach(rx->aead[key.passive], &rx->lock);
- pr_info("RX(%s): key %d is freed!\n", tipc_node_get_id_str(rx->node),
- key.passive);
+ pr_debug("%s: key[%d] is freed\n", rx->name, key.passive);
s5:
spin_unlock(&rx->lock);
+ /* Relax it here, the flag will be set again if it really is, but only
+ * when we are not in grace period for safety!
+ */
+ if (time_after(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD))
+ tx->legacy_user = 0;
+
/* Limit max_tfms & do debug commands if needed */
if (likely(sysctl_tipc_max_tfms <= TIPC_MAX_TFMS_LIM))
return;
@@ -1525,6 +1638,22 @@ s5:
tipc_crypto_do_cmd(rx->net, cmd);
}
+static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode, u8 type)
+{
+ struct sk_buff *skb;
+
+ skb = skb_clone(_skb, GFP_ATOMIC);
+ if (skb) {
+ TIPC_SKB_CB(skb)->xmit_type = type;
+ tipc_crypto_xmit(net, &skb, b, dst, __dnode);
+ if (skb)
+ b->media->send_msg(net, skb, b, dst);
+ }
+}
+
/**
* tipc_crypto_xmit - Build & encrypt TIPC message for xmit
* @net: struct net
@@ -1534,18 +1663,19 @@ s5:
* @__dnode: destination node for reference if any
*
* First, build an encryption message header on the top of the message, then
- * encrypt the original TIPC message by using the active or pending TX key.
+ * encrypt the original TIPC message by using the pending, master or active
+ * key with this preference order.
* If the encryption is successful, the encrypted skb is returned directly or
* via the callback.
* Otherwise, the skb is freed!
*
* Return:
- * 0 : the encryption has succeeded (or no encryption)
- * -EINPROGRESS/-EBUSY : the encryption is ongoing, a callback will be made
- * -ENOKEK : the encryption has failed due to no key
- * -EKEYREVOKED : the encryption has failed due to key revoked
- * -ENOMEM : the encryption has failed due to no memory
- * < 0 : the encryption has failed due to other reasons
+ * * 0 : the encryption has succeeded (or no encryption)
+ * * -EINPROGRESS/-EBUSY : the encryption is ongoing, a callback will be made
+ * * -ENOKEK : the encryption has failed due to no key
+ * * -EKEYREVOKED : the encryption has failed due to key revoked
+ * * -ENOMEM : the encryption has failed due to no memory
+ * * < 0 : the encryption has failed due to other reasons
*/
int tipc_crypto_xmit(struct net *net, struct sk_buff **skb,
struct tipc_bearer *b, struct tipc_media_addr *dst,
@@ -1554,46 +1684,67 @@ int tipc_crypto_xmit(struct net *net, struct sk_buff **skb,
struct tipc_crypto *__rx = tipc_node_crypto_rx(__dnode);
struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
struct tipc_crypto_stats __percpu *stats = tx->stats;
+ struct tipc_msg *hdr = buf_msg(*skb);
struct tipc_key key = tx->key;
struct tipc_aead *aead = NULL;
- struct sk_buff *probe;
+ u32 user = msg_user(hdr);
+ u32 type = msg_type(hdr);
int rc = -ENOKEY;
- u8 tx_key;
+ u8 tx_key = 0;
/* No encryption? */
if (!tx->working)
return 0;
- /* Try with the pending key if available and:
- * 1) This is the only choice (i.e. no active key) or;
- * 2) Peer has switched to this key (unicast only) or;
- * 3) It is time to do a pending key probe;
- */
+ /* Pending key if peer has active on it or probing time */
if (unlikely(key.pending)) {
tx_key = key.pending;
- if (!key.active)
+ if (!tx->key_master && !key.active)
goto encrypt;
if (__rx && atomic_read(&__rx->peer_rx_active) == tx_key)
goto encrypt;
- if (TIPC_SKB_CB(*skb)->probe)
+ if (TIPC_SKB_CB(*skb)->xmit_type == SKB_PROBING) {
+ pr_debug("%s: probing for key[%d]\n", tx->name,
+ key.pending);
goto encrypt;
- if (!__rx &&
- time_after(jiffies, tx->timer2 + TIPC_TX_PROBE_LIM)) {
- tx->timer2 = jiffies;
- probe = skb_clone(*skb, GFP_ATOMIC);
- if (probe) {
- TIPC_SKB_CB(probe)->probe = 1;
- tipc_crypto_xmit(net, &probe, b, dst, __dnode);
- if (probe)
- b->media->send_msg(net, probe, b, dst);
+ }
+ if (user == LINK_CONFIG || user == LINK_PROTOCOL)
+ tipc_crypto_clone_msg(net, *skb, b, dst, __dnode,
+ SKB_PROBING);
+ }
+
+ /* Master key if this is a *vital* message or in grace period */
+ if (tx->key_master) {
+ tx_key = KEY_MASTER;
+ if (!key.active)
+ goto encrypt;
+ if (TIPC_SKB_CB(*skb)->xmit_type == SKB_GRACING) {
+ pr_debug("%s: gracing for msg (%d %d)\n", tx->name,
+ user, type);
+ goto encrypt;
+ }
+ if (user == LINK_CONFIG ||
+ (user == LINK_PROTOCOL && type == RESET_MSG) ||
+ (user == MSG_CRYPTO && type == KEY_DISTR_MSG) ||
+ time_before(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD)) {
+ if (__rx && __rx->key_master &&
+ !atomic_read(&__rx->peer_rx_active))
+ goto encrypt;
+ if (!__rx) {
+ if (likely(!tx->legacy_user))
+ goto encrypt;
+ tipc_crypto_clone_msg(net, *skb, b, dst,
+ __dnode, SKB_GRACING);
}
}
}
+
/* Else, use the active key if any */
if (likely(key.active)) {
tx_key = key.active;
goto encrypt;
}
+
goto exit;
encrypt:
@@ -1644,12 +1795,12 @@ exit:
* cluster key(s) can be taken for decryption (- recursive).
*
* Return:
- * 0 : the decryption has successfully completed
- * -EINPROGRESS/-EBUSY : the decryption is ongoing, a callback will be made
- * -ENOKEY : the decryption has failed due to no key
- * -EBADMSG : the decryption has failed due to bad message
- * -ENOMEM : the decryption has failed due to no memory
- * < 0 : the decryption has failed due to other reasons
+ * * 0 : the decryption has successfully completed
+ * * -EINPROGRESS/-EBUSY : the decryption is ongoing, a callback will be made
+ * * -ENOKEY : the decryption has failed due to no key
+ * * -EBADMSG : the decryption has failed due to bad message
+ * * -ENOMEM : the decryption has failed due to no memory
+ * * < 0 : the decryption has failed due to other reasons
*/
int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
struct sk_buff **skb, struct tipc_bearer *b)
@@ -1659,30 +1810,21 @@ int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
struct tipc_aead *aead = NULL;
struct tipc_key key;
int rc = -ENOKEY;
- u8 tx_key = 0;
+ u8 tx_key, n;
+
+ tx_key = ((struct tipc_ehdr *)(*skb)->data)->tx_key;
/* New peer?
* Let's try with TX key (i.e. cluster mode) & verify the skb first!
*/
- if (unlikely(!rx))
+ if (unlikely(!rx || tx_key == KEY_MASTER))
goto pick_tx;
- /* Pick RX key according to TX key, three cases are possible:
- * 1) The current active key (likely) or;
- * 2) The pending (new or deactivated) key (if any) or;
- * 3) The passive or old active key (i.e. users > 0);
- */
- tx_key = ((struct tipc_ehdr *)(*skb)->data)->tx_key;
+ /* Pick RX key according to TX key if any */
key = rx->key;
- if (likely(tx_key == key.active))
+ if (tx_key == key.active || tx_key == key.pending ||
+ tx_key == key.passive)
goto decrypt;
- if (tx_key == key.pending)
- goto decrypt;
- if (tx_key == key.passive) {
- rx->timer2 = jiffies;
- if (tipc_aead_users(rx->aead[key.passive]) > 0)
- goto decrypt;
- }
/* Unknown key, let's try to align RX key(s) */
if (tipc_crypto_key_try_align(rx, tx_key))
@@ -1690,7 +1832,7 @@ int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
pick_tx:
/* No key suitable? Try to pick one from TX... */
- aead = tipc_crypto_key_pick_tx(tx, rx, *skb);
+ aead = tipc_crypto_key_pick_tx(tx, rx, *skb, tx_key);
if (aead)
goto decrypt;
goto exit;
@@ -1718,8 +1860,19 @@ exit:
if (rc == -ENOKEY) {
kfree_skb(*skb);
*skb = NULL;
- if (rx)
+ if (rx) {
+ /* Mark rx->nokey only if we dont have a
+ * pending received session key, nor a newer
+ * one i.e. in the next slot.
+ */
+ n = key_next(tx_key);
+ rx->nokey = !(rx->skey ||
+ rcu_access_pointer(rx->aead[n]));
+ pr_debug_ratelimited("%s: nokey %d, key %d/%x\n",
+ rx->name, rx->nokey,
+ tx_key, rx->key.keys);
tipc_node_put(rx->node);
+ }
this_cpu_inc(stats->stat[STAT_NOKEYS]);
return rc;
} else if (rc == -EBADMSG) {
@@ -1741,21 +1894,17 @@ static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
struct tipc_aead *tmp = NULL;
struct tipc_ehdr *ehdr;
struct tipc_node *n;
- u8 rx_key_active;
- bool destined;
/* Is this completed by TX? */
- if (unlikely(!rx->node)) {
+ if (unlikely(is_tx(aead->crypto))) {
rx = skb_cb->tx_clone_ctx.rx;
-#ifdef TIPC_CRYPTO_DEBUG
- pr_info("TX->RX(%s): err %d, aead %p, skb->next %p, flags %x\n",
- (rx) ? tipc_node_get_id_str(rx->node) : "-", err, aead,
- (*skb)->next, skb_cb->flags);
- pr_info("skb_cb [recurs %d, last %p], tx->aead [%p %p %p]\n",
- skb_cb->tx_clone_ctx.recurs, skb_cb->tx_clone_ctx.last,
- aead->crypto->aead[1], aead->crypto->aead[2],
- aead->crypto->aead[3]);
-#endif
+ pr_debug("TX->RX(%s): err %d, aead %p, skb->next %p, flags %x\n",
+ (rx) ? tipc_node_get_id_str(rx->node) : "-", err, aead,
+ (*skb)->next, skb_cb->flags);
+ pr_debug("skb_cb [recurs %d, last %p], tx->aead [%p %p %p]\n",
+ skb_cb->tx_clone_ctx.recurs, skb_cb->tx_clone_ctx.last,
+ aead->crypto->aead[1], aead->crypto->aead[2],
+ aead->crypto->aead[3]);
if (unlikely(err)) {
if (err == -EBADMSG && (*skb)->next)
tipc_rcv(net, (*skb)->next, b);
@@ -1776,35 +1925,40 @@ static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
goto free_skb;
}
- /* Skip cloning this time as we had a RX pending key */
- if (rx->key.pending)
+ /* Ignore cloning if it was TX master key */
+ if (ehdr->tx_key == KEY_MASTER)
goto rcv;
if (tipc_aead_clone(&tmp, aead) < 0)
goto rcv;
- if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key) < 0) {
+ WARN_ON(!refcount_inc_not_zero(&tmp->refcnt));
+ if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) {
tipc_aead_free(&tmp->rcu);
goto rcv;
}
tipc_aead_put(aead);
- aead = tipc_aead_get(tmp);
+ aead = tmp;
}
if (unlikely(err)) {
- tipc_aead_users_dec(aead, INT_MIN);
+ tipc_aead_users_dec((struct tipc_aead __force __rcu *)aead, INT_MIN);
goto free_skb;
}
/* Set the RX key's user */
- tipc_aead_users_set(aead, 1);
+ tipc_aead_users_set((struct tipc_aead __force __rcu *)aead, 1);
-rcv:
/* Mark this point, RX works */
rx->timer1 = jiffies;
+rcv:
/* Remove ehdr & auth. tag prior to tipc_rcv() */
ehdr = (struct tipc_ehdr *)(*skb)->data;
- destined = ehdr->destined;
- rx_key_active = ehdr->rx_key_active;
+
+ /* Mark this point, RX passive still works */
+ if (rx->key.passive && ehdr->tx_key == rx->key.passive)
+ rx->timer2 = jiffies;
+
+ skb_reset_network_header(*skb);
skb_pull(*skb, tipc_ehdr_size(ehdr));
pskb_trim(*skb, (*skb)->len - aead->authsize);
@@ -1814,9 +1968,8 @@ rcv:
goto free_skb;
}
- /* Update peer RX active key & TX users */
- if (destined)
- tipc_crypto_key_synch(rx, rx_key_active, buf_msg(*skb));
+ /* Ok, everything's fine, try to synch own keys according to peers' */
+ tipc_crypto_key_synch(rx, *skb);
/* Mark skb decrypted */
skb_cb->decrypted = 1;
@@ -1875,7 +2028,7 @@ print_stats:
/* Print crypto statistics */
for (i = 0, j = 0; i < MAX_STATS; i++)
j += scnprintf(buf + j, 200 - j, "|%11s ", hstats[i]);
- pr_info("\nCounter %s", buf);
+ pr_info("Counter %s", buf);
memset(buf, '-', 115);
buf[115] = '\0';
@@ -1919,21 +2072,31 @@ static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf)
char *s;
for (k = KEY_MIN; k <= KEY_MAX; k++) {
- if (k == key.passive)
- s = "PAS";
- else if (k == key.active)
- s = "ACT";
- else if (k == key.pending)
- s = "PEN";
- else
- s = "-";
+ if (k == KEY_MASTER) {
+ if (is_rx(c))
+ continue;
+ if (time_before(jiffies,
+ c->timer2 + TIPC_TX_GRACE_PERIOD))
+ s = "ACT";
+ else
+ s = "PAS";
+ } else {
+ if (k == key.passive)
+ s = "PAS";
+ else if (k == key.active)
+ s = "ACT";
+ else if (k == key.pending)
+ s = "PEN";
+ else
+ s = "-";
+ }
i += scnprintf(buf + i, 200 - i, "\tKey%d: %s", k, s);
rcu_read_lock();
aead = rcu_dereference(c->aead[k]);
if (aead)
i += scnprintf(buf + i, 200 - i,
- "{\"%s...\", \"%s\"}/%d:%d",
+ "{\"0x...%s\", \"%s\"}/%d:%d",
aead->hint,
(aead->mode == CLUSTER_KEY) ? "c" : "p",
atomic_read(&aead->users),
@@ -1942,14 +2105,13 @@ static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf)
i += scnprintf(buf + i, 200 - i, "\n");
}
- if (c->node)
+ if (is_rx(c))
i += scnprintf(buf + i, 200 - i, "\tPeer RX active: %d\n",
atomic_read(&c->peer_rx_active));
return buf;
}
-#ifdef TIPC_CRYPTO_DEBUG
static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
char *buf)
{
@@ -1960,7 +2122,7 @@ static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
/* Output format: "[%s %s %s] -> [%s %s %s]", max len = 32 */
again:
i += scnprintf(buf + i, 32 - i, "[");
- for (k = KEY_MIN; k <= KEY_MAX; k++) {
+ for (k = KEY_1; k <= KEY_3; k++) {
if (k == key->passive)
s = "pas";
else if (k == key->active)
@@ -1970,7 +2132,7 @@ again:
else
s = "-";
i += scnprintf(buf + i, 32 - i,
- (k != KEY_MAX) ? "%s " : "%s", s);
+ (k != KEY_3) ? "%s " : "%s", s);
}
if (key != &new) {
i += scnprintf(buf + i, 32 - i, "] -> ");
@@ -1980,4 +2142,330 @@ again:
i += scnprintf(buf + i, 32 - i, "]");
return buf;
}
-#endif
+
+/**
+ * tipc_crypto_msg_rcv - Common 'MSG_CRYPTO' processing point
+ * @net: the struct net
+ * @skb: the receiving message buffer
+ */
+void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb)
+{
+ struct tipc_crypto *rx;
+ struct tipc_msg *hdr;
+
+ if (unlikely(skb_linearize(skb)))
+ goto exit;
+
+ hdr = buf_msg(skb);
+ rx = tipc_node_crypto_rx_by_addr(net, msg_prevnode(hdr));
+ if (unlikely(!rx))
+ goto exit;
+
+ switch (msg_type(hdr)) {
+ case KEY_DISTR_MSG:
+ if (tipc_crypto_key_rcv(rx, hdr))
+ goto exit;
+ break;
+ default:
+ break;
+ }
+
+ tipc_node_put(rx->node);
+
+exit:
+ kfree_skb(skb);
+}
+
+/**
+ * tipc_crypto_key_distr - Distribute a TX key
+ * @tx: the TX crypto
+ * @key: the key's index
+ * @dest: the destination tipc node, = NULL if distributing to all nodes
+ *
+ * Return: 0 in case of success, otherwise < 0
+ */
+int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key,
+ struct tipc_node *dest)
+{
+ struct tipc_aead *aead;
+ u32 dnode = tipc_node_get_addr(dest);
+ int rc = -ENOKEY;
+
+ if (!sysctl_tipc_key_exchange_enabled)
+ return 0;
+
+ if (key) {
+ rcu_read_lock();
+ aead = tipc_aead_get(tx->aead[key]);
+ if (likely(aead)) {
+ rc = tipc_crypto_key_xmit(tx->net, aead->key,
+ aead->gen, aead->mode,
+ dnode);
+ tipc_aead_put(aead);
+ }
+ rcu_read_unlock();
+ }
+
+ return rc;
+}
+
+/**
+ * tipc_crypto_key_xmit - Send a session key
+ * @net: the struct net
+ * @skey: the session key to be sent
+ * @gen: the key's generation
+ * @mode: the key's mode
+ * @dnode: the destination node address, = 0 if broadcasting to all nodes
+ *
+ * The session key 'skey' is packed in a TIPC v2 'MSG_CRYPTO/KEY_DISTR_MSG'
+ * as its data section, then xmit-ed through the uc/bc link.
+ *
+ * Return: 0 in case of success, otherwise < 0
+ */
+static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey,
+ u16 gen, u8 mode, u32 dnode)
+{
+ struct sk_buff_head pkts;
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ u16 size, cong_link_cnt;
+ u8 *data;
+ int rc;
+
+ size = tipc_aead_key_size(skey);
+ skb = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = buf_msg(skb);
+ tipc_msg_init(tipc_own_addr(net), hdr, MSG_CRYPTO, KEY_DISTR_MSG,
+ INT_H_SIZE, dnode);
+ msg_set_size(hdr, INT_H_SIZE + size);
+ msg_set_key_gen(hdr, gen);
+ msg_set_key_mode(hdr, mode);
+
+ data = msg_data(hdr);
+ *((__be32 *)(data + TIPC_AEAD_ALG_NAME)) = htonl(skey->keylen);
+ memcpy(data, skey->alg_name, TIPC_AEAD_ALG_NAME);
+ memcpy(data + TIPC_AEAD_ALG_NAME + sizeof(__be32), skey->key,
+ skey->keylen);
+
+ __skb_queue_head_init(&pkts);
+ __skb_queue_tail(&pkts, skb);
+ if (dnode)
+ rc = tipc_node_xmit(net, &pkts, dnode, 0);
+ else
+ rc = tipc_bcast_xmit(net, &pkts, &cong_link_cnt);
+
+ return rc;
+}
+
+/**
+ * tipc_crypto_key_rcv - Receive a session key
+ * @rx: the RX crypto
+ * @hdr: the TIPC v2 message incl. the receiving session key in its data
+ *
+ * This function retrieves the session key in the message from peer, then
+ * schedules a RX work to attach the key to the corresponding RX crypto.
+ *
+ * Return: "true" if the key has been scheduled for attaching, otherwise
+ * "false".
+ */
+static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
+{
+ struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
+ struct tipc_aead_key *skey = NULL;
+ u16 key_gen = msg_key_gen(hdr);
+ u32 size = msg_data_sz(hdr);
+ u8 *data = msg_data(hdr);
+ unsigned int keylen;
+
+ /* Verify whether the size can exist in the packet */
+ if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) {
+ pr_debug("%s: message data size is too small\n", rx->name);
+ goto exit;
+ }
+
+ keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME)));
+
+ /* Verify the supplied size values */
+ if (unlikely(size != keylen + sizeof(struct tipc_aead_key) ||
+ keylen > TIPC_AEAD_KEY_SIZE_MAX)) {
+ pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name);
+ goto exit;
+ }
+
+ spin_lock(&rx->lock);
+ if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) {
+ pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name,
+ rx->skey, key_gen, rx->key_gen);
+ goto exit_unlock;
+ }
+
+ /* Allocate memory for the key */
+ skey = kmalloc(size, GFP_ATOMIC);
+ if (unlikely(!skey)) {
+ pr_err("%s: unable to allocate memory for skey\n", rx->name);
+ goto exit_unlock;
+ }
+
+ /* Copy key from msg data */
+ skey->keylen = keylen;
+ memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME);
+ memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32),
+ skey->keylen);
+
+ rx->key_gen = key_gen;
+ rx->skey_mode = msg_key_mode(hdr);
+ rx->skey = skey;
+ rx->nokey = 0;
+ mb(); /* for nokey flag */
+
+exit_unlock:
+ spin_unlock(&rx->lock);
+
+exit:
+ /* Schedule the key attaching on this crypto */
+ if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0)))
+ return true;
+
+ return false;
+}
+
+/**
+ * tipc_crypto_work_rx - Scheduled RX works handler
+ * @work: the struct RX work
+ *
+ * The function processes the previous scheduled works i.e. distributing TX key
+ * or attaching a received session key on RX crypto.
+ */
+static void tipc_crypto_work_rx(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct tipc_crypto *rx = container_of(dwork, struct tipc_crypto, work);
+ struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx;
+ unsigned long delay = msecs_to_jiffies(5000);
+ bool resched = false;
+ u8 key;
+ int rc;
+
+ /* Case 1: Distribute TX key to peer if scheduled */
+ if (atomic_cmpxchg(&rx->key_distr,
+ KEY_DISTR_SCHED,
+ KEY_DISTR_COMPL) == KEY_DISTR_SCHED) {
+ /* Always pick the newest one for distributing */
+ key = tx->key.pending ?: tx->key.active;
+ rc = tipc_crypto_key_distr(tx, key, rx->node);
+ if (unlikely(rc))
+ pr_warn("%s: unable to distr key[%d] to %s, err %d\n",
+ tx->name, key, tipc_node_get_id_str(rx->node),
+ rc);
+
+ /* Sched for key_distr releasing */
+ resched = true;
+ } else {
+ atomic_cmpxchg(&rx->key_distr, KEY_DISTR_COMPL, 0);
+ }
+
+ /* Case 2: Attach a pending received session key from peer if any */
+ if (rx->skey) {
+ rc = tipc_crypto_key_init(rx, rx->skey, rx->skey_mode, false);
+ if (unlikely(rc < 0))
+ pr_warn("%s: unable to attach received skey, err %d\n",
+ rx->name, rc);
+ switch (rc) {
+ case -EBUSY:
+ case -ENOMEM:
+ /* Resched the key attaching */
+ resched = true;
+ break;
+ default:
+ synchronize_rcu();
+ kfree(rx->skey);
+ rx->skey = NULL;
+ break;
+ }
+ }
+
+ if (resched && queue_delayed_work(tx->wq, &rx->work, delay))
+ return;
+
+ tipc_node_put(rx->node);
+}
+
+/**
+ * tipc_crypto_rekeying_sched - (Re)schedule rekeying w/o new interval
+ * @tx: TX crypto
+ * @changed: if the rekeying needs to be rescheduled with new interval
+ * @new_intv: new rekeying interval (when "changed" = true)
+ */
+void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed,
+ u32 new_intv)
+{
+ unsigned long delay;
+ bool now = false;
+
+ if (changed) {
+ if (new_intv == TIPC_REKEYING_NOW)
+ now = true;
+ else
+ tx->rekeying_intv = new_intv;
+ cancel_delayed_work_sync(&tx->work);
+ }
+
+ if (tx->rekeying_intv || now) {
+ delay = (now) ? 0 : tx->rekeying_intv * 60 * 1000;
+ queue_delayed_work(tx->wq, &tx->work, msecs_to_jiffies(delay));
+ }
+}
+
+/**
+ * tipc_crypto_work_tx - Scheduled TX works handler
+ * @work: the struct TX work
+ *
+ * The function processes the previous scheduled work, i.e. key rekeying, by
+ * generating a new session key based on current one, then attaching it to the
+ * TX crypto and finally distributing it to peers. It also re-schedules the
+ * rekeying if needed.
+ */
+static void tipc_crypto_work_tx(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct tipc_crypto *tx = container_of(dwork, struct tipc_crypto, work);
+ struct tipc_aead_key *skey = NULL;
+ struct tipc_key key = tx->key;
+ struct tipc_aead *aead;
+ int rc = -ENOMEM;
+
+ if (unlikely(key.pending))
+ goto resched;
+
+ /* Take current key as a template */
+ rcu_read_lock();
+ aead = rcu_dereference(tx->aead[key.active ?: KEY_MASTER]);
+ if (unlikely(!aead)) {
+ rcu_read_unlock();
+ /* At least one key should exist for securing */
+ return;
+ }
+
+ /* Lets duplicate it first */
+ skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC);
+ rcu_read_unlock();
+
+ /* Now, generate new key, initiate & distribute it */
+ if (likely(skey)) {
+ rc = tipc_aead_key_generate(skey) ?:
+ tipc_crypto_key_init(tx, skey, PER_NODE_KEY, false);
+ if (likely(rc > 0))
+ rc = tipc_crypto_key_distr(tx, rc, NULL);
+ kfree_sensitive(skey);
+ }
+
+ if (unlikely(rc))
+ pr_warn_ratelimited("%s: rekeying returns %d\n", tx->name, rc);
+
+resched:
+ /* Re-schedule rekeying if any */
+ tipc_crypto_rekeying_sched(tx, false, 0);
+}
diff --git a/net/tipc/crypto.h b/net/tipc/crypto.h
index c3de769f49e8..ce7d4cc8a9e0 100644
--- a/net/tipc/crypto.h
+++ b/net/tipc/crypto.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/**
+/*
* net/tipc/crypto.h: Include file for TIPC crypto
*
* Copyright (c) 2019, Ericsson AB
@@ -53,7 +53,7 @@
#define TIPC_AES_GCM_IV_SIZE 12
#define TIPC_AES_GCM_TAG_SIZE 16
-/**
+/*
* TIPC crypto modes:
* - CLUSTER_KEY:
* One single key is used for both TX & RX in all nodes in the cluster.
@@ -67,14 +67,15 @@ enum {
};
extern int sysctl_tipc_max_tfms __read_mostly;
+extern int sysctl_tipc_key_exchange_enabled __read_mostly;
-/**
+/*
* TIPC encryption message format:
*
* 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
* 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * w0:|Ver=7| User |D|TX |RX |K| Rsvd |
+ * w0:|Ver=7| User |D|TX |RX |K|M|N| Rsvd |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* w1:| Seqno |
* w2:| (8 octets) |
@@ -101,6 +102,9 @@ extern int sysctl_tipc_max_tfms __read_mostly;
* RX : Currently RX active key corresponding to the destination
* node's TX key (when the "D" bit is set)
* K : Keep-alive bit (for RPS, LINK_PROTOCOL/STATE_MSG only)
+ * M : Bit indicates if sender has master key
+ * N : Bit indicates if sender has no RX keys corresponding to the
+ * receiver's TX (when the "D" bit is set)
* Rsvd : Reserved bit, field
* Word1-2:
* Seqno : The 64-bit sequence number of the encrypted message, also
@@ -117,7 +121,9 @@ struct tipc_ehdr {
__u8 destined:1,
user:4,
version:3;
- __u8 reserved_1:3,
+ __u8 reserved_1:1,
+ rx_nokey:1,
+ master_key:1,
keepalive:1,
rx_key_active:2,
tx_key:2;
@@ -128,7 +134,9 @@ struct tipc_ehdr {
__u8 tx_key:2,
rx_key_active:2,
keepalive:1,
- reserved_1:3;
+ master_key:1,
+ rx_nokey:1,
+ reserved_1:1;
#else
#error "Please fix <asm/byteorder.h>"
#endif
@@ -158,10 +166,35 @@ int tipc_crypto_xmit(struct net *net, struct sk_buff **skb,
int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
struct sk_buff **skb, struct tipc_bearer *b);
int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
- u8 mode);
+ u8 mode, bool master_key);
void tipc_crypto_key_flush(struct tipc_crypto *c);
-int tipc_aead_key_validate(struct tipc_aead_key *ukey);
+int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key,
+ struct tipc_node *dest);
+void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb);
+void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed,
+ u32 new_intv);
+int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info);
bool tipc_ehdr_validate(struct sk_buff *skb);
+static inline u32 msg_key_gen(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 16, 0xffff);
+}
+
+static inline void msg_set_key_gen(struct tipc_msg *m, u32 gen)
+{
+ msg_set_bits(m, 4, 16, 0xffff, gen);
+}
+
+static inline u32 msg_key_mode(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 0, 0xf);
+}
+
+static inline void msg_set_key_mode(struct tipc_msg *m, u32 mode)
+{
+ msg_set_bits(m, 4, 0, 0xf, mode);
+}
+
#endif /* _TIPC_CRYPTO_H */
#endif
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index bfe43da127c0..e8630707901e 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -74,7 +74,8 @@ struct tipc_discoverer {
/**
* tipc_disc_init_msg - initialize a link setup message
* @net: the applicable net namespace
- * @type: message type (request or response)
+ * @skb: buffer containing message
+ * @mtyp: message type (request or response)
* @b: ptr to bearer issuing message
*/
static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
@@ -147,8 +148,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
{
struct net *net = d->net;
struct tipc_net *tn = tipc_net(net);
- bool trial = time_before(jiffies, tn->addr_trial_end);
u32 self = tipc_own_addr(net);
+ bool trial = time_before(jiffies, tn->addr_trial_end) && !self;
if (mtyp == DSC_TRIAL_FAIL_MSG) {
if (!trial)
@@ -167,7 +168,7 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
/* Apply trial address if we just left trial period */
if (!trial && !self) {
- tipc_sched_net_finalize(net, tn->trial_addr);
+ schedule_work(&tn->work);
msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
@@ -307,7 +308,7 @@ static void tipc_disc_timeout(struct timer_list *t)
if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
mod_timer(&d->timer, jiffies + TIPC_DISC_INIT);
spin_unlock_bh(&d->lock);
- tipc_sched_net_finalize(net, tn->trial_addr);
+ schedule_work(&tn->work);
return;
}
@@ -339,9 +340,9 @@ exit:
* @net: the applicable net namespace
* @b: ptr to bearer issuing requests
* @dest: destination address for request messages
- * @dest_domain: network domain to which links can be established
+ * @skb: pointer to created frame
*
- * Returns 0 if successful, otherwise -errno.
+ * Return: 0 if successful, otherwise -errno.
*/
int tipc_disc_create(struct net *net, struct tipc_bearer *b,
struct tipc_media_addr *dest, struct sk_buff **skb)
@@ -380,7 +381,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b,
/**
* tipc_disc_delete - destroy object sending periodic link setup requests
- * @d: ptr to link duest structure
+ * @d: ptr to link dest structure
*/
void tipc_disc_delete(struct tipc_discoverer *d)
{
@@ -393,7 +394,6 @@ void tipc_disc_delete(struct tipc_discoverer *d)
* tipc_disc_reset - reset object to send periodic link setup requests
* @net: the applicable net namespace
* @b: ptr to bearer issuing requests
- * @dest_domain: network domain to which links can be established
*/
void tipc_disc_reset(struct net *net, struct tipc_bearer *b)
{
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 8b0bb600602d..cb0d185e06af 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -60,14 +60,12 @@ static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr)
/* Convert raw mac address format to media addr format */
static int tipc_eth_raw2addr(struct tipc_bearer *b,
struct tipc_media_addr *addr,
- char *msg)
+ const char *msg)
{
- char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-
memset(addr, 0, sizeof(*addr));
ether_addr_copy(addr->value, msg);
addr->media_id = TIPC_MEDIA_TYPE_ETH;
- addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN);
+ addr->broadcast = is_broadcast_ether_addr(addr->value);
return 0;
}
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 89257e2a980d..3e137d8c9d2f 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -2,6 +2,7 @@
* net/tipc/group.c: TIPC group messaging code
*
* Copyright (c) 2017, Ericsson AB
+ * Copyright (c) 2020, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -273,8 +274,8 @@ static struct tipc_member *tipc_group_find_node(struct tipc_group *grp,
return NULL;
}
-static void tipc_group_add_to_tree(struct tipc_group *grp,
- struct tipc_member *m)
+static int tipc_group_add_to_tree(struct tipc_group *grp,
+ struct tipc_member *m)
{
u64 nkey, key = (u64)m->node << 32 | m->port;
struct rb_node **n, *parent = NULL;
@@ -291,10 +292,11 @@ static void tipc_group_add_to_tree(struct tipc_group *grp,
else if (key > nkey)
n = &(*n)->rb_right;
else
- return;
+ return -EEXIST;
}
rb_link_node(&m->tree_node, parent, n);
rb_insert_color(&m->tree_node, &grp->members);
+ return 0;
}
static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
@@ -302,6 +304,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
u32 instance, int state)
{
struct tipc_member *m;
+ int ret;
m = kzalloc(sizeof(*m), GFP_ATOMIC);
if (!m)
@@ -314,8 +317,12 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
m->port = port;
m->instance = instance;
m->bc_acked = grp->bc_snd_nxt - 1;
+ ret = tipc_group_add_to_tree(grp, m);
+ if (ret < 0) {
+ kfree(m);
+ return NULL;
+ }
grp->member_cnt++;
- tipc_group_add_to_tree(grp, m);
tipc_nlist_add(&grp->dests, m->node);
m->state = state;
return m;
@@ -353,7 +360,7 @@ struct tipc_nlist *tipc_group_dests(struct tipc_group *grp)
return &grp->dests;
}
-void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
+void tipc_group_self(struct tipc_group *grp, struct tipc_service_range *seq,
int *scope)
{
seq->type = grp->type;
@@ -536,7 +543,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
update = true;
deliver = false;
}
- /* Fall thru */
+ fallthrough;
case TIPC_GRP_BCAST_MSG:
m->bc_rcv_nxt++;
ack = msg_grp_bc_ack_req(hdr);
diff --git a/net/tipc/group.h b/net/tipc/group.h
index 76b4e5a7b39d..ea4c3be64c78 100644
--- a/net/tipc/group.h
+++ b/net/tipc/group.h
@@ -2,6 +2,7 @@
* net/tipc/group.h: Include file for TIPC group unicast/multicast functions
*
* Copyright (c) 2017, Ericsson AB
+ * Copyright (c) 2020, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -50,7 +51,7 @@ void tipc_group_delete(struct net *net, struct tipc_group *grp);
void tipc_group_add_member(struct tipc_group *grp, u32 node,
u32 port, u32 instance);
struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
-void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
+void tipc_group_self(struct tipc_group *grp, struct tipc_service_range *seq,
int *scope);
u32 tipc_group_exclude(struct tipc_group *grp);
void tipc_group_filter_msg(struct tipc_group *grp,
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
index 7aa9ff88458d..b9ad0434c3cd 100644
--- a/net/tipc/ib_media.c
+++ b/net/tipc/ib_media.c
@@ -67,7 +67,7 @@ static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr)
/* Convert raw InfiniBand address format to media addr format */
static int tipc_ib_raw2addr(struct tipc_bearer *b,
struct tipc_media_addr *addr,
- char *msg)
+ const char *msg)
{
memset(addr, 0, sizeof(*addr));
memcpy(addr->value, msg, INFINIBAND_ALEN);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 467c53a1fb5c..e260c0d557f5 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -120,6 +120,34 @@ struct tipc_stats {
* @reasm_buf: head of partially reassembled inbound message fragments
* @bc_rcvr: marks that this is a broadcast receiver link
* @stats: collects statistics regarding link activity
+ * @session: session to be used by link
+ * @snd_nxt_state: next send seq number
+ * @rcv_nxt_state: next rcv seq number
+ * @in_session: have received ACTIVATE_MSG from peer
+ * @active: link is active
+ * @if_name: associated interface name
+ * @rst_cnt: link reset counter
+ * @drop_point: seq number for failover handling (FIXME)
+ * @failover_reasm_skb: saved failover msg ptr (FIXME)
+ * @failover_deferdq: deferred message queue for failover processing (FIXME)
+ * @transmq: the link's transmit queue
+ * @backlog: link's backlog by priority (importance)
+ * @snd_nxt: next sequence number to be used
+ * @rcv_unacked: # messages read by user, but not yet acked back to peer
+ * @deferdq: deferred receive queue
+ * @window: sliding window size for congestion handling
+ * @min_win: minimal send window to be used by link
+ * @ssthresh: slow start threshold for congestion handling
+ * @max_win: maximal send window to be used by link
+ * @cong_acks: congestion acks for congestion avoidance (FIXME)
+ * @checkpoint: seq number for congestion window size handling
+ * @reasm_tnlmsg: fragmentation/reassembly area for tunnel protocol message
+ * @last_gap: last gap ack blocks for bcast (FIXME)
+ * @last_ga: ptr to gap ack blocks
+ * @bc_rcvlink: the peer specific link used for broadcast reception
+ * @bc_sndlink: the namespace global link used for broadcast sending
+ * @nack_state: bcast nack state
+ * @bc_peer_is_up: peer has acked the bcast init msg
*/
struct tipc_link {
u32 addr;
@@ -188,6 +216,8 @@ struct tipc_link {
/* Broadcast */
u16 ackers;
u16 acked;
+ u16 last_gap;
+ struct tipc_gap_ack_blks *last_ga;
struct tipc_link *bc_rcvlink;
struct tipc_link *bc_sndlink;
u8 nack_state;
@@ -214,11 +244,6 @@ enum {
#define TIPC_BC_RETR_LIM (jiffies + msecs_to_jiffies(10))
#define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1))
-/*
- * Interval between NACKs when packets arrive out of order
- */
-#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
-
/* Link FSM states:
*/
enum {
@@ -249,11 +274,14 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
static void tipc_link_build_bc_init_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
-static int tipc_link_release_pkts(struct tipc_link *l, u16 to);
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap);
-static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga,
+ struct tipc_link *l, u8 start_index);
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr);
+static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
+ u16 acked, u16 gap,
struct tipc_gap_ack_blks *ga,
- struct sk_buff_head *xmitq);
+ struct sk_buff_head *xmitq,
+ bool *retransmitted, int *rc);
static void tipc_link_update_cwin(struct tipc_link *l, int released,
bool retransmitted);
/*
@@ -344,6 +372,11 @@ char tipc_link_plane(struct tipc_link *l)
return l->net_plane;
}
+struct net *tipc_link_net(struct tipc_link *l)
+{
+ return l->net;
+}
+
void tipc_link_update_caps(struct tipc_link *l, u16 capabilities)
{
l->peer_caps = capabilities;
@@ -370,7 +403,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
snd_l->ackers--;
rcv_l->bc_peer_is_up = true;
rcv_l->state = LINK_ESTABLISHED;
- tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
+ tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq, NULL);
trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
tipc_link_reset(rcv_l);
rcv_l->state = LINK_RESET;
@@ -440,7 +473,7 @@ u32 tipc_link_state(struct tipc_link *l)
/**
* tipc_link_create - create a new link
- * @n: pointer to associated node
+ * @net: pointer to associated network namespace
* @if_name: associated interface name
* @bearer_id: id (index) of associated bearer
* @tolerance: link tolerance to be used by link
@@ -450,7 +483,6 @@ u32 tipc_link_state(struct tipc_link *l)
* @min_win: minimal send window to be used by link
* @max_win: maximal send window to be used by link
* @session: session to be used by link
- * @ownnode: identity of own node
* @peer: node id of peer node
* @peer_caps: bitmap describing peer node capabilities
* @bc_sndlink: the namespace global link used for broadcast sending
@@ -458,8 +490,10 @@ u32 tipc_link_state(struct tipc_link *l)
* @inputq: queue to put messages ready for delivery
* @namedq: queue to put binding table update messages ready for delivery
* @link: return value, pointer to put the created link
+ * @self: local unicast link id
+ * @peer_id: 128-bit ID of peer
*
- * Returns true if link was created, otherwise false
+ * Return: true if link was created, otherwise false
*/
bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
int tolerance, char net_plane, u32 mtu, int priority,
@@ -525,16 +559,22 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
/**
* tipc_link_bc_create - create new link to be used for broadcast
- * @n: pointer to associated node
+ * @net: pointer to associated network namespace
* @mtu: mtu to be used initially if no peers
- * @window: send window to be used
+ * @min_win: minimal send window to be used by link
+ * @max_win: maximal send window to be used by link
* @inputq: queue to put messages ready for delivery
* @namedq: queue to put binding table update messages ready for delivery
* @link: return value, pointer to put the created link
+ * @ownnode: identity of own node
+ * @peer: node id of peer node
+ * @peer_id: 128-bit ID of peer
+ * @peer_caps: bitmap describing peer node capabilities
+ * @bc_sndlink: the namespace global link used for broadcast sending
*
- * Returns true if link was created, otherwise false
+ * Return: true if link was created, otherwise false
*/
-bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id,
int mtu, u32 min_win, u32 max_win, u16 peer_caps,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
@@ -549,7 +589,18 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
return false;
l = *link;
- strcpy(l->name, tipc_bclink_name);
+ if (peer_id) {
+ char peer_str[NODE_ID_STR_LEN] = {0,};
+
+ tipc_nodeid2string(peer_str, peer_id);
+ if (strlen(peer_str) > 16)
+ sprintf(peer_str, "%x", peer);
+ /* Broadcast receiver link name: "broadcast-link:<peer>" */
+ snprintf(l->name, sizeof(l->name), "%s:%s", tipc_bclink_name,
+ peer_str);
+ } else {
+ strcpy(l->name, tipc_bclink_name);
+ }
trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!");
tipc_link_reset(l);
l->state = LINK_RESET;
@@ -603,6 +654,7 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt)
break;
case LINK_FAILOVER_BEGIN_EVT:
l->state = LINK_FAILINGOVER;
+ break;
case LINK_FAILURE_EVT:
case LINK_RESET_EVT:
case LINK_ESTABLISH_EVT:
@@ -776,7 +828,7 @@ static void link_profile_stats(struct tipc_link *l)
* tipc_link_too_silent - check if link is "too silent"
* @l: tipc link to be checked
*
- * Returns true if the link 'silent_intv_cnt' is about to reach the
+ * Return: true if the link 'silent_intv_cnt' is about to reach the
* 'abort_limit' value, otherwise false
*/
bool tipc_link_too_silent(struct tipc_link *l)
@@ -784,8 +836,6 @@ bool tipc_link_too_silent(struct tipc_link *l)
return (l->silent_intv_cnt + 2 > l->abort_limit);
}
-static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
- u16 from, u16 to, struct sk_buff_head *xmitq);
/* tipc_link_timeout - perform periodic task as instructed from node timeout
*/
int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
@@ -813,11 +863,11 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
state |= l->bc_rcvlink->rcv_unacked;
state |= l->rcv_unacked;
state |= !skb_queue_empty(&l->transmq);
- state |= !skb_queue_empty(&l->deferdq);
probe = mstate->probing;
probe |= l->silent_intv_cnt;
if (probe || mstate->monitoring)
l->silent_intv_cnt++;
+ probe |= !skb_queue_empty(&l->deferdq);
if (l->snd_nxt == l->checkpoint) {
tipc_link_update_cwin(l, 0, 0);
probe = true;
@@ -907,6 +957,21 @@ static void link_prepare_wakeup(struct tipc_link *l)
}
+/**
+ * tipc_link_set_skb_retransmit_time - set the time at which retransmission of
+ * the given skb should be next attempted
+ * @skb: skb to set a future retransmission time for
+ * @l: link the skb will be transmitted on
+ */
+static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb,
+ struct tipc_link *l)
+{
+ if (link_is_bc_sndlink(l))
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+ else
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
+}
+
void tipc_link_reset(struct tipc_link *l)
{
struct sk_buff_head list;
@@ -948,6 +1013,9 @@ void tipc_link_reset(struct tipc_link *l)
l->snd_nxt_state = 1;
l->rcv_nxt_state = 1;
l->acked = 0;
+ l->last_gap = 0;
+ kfree(l->last_ga);
+ l->last_ga = NULL;
l->silent_intv_cnt = 0;
l->rst_cnt = 0;
l->bc_peer_is_up = false;
@@ -957,18 +1025,17 @@ void tipc_link_reset(struct tipc_link *l)
/**
* tipc_link_xmit(): enqueue buffer list according to queue situation
- * @link: link to use
+ * @l: link to use
* @list: chain of buffers containing message
* @xmitq: returned list of packets to be sent by caller
*
* Consumes the buffer chain.
- * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
* Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
+ * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
*/
int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
struct sk_buff_head *xmitq)
{
- struct tipc_msg *hdr = buf_msg(skb_peek(list));
struct sk_buff_head *backlogq = &l->backlogq;
struct sk_buff_head *transmq = &l->transmq;
struct sk_buff *skb, *_skb;
@@ -976,13 +1043,18 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
u16 ack = l->rcv_nxt - 1;
u16 seqno = l->snd_nxt;
int pkt_cnt = skb_queue_len(list);
- int imp = msg_importance(hdr);
unsigned int mss = tipc_link_mss(l);
unsigned int cwin = l->window;
unsigned int mtu = l->mtu;
+ struct tipc_msg *hdr;
bool new_bundle;
int rc = 0;
+ int imp;
+ if (pkt_cnt <= 0)
+ return 0;
+
+ hdr = buf_msg(skb_peek(list));
if (unlikely(msg_size(hdr) > mtu)) {
pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n",
skb_queue_len(list), msg_user(hdr),
@@ -991,6 +1063,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
return -EMSGSIZE;
}
+ imp = msg_importance(hdr);
/* Allow oversubscription of one data msg per source at congestion */
if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) {
if (imp == TIPC_SYSTEM_IMPORTANCE) {
@@ -1019,9 +1092,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
return -ENOBUFS;
}
__skb_queue_tail(transmq, skb);
- /* next retransmit attempt */
- if (link_is_bc_sndlink(l))
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+ tipc_link_set_skb_retransmit_time(skb, l);
__skb_queue_tail(xmitq, _skb);
TIPC_SKB_CB(skb)->ackers = l->ackers;
l->rcv_unacked = 0;
@@ -1065,7 +1136,7 @@ static void tipc_link_update_cwin(struct tipc_link *l, int released,
/* Enter fast recovery */
if (unlikely(retransmitted)) {
l->ssthresh = max_t(u16, l->window / 2, 300);
- l->window = l->ssthresh;
+ l->window = min_t(u16, l->ssthresh, l->window);
return;
}
/* Enter slow start */
@@ -1122,9 +1193,7 @@ static void tipc_link_advance_backlog(struct tipc_link *l,
if (unlikely(skb == l->backlog[imp].target_bskb))
l->backlog[imp].target_bskb = NULL;
__skb_queue_tail(&l->transmq, skb);
- /* next retransmit attempt */
- if (link_is_bc_sndlink(l))
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
+ tipc_link_set_skb_retransmit_time(skb, l);
__skb_queue_tail(xmitq, _skb);
TIPC_SKB_CB(skb)->ackers = l->ackers;
@@ -1183,68 +1252,14 @@ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
if (link_is_bc_sndlink(l)) {
r->state = LINK_RESET;
- *rc = TIPC_LINK_DOWN_EVT;
+ *rc |= TIPC_LINK_DOWN_EVT;
} else {
- *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ *rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
return true;
}
-/* tipc_link_bc_retrans() - retransmit zero or more packets
- * @l: the link to transmit on
- * @r: the receiving link ordering the retransmit. Same as l if unicast
- * @from: retransmit from (inclusive) this sequence number
- * @to: retransmit to (inclusive) this sequence number
- * xmitq: queue for accumulating the retransmitted packets
- */
-static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
- u16 from, u16 to, struct sk_buff_head *xmitq)
-{
- struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
- u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
- u16 ack = l->rcv_nxt - 1;
- int retransmitted = 0;
- struct tipc_msg *hdr;
- int rc = 0;
-
- if (!skb)
- return 0;
- if (less(to, from))
- return 0;
-
- trace_tipc_link_retrans(r, from, to, &l->transmq);
-
- if (link_retransmit_failure(l, r, &rc))
- return rc;
-
- skb_queue_walk(&l->transmq, skb) {
- hdr = buf_msg(skb);
- if (less(msg_seqno(hdr), from))
- continue;
- if (more(msg_seqno(hdr), to))
- break;
- if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
- continue;
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
- _skb = pskb_copy(skb, GFP_ATOMIC);
- if (!_skb)
- return 0;
- hdr = buf_msg(_skb);
- msg_set_ack(hdr, ack);
- msg_set_bcast_ack(hdr, bc_ack);
- _skb->priority = TC_PRIO_CONTROL;
- __skb_queue_tail(xmitq, _skb);
- l->stats.retransmitted++;
- retransmitted++;
- /* Increase actual retrans counter & mark first time */
- if (!TIPC_SKB_CB(skb)->retr_cnt++)
- TIPC_SKB_CB(skb)->retr_stamp = jiffies;
- }
- tipc_link_update_cwin(l, 0, retransmitted);
- return 0;
-}
-
/* tipc_data_input - deliver data and name distr msgs to upper layer
*
* Consumes buffer if message is of right type
@@ -1265,7 +1280,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
skb_queue_tail(mc_inputq, skb);
return true;
}
- /* fall through */
+ fallthrough;
case CONN_MANAGER:
skb_queue_tail(inputq, skb);
return true;
@@ -1281,11 +1296,20 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
case MSG_FRAGMENTER:
case BCAST_PROTOCOL:
return false;
+#ifdef CONFIG_TIPC_CRYPTO
+ case MSG_CRYPTO:
+ if (sysctl_tipc_key_exchange_enabled &&
+ TIPC_SKB_CB(skb)->decrypted) {
+ tipc_crypto_msg_rcv(l->net, skb);
+ return true;
+ }
+ fallthrough;
+#endif
default:
pr_warn("Dropping received illegal msg type\n");
kfree_skb(skb);
return true;
- };
+ }
}
/* tipc_link_input - process packet that has passed link protocol check
@@ -1402,46 +1426,68 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb,
return rc;
}
-static int tipc_link_release_pkts(struct tipc_link *l, u16 acked)
-{
- int released = 0;
- struct sk_buff *skb, *tmp;
-
- skb_queue_walk_safe(&l->transmq, skb, tmp) {
- if (more(buf_seqno(skb), acked))
- break;
- __skb_unlink(skb, &l->transmq);
- kfree_skb(skb);
- released++;
+/**
+ * tipc_get_gap_ack_blks - get Gap ACK blocks from PROTOCOL/STATE_MSG
+ * @ga: returned pointer to the Gap ACK blocks if any
+ * @l: the tipc link
+ * @hdr: the PROTOCOL/STATE_MSG header
+ * @uc: desired Gap ACK blocks type, i.e. unicast (= 1) or broadcast (= 0)
+ *
+ * Return: the total Gap ACK blocks size
+ */
+u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
+ struct tipc_msg *hdr, bool uc)
+{
+ struct tipc_gap_ack_blks *p;
+ u16 sz = 0;
+
+ /* Does peer support the Gap ACK blocks feature? */
+ if (l->peer_caps & TIPC_GAP_ACK_BLOCK) {
+ p = (struct tipc_gap_ack_blks *)msg_data(hdr);
+ sz = ntohs(p->len);
+ /* Sanity check */
+ if (sz == struct_size(p, gacks, p->ugack_cnt + p->bgack_cnt)) {
+ /* Good, check if the desired type exists */
+ if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt))
+ goto ok;
+ /* Backward compatible: peer might not support bc, but uc? */
+ } else if (uc && sz == struct_size(p, gacks, p->ugack_cnt)) {
+ if (p->ugack_cnt) {
+ p->bgack_cnt = 0;
+ goto ok;
+ }
+ }
}
- return released;
+ /* Other cases: ignore! */
+ p = NULL;
+
+ok:
+ *ga = p;
+ return sz;
}
-/* tipc_build_gap_ack_blks - build Gap ACK blocks
- * @l: tipc link that data have come with gaps in sequence if any
- * @data: data buffer to store the Gap ACK blocks after built
- *
- * returns the actual allocated memory size
- */
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap)
+static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga,
+ struct tipc_link *l, u8 start_index)
{
+ struct tipc_gap_ack *gacks = &ga->gacks[start_index];
struct sk_buff *skb = skb_peek(&l->deferdq);
- struct tipc_gap_ack_blks *ga = data;
- u16 len, expect, seqno = 0;
+ u16 expect, seqno = 0;
u8 n = 0;
- if (!skb || !gap)
- goto exit;
+ if (!skb)
+ return 0;
expect = buf_seqno(skb);
skb_queue_walk(&l->deferdq, skb) {
seqno = buf_seqno(skb);
if (unlikely(more(seqno, expect))) {
- ga->gacks[n].ack = htons(expect - 1);
- ga->gacks[n].gap = htons(seqno - expect);
- if (++n >= MAX_GAP_ACK_BLKS) {
- pr_info_ratelimited("Too few Gap ACK blocks!\n");
- goto exit;
+ gacks[n].ack = htons(expect - 1);
+ gacks[n].gap = htons(seqno - expect);
+ if (++n >= MAX_GAP_ACK_BLKS / 2) {
+ pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n",
+ l->name, n,
+ skb_queue_len(&l->deferdq));
+ return n;
}
} else if (unlikely(less(seqno, expect))) {
pr_warn("Unexpected skb in deferdq!\n");
@@ -1451,14 +1497,44 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap)
}
/* last block */
- ga->gacks[n].ack = htons(seqno);
- ga->gacks[n].gap = 0;
+ gacks[n].ack = htons(seqno);
+ gacks[n].gap = 0;
n++;
+ return n;
+}
-exit:
- len = tipc_gap_ack_blks_sz(n);
+/* tipc_build_gap_ack_blks - build Gap ACK blocks
+ * @l: tipc unicast link
+ * @hdr: the tipc message buffer to store the Gap ACK blocks after built
+ *
+ * The function builds Gap ACK blocks for both the unicast & broadcast receiver
+ * links of a certain peer, the buffer after built has the network data format
+ * as found at the struct tipc_gap_ack_blks definition.
+ *
+ * returns the actual allocated memory size
+ */
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr)
+{
+ struct tipc_link *bcl = l->bc_rcvlink;
+ struct tipc_gap_ack_blks *ga;
+ u16 len;
+
+ ga = (struct tipc_gap_ack_blks *)msg_data(hdr);
+
+ /* Start with broadcast link first */
+ tipc_bcast_lock(bcl->net);
+ msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
+ msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
+ ga->bgack_cnt = __tipc_build_gap_ack_blks(ga, bcl, 0);
+ tipc_bcast_unlock(bcl->net);
+
+ /* Now for unicast link, but an explicit NACK only (???) */
+ ga->ugack_cnt = (msg_seq_gap(hdr)) ?
+ __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0;
+
+ /* Total len */
+ len = struct_size(ga, gacks, ga->bgack_cnt + ga->ugack_cnt);
ga->len = htons(len);
- ga->gack_cnt = n;
return len;
}
@@ -1466,47 +1542,110 @@ exit:
* acked packets, also doing retransmissions if
* gaps found
* @l: tipc link with transmq queue to be advanced
+ * @r: tipc link "receiver" i.e. in case of broadcast (= "l" if unicast)
* @acked: seqno of last packet acked by peer without any gaps before
* @gap: # of gap packets
* @ga: buffer pointer to Gap ACK blocks from peer
* @xmitq: queue for accumulating the retransmitted packets if any
+ * @retransmitted: returned boolean value if a retransmission is really issued
+ * @rc: returned code e.g. TIPC_LINK_DOWN_EVT if a repeated retransmit failures
+ * happens (- unlikely case)
*
- * In case of a repeated retransmit failures, the call will return shortly
- * with a returned code (e.g. TIPC_LINK_DOWN_EVT)
+ * Return: the number of packets released from the link transmq
*/
-static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
+ u16 acked, u16 gap,
struct tipc_gap_ack_blks *ga,
- struct sk_buff_head *xmitq)
+ struct sk_buff_head *xmitq,
+ bool *retransmitted, int *rc)
{
+ struct tipc_gap_ack_blks *last_ga = r->last_ga, *this_ga = NULL;
+ struct tipc_gap_ack *gacks = NULL;
struct sk_buff *skb, *_skb, *tmp;
struct tipc_msg *hdr;
+ u32 qlen = skb_queue_len(&l->transmq);
+ u16 nacked = acked, ngap = gap, gack_cnt = 0;
u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
- bool retransmitted = false;
u16 ack = l->rcv_nxt - 1;
- bool passed = false;
- u16 released = 0;
u16 seqno, n = 0;
- int rc = 0;
+ u16 end = r->acked, start = end, offset = r->last_gap;
+ u16 si = (last_ga) ? last_ga->start_index : 0;
+ bool is_uc = !link_is_bc_sndlink(l);
+ bool bc_has_acked = false;
+
+ trace_tipc_link_retrans(r, acked + 1, acked + gap, &l->transmq);
+
+ /* Determine Gap ACK blocks if any for the particular link */
+ if (ga && is_uc) {
+ /* Get the Gap ACKs, uc part */
+ gack_cnt = ga->ugack_cnt;
+ gacks = &ga->gacks[ga->bgack_cnt];
+ } else if (ga) {
+ /* Copy the Gap ACKs, bc part, for later renewal if needed */
+ this_ga = kmemdup(ga, struct_size(ga, gacks, ga->bgack_cnt),
+ GFP_ATOMIC);
+ if (likely(this_ga)) {
+ this_ga->start_index = 0;
+ /* Start with the bc Gap ACKs */
+ gack_cnt = this_ga->bgack_cnt;
+ gacks = &this_ga->gacks[0];
+ } else {
+ /* Hmm, we can get in trouble..., simply ignore it */
+ pr_warn_ratelimited("Ignoring bc Gap ACKs, no memory\n");
+ }
+ }
+ /* Advance the link transmq */
skb_queue_walk_safe(&l->transmq, skb, tmp) {
seqno = buf_seqno(skb);
next_gap_ack:
- if (less_eq(seqno, acked)) {
+ if (less_eq(seqno, nacked)) {
+ if (is_uc)
+ goto release;
+ /* Skip packets peer has already acked */
+ if (!more(seqno, r->acked))
+ continue;
+ /* Get the next of last Gap ACK blocks */
+ while (more(seqno, end)) {
+ if (!last_ga || si >= last_ga->bgack_cnt)
+ break;
+ start = end + offset + 1;
+ end = ntohs(last_ga->gacks[si].ack);
+ offset = ntohs(last_ga->gacks[si].gap);
+ si++;
+ WARN_ONCE(more(start, end) ||
+ (!offset &&
+ si < last_ga->bgack_cnt) ||
+ si > MAX_GAP_ACK_BLKS,
+ "Corrupted Gap ACK: %d %d %d %d %d\n",
+ start, end, offset, si,
+ last_ga->bgack_cnt);
+ }
+ /* Check against the last Gap ACK block */
+ if (in_range(seqno, start, end))
+ continue;
+ /* Update/release the packet peer is acking */
+ bc_has_acked = true;
+ if (--TIPC_SKB_CB(skb)->ackers)
+ continue;
+release:
/* release skb */
__skb_unlink(skb, &l->transmq);
kfree_skb(skb);
- released++;
- } else if (less_eq(seqno, acked + gap)) {
- /* First, check if repeated retrans failures occurs? */
- if (!passed && link_retransmit_failure(l, l, &rc))
- return rc;
- passed = true;
-
+ } else if (less_eq(seqno, nacked + ngap)) {
+ /* First gap: check if repeated retrans failures? */
+ if (unlikely(seqno == acked + 1 &&
+ link_retransmit_failure(l, r, rc))) {
+ /* Ignore this bc Gap ACKs if any */
+ kfree(this_ga);
+ this_ga = NULL;
+ break;
+ }
/* retransmit skb if unrestricted*/
if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
continue;
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
+ tipc_link_set_skb_retransmit_time(skb, l);
_skb = pskb_copy(skb, GFP_ATOMIC);
if (!_skb)
continue;
@@ -1516,25 +1655,53 @@ next_gap_ack:
_skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, _skb);
l->stats.retransmitted++;
- retransmitted = true;
+ if (!is_uc)
+ r->stats.retransmitted++;
+ *retransmitted = true;
/* Increase actual retrans counter & mark first time */
if (!TIPC_SKB_CB(skb)->retr_cnt++)
TIPC_SKB_CB(skb)->retr_stamp = jiffies;
} else {
/* retry with Gap ACK blocks if any */
- if (!ga || n >= ga->gack_cnt)
+ if (n >= gack_cnt)
break;
- acked = ntohs(ga->gacks[n].ack);
- gap = ntohs(ga->gacks[n].gap);
+ nacked = ntohs(gacks[n].ack);
+ ngap = ntohs(gacks[n].gap);
n++;
goto next_gap_ack;
}
}
- if (released || retransmitted)
- tipc_link_update_cwin(l, released, retransmitted);
- if (released)
- tipc_link_advance_backlog(l, xmitq);
- return 0;
+
+ /* Renew last Gap ACK blocks for bc if needed */
+ if (bc_has_acked) {
+ if (this_ga) {
+ kfree(last_ga);
+ r->last_ga = this_ga;
+ r->last_gap = gap;
+ } else if (last_ga) {
+ if (less(acked, start)) {
+ si--;
+ offset = start - acked - 1;
+ } else if (less(acked, end)) {
+ acked = end;
+ }
+ if (si < last_ga->bgack_cnt) {
+ last_ga->start_index = si;
+ r->last_gap = offset;
+ } else {
+ kfree(last_ga);
+ r->last_ga = NULL;
+ r->last_gap = 0;
+ }
+ } else {
+ r->last_gap = 0;
+ }
+ r->acked = acked;
+ } else {
+ kfree(this_ga);
+ }
+
+ return qlen - skb_queue_len(&l->transmq);
}
/* tipc_link_build_state_msg: prepare link state message for transmission
@@ -1651,11 +1818,13 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
kfree_skb(skb);
break;
}
- released += tipc_link_release_pkts(l, msg_ack(hdr));
+ released += tipc_link_advance_transmq(l, l, msg_ack(hdr), 0,
+ NULL, NULL, NULL, NULL);
/* Defer delivery if sequence gap */
if (unlikely(seqno != rcv_nxt)) {
- __tipc_skb_queue_sorted(defq, seqno, skb);
+ if (!__tipc_skb_queue_sorted(defq, seqno, skb))
+ l->stats.duplicates++;
rc |= tipc_link_build_nack_msg(l, xmitq);
break;
}
@@ -1689,15 +1858,15 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
int tolerance, int priority,
struct sk_buff_head *xmitq)
{
+ struct tipc_mon_state *mstate = &l->mon_state;
+ struct sk_buff_head *dfq = &l->deferdq;
struct tipc_link *bcl = l->bc_rcvlink;
- struct sk_buff *skb;
struct tipc_msg *hdr;
- struct sk_buff_head *dfq = &l->deferdq;
+ struct sk_buff *skb;
bool node_up = link_is_up(bcl);
- struct tipc_mon_state *mstate = &l->mon_state;
+ u16 glen = 0, bc_rcvgap = 0;
int dlen = 0;
void *data;
- u16 glen = 0;
/* Don't send protocol message during reset or link failover */
if (tipc_link_is_blocked(l))
@@ -1735,11 +1904,12 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
if (l->peer_caps & TIPC_LINK_PROTO_SEQNO)
msg_set_seqno(hdr, l->snd_nxt_state++);
msg_set_seq_gap(hdr, rcvgap);
- msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
+ bc_rcvgap = link_bc_rcv_gap(bcl);
+ msg_set_bc_gap(hdr, bc_rcvgap);
msg_set_probe(hdr, probe);
msg_set_is_keepalive(hdr, probe || probe_reply);
if (l->peer_caps & TIPC_GAP_ACK_BLOCK)
- glen = tipc_build_gap_ack_blks(l, data, rcvgap);
+ glen = tipc_build_gap_ack_blks(l, hdr);
tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id);
msg_set_size(hdr, INT_H_SIZE + glen + dlen);
skb_trim(skb, INT_H_SIZE + glen + dlen);
@@ -1760,6 +1930,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
l->stats.sent_probes++;
if (rcvgap)
l->stats.sent_nacks++;
+ if (bc_rcvgap)
+ bcl->stats.sent_nacks++;
skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, skb);
trace_tipc_proto_build(skb, false, l->name);
@@ -2027,22 +2199,25 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
{
struct tipc_msg *hdr = buf_msg(skb);
struct tipc_gap_ack_blks *ga = NULL;
- u16 rcvgap = 0;
- u16 ack = msg_ack(hdr);
- u16 gap = msg_seq_gap(hdr);
+ bool reply = msg_probe(hdr), retransmitted = false;
+ u32 dlen = msg_data_sz(hdr), glen = 0;
u16 peers_snd_nxt = msg_next_sent(hdr);
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
+ u16 gap = msg_seq_gap(hdr);
+ u16 ack = msg_ack(hdr);
u16 rcv_nxt = l->rcv_nxt;
- u16 dlen = msg_data_sz(hdr);
+ u16 rcvgap = 0;
int mtyp = msg_type(hdr);
- bool reply = msg_probe(hdr);
- u16 glen = 0;
- void *data;
+ int rc = 0, released;
char *if_name;
- int rc = 0;
+ void *data;
trace_tipc_proto_rcv(skb, false, l->name);
+
+ if (dlen > U16_MAX)
+ goto exit;
+
if (tipc_link_is_blocked(l) || !xmitq)
goto exit;
@@ -2111,6 +2286,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
case STATE_MSG:
+ /* Validate Gap ACK blocks, drop if invalid */
+ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
+ if (glen > dlen)
+ break;
+
l->rcv_nxt_state = msg_seqno(hdr) + 1;
/* Update own tolerance if peer indicates a non-zero value */
@@ -2136,15 +2316,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
}
- /* Receive Gap ACK blocks from peer if any */
- if (l->peer_caps & TIPC_GAP_ACK_BLOCK) {
- ga = (struct tipc_gap_ack_blks *)data;
- glen = ntohs(ga->len);
- /* sanity check: if failed, ignore Gap ACK blocks */
- if (glen != tipc_gap_ack_blks_sz(ga->gack_cnt))
- ga = NULL;
- }
-
tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
&l->mon_state, l->bearer_id);
@@ -2158,9 +2329,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
rcvgap, 0, 0, xmitq);
- rc |= tipc_link_advance_transmq(l, ack, gap, ga, xmitq);
+ released = tipc_link_advance_transmq(l, l, ack, gap, ga, xmitq,
+ &retransmitted, &rc);
if (gap)
l->stats.recv_nacks++;
+ if (released || retransmitted)
+ tipc_link_update_cwin(l, released, retransmitted);
+ if (released)
+ tipc_link_advance_backlog(l, xmitq);
if (unlikely(!skb_queue_empty(&l->wakeupq)))
link_prepare_wakeup(l);
}
@@ -2246,10 +2422,7 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
struct sk_buff_head *xmitq)
{
- struct tipc_link *snd_l = l->bc_sndlink;
u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
- u16 from = msg_bcast_ack(hdr) + 1;
- u16 to = from + msg_bc_gap(hdr) - 1;
int rc = 0;
if (!link_is_up(l))
@@ -2258,21 +2431,17 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
if (!msg_peer_node_is_up(hdr))
return rc;
- /* Open when peer ackowledges our bcast init msg (pkt #1) */
+ /* Open when peer acknowledges our bcast init msg (pkt #1) */
if (msg_ack(hdr))
l->bc_peer_is_up = true;
if (!l->bc_peer_is_up)
return rc;
- l->stats.recv_nacks++;
-
/* Ignore if peers_snd_nxt goes beyond receive window */
if (more(peers_snd_nxt, l->rcv_nxt + l->window))
return rc;
- rc = tipc_link_bc_retrans(snd_l, l, from, to, xmitq);
-
l->snd_nxt = peers_snd_nxt;
if (link_bc_rcv_gap(l))
rc |= TIPC_LINK_SND_STATE;
@@ -2307,38 +2476,34 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
return 0;
}
-void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
- struct sk_buff_head *xmitq)
+int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq,
+ struct sk_buff_head *retrq)
{
- struct sk_buff *skb, *tmp;
- struct tipc_link *snd_l = l->bc_sndlink;
-
- if (!link_is_up(l) || !l->bc_peer_is_up)
- return;
+ struct tipc_link *l = r->bc_sndlink;
+ bool unused = false;
+ int rc = 0;
- if (!more(acked, l->acked))
- return;
+ if (!link_is_up(r) || !r->bc_peer_is_up)
+ return 0;
- trace_tipc_link_bc_ack(l, l->acked, acked, &snd_l->transmq);
- /* Skip over packets peer has already acked */
- skb_queue_walk(&snd_l->transmq, skb) {
- if (more(buf_seqno(skb), l->acked))
- break;
+ if (gap) {
+ l->stats.recv_nacks++;
+ r->stats.recv_nacks++;
}
- /* Update/release the packets peer is acking now */
- skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) {
- if (more(buf_seqno(skb), acked))
- break;
- if (!--TIPC_SKB_CB(skb)->ackers) {
- __skb_unlink(skb, &snd_l->transmq);
- kfree_skb(skb);
- }
- }
- l->acked = acked;
- tipc_link_advance_backlog(snd_l, xmitq);
- if (unlikely(!skb_queue_empty(&snd_l->wakeupq)))
- link_prepare_wakeup(snd_l);
+ if (less(acked, r->acked) || (acked == r->acked && !gap && !ga))
+ return 0;
+
+ trace_tipc_link_bc_ack(r, acked, gap, &l->transmq);
+ tipc_link_advance_transmq(l, r, acked, gap, ga, retrq, &unused, &rc);
+
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
+
+ return rc;
}
/* tipc_link_bc_nack_rcv(): receive broadcast nack message
@@ -2366,8 +2531,8 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
return 0;
if (dnode == tipc_own_addr(l->net)) {
- tipc_link_bc_ack_rcv(l, acked, xmitq);
- rc = tipc_link_bc_retrans(l->bc_sndlink, l, from, to, xmitq);
+ rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq,
+ xmitq);
l->stats.recv_nacks++;
return rc;
}
@@ -2395,7 +2560,7 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win)
}
/**
- * link_reset_stats - reset link statistics
+ * tipc_link_reset_stats - reset link statistics
* @l: pointer to link
*/
void tipc_link_reset_stats(struct tipc_link *l)
@@ -2639,16 +2804,15 @@ msg_full:
return -EMSGSIZE;
}
-int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *bcl)
{
int err;
void *hdr;
struct nlattr *attrs;
struct nlattr *prop;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- u32 bc_mode = tipc_bcast_get_broadcast_mode(net);
+ u32 bc_mode = tipc_bcast_get_mode(net);
u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net);
- struct tipc_link *bcl = tn->bcl;
if (!bcl)
return 0;
@@ -2735,21 +2899,6 @@ void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
l->abort_limit = limit;
}
-char *tipc_link_name_ext(struct tipc_link *l, char *buf)
-{
- if (!l)
- scnprintf(buf, TIPC_MAX_LINK_NAME, "null");
- else if (link_is_bc_sndlink(l))
- scnprintf(buf, TIPC_MAX_LINK_NAME, "broadcast-sender");
- else if (link_is_bc_rcvlink(l))
- scnprintf(buf, TIPC_MAX_LINK_NAME,
- "broadcast-receiver, peer %x", l->addr);
- else
- memcpy(buf, l->name, TIPC_MAX_LINK_NAME);
-
- return buf;
-}
-
/**
* tipc_link_dump - dump TIPC link data
* @l: tipc link to be dumped
diff --git a/net/tipc/link.h b/net/tipc/link.h
index d3c1c3fc1659..a16f401fdabd 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -80,7 +80,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
struct tipc_link **link);
-bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id,
int mtu, u32 min_win, u32 max_win, u16 peer_caps,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
@@ -111,7 +111,6 @@ u16 tipc_link_rcv_nxt(struct tipc_link *l);
u16 tipc_link_acked(struct tipc_link *l);
u32 tipc_link_id(struct tipc_link *l);
char *tipc_link_name(struct tipc_link *l);
-char *tipc_link_name_ext(struct tipc_link *l, char *buf);
u32 tipc_link_state(struct tipc_link *l);
char tipc_link_plane(struct tipc_link *l);
int tipc_link_prio(struct tipc_link *l);
@@ -143,8 +142,12 @@ int tipc_link_bc_peers(struct tipc_link *l);
void tipc_link_set_mtu(struct tipc_link *l, int mtu);
int tipc_link_mtu(struct tipc_link *l);
int tipc_link_mss(struct tipc_link *l);
-void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
- struct sk_buff_head *xmitq);
+u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
+ struct tipc_msg *hdr, bool uc);
+int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq,
+ struct sk_buff_head *retrq);
void tipc_link_build_bc_sync_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
@@ -153,4 +156,5 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq);
bool tipc_link_too_silent(struct tipc_link *l);
+struct net *tipc_link_net(struct tipc_link *l);
#endif
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 58708b4c7719..9618e4429f0f 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -104,11 +104,41 @@ static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
+static inline u16 mon_cpu_to_le16(u16 val)
+{
+ return (__force __u16)htons(val);
+}
+
+static inline u32 mon_cpu_to_le32(u32 val)
+{
+ return (__force __u32)htonl(val);
+}
+
+static inline u64 mon_cpu_to_le64(u64 val)
+{
+ return (__force __u64)cpu_to_be64(val);
+}
+
+static inline u16 mon_le16_to_cpu(u16 val)
+{
+ return ntohs((__force __be16)val);
+}
+
+static inline u32 mon_le32_to_cpu(u32 val)
+{
+ return ntohl((__force __be32)val);
+}
+
+static inline u64 mon_le64_to_cpu(u64 val)
+{
+ return be64_to_cpu((__force __be64)val);
+}
+
/* dom_rec_len(): actual length of domain record for transport
*/
static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
{
- return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
+ return (offsetof(struct tipc_mon_domain, members)) + (mcnt * sizeof(u32));
}
/* dom_size() : calculate size of own domain based on number of peers
@@ -130,7 +160,7 @@ static void map_set(u64 *up_map, int i, unsigned int v)
static int map_get(u64 up_map, int i)
{
- return (up_map & (1 << i)) >> i;
+ return (up_map & (1ULL << i)) >> i;
}
static struct tipc_peer *peer_prev(struct tipc_peer *peer)
@@ -260,16 +290,16 @@ static void mon_update_local_domain(struct tipc_monitor *mon)
diff |= dom->members[i] != peer->addr;
dom->members[i] = peer->addr;
map_set(&dom->up_map, i, peer->is_up);
- cache->members[i] = htonl(peer->addr);
+ cache->members[i] = mon_cpu_to_le32(peer->addr);
}
diff |= dom->up_map != prev_up_map;
if (!diff)
return;
dom->gen = ++mon->dom_gen;
- cache->len = htons(dom->len);
- cache->gen = htons(dom->gen);
- cache->member_cnt = htons(member_cnt);
- cache->up_map = cpu_to_be64(dom->up_map);
+ cache->len = mon_cpu_to_le16(dom->len);
+ cache->gen = mon_cpu_to_le16(dom->gen);
+ cache->member_cnt = mon_cpu_to_le16(member_cnt);
+ cache->up_map = mon_cpu_to_le64(dom->up_map);
mon_apply_domain(mon, self);
}
@@ -322,9 +352,13 @@ static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
- struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *self;
struct tipc_peer *peer, *prev, *head;
+ if (!mon)
+ return;
+
+ self = get_self(net, bearer_id);
write_lock_bh(&mon->lock);
peer = get_peer(mon, addr);
if (!peer)
@@ -407,11 +441,15 @@ exit:
void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
- struct tipc_peer *self = get_self(net, bearer_id);
+ struct tipc_peer *self;
struct tipc_peer *peer, *head;
struct tipc_mon_domain *dom;
int applied;
+ if (!mon)
+ return;
+
+ self = get_self(net, bearer_id);
write_lock_bh(&mon->lock);
peer = get_peer(mon, addr);
if (!peer) {
@@ -447,21 +485,24 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
struct tipc_mon_domain dom_bef;
struct tipc_mon_domain *dom;
struct tipc_peer *peer;
- u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
+ u16 new_member_cnt = mon_le16_to_cpu(arrv_dom->member_cnt);
int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
- u16 new_gen = ntohs(arrv_dom->gen);
- u16 acked_gen = ntohs(arrv_dom->ack_gen);
+ u16 new_gen = mon_le16_to_cpu(arrv_dom->gen);
+ u16 acked_gen = mon_le16_to_cpu(arrv_dom->ack_gen);
+ u16 arrv_dlen = mon_le16_to_cpu(arrv_dom->len);
bool probing = state->probing;
int i, applied_bef;
state->probing = false;
/* Sanity check received domain record */
+ if (new_member_cnt > MAX_MON_DOMAIN)
+ return;
if (dlen < dom_rec_len(arrv_dom, 0))
return;
if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
return;
- if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen)
+ if (dlen < new_dlen || arrv_dlen != new_dlen)
return;
/* Synch generation numbers with peer if link just came up */
@@ -509,9 +550,9 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
dom->len = new_dlen;
dom->gen = new_gen;
dom->member_cnt = new_member_cnt;
- dom->up_map = be64_to_cpu(arrv_dom->up_map);
+ dom->up_map = mon_le64_to_cpu(arrv_dom->up_map);
for (i = 0; i < new_member_cnt; i++)
- dom->members[i] = ntohl(arrv_dom->members[i]);
+ dom->members[i] = mon_le32_to_cpu(arrv_dom->members[i]);
/* Update peers affected by this domain record */
applied_bef = peer->applied;
@@ -540,19 +581,19 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen,
if (likely(state->acked_gen == gen)) {
len = dom_rec_len(dom, 0);
*dlen = len;
- dom->len = htons(len);
- dom->gen = htons(gen);
- dom->ack_gen = htons(state->peer_gen);
+ dom->len = mon_cpu_to_le16(len);
+ dom->gen = mon_cpu_to_le16(gen);
+ dom->ack_gen = mon_cpu_to_le16(state->peer_gen);
dom->member_cnt = 0;
return;
}
/* Send the full record */
read_lock_bh(&mon->lock);
- len = ntohs(mon->cache.len);
+ len = mon_le16_to_cpu(mon->cache.len);
*dlen = len;
memcpy(data, &mon->cache, len);
read_unlock_bh(&mon->lock);
- dom->ack_gen = htons(state->peer_gen);
+ dom->ack_gen = mon_cpu_to_le16(state->peer_gen);
}
void tipc_mon_get_state(struct net *net, u32 addr,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 0d515d20b056..5c9fd4791c4b 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -41,39 +41,35 @@
#include "name_table.h"
#include "crypto.h"
+#define BUF_ALIGN(x) ALIGN(x, 4)
#define MAX_FORWARD_SIZE 1024
#ifdef CONFIG_TIPC_CRYPTO
#define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16)
-#define BUF_TAILROOM (TIPC_AES_GCM_TAG_SIZE)
+#define BUF_OVERHEAD (BUF_HEADROOM + TIPC_AES_GCM_TAG_SIZE)
#else
#define BUF_HEADROOM (LL_MAX_HEADER + 48)
-#define BUF_TAILROOM 16
+#define BUF_OVERHEAD BUF_HEADROOM
#endif
-static unsigned int align(unsigned int i)
-{
- return (i + 3) & ~3u;
-}
+const int one_page_mtu = PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
/**
* tipc_buf_acquire - creates a TIPC message buffer
* @size: message size (including TIPC header)
+ * @gfp: memory allocation flags
*
- * Returns a new buffer with data pointers set to the specified size.
+ * Return: a new buffer with data pointers set to the specified size.
*
- * NOTE: Headroom is reserved to allow prepending of a data link header.
- * There may also be unrequested tailroom present at the buffer's end.
+ * NOTE:
+ * Headroom is reserved to allow prepending of a data link header.
+ * There may also be unrequested tailroom present at the buffer's end.
*/
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
{
struct sk_buff *skb;
-#ifdef CONFIG_TIPC_CRYPTO
- unsigned int buf_size = (BUF_HEADROOM + size + BUF_TAILROOM + 3) & ~3u;
-#else
- unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
-#endif
- skb = alloc_skb_fclone(buf_size, gfp);
+ skb = alloc_skb_fclone(BUF_OVERHEAD + size, gfp);
if (skb) {
skb_reserve(skb, BUF_HEADROOM);
skb_put(skb, size);
@@ -115,10 +111,6 @@ struct sk_buff *tipc_msg_create(uint user, uint type,
msg_set_origport(msg, oport);
msg_set_destport(msg, dport);
msg_set_errcode(msg, errcode);
- if (hdr_sz > SHORT_H_SIZE) {
- msg_set_orignode(msg, onode);
- msg_set_destnode(msg, dnode);
- }
return buf;
}
@@ -150,18 +142,14 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
- if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
+ *buf = NULL;
+ if (skb_has_frag_list(frag) && __skb_linearize(frag))
+ goto err;
+ frag = skb_unshare(frag, GFP_ATOMIC);
+ if (unlikely(!frag))
goto err;
head = *headbuf = frag;
- *buf = NULL;
TIPC_SKB_CB(head)->tail = NULL;
- if (skb_is_nonlinear(head)) {
- skb_walk_frags(head, tail) {
- TIPC_SKB_CB(head)->tail = tail;
- }
- } else {
- skb_frag_list_init(head);
- }
return 0;
}
@@ -202,17 +190,18 @@ err:
/**
* tipc_msg_append(): Append data to tail of an existing buffer queue
- * @hdr: header to be used
+ * @_hdr: header to be used
* @m: the data to be appended
* @mss: max allowable size of buffer
* @dlen: size of data to be appended
- * @txq: queue to appand to
- * Returns the number og 1k blocks appended or errno value
+ * @txq: queue to append to
+ *
+ * Return: the number of 1k blocks appended or errno value
*/
int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
int mss, struct sk_buff_head *txq)
{
- struct sk_buff *skb, *prev;
+ struct sk_buff *skb;
int accounted, total, curr;
int mlen, cpy, rem = dlen;
struct tipc_msg *hdr;
@@ -221,9 +210,8 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
accounted = skb ? msg_blocks(buf_msg(skb)) : 0;
total = accounted;
- while (rem) {
+ do {
if (!skb || skb->len >= mss) {
- prev = skb;
skb = tipc_buf_acquire(mss, GFP_KERNEL);
if (unlikely(!skb))
return -ENOMEM;
@@ -235,21 +223,18 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
msg_set_size(hdr, MIN_H_SIZE);
__skb_queue_tail(txq, skb);
total += 1;
- if (prev)
- msg_set_ack_required(buf_msg(prev), 0);
- msg_set_ack_required(hdr, 1);
}
hdr = buf_msg(skb);
curr = msg_blocks(hdr);
mlen = msg_size(hdr);
- cpy = min_t(int, rem, mss - mlen);
+ cpy = min_t(size_t, rem, mss - mlen);
if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter))
return -EFAULT;
msg_set_size(hdr, mlen + cpy);
skb_put(skb, cpy);
rem -= cpy;
total += msg_blocks(hdr) - curr;
- }
+ } while (rem > 0);
return total - accounted;
}
@@ -315,7 +300,7 @@ bool tipc_msg_validate(struct sk_buff **_skb)
* @pktmax: max size of a fragment incl. the header
* @frags: returned fragment skb list
*
- * Returns 0 if the fragmentation is successful, otherwise: -EINVAL
+ * Return: 0 if the fragmentation is successful, otherwise: -EINVAL
* or -ENOMEM
*/
int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
@@ -370,6 +355,7 @@ error:
* tipc_msg_build - create buffer chain containing specified header and data
* @mhdr: Message header, to be prepended to data
* @m: User message
+ * @offset: buffer offset for fragmented messages (FIXME)
* @dsz: Total length of user data
* @pktmax: Max packet size that can be used
* @list: Buffer or chain of buffers to be returned to caller
@@ -377,7 +363,7 @@ error:
* Note that the recursive call we are making here is safe, since it can
* logically go only one further level down.
*
- * Returns message data size or errno: -ENOMEM, -EFAULT
+ * Return: message data size or errno: -ENOMEM, -EFAULT
*/
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
int dsz, int pktmax, struct sk_buff_head *list)
@@ -403,7 +389,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
if (unlikely(!skb)) {
if (pktmax != MAX_MSG_SIZE)
return -ENOMEM;
- rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list);
+ rc = tipc_msg_build(mhdr, m, offset, dsz,
+ one_page_mtu, list);
if (rc != dsz)
return rc;
if (tipc_msg_assemble(list))
@@ -488,7 +475,7 @@ error:
* @msg: message to be appended
* @max: max allowable size for the bundle buffer
*
- * Returns "true" if bundling has been performed, otherwise "false"
+ * Return: "true" if bundling has been performed, otherwise "false"
*/
static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg,
u32 max)
@@ -498,7 +485,7 @@ static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg,
msz = msg_size(msg);
bsz = msg_size(bmsg);
- offset = align(bsz);
+ offset = BUF_ALIGN(bsz);
pad = offset - bsz;
if (unlikely(skb_tailroom(bskb) < (pad + msz)))
@@ -555,7 +542,7 @@ bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
/* Make a new bundle of the two messages if possible */
tsz = msg_size(buf_msg(tskb));
- if (unlikely(mss < align(INT_H_SIZE + tsz) + msg_size(msg)))
+ if (unlikely(mss < BUF_ALIGN(INT_H_SIZE + tsz) + msg_size(msg)))
return true;
if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE,
GFP_ATOMIC)))
@@ -583,9 +570,9 @@ bundle:
* @skb: buffer to be extracted from.
* @iskb: extracted inner buffer, to be returned
* @pos: position in outer message of msg to be extracted.
- * Returns position of next msg
+ * Returns position of next msg.
* Consumes outer buffer when last packet extracted
- * Returns true when when there is an extracted buffer, otherwise false
+ * Return: true when there is an extracted buffer, otherwise false
*/
bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
{
@@ -614,7 +601,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
if (unlikely(!tipc_msg_validate(iskb)))
goto none;
- *pos += align(imsz);
+ *pos += BUF_ALIGN(imsz);
return true;
none:
kfree_skb(skb);
@@ -629,7 +616,7 @@ none:
* @skb: buffer containing message to be reversed; will be consumed
* @err: error code to be set in message, if any
* Replaces consumed buffer with new one when successful
- * Returns true if success, otherwise false
+ * Return: true if success, otherwise false
*/
bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
{
@@ -701,16 +688,20 @@ bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy)
/**
* tipc_msg_lookup_dest(): try to find new destination for named message
+ * @net: pointer to associated network namespace
* @skb: the buffer containing the message.
* @err: error code to be used by caller if lookup fails
* Does not consume buffer
- * Returns true if a destination is found, false otherwise
+ * Return: true if a destination is found, false otherwise
*/
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
{
struct tipc_msg *msg = buf_msg(skb);
- u32 dport, dnode;
- u32 onode = tipc_own_addr(net);
+ u32 scope = msg_lookup_scope(msg);
+ u32 self = tipc_own_addr(net);
+ u32 inst = msg_nameinst(msg);
+ struct tipc_socket_addr sk;
+ struct tipc_uaddr ua;
if (!msg_isdata(msg))
return false;
@@ -724,21 +715,18 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
msg = buf_msg(skb);
if (msg_reroute_cnt(msg))
return false;
- dnode = tipc_scope2node(net, msg_lookup_scope(msg));
- dport = tipc_nametbl_translate(net, msg_nametype(msg),
- msg_nameinst(msg), &dnode);
- if (!dport)
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, scope,
+ msg_nametype(msg), inst, inst);
+ sk.node = tipc_scope2node(net, scope);
+ if (!tipc_nametbl_lookup_anycast(net, &ua, &sk))
return false;
msg_incr_reroute_cnt(msg);
- if (dnode != onode)
- msg_set_prevnode(msg, onode);
- msg_set_destnode(msg, dnode);
- msg_set_destport(msg, dport);
+ if (sk.node != self)
+ msg_set_prevnode(msg, self);
+ msg_set_destnode(msg, sk.node);
+ msg_set_destport(msg, sk.ref);
*err = TIPC_OK;
- if (!skb_cloned(skb))
- return true;
-
return true;
}
@@ -828,19 +816,19 @@ bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
* @seqno: sequence number of buffer to add
* @skb: buffer to add
*/
-void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
struct sk_buff *skb)
{
struct sk_buff *_skb, *tmp;
if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
__skb_queue_head(list, skb);
- return;
+ return true;
}
if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
__skb_queue_tail(list, skb);
- return;
+ return true;
}
skb_queue_walk_safe(list, _skb, tmp) {
@@ -849,9 +837,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
if (seqno == buf_seqno(_skb))
break;
__skb_queue_before(list, _skb, skb);
- return;
+ return true;
}
kfree_skb(skb);
+ return false;
}
void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 6d466ebdb64f..c5eec16213d7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -82,6 +82,7 @@ struct plist;
#define NAME_DISTRIBUTOR 11
#define MSG_FRAGMENTER 12
#define LINK_CONFIG 13
+#define MSG_CRYPTO 14
#define SOCK_WAKEUP 14 /* pseudo user */
#define TOP_SRV 15 /* pseudo user */
@@ -98,9 +99,10 @@ struct plist;
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
-#define FB_MTU 3744
#define TIPC_MEDIA_INFO_OFFSET 5
+extern const int one_page_mtu;
+
struct tipc_skb_cb {
union {
struct {
@@ -127,7 +129,9 @@ struct tipc_skb_cb {
#ifdef CONFIG_TIPC_CRYPTO
u8 encrypted:1;
u8 decrypted:1;
- u8 probe:1;
+#define SKB_PROBING 1
+#define SKB_GRACING 2
+ u8 xmit_type:2;
u8 tx_clone_deferred:1;
#endif
};
@@ -160,21 +164,38 @@ struct tipc_gap_ack {
/* struct tipc_gap_ack_blks
* @len: actual length of the record
- * @gack_cnt: number of Gap ACK blocks in the record
+ * @ugack_cnt: number of Gap ACK blocks for unicast (following the broadcast
+ * ones)
+ * @start_index: starting index for "valid" broadcast Gap ACK blocks
+ * @bgack_cnt: number of Gap ACK blocks for broadcast in the record
* @gacks: array of Gap ACK blocks
+ *
+ * 31 16 15 0
+ * +-------------+-------------+-------------+-------------+
+ * | bgack_cnt | ugack_cnt | len |
+ * +-------------+-------------+-------------+-------------+ -
+ * | gap | ack | |
+ * +-------------+-------------+-------------+-------------+ > bc gacks
+ * : : : |
+ * +-------------+-------------+-------------+-------------+ -
+ * | gap | ack | |
+ * +-------------+-------------+-------------+-------------+ > uc gacks
+ * : : : |
+ * +-------------+-------------+-------------+-------------+ -
*/
struct tipc_gap_ack_blks {
__be16 len;
- u8 gack_cnt;
- u8 reserved;
+ union {
+ u8 ugack_cnt;
+ u8 start_index;
+ };
+ u8 bgack_cnt;
struct tipc_gap_ack gacks[];
};
-#define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \
- sizeof(struct tipc_gap_ack) * (n))
-
-#define MAX_GAP_ACK_BLKS 32
-#define MAX_GAP_ACK_BLKS_SZ tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS)
+#define MAX_GAP_ACK_BLKS 128
+#define MAX_GAP_ACK_BLKS_SZ (sizeof(struct tipc_gap_ack_blks) + \
+ sizeof(struct tipc_gap_ack) * MAX_GAP_ACK_BLKS)
static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
{
@@ -205,14 +226,6 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w,
m->hdr[w] |= htonl(val);
}
-static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b)
-{
- u32 temp = msg->hdr[a];
-
- msg->hdr[a] = msg->hdr[b];
- msg->hdr[b] = temp;
-}
-
/*
* Word 0
*/
@@ -321,9 +334,19 @@ static inline int msg_ack_required(struct tipc_msg *m)
return msg_bits(m, 0, 18, 1);
}
-static inline void msg_set_ack_required(struct tipc_msg *m, u32 d)
+static inline void msg_set_ack_required(struct tipc_msg *m)
{
- msg_set_bits(m, 0, 18, 1, d);
+ msg_set_bits(m, 0, 18, 1, 1);
+}
+
+static inline int msg_nagle_ack(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 18, 1);
+}
+
+static inline void msg_set_nagle_ack(struct tipc_msg *m)
+{
+ msg_set_bits(m, 0, 18, 1, 1);
}
static inline bool msg_is_rcast(struct tipc_msg *m)
@@ -394,6 +417,11 @@ static inline u32 msg_connected(struct tipc_msg *m)
return msg_type(m) == TIPC_CONN_MSG;
}
+static inline u32 msg_direct(struct tipc_msg *m)
+{
+ return msg_type(m) == TIPC_DIRECT_MSG;
+}
+
static inline u32 msg_errcode(struct tipc_msg *m)
{
return msg_bits(m, 1, 25, 0xf);
@@ -404,6 +432,36 @@ static inline void msg_set_errcode(struct tipc_msg *m, u32 err)
msg_set_bits(m, 1, 25, 0xf, err);
}
+static inline void msg_set_bulk(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 28, 0x1, 1);
+}
+
+static inline u32 msg_is_bulk(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 28, 0x1);
+}
+
+static inline void msg_set_last_bulk(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 27, 0x1, 1);
+}
+
+static inline u32 msg_is_last_bulk(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 27, 0x1);
+}
+
+static inline void msg_set_non_legacy(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 26, 0x1, 1);
+}
+
+static inline u32 msg_is_legacy(struct tipc_msg *m)
+{
+ return !msg_bits(m, 1, 26, 0x1);
+}
+
static inline u32 msg_reroute_cnt(struct tipc_msg *m)
{
return msg_bits(m, 1, 21, 0xf);
@@ -414,11 +472,6 @@ static inline void msg_incr_reroute_cnt(struct tipc_msg *m)
msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1);
}
-static inline void msg_reset_reroute_cnt(struct tipc_msg *m)
-{
- msg_set_bits(m, 1, 21, 0xf, 0);
-}
-
static inline u32 msg_lookup_scope(struct tipc_msg *m)
{
return msg_bits(m, 1, 19, 0x3);
@@ -533,6 +586,16 @@ static inline void msg_set_origport(struct tipc_msg *m, u32 p)
msg_set_word(m, 4, p);
}
+static inline u16 msg_named_seqno(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 0, 0xffff);
+}
+
+static inline void msg_set_named_seqno(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 4, 0, 0xffff, n);
+}
+
static inline u32 msg_destport(struct tipc_msg *m)
{
return msg_word(m, 5);
@@ -675,6 +738,9 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
#define GRP_RECLAIM_MSG 4
#define GRP_REMIT_MSG 5
+/* Crypto message types */
+#define KEY_DISTR_MSG 0
+
/*
* Word 1
*/
@@ -721,11 +787,6 @@ static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n)
msg_set_word(m, 2, n);
}
-static inline u32 msg_bcgap_after(struct tipc_msg *m)
-{
- return msg_bits(m, 2, 16, 0xffff);
-}
-
static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n)
{
msg_set_bits(m, 2, 16, 0xffff, n);
@@ -789,11 +850,6 @@ static inline void msg_set_next_sent(struct tipc_msg *m, u16 n)
msg_set_bits(m, 4, 0, 0xffff, n);
}
-static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n)
-{
- msg_set_bits(m, 4, 0, 0xffff, n);
-}
-
static inline u32 msg_bc_netid(struct tipc_msg *m)
{
return msg_word(m, 4);
@@ -1121,7 +1177,7 @@ bool tipc_msg_assemble(struct sk_buff_head *list);
bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
struct sk_buff_head *cpy);
-void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
struct sk_buff *skb);
bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 5feaf3b67380..190b49c5cbc3 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -1,8 +1,9 @@
/*
* net/tipc/name_distr.c: TIPC name distribution code
*
- * Copyright (c) 2000-2006, 2014, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2019, Ericsson AB
* Copyright (c) 2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,28 +41,26 @@
int sysctl_tipc_named_timeout __read_mostly = 2000;
-struct distr_queue_item {
- struct distr_item i;
- u32 dtype;
- u32 node;
- unsigned long expires;
- struct list_head next;
-};
-
/**
* publ_to_item - add publication info to a publication message
+ * @p: publication info
+ * @i: location of item in the message
*/
static void publ_to_item(struct distr_item *i, struct publication *p)
{
- i->type = htonl(p->type);
- i->lower = htonl(p->lower);
- i->upper = htonl(p->upper);
- i->port = htonl(p->port);
+ i->type = htonl(p->sr.type);
+ i->lower = htonl(p->sr.lower);
+ i->upper = htonl(p->sr.upper);
+ i->port = htonl(p->sk.ref);
i->key = htonl(p->key);
}
/**
* named_prepare_buf - allocate & initialize a publication message
+ * @net: the associated network namespace
+ * @type: message type
+ * @size: payload size
+ * @dest: destination node
*
* The buffer returned is of size INT_H_SIZE + payload size
*/
@@ -83,65 +82,73 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
/**
* tipc_named_publish - tell other nodes about a new publication by this node
+ * @net: the associated network namespace
+ * @p: the new publication
*/
-struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
+struct sk_buff *tipc_named_publish(struct net *net, struct publication *p)
{
struct name_table *nt = tipc_name_table(net);
struct distr_item *item;
struct sk_buff *skb;
- if (publ->scope == TIPC_NODE_SCOPE) {
- list_add_tail_rcu(&publ->binding_node, &nt->node_scope);
+ if (p->scope == TIPC_NODE_SCOPE) {
+ list_add_tail_rcu(&p->binding_node, &nt->node_scope);
return NULL;
}
write_lock_bh(&nt->cluster_scope_lock);
- list_add_tail(&publ->binding_node, &nt->cluster_scope);
+ list_add_tail(&p->binding_node, &nt->cluster_scope);
write_unlock_bh(&nt->cluster_scope_lock);
skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
if (!skb) {
pr_warn("Publication distribution failure\n");
return NULL;
}
-
+ msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
+ msg_set_non_legacy(buf_msg(skb));
item = (struct distr_item *)msg_data(buf_msg(skb));
- publ_to_item(item, publ);
+ publ_to_item(item, p);
return skb;
}
/**
* tipc_named_withdraw - tell other nodes about a withdrawn publication by this node
+ * @net: the associated network namespace
+ * @p: the withdrawn publication
*/
-struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
+struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *p)
{
struct name_table *nt = tipc_name_table(net);
- struct sk_buff *buf;
struct distr_item *item;
+ struct sk_buff *skb;
write_lock_bh(&nt->cluster_scope_lock);
- list_del(&publ->binding_node);
+ list_del(&p->binding_node);
write_unlock_bh(&nt->cluster_scope_lock);
- if (publ->scope == TIPC_NODE_SCOPE)
+ if (p->scope == TIPC_NODE_SCOPE)
return NULL;
- buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
- if (!buf) {
+ skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
+ if (!skb) {
pr_warn("Withdrawal distribution failure\n");
return NULL;
}
-
- item = (struct distr_item *)msg_data(buf_msg(buf));
- publ_to_item(item, publ);
- return buf;
+ msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
+ msg_set_non_legacy(buf_msg(skb));
+ item = (struct distr_item *)msg_data(buf_msg(skb));
+ publ_to_item(item, p);
+ return skb;
}
/**
* named_distribute - prepare name info for bulk distribution to another node
+ * @net: the associated network namespace
* @list: list of messages (buffers) to be returned from this function
* @dnode: node to be updated
* @pls: linked list of publication items to be packed into buffer chain
+ * @seqno: sequence number for this message
*/
static void named_distribute(struct net *net, struct sk_buff_head *list,
- u32 dnode, struct list_head *pls)
+ u32 dnode, struct list_head *pls, u16 seqno)
{
struct publication *publ;
struct sk_buff *skb = NULL;
@@ -149,6 +156,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) /
ITEM_SIZE) * ITEM_SIZE;
u32 msg_rem = msg_dsz;
+ struct tipc_msg *hdr;
list_for_each_entry(publ, pls, binding_node) {
/* Prepare next buffer: */
@@ -159,8 +167,11 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
pr_warn("Bulk publication failure\n");
return;
}
- msg_set_bc_ack_invalid(buf_msg(skb), true);
- item = (struct distr_item *)msg_data(buf_msg(skb));
+ hdr = buf_msg(skb);
+ msg_set_bc_ack_invalid(hdr, true);
+ msg_set_bulk(hdr);
+ msg_set_non_legacy(hdr);
+ item = (struct distr_item *)msg_data(hdr);
}
/* Pack publication into message: */
@@ -176,148 +187,197 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
}
}
if (skb) {
- msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem));
+ hdr = buf_msg(skb);
+ msg_set_size(hdr, INT_H_SIZE + (msg_dsz - msg_rem));
skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem));
__skb_queue_tail(list, skb);
}
+ hdr = buf_msg(skb_peek_tail(list));
+ msg_set_last_bulk(hdr);
+ msg_set_named_seqno(hdr, seqno);
}
/**
* tipc_named_node_up - tell specified node about all publications by this node
+ * @net: the associated network namespace
+ * @dnode: destination node
+ * @capabilities: peer node's capabilities
*/
-void tipc_named_node_up(struct net *net, u32 dnode)
+void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities)
{
struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
struct sk_buff_head head;
+ u16 seqno;
__skb_queue_head_init(&head);
+ spin_lock_bh(&tn->nametbl_lock);
+ if (!(capabilities & TIPC_NAMED_BCAST))
+ nt->rc_dests++;
+ seqno = nt->snd_nxt;
+ spin_unlock_bh(&tn->nametbl_lock);
read_lock_bh(&nt->cluster_scope_lock);
- named_distribute(net, &head, dnode, &nt->cluster_scope);
+ named_distribute(net, &head, dnode, &nt->cluster_scope, seqno);
tipc_node_xmit(net, &head, dnode, 0);
read_unlock_bh(&nt->cluster_scope_lock);
}
/**
* tipc_publ_purge - remove publication associated with a failed node
+ * @net: the associated network namespace
+ * @p: the publication to remove
+ * @addr: failed node's address
*
* Invoked for each publication issued by a newly failed node.
* Removes publication structure from name table & deletes it.
*/
-static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
+static void tipc_publ_purge(struct net *net, struct publication *p, u32 addr)
{
struct tipc_net *tn = tipc_net(net);
- struct publication *p;
+ struct publication *_p;
+ struct tipc_uaddr ua;
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, p->scope, p->sr.type,
+ p->sr.lower, p->sr.upper);
spin_lock_bh(&tn->nametbl_lock);
- p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->upper,
- publ->node, publ->key);
- if (p)
- tipc_node_unsubscribe(net, &p->binding_node, addr);
+ _p = tipc_nametbl_remove_publ(net, &ua, &p->sk, p->key);
+ if (_p)
+ tipc_node_unsubscribe(net, &_p->binding_node, addr);
spin_unlock_bh(&tn->nametbl_lock);
-
- if (p != publ) {
- pr_err("Unable to remove publication from failed node\n"
- " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n",
- publ->type, publ->lower, publ->node, publ->port,
- publ->key);
- }
-
- if (p)
- kfree_rcu(p, rcu);
+ if (_p)
+ kfree_rcu(_p, rcu);
}
-/**
- * tipc_dist_queue_purge - remove deferred updates from a node that went down
- */
-static void tipc_dist_queue_purge(struct net *net, u32 addr)
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
+ u32 addr, u16 capabilities)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct distr_queue_item *e, *tmp;
-
- spin_lock_bh(&tn->nametbl_lock);
- list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) {
- if (e->node != addr)
- continue;
- list_del(&e->next);
- kfree(e);
- }
- spin_unlock_bh(&tn->nametbl_lock);
-}
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
-void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
-{
struct publication *publ, *tmp;
list_for_each_entry_safe(publ, tmp, nsub_list, binding_node)
tipc_publ_purge(net, publ, addr);
- tipc_dist_queue_purge(net, addr);
+ spin_lock_bh(&tn->nametbl_lock);
+ if (!(capabilities & TIPC_NAMED_BCAST))
+ nt->rc_dests--;
+ spin_unlock_bh(&tn->nametbl_lock);
}
/**
* tipc_update_nametbl - try to process a nametable update and notify
* subscribers
+ * @net: the associated network namespace
+ * @i: location of item in the message
+ * @node: node address
+ * @dtype: name distributor message type
*
* tipc_nametbl_lock must be held.
- * Returns the publication item if successful, otherwise NULL.
+ * Return: the publication item if successful, otherwise NULL.
*/
static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
u32 node, u32 dtype)
{
struct publication *p = NULL;
- u32 lower = ntohl(i->lower);
- u32 upper = ntohl(i->upper);
- u32 type = ntohl(i->type);
- u32 port = ntohl(i->port);
+ struct tipc_socket_addr sk;
+ struct tipc_uaddr ua;
u32 key = ntohl(i->key);
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE,
+ ntohl(i->type), ntohl(i->lower), ntohl(i->upper));
+ sk.ref = ntohl(i->port);
+ sk.node = node;
+
if (dtype == PUBLICATION) {
- p = tipc_nametbl_insert_publ(net, type, lower, upper,
- TIPC_CLUSTER_SCOPE, node,
- port, key);
+ p = tipc_nametbl_insert_publ(net, &ua, &sk, key);
if (p) {
tipc_node_subscribe(net, &p->binding_node, node);
return true;
}
} else if (dtype == WITHDRAWAL) {
- p = tipc_nametbl_remove_publ(net, type, lower,
- upper, node, key);
+ p = tipc_nametbl_remove_publ(net, &ua, &sk, key);
if (p) {
tipc_node_unsubscribe(net, &p->binding_node, node);
kfree_rcu(p, rcu);
return true;
}
- pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n",
- type, lower, node);
+ pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n",
+ ua.sr.type, ua.sr.lower, node);
} else {
- pr_warn("Unrecognized name table message received\n");
+ pr_warn_ratelimited("Unknown name table message received\n");
}
return false;
}
+static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open)
+{
+ struct sk_buff *skb, *tmp;
+ struct tipc_msg *hdr;
+ u16 seqno;
+
+ spin_lock_bh(&namedq->lock);
+ skb_queue_walk_safe(namedq, skb, tmp) {
+ if (unlikely(skb_linearize(skb))) {
+ __skb_unlink(skb, namedq);
+ kfree_skb(skb);
+ continue;
+ }
+ hdr = buf_msg(skb);
+ seqno = msg_named_seqno(hdr);
+ if (msg_is_last_bulk(hdr)) {
+ *rcv_nxt = seqno;
+ *open = true;
+ }
+
+ if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) {
+ __skb_unlink(skb, namedq);
+ spin_unlock_bh(&namedq->lock);
+ return skb;
+ }
+
+ if (*open && (*rcv_nxt == seqno)) {
+ (*rcv_nxt)++;
+ __skb_unlink(skb, namedq);
+ spin_unlock_bh(&namedq->lock);
+ return skb;
+ }
+
+ if (less(seqno, *rcv_nxt)) {
+ __skb_unlink(skb, namedq);
+ kfree_skb(skb);
+ continue;
+ }
+ }
+ spin_unlock_bh(&namedq->lock);
+ return NULL;
+}
+
/**
* tipc_named_rcv - process name table update messages sent by another node
+ * @net: the associated network namespace
+ * @namedq: queue to receive from
+ * @rcv_nxt: store last received seqno here
+ * @open: last bulk msg was received (FIXME)
*/
-void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
+void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_msg *msg;
+ struct tipc_net *tn = tipc_net(net);
struct distr_item *item;
- uint count;
- u32 node;
+ struct tipc_msg *hdr;
struct sk_buff *skb;
- int mtype;
+ u32 count, node;
spin_lock_bh(&tn->nametbl_lock);
- for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) {
- skb_linearize(skb);
- msg = buf_msg(skb);
- mtype = msg_type(msg);
- item = (struct distr_item *)msg_data(msg);
- count = msg_data_sz(msg) / ITEM_SIZE;
- node = msg_orignode(msg);
+ while ((skb = tipc_named_dequeue(namedq, rcv_nxt, open))) {
+ hdr = buf_msg(skb);
+ node = msg_orignode(hdr);
+ item = (struct distr_item *)msg_data(hdr);
+ count = msg_data_sz(hdr) / ITEM_SIZE;
while (count--) {
- tipc_update_nametbl(net, item, node, mtype);
+ tipc_update_nametbl(net, item, node, msg_type(hdr));
item++;
}
kfree_skb(skb);
@@ -327,6 +387,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
/**
* tipc_named_reinit - re-initialize local publications
+ * @net: the associated network namespace
*
* This routine is called whenever TIPC networking is enabled.
* All name table entries published by this node are updated to reflect
@@ -336,15 +397,15 @@ void tipc_named_reinit(struct net *net)
{
struct name_table *nt = tipc_name_table(net);
struct tipc_net *tn = tipc_net(net);
- struct publication *publ;
+ struct publication *p;
u32 self = tipc_own_addr(net);
spin_lock_bh(&tn->nametbl_lock);
- list_for_each_entry_rcu(publ, &nt->node_scope, binding_node)
- publ->node = self;
- list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
- publ->node = self;
-
+ list_for_each_entry_rcu(p, &nt->node_scope, binding_node)
+ p->sk.node = self;
+ list_for_each_entry_rcu(p, &nt->cluster_scope, binding_node)
+ p->sk.node = self;
+ nt->rc_dests = 0;
spin_unlock_bh(&tn->nametbl_lock);
}
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 63fc73e0fa6c..e231e6964d61 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -46,7 +46,7 @@
* @type: name sequence type
* @lower: name sequence lower bound
* @upper: name sequence upper bound
- * @ref: publishing port reference
+ * @port: publishing port reference
* @key: publication key
*
* ===> All fields are stored in network byte order. <===
@@ -67,11 +67,14 @@ struct distr_item {
__be32 key;
};
+void tipc_named_bcast(struct net *net, struct sk_buff *skb);
struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
-void tipc_named_node_up(struct net *net, u32 dnode);
-void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue);
+void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities);
+void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open);
void tipc_named_reinit(struct net *net);
-void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr);
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
+ u32 addr, u16 capabilities);
#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 359b2bc888cf..d1180370fdf4 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 2000-2006, 2014-2018, Ericsson AB
* Copyright (c) 2004-2008, 2010-2014, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -103,7 +104,8 @@ RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks,
* range match
* @sr: the service range pointer as a loop cursor
* @sc: the pointer to tipc service which holds the service range rbtree
- * @start, end: the range (end >= start) for matching
+ * @start: beginning of the search range (end >= start) for matching
+ * @end: end of the search range (end >= start) for matching
*/
#define service_range_foreach_match(sr, sc, start, end) \
for (sr = service_range_match_first((sc)->ranges.rb_node, \
@@ -117,7 +119,8 @@ RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks,
/**
* service_range_match_first - find first service range matching a range
* @n: the root node of service range rbtree for searching
- * @start, end: the range (end >= start) for matching
+ * @start: beginning of the search range (end >= start) for matching
+ * @end: end of the search range (end >= start) for matching
*
* Return: the leftmost service range node in the rbtree that overlaps the
* specific range if any. Otherwise, returns NULL.
@@ -166,7 +169,8 @@ static struct service_range *service_range_match_first(struct rb_node *n,
/**
* service_range_match_next - find next service range matching a range
* @n: a node in service range rbtree from which the searching starts
- * @start, end: the range (end >= start) for matching
+ * @start: beginning of the search range (end >= start) for matching
+ * @end: end of the search range (end >= start) for matching
*
* Return: the next service range node to the given node in the rbtree that
* overlaps the specific range if any. Otherwise, returns NULL.
@@ -218,50 +222,57 @@ static int hash(int x)
/**
* tipc_publ_create - create a publication structure
+ * @ua: the service range the user is binding to
+ * @sk: the address of the socket that is bound
+ * @key: publication key
*/
-static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper,
- u32 scope, u32 node, u32 port,
+static struct publication *tipc_publ_create(struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk,
u32 key)
{
- struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
+ struct publication *p = kzalloc(sizeof(*p), GFP_ATOMIC);
- if (!publ)
+ if (!p)
return NULL;
- publ->type = type;
- publ->lower = lower;
- publ->upper = upper;
- publ->scope = scope;
- publ->node = node;
- publ->port = port;
- publ->key = key;
- INIT_LIST_HEAD(&publ->binding_sock);
- INIT_LIST_HEAD(&publ->binding_node);
- INIT_LIST_HEAD(&publ->local_publ);
- INIT_LIST_HEAD(&publ->all_publ);
- INIT_LIST_HEAD(&publ->list);
- return publ;
+ p->sr = ua->sr;
+ p->sk = *sk;
+ p->scope = ua->scope;
+ p->key = key;
+ INIT_LIST_HEAD(&p->binding_sock);
+ INIT_LIST_HEAD(&p->binding_node);
+ INIT_LIST_HEAD(&p->local_publ);
+ INIT_LIST_HEAD(&p->all_publ);
+ INIT_LIST_HEAD(&p->list);
+ return p;
}
/**
* tipc_service_create - create a service structure for the specified 'type'
+ * @net: network namespace
+ * @ua: address representing the service to be bound
*
* Allocates a single range structure and sets it to all 0's.
*/
-static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd)
+static struct tipc_service *tipc_service_create(struct net *net,
+ struct tipc_uaddr *ua)
{
- struct tipc_service *service = kzalloc(sizeof(*service), GFP_ATOMIC);
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_service *service;
+ struct hlist_head *hd;
+ service = kzalloc(sizeof(*service), GFP_ATOMIC);
if (!service) {
pr_warn("Service creation failed, no memory\n");
return NULL;
}
spin_lock_init(&service->lock);
- service->type = type;
+ service->type = ua->sr.type;
service->ranges = RB_ROOT;
INIT_HLIST_NODE(&service->service_list);
INIT_LIST_HEAD(&service->subscriptions);
+ hd = &nt->services[hash(ua->sr.type)];
hlist_add_head_rcu(&service->service_list, hd);
return service;
}
@@ -269,13 +280,13 @@ static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd)
/* tipc_service_find_range - find service range matching publication parameters
*/
static struct service_range *tipc_service_find_range(struct tipc_service *sc,
- u32 lower, u32 upper)
+ struct tipc_uaddr *ua)
{
struct service_range *sr;
- service_range_foreach_match(sr, sc, lower, upper) {
+ service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
/* Look for exact match */
- if (sr->lower == lower && sr->upper == upper)
+ if (sr->lower == ua->sr.lower && sr->upper == ua->sr.upper)
return sr;
}
@@ -283,10 +294,12 @@ static struct service_range *tipc_service_find_range(struct tipc_service *sc,
}
static struct service_range *tipc_service_create_range(struct tipc_service *sc,
- u32 lower, u32 upper)
+ struct publication *p)
{
struct rb_node **n, *parent = NULL;
struct service_range *sr;
+ u32 lower = p->sr.lower;
+ u32 upper = p->sr.upper;
n = &sc->ranges.rb_node;
while (*n) {
@@ -314,61 +327,68 @@ static struct service_range *tipc_service_create_range(struct tipc_service *sc,
return sr;
}
-static struct publication *tipc_service_insert_publ(struct net *net,
- struct tipc_service *sc,
- u32 type, u32 lower,
- u32 upper, u32 scope,
- u32 node, u32 port,
- u32 key)
+static bool tipc_service_insert_publ(struct net *net,
+ struct tipc_service *sc,
+ struct publication *p)
{
struct tipc_subscription *sub, *tmp;
struct service_range *sr;
- struct publication *p;
+ struct publication *_p;
+ u32 node = p->sk.node;
bool first = false;
+ bool res = false;
+ u32 key = p->key;
- sr = tipc_service_create_range(sc, lower, upper);
+ spin_lock_bh(&sc->lock);
+ sr = tipc_service_create_range(sc, p);
if (!sr)
- goto err;
+ goto exit;
first = list_empty(&sr->all_publ);
/* Return if the publication already exists */
- list_for_each_entry(p, &sr->all_publ, all_publ) {
- if (p->key == key && (!p->node || p->node == node))
- return NULL;
+ list_for_each_entry(_p, &sr->all_publ, all_publ) {
+ if (_p->key == key && (!_p->sk.node || _p->sk.node == node)) {
+ pr_debug("Failed to bind duplicate %u,%u,%u/%u:%u/%u\n",
+ p->sr.type, p->sr.lower, p->sr.upper,
+ node, p->sk.ref, key);
+ goto exit;
+ }
}
- /* Create and insert publication */
- p = tipc_publ_create(type, lower, upper, scope, node, port, key);
- if (!p)
- goto err;
- /* Suppose there shouldn't be a huge gap btw publs i.e. >INT_MAX */
- p->id = sc->publ_cnt++;
- if (in_own_node(net, node))
+ if (in_own_node(net, p->sk.node))
list_add(&p->local_publ, &sr->local_publ);
list_add(&p->all_publ, &sr->all_publ);
+ p->id = sc->publ_cnt++;
/* Any subscriptions waiting for notification? */
list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
- tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_PUBLISHED,
- p->port, p->node, p->scope, first);
+ tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, first);
}
- return p;
-err:
- pr_warn("Failed to bind to %u,%u,%u, no memory\n", type, lower, upper);
- return NULL;
+ res = true;
+exit:
+ if (!res)
+ pr_warn("Failed to bind to %u,%u,%u\n",
+ p->sr.type, p->sr.lower, p->sr.upper);
+ spin_unlock_bh(&sc->lock);
+ return res;
}
/**
* tipc_service_remove_publ - remove a publication from a service
+ * @r: service_range to remove publication from
+ * @sk: address publishing socket
+ * @key: target publication key
*/
-static struct publication *tipc_service_remove_publ(struct service_range *sr,
- u32 node, u32 key)
+static struct publication *tipc_service_remove_publ(struct service_range *r,
+ struct tipc_socket_addr *sk,
+ u32 key)
{
struct publication *p;
+ u32 node = sk->node;
- list_for_each_entry(p, &sr->all_publ, all_publ) {
- if (p->key != key || (node && node != p->node))
+ list_for_each_entry(p, &r->all_publ, all_publ) {
+ if (p->key != key || (node && node != p->sk.node))
continue;
list_del(&p->all_publ);
list_del(&p->local_publ);
@@ -377,12 +397,12 @@ static struct publication *tipc_service_remove_publ(struct service_range *sr,
return NULL;
}
-/**
+/*
* Code reused: time_after32() for the same purpose
*/
#define publication_after(pa, pb) time_after32((pa)->id, (pb)->id)
-static int tipc_publ_sort(void *priv, struct list_head *a,
- struct list_head *b)
+static int tipc_publ_sort(void *priv, const struct list_head *a,
+ const struct list_head *b)
{
struct publication *pa, *pb;
@@ -395,21 +415,20 @@ static int tipc_publ_sort(void *priv, struct list_head *a,
* tipc_service_subscribe - attach a subscription, and optionally
* issue the prescribed number of events if there is any service
* range overlapping with the requested range
+ * @service: the tipc_service to attach the @sub to
+ * @sub: the subscription to attach
*/
static void tipc_service_subscribe(struct tipc_service *service,
struct tipc_subscription *sub)
{
- struct tipc_subscr *sb = &sub->evt.s;
struct publication *p, *first, *tmp;
struct list_head publ_list;
struct service_range *sr;
- struct tipc_name_seq ns;
- u32 filter;
+ u32 filter, lower, upper;
- ns.type = tipc_sub_read(sb, seq.type);
- ns.lower = tipc_sub_read(sb, seq.lower);
- ns.upper = tipc_sub_read(sb, seq.upper);
- filter = tipc_sub_read(sb, filter);
+ filter = sub->s.filter;
+ lower = sub->s.seq.lower;
+ upper = sub->s.seq.upper;
tipc_sub_get(sub);
list_add(&sub->service_list, &service->subscriptions);
@@ -418,7 +437,7 @@ static void tipc_service_subscribe(struct tipc_service *service,
return;
INIT_LIST_HEAD(&publ_list);
- service_range_foreach_match(sr, service, ns.lower, ns.upper) {
+ service_range_foreach_match(sr, service, lower, upper) {
first = NULL;
list_for_each_entry(p, &sr->all_publ, all_publ) {
if (filter & TIPC_SUB_PORTS)
@@ -434,80 +453,74 @@ static void tipc_service_subscribe(struct tipc_service *service,
/* Sort the publications before reporting */
list_sort(NULL, &publ_list, tipc_publ_sort);
list_for_each_entry_safe(p, tmp, &publ_list, list) {
- tipc_sub_report_overlap(sub, p->lower, p->upper,
- TIPC_PUBLISHED, p->port, p->node,
- p->scope, true);
+ tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, true);
list_del_init(&p->list);
}
}
-static struct tipc_service *tipc_service_find(struct net *net, u32 type)
+static struct tipc_service *tipc_service_find(struct net *net,
+ struct tipc_uaddr *ua)
{
struct name_table *nt = tipc_name_table(net);
struct hlist_head *service_head;
struct tipc_service *service;
- service_head = &nt->services[hash(type)];
+ service_head = &nt->services[hash(ua->sr.type)];
hlist_for_each_entry_rcu(service, service_head, service_list) {
- if (service->type == type)
+ if (service->type == ua->sr.type)
return service;
}
return NULL;
};
-struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
- u32 lower, u32 upper,
- u32 scope, u32 node,
- u32 port, u32 key)
+struct publication *tipc_nametbl_insert_publ(struct net *net,
+ struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk,
+ u32 key)
{
- struct name_table *nt = tipc_name_table(net);
struct tipc_service *sc;
struct publication *p;
- if (scope > TIPC_NODE_SCOPE || lower > upper) {
- pr_debug("Failed to bind illegal {%u,%u,%u} with scope %u\n",
- type, lower, upper, scope);
- return NULL;
- }
- sc = tipc_service_find(net, type);
- if (!sc)
- sc = tipc_service_create(type, &nt->services[hash(type)]);
- if (!sc)
+ p = tipc_publ_create(ua, sk, key);
+ if (!p)
return NULL;
- spin_lock_bh(&sc->lock);
- p = tipc_service_insert_publ(net, sc, type, lower, upper,
- scope, node, port, key);
- spin_unlock_bh(&sc->lock);
- return p;
+ sc = tipc_service_find(net, ua);
+ if (!sc)
+ sc = tipc_service_create(net, ua);
+ if (sc && tipc_service_insert_publ(net, sc, p))
+ return p;
+ kfree(p);
+ return NULL;
}
-struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
- u32 lower, u32 upper,
- u32 node, u32 key)
+struct publication *tipc_nametbl_remove_publ(struct net *net,
+ struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk,
+ u32 key)
{
- struct tipc_service *sc = tipc_service_find(net, type);
struct tipc_subscription *sub, *tmp;
- struct service_range *sr = NULL;
struct publication *p = NULL;
+ struct service_range *sr;
+ struct tipc_service *sc;
bool last;
+ sc = tipc_service_find(net, ua);
if (!sc)
- return NULL;
+ goto exit;
spin_lock_bh(&sc->lock);
- sr = tipc_service_find_range(sc, lower, upper);
+ sr = tipc_service_find_range(sc, ua);
if (!sr)
- goto exit;
- p = tipc_service_remove_publ(sr, node, key);
+ goto unlock;
+ p = tipc_service_remove_publ(sr, sk, key);
if (!p)
- goto exit;
+ goto unlock;
/* Notify any waiting subscriptions */
last = list_empty(&sr->all_publ);
list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
- tipc_sub_report_overlap(sub, lower, upper, TIPC_WITHDRAWN,
- p->port, node, p->scope, last);
+ tipc_sub_report_overlap(sub, p, TIPC_WITHDRAWN, last);
}
/* Remove service range item if this was its last publication */
@@ -516,75 +529,85 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
kfree(sr);
}
- /* Delete service item if this no more publications and subscriptions */
+ /* Delete service item if no more publications and subscriptions */
if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) {
hlist_del_init_rcu(&sc->service_list);
kfree_rcu(sc, rcu);
}
-exit:
+unlock:
spin_unlock_bh(&sc->lock);
+exit:
+ if (!p) {
+ pr_err("Failed to remove unknown binding: %u,%u,%u/%u:%u/%u\n",
+ ua->sr.type, ua->sr.lower, ua->sr.upper,
+ sk->node, sk->ref, key);
+ }
return p;
}
/**
- * tipc_nametbl_translate - perform service instance to socket translation
+ * tipc_nametbl_lookup_anycast - perform service instance to socket translation
+ * @net: network namespace
+ * @ua: service address to look up
+ * @sk: address to socket we want to find
*
- * On entry, 'dnode' is the search domain used during translation.
+ * On entry, a non-zero 'sk->node' indicates the node where we want lookup to be
+ * performed, which may not be this one.
*
* On exit:
- * - if translation is deferred to another node, leave 'dnode' unchanged and
- * return 0
- * - if translation is attempted and succeeds, set 'dnode' to the publishing
- * node and return the published (non-zero) port number
- * - if translation is attempted and fails, set 'dnode' to 0 and return 0
+ *
+ * - If lookup is deferred to another node, leave 'sk->node' unchanged and
+ * return 'true'.
+ * - If lookup is successful, set the 'sk->node' and 'sk->ref' (== portid) which
+ * represent the bound socket and return 'true'.
+ * - If lookup fails, return 'false'
*
* Note that for legacy users (node configured with Z.C.N address format) the
- * 'closest-first' lookup algorithm must be maintained, i.e., if dnode is 0
+ * 'closest-first' lookup algorithm must be maintained, i.e., if sk.node is 0
* we must look in the local binding list first
*/
-u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode)
+bool tipc_nametbl_lookup_anycast(struct net *net,
+ struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk)
{
struct tipc_net *tn = tipc_net(net);
bool legacy = tn->legacy_addr_format;
u32 self = tipc_own_addr(net);
- struct service_range *sr;
+ u32 inst = ua->sa.instance;
+ struct service_range *r;
struct tipc_service *sc;
- struct list_head *list;
struct publication *p;
- u32 port = 0;
- u32 node = 0;
+ struct list_head *l;
+ bool res = false;
- if (!tipc_in_scope(legacy, *dnode, self))
- return 0;
+ if (!tipc_in_scope(legacy, sk->node, self))
+ return true;
rcu_read_lock();
- sc = tipc_service_find(net, type);
+ sc = tipc_service_find(net, ua);
if (unlikely(!sc))
goto exit;
spin_lock_bh(&sc->lock);
- service_range_foreach_match(sr, sc, instance, instance) {
+ service_range_foreach_match(r, sc, inst, inst) {
/* Select lookup algo: local, closest-first or round-robin */
- if (*dnode == self) {
- list = &sr->local_publ;
- if (list_empty(list))
+ if (sk->node == self) {
+ l = &r->local_publ;
+ if (list_empty(l))
continue;
- p = list_first_entry(list, struct publication,
- local_publ);
- list_move_tail(&p->local_publ, &sr->local_publ);
- } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) {
- list = &sr->local_publ;
- p = list_first_entry(list, struct publication,
- local_publ);
- list_move_tail(&p->local_publ, &sr->local_publ);
+ p = list_first_entry(l, struct publication, local_publ);
+ list_move_tail(&p->local_publ, &r->local_publ);
+ } else if (legacy && !sk->node && !list_empty(&r->local_publ)) {
+ l = &r->local_publ;
+ p = list_first_entry(l, struct publication, local_publ);
+ list_move_tail(&p->local_publ, &r->local_publ);
} else {
- list = &sr->all_publ;
- p = list_first_entry(list, struct publication,
- all_publ);
- list_move_tail(&p->all_publ, &sr->all_publ);
+ l = &r->all_publ;
+ p = list_first_entry(l, struct publication, all_publ);
+ list_move_tail(&p->all_publ, &r->all_publ);
}
- port = p->port;
- node = p->node;
+ *sk = p->sk;
+ res = true;
/* Todo: as for legacy, pick the first matching range only, a
* "true" round-robin will be performed as needed.
*/
@@ -594,40 +617,45 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode)
exit:
rcu_read_unlock();
- *dnode = node;
- return port;
+ return res;
}
-bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
- struct list_head *dsts, int *dstcnt, u32 exclude,
- bool all)
+/* tipc_nametbl_lookup_group(): lookup destinaton(s) in a communication group
+ * Returns a list of one (== group anycast) or more (== group multicast)
+ * destination socket/node pairs matching the given address.
+ * The requester may or may not want to exclude himself from the list.
+ */
+bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
+ struct list_head *dsts, int *dstcnt,
+ u32 exclude, bool mcast)
{
u32 self = tipc_own_addr(net);
+ u32 inst = ua->sa.instance;
struct service_range *sr;
struct tipc_service *sc;
struct publication *p;
*dstcnt = 0;
rcu_read_lock();
- sc = tipc_service_find(net, type);
+ sc = tipc_service_find(net, ua);
if (unlikely(!sc))
goto exit;
spin_lock_bh(&sc->lock);
/* Todo: a full search i.e. service_range_foreach_match() instead? */
- sr = service_range_match_first(sc->ranges.rb_node, instance, instance);
+ sr = service_range_match_first(sc->ranges.rb_node, inst, inst);
if (!sr)
goto no_match;
list_for_each_entry(p, &sr->all_publ, all_publ) {
- if (p->scope != scope)
+ if (p->scope != ua->scope)
continue;
- if (p->port == exclude && p->node == self)
+ if (p->sk.ref == exclude && p->sk.node == self)
continue;
- tipc_dest_push(dsts, p->node, p->port);
+ tipc_dest_push(dsts, p->sk.node, p->sk.ref);
(*dstcnt)++;
- if (all)
+ if (mcast)
continue;
list_move_tail(&p->all_publ, &sr->all_publ);
break;
@@ -639,23 +667,29 @@ exit:
return !list_empty(dsts);
}
-void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
- u32 scope, bool exact, struct list_head *dports)
+/* tipc_nametbl_lookup_mcast_sockets(): look up node local destinaton sockets
+ * matching the given address
+ * Used on nodes which have received a multicast/broadcast message
+ * Returns a list of local sockets
+ */
+void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
+ struct list_head *dports)
{
struct service_range *sr;
struct tipc_service *sc;
struct publication *p;
+ u8 scope = ua->scope;
rcu_read_lock();
- sc = tipc_service_find(net, type);
+ sc = tipc_service_find(net, ua);
if (!sc)
goto exit;
spin_lock_bh(&sc->lock);
- service_range_foreach_match(sr, sc, lower, upper) {
+ service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
list_for_each_entry(p, &sr->local_publ, local_publ) {
- if (p->scope == scope || (!exact && p->scope < scope))
- tipc_dest_push(dports, 0, p->port);
+ if (scope == p->scope || scope == TIPC_ANY_SCOPE)
+ tipc_dest_push(dports, 0, p->sk.ref);
}
}
spin_unlock_bh(&sc->lock);
@@ -663,26 +697,27 @@ exit:
rcu_read_unlock();
}
-/* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes
- * - Creates list of nodes that overlap the given multicast address
- * - Determines if any node local destinations overlap
+/* tipc_nametbl_lookup_mcast_nodes(): look up all destination nodes matching
+ * the given address. Used in sending node.
+ * Used on nodes which are sending out a multicast/broadcast message
+ * Returns a list of nodes, including own node if applicable
*/
-void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
- u32 upper, struct tipc_nlist *nodes)
+void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_nlist *nodes)
{
struct service_range *sr;
struct tipc_service *sc;
struct publication *p;
rcu_read_lock();
- sc = tipc_service_find(net, type);
+ sc = tipc_service_find(net, ua);
if (!sc)
goto exit;
spin_lock_bh(&sc->lock);
- service_range_foreach_match(sr, sc, lower, upper) {
+ service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
list_for_each_entry(p, &sr->all_publ, all_publ) {
- tipc_nlist_add(nodes, p->node);
+ tipc_nlist_add(nodes, p->sk.node);
}
}
spin_unlock_bh(&sc->lock);
@@ -693,7 +728,7 @@ exit:
/* tipc_nametbl_build_group - build list of communication group members
*/
void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
- u32 type, u32 scope)
+ struct tipc_uaddr *ua)
{
struct service_range *sr;
struct tipc_service *sc;
@@ -701,7 +736,7 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
struct rb_node *n;
rcu_read_lock();
- sc = tipc_service_find(net, type);
+ sc = tipc_service_find(net, ua);
if (!sc)
goto exit;
@@ -709,9 +744,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
sr = container_of(n, struct service_range, tree_node);
list_for_each_entry(p, &sr->all_publ, all_publ) {
- if (p->scope != scope)
+ if (p->scope != ua->scope)
continue;
- tipc_group_add_member(grp, p->node, p->port, p->lower);
+ tipc_group_add_member(grp, p->sk.node, p->sk.ref,
+ p->sr.lower);
}
}
spin_unlock_bh(&sc->lock);
@@ -721,14 +757,14 @@ exit:
/* tipc_nametbl_publish - add service binding to name table
*/
-struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
- u32 upper, u32 scope, u32 port,
- u32 key)
+struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk, u32 key)
{
struct name_table *nt = tipc_name_table(net);
struct tipc_net *tn = tipc_net(net);
struct publication *p = NULL;
struct sk_buff *skb = NULL;
+ u32 rc_dests;
spin_lock_bh(&tn->nametbl_lock);
@@ -737,77 +773,78 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
goto exit;
}
- p = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
- tipc_own_addr(net), port, key);
+ p = tipc_nametbl_insert_publ(net, ua, sk, key);
if (p) {
nt->local_publ_count++;
skb = tipc_named_publish(net, p);
}
+ rc_dests = nt->rc_dests;
exit:
spin_unlock_bh(&tn->nametbl_lock);
if (skb)
- tipc_node_broadcast(net, skb);
+ tipc_node_broadcast(net, skb, rc_dests);
return p;
+
}
/**
* tipc_nametbl_withdraw - withdraw a service binding
+ * @net: network namespace
+ * @ua: service address/range being unbound
+ * @sk: address of the socket being unbound from
+ * @key: target publication key
*/
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
- u32 upper, u32 key)
+void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk, u32 key)
{
struct name_table *nt = tipc_name_table(net);
struct tipc_net *tn = tipc_net(net);
- u32 self = tipc_own_addr(net);
struct sk_buff *skb = NULL;
struct publication *p;
+ u32 rc_dests;
spin_lock_bh(&tn->nametbl_lock);
- p = tipc_nametbl_remove_publ(net, type, lower, upper, self, key);
+ p = tipc_nametbl_remove_publ(net, ua, sk, key);
if (p) {
nt->local_publ_count--;
skb = tipc_named_withdraw(net, p);
list_del_init(&p->binding_sock);
kfree_rcu(p, rcu);
- } else {
- pr_err("Failed to remove local publication {%u,%u,%u}/%u\n",
- type, lower, upper, key);
}
+ rc_dests = nt->rc_dests;
spin_unlock_bh(&tn->nametbl_lock);
- if (skb) {
- tipc_node_broadcast(net, skb);
- return 1;
- }
- return 0;
+ if (skb)
+ tipc_node_broadcast(net, skb, rc_dests);
}
/**
* tipc_nametbl_subscribe - add a subscription object to the name table
+ * @sub: subscription to add
*/
bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
{
- struct name_table *nt = tipc_name_table(sub->net);
struct tipc_net *tn = tipc_net(sub->net);
- struct tipc_subscr *s = &sub->evt.s;
- u32 type = tipc_sub_read(s, seq.type);
+ u32 type = sub->s.seq.type;
struct tipc_service *sc;
+ struct tipc_uaddr ua;
bool res = true;
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type,
+ sub->s.seq.lower, sub->s.seq.upper);
spin_lock_bh(&tn->nametbl_lock);
- sc = tipc_service_find(sub->net, type);
+ sc = tipc_service_find(sub->net, &ua);
if (!sc)
- sc = tipc_service_create(type, &nt->services[hash(type)]);
+ sc = tipc_service_create(sub->net, &ua);
if (sc) {
spin_lock_bh(&sc->lock);
tipc_service_subscribe(sc, sub);
spin_unlock_bh(&sc->lock);
} else {
- pr_warn("Failed to subscribe for {%u,%u,%u}\n", type,
- tipc_sub_read(s, seq.lower),
- tipc_sub_read(s, seq.upper));
+ pr_warn("Failed to subscribe for {%u,%u,%u}\n",
+ type, sub->s.seq.lower, sub->s.seq.upper);
res = false;
}
spin_unlock_bh(&tn->nametbl_lock);
@@ -816,16 +853,18 @@ bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
/**
* tipc_nametbl_unsubscribe - remove a subscription object from name table
+ * @sub: subscription to remove
*/
void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
{
struct tipc_net *tn = tipc_net(sub->net);
- struct tipc_subscr *s = &sub->evt.s;
- u32 type = tipc_sub_read(s, seq.type);
struct tipc_service *sc;
+ struct tipc_uaddr ua;
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+ sub->s.seq.type, sub->s.seq.lower, sub->s.seq.upper);
spin_lock_bh(&tn->nametbl_lock);
- sc = tipc_service_find(sub->net, type);
+ sc = tipc_service_find(sub->net, &ua);
if (!sc)
goto exit;
@@ -865,7 +904,9 @@ int tipc_nametbl_init(struct net *net)
}
/**
- * tipc_service_delete - purge all publications for a service and delete it
+ * tipc_service_delete - purge all publications for a service and delete it
+ * @net: the associated network namespace
+ * @sc: tipc_service to delete
*/
static void tipc_service_delete(struct net *net, struct tipc_service *sc)
{
@@ -875,7 +916,7 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc)
spin_lock_bh(&sc->lock);
rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) {
list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) {
- tipc_service_remove_publ(sr, p->node, p->key);
+ tipc_service_remove_publ(sr, &p->sk, p->key);
kfree_rcu(p, rcu);
}
rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
@@ -926,7 +967,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
list_for_each_entry(p, &sr->all_publ, all_publ)
if (p->key == *last_key)
break;
- if (p->key != *last_key)
+ if (list_entry_is_head(p, &sr->all_publ, all_publ))
return -EPIPE;
} else {
p = list_first_entry(&sr->all_publ,
@@ -959,9 +1000,9 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
goto publ_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope))
goto publ_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->sk.node))
goto publ_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->sk.ref))
goto publ_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key))
goto publ_msg_full;
@@ -1012,6 +1053,7 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
struct tipc_net *tn = tipc_net(net);
struct tipc_service *service = NULL;
struct hlist_head *head;
+ struct tipc_uaddr ua;
int err;
int i;
@@ -1025,7 +1067,9 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
if (*last_type ||
(!i && *last_key && (*last_lower == *last_key))) {
- service = tipc_service_find(net, *last_type);
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+ *last_type, *last_lower, *last_lower);
+ service = tipc_service_find(net, &ua);
if (!service)
return -EPIPE;
} else {
@@ -1158,14 +1202,3 @@ void tipc_dest_list_purge(struct list_head *l)
kfree(dst);
}
}
-
-int tipc_dest_list_len(struct list_head *l)
-{
- struct tipc_dest *dst;
- int i = 0;
-
- list_for_each_entry(dst, l, list) {
- i++;
- }
- return i;
-}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 728bc7016c38..3bcd9ef8cee3 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -3,6 +3,7 @@
*
* Copyright (c) 2000-2006, 2014-2018, Ericsson AB
* Copyright (c) 2004-2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,6 +42,7 @@ struct tipc_subscription;
struct tipc_plist;
struct tipc_nlist;
struct tipc_group;
+struct tipc_uaddr;
/*
* TIPC name types reserved for internal TIPC use (both current and planned)
@@ -49,19 +51,18 @@ struct tipc_group;
#define TIPC_PUBL_SCOPE_NUM (TIPC_NODE_SCOPE + 1)
#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
+#define TIPC_ANY_SCOPE 10 /* Both node and cluster scope will match */
+
/**
- * struct publication - info about a published (name or) name sequence
- * @type: name sequence type
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
+ * struct publication - info about a published service address or range
+ * @sr: service range represented by this publication
+ * @sk: address of socket bound to this publication
* @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE
- * @node: network address of publishing socket's node
- * @port: publishing port
* @key: publication key, unique across the cluster
* @id: publication id
* @binding_node: all publications from the same node which bound this one
- * - Remote publications: in node->publ_list
- * Used by node/name distr to withdraw publications when node is lost
+ * - Remote publications: in node->publ_list;
+ * Used by node/name distr to withdraw publications when node is lost
* - Local/node scope publications: in name_table->node_scope list
* - Local/cluster scope publications: in name_table->cluster_scope list
* @binding_sock: all publications from the same socket which bound this one
@@ -74,12 +75,9 @@ struct tipc_group;
* @rcu: RCU callback head used for deferred freeing
*/
struct publication {
- u32 type;
- u32 lower;
- u32 upper;
- u32 scope;
- u32 node;
- u32 port;
+ struct tipc_service_range sr;
+ struct tipc_socket_addr sk;
+ u16 scope;
u32 key;
u32 id;
struct list_head binding_node;
@@ -92,13 +90,16 @@ struct publication {
/**
* struct name_table - table containing all existing port name publications
- * @seq_hlist: name sequence hash lists
+ * @services: name sequence hash lists
* @node_scope: all local publications with node scope
* - used by name_distr during re-init of name table
* @cluster_scope: all local publications with cluster scope
* - used by name_distr to send bulk updates to new nodes
* - used by name_distr during re-init of name table
+ * @cluster_scope_lock: lock for accessing @cluster_scope
* @local_publ_count: number of publications issued by this node
+ * @rc_dests: destination node counter
+ * @snd_nxt: next sequence number to be used
*/
struct name_table {
struct hlist_head services[TIPC_NAMETBL_SIZE];
@@ -106,31 +107,34 @@ struct name_table {
struct list_head cluster_scope;
rwlock_t cluster_scope_lock;
u32 local_publ_count;
+ u32 rc_dests;
+ u32 snd_nxt;
};
int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
-u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
-void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
- u32 scope, bool exact, struct list_head *dports);
+bool tipc_nametbl_lookup_anycast(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk);
+void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
+ struct list_head *dports);
+void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_nlist *nodes);
+bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
+ struct list_head *dsts, int *dstcnt,
+ u32 exclude, bool mcast);
void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
- u32 type, u32 domain);
-void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
- u32 upper, struct tipc_nlist *nodes);
-bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
- struct list_head *dsts, int *dstcnt, u32 exclude,
- bool all);
-struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
- u32 upper, u32 scope, u32 port,
- u32 key);
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper,
- u32 key);
-struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
- u32 lower, u32 upper, u32 scope,
- u32 node, u32 ref, u32 key);
-struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
- u32 lower, u32 upper,
- u32 node, u32 key);
+ struct tipc_uaddr *ua);
+struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk, u32 key);
+void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk, u32 key);
+struct publication *tipc_nametbl_insert_publ(struct net *net,
+ struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk,
+ u32 key);
+struct publication *tipc_nametbl_remove_publ(struct net *net,
+ struct tipc_uaddr *ua,
+ struct tipc_socket_addr *sk,
+ u32 key);
bool tipc_nametbl_subscribe(struct tipc_subscription *s);
void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(struct net *net);
@@ -147,6 +151,5 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port);
bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
void tipc_dest_list_purge(struct list_head *l);
-int tipc_dest_list_len(struct list_head *l);
#endif
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 85400e4242de..0e95572e56b4 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -41,6 +41,7 @@
#include "socket.h"
#include "node.h"
#include "bcast.h"
+#include "link.h"
#include "netlink.h"
#include "monitor.h"
@@ -89,7 +90,7 @@
* - A spin lock to protect the registry of kernel/driver users (reg.c)
* - A global spin_lock (tipc_port_lock), which only task is to ensure
* consistency where more than one port is involved in an operation,
- * i.e., whe a port is part of a linked list of ports.
+ * i.e., when a port is part of a linked list of ports.
* There are two such lists; 'port_list', which is used for management,
* and 'wait_list', which is used to queue ports during congestion.
*
@@ -105,12 +106,6 @@
* - A local spin_lock protecting the queue of subscriber events.
*/
-struct tipc_net_work {
- struct work_struct work;
- struct net *net;
- u32 addr;
-};
-
static void tipc_net_finalize(struct net *net, u32 addr);
int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
@@ -131,6 +126,11 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
static void tipc_net_finalize(struct net *net, u32 addr)
{
struct tipc_net *tn = tipc_net(net);
+ struct tipc_socket_addr sk = {0, addr};
+ struct tipc_uaddr ua;
+
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE,
+ TIPC_NODE_STATE, addr, addr);
if (cmpxchg(&tn->node_addr, 0, addr))
return;
@@ -138,29 +138,14 @@ static void tipc_net_finalize(struct net *net, u32 addr)
tipc_named_reinit(net);
tipc_sk_reinit(net);
tipc_mon_reinit_self(net);
- tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
- TIPC_CLUSTER_SCOPE, 0, addr);
+ tipc_nametbl_publish(net, &ua, &sk, addr);
}
-static void tipc_net_finalize_work(struct work_struct *work)
+void tipc_net_finalize_work(struct work_struct *work)
{
- struct tipc_net_work *fwork;
-
- fwork = container_of(work, struct tipc_net_work, work);
- tipc_net_finalize(fwork->net, fwork->addr);
- kfree(fwork);
-}
+ struct tipc_net *tn = container_of(work, struct tipc_net, work);
-void tipc_sched_net_finalize(struct net *net, u32 addr)
-{
- struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC);
-
- if (!fwork)
- return;
- INIT_WORK(&fwork->work, tipc_net_finalize_work);
- fwork->net = net;
- fwork->addr = addr;
- schedule_work(&fwork->work);
+ tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr);
}
void tipc_net_stop(struct net *net)
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 6740d97c706e..d0c91d2df20a 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -42,6 +42,7 @@
extern const struct nla_policy tipc_nl_net_policy[];
int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
+void tipc_net_finalize_work(struct work_struct *work);
void tipc_sched_net_finalize(struct net *net, u32 addr);
void tipc_net_stop(struct net *net);
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index bb9862410e68..e8fd257c0e68 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -108,6 +108,8 @@ const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
.len = TIPC_NODEID_LEN},
[TIPC_NLA_NODE_KEY] = { .type = NLA_BINARY,
.len = TIPC_AEAD_KEY_SIZE_MAX},
+ [TIPC_NLA_NODE_KEY_MASTER] = { .type = NLA_FLAG },
+ [TIPC_NLA_NODE_REKEYING] = { .type = NLA_U32 },
};
/* Properties valid for media, bearer and link */
@@ -188,7 +190,7 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
},
{
.cmd = TIPC_NL_LINK_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT,
.doit = tipc_nl_node_get_link,
.dumpit = tipc_nl_node_dump_link,
},
@@ -292,6 +294,7 @@ struct genl_family tipc_genl_family __ro_after_init = {
.module = THIS_MODULE,
.ops = tipc_genl_v2_ops,
.n_ops = ARRAY_SIZE(tipc_genl_v2_ops),
+ .resv_start_op = TIPC_NL_ADDR_LEGACY_GET + 1,
};
int __init tipc_netlink_start(void)
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 217516357ef2..dfea27a906f2 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -118,7 +118,8 @@ static void tipc_tlv_init(struct sk_buff *skb, u16 type)
skb_put(skb, sizeof(struct tlv_desc));
}
-static int tipc_tlv_sprintf(struct sk_buff *skb, const char *fmt, ...)
+static __printf(2, 3) int tipc_tlv_sprintf(struct sk_buff *skb,
+ const char *fmt, ...)
{
int n;
u16 len;
@@ -212,12 +213,14 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
}
info.attrs = attrbuf;
- err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf,
- tipc_genl_family.maxattr,
- tipc_genl_family.policy, NULL);
- if (err)
- goto err_out;
+ if (nlmsg_len(cb.nlh) > 0) {
+ err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf,
+ tipc_genl_family.maxattr,
+ tipc_genl_family.policy, NULL);
+ if (err)
+ goto err_out;
+ }
do {
int rem;
@@ -275,8 +278,9 @@ err_out:
static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
struct tipc_nl_compat_msg *msg)
{
- int err;
+ struct nlmsghdr *nlh;
struct sk_buff *arg;
+ int err;
if (msg->req_type && (!msg->req_size ||
!TLV_CHECK_TYPE(msg->req, msg->req_type)))
@@ -305,6 +309,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
return -ENOMEM;
}
+ nlh = nlmsg_put(arg, 0, 0, tipc_genl_family.id, 0, NLM_F_MULTI);
+ if (!nlh) {
+ kfree_skb(arg);
+ kfree_skb(msg->rep);
+ msg->rep = NULL;
+ return -EMSGSIZE;
+ }
+ nlmsg_end(arg, nlh);
+
err = __tipc_nl_compat_dumpit(cmd, msg, arg);
if (err) {
kfree_skb(msg->rep);
@@ -578,7 +591,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
return 0;
tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n",
- nla_data(link[TIPC_NLA_LINK_NAME]));
+ (char *)nla_data(link[TIPC_NLA_LINK_NAME]));
if (link[TIPC_NLA_LINK_BROADCAST]) {
__fill_bc_link_stat(msg, prop, stats);
@@ -683,9 +696,9 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
if (err)
return err;
- link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
+ link_info.dest = htonl(nla_get_flag(link[TIPC_NLA_LINK_DEST]));
link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
- nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME],
+ nla_strscpy(link_info.str, link[TIPC_NLA_LINK_NAME],
TIPC_MAX_LINK_NAME);
return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO,
@@ -867,7 +880,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg)
};
ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);
- if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query))
+ if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query))
return -EINVAL;
depth = ntohl(ntq->depth);
@@ -1327,7 +1340,7 @@ send:
return err;
}
-static const struct genl_ops tipc_genl_compat_ops[] = {
+static const struct genl_small_ops tipc_genl_compat_ops[] = {
{
.cmd = TIPC_GENL_CMD,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -1342,8 +1355,9 @@ static struct genl_family tipc_genl_compat_family __ro_after_init = {
.maxattr = 0,
.netnsok = true,
.module = THIS_MODULE,
- .ops = tipc_genl_compat_ops,
- .n_ops = ARRAY_SIZE(tipc_genl_compat_ops),
+ .small_ops = tipc_genl_compat_ops,
+ .n_small_ops = ARRAY_SIZE(tipc_genl_compat_ops),
+ .resv_start_op = TIPC_GENL_CMD + 1,
};
int __init tipc_netlink_compat_start(void)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 0c88778c88b5..b48d97cbbe29 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -75,12 +75,14 @@ struct tipc_bclink_entry {
struct sk_buff_head arrvq;
struct sk_buff_head inputq2;
struct sk_buff_head namedq;
+ u16 named_rcv_nxt;
+ bool named_open;
};
/**
* struct tipc_node - TIPC node structure
* @addr: network address of node
- * @ref: reference counter to node object
+ * @kref: reference counter to node object
* @lock: rwlock governing access to structure
* @net: the applicable net namespace
* @hash: links to adjacent nodes in unsorted hash chain
@@ -88,9 +90,11 @@ struct tipc_bclink_entry {
* @namedq: pointer to name table input queue with name table messages
* @active_links: bearer ids of active links, used as index into links[] array
* @links: array containing references to all links to node
+ * @bc_entry: broadcast link entry
* @action_flags: bit mask of different types of node actions
* @state: connectivity state vs peer node
* @preliminary: a preliminary node or not
+ * @failover_sent: failover sent or not
* @sync_point: sequence number where synch/failover is finished
* @list: links to adjacent nodes in sorted list of cluster's nodes
* @working_links: number of working links to node (both active and standby)
@@ -98,9 +102,16 @@ struct tipc_bclink_entry {
* @capabilities: bitmap, indicating peer node's functional capabilities
* @signature: node instance identifier
* @link_id: local and remote bearer ids of changing link, if any
+ * @peer_id: 128-bit ID of peer
+ * @peer_id_string: ID string of peer
* @publ_list: list of publications
+ * @conn_sks: list of connections (FIXME)
+ * @timer: node's keepalive timer
+ * @keepalive_intv: keepalive interval in milliseconds
* @rcu: rcu struct for tipc_node
* @delete_at: indicates the time for deleting a down node
+ * @peer_net: peer's net namespace
+ * @peer_hash_mix: hash for this peer (FIXME)
* @crypto_rx: RX crypto handler
*/
struct tipc_node {
@@ -265,6 +276,7 @@ char *tipc_node_get_id_str(struct tipc_node *node)
#ifdef CONFIG_TIPC_CRYPTO
/**
* tipc_node_crypto_rx - Retrieve crypto RX handle from node
+ * @__n: target tipc_node
* Note: node ref counter must be held first!
*/
struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n)
@@ -276,6 +288,14 @@ struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos)
{
return container_of(pos, struct tipc_node, list)->crypto_rx;
}
+
+struct tipc_crypto *tipc_node_crypto_rx_by_addr(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+
+ n = tipc_node_find(net, addr);
+ return (n) ? n->crypto_rx : NULL;
+}
#endif
static void tipc_node_free(struct rcu_head *rp)
@@ -301,7 +321,7 @@ void tipc_node_put(struct tipc_node *node)
kref_put(&node->kref, tipc_node_kref_release);
}
-static void tipc_node_get(struct tipc_node *node)
+void tipc_node_get(struct tipc_node *node)
{
kref_get(&node->kref);
}
@@ -352,42 +372,50 @@ static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id)
}
static void tipc_node_read_lock(struct tipc_node *n)
+ __acquires(n->lock)
{
read_lock_bh(&n->lock);
}
static void tipc_node_read_unlock(struct tipc_node *n)
+ __releases(n->lock)
{
read_unlock_bh(&n->lock);
}
static void tipc_node_write_lock(struct tipc_node *n)
+ __acquires(n->lock)
{
write_lock_bh(&n->lock);
}
static void tipc_node_write_unlock_fast(struct tipc_node *n)
+ __releases(n->lock)
{
write_unlock_bh(&n->lock);
}
static void tipc_node_write_unlock(struct tipc_node *n)
+ __releases(n->lock)
{
+ struct tipc_socket_addr sk;
struct net *net = n->net;
- u32 addr = 0;
u32 flags = n->action_flags;
- u32 link_id = 0;
- u32 bearer_id;
struct list_head *publ_list;
+ struct tipc_uaddr ua;
+ u32 bearer_id, node;
if (likely(!flags)) {
write_unlock_bh(&n->lock);
return;
}
- addr = n->addr;
- link_id = n->link_id;
- bearer_id = link_id & 0xffff;
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+ TIPC_LINK_STATE, n->addr, n->addr);
+ sk.ref = n->link_id;
+ sk.node = tipc_own_addr(net);
+ node = n->addr;
+ bearer_id = n->link_id & 0xffff;
publ_list = &n->publ_list;
n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
@@ -396,20 +424,18 @@ static void tipc_node_write_unlock(struct tipc_node *n)
write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN)
- tipc_publ_notify(net, publ_list, addr);
+ tipc_publ_notify(net, publ_list, node, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP)
- tipc_named_node_up(net, addr);
+ tipc_named_node_up(net, node, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) {
- tipc_mon_peer_up(net, addr, bearer_id);
- tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
- TIPC_NODE_SCOPE, link_id, link_id);
+ tipc_mon_peer_up(net, node, bearer_id);
+ tipc_nametbl_publish(net, &ua, &sk, sk.ref);
}
if (flags & TIPC_NOTIFY_LINK_DOWN) {
- tipc_mon_peer_down(net, addr, bearer_id);
- tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
- addr, link_id);
+ tipc_mon_peer_down(net, node, bearer_id);
+ tipc_nametbl_withdraw(net, &ua, &sk, sk.ref);
}
}
@@ -446,8 +472,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
bool preliminary)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_link *l, *snd_l = tipc_bc_sndlink(net);
struct tipc_node *n, *temp_node;
- struct tipc_link *l;
unsigned long intv;
int bearer_id;
int i;
@@ -462,6 +488,16 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
goto exit;
/* A preliminary node becomes "real" now, refresh its data */
tipc_node_write_lock(n);
+ if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link refresh failed, no memory\n");
+ tipc_node_write_unlock_fast(n);
+ tipc_node_put(n);
+ n = NULL;
+ goto exit;
+ }
n->preliminary = false;
n->addr = addr;
hlist_del_rcu(&n->hash);
@@ -541,7 +577,16 @@ update:
n->signature = INVALID_NODE_SIG;
n->active_links[0] = INVALID_BEARER_ID;
n->active_links[1] = INVALID_BEARER_ID;
- n->bc_entry.link = NULL;
+ if (!preliminary &&
+ !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link creation failed, no memory\n");
+ kfree(n);
+ n = NULL;
+ goto exit;
+ }
tipc_node_get(n);
timer_setup(&n->timer, tipc_node_timeout, 0);
/* Start a slow timer anyway, crypto needs it */
@@ -582,6 +627,9 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l)
static void tipc_node_delete_from_list(struct tipc_node *node)
{
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_key_flush(node->crypto_rx);
+#endif
list_del_rcu(&node->list);
hlist_del_rcu(&node->hash);
tipc_node_put(node);
@@ -801,6 +849,9 @@ static void tipc_node_timeout(struct timer_list *t)
/**
* __tipc_node_link_up - handle addition of link
+ * @n: target tipc_node
+ * @bearer_id: id of the bearer
+ * @xmitq: queue for messages to be xmited on
* Node lock must be held by caller
* Link becomes active (alone or shared) or standby, depending on its priority.
*/
@@ -867,6 +918,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
/**
* tipc_node_link_up - handle addition of link
+ * @n: target tipc_node
+ * @bearer_id: id of the bearer
+ * @xmitq: queue for messages to be xmited on
*
* Link becomes active (alone or shared) or standby, depending on its priority.
*/
@@ -887,10 +941,11 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id,
*
* This function is only called in a very special situation where link
* failover can be already started on peer node but not on this node.
- * This can happen when e.g.
+ * This can happen when e.g.::
+ *
* 1. Both links <1A-2A>, <1B-2B> down
* 2. Link endpoint 2A up, but 1A still down (e.g. due to network
- * disturbance, wrong session, etc.)
+ * disturbance, wrong session, etc.)
* 3. Link <1B-2B> up
* 4. Link endpoint 2A down (e.g. due to link tolerance timeout)
* 5. Node 2 starts failover onto link <1B-2B>
@@ -927,6 +982,10 @@ static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l,
/**
* __tipc_node_link_down - handle loss of link
+ * @n: target tipc_node
+ * @bearer_id: id of the bearer
+ * @xmitq: queue for messages to be xmited on
+ * @maddr: output media address of the bearer
*/
static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
struct sk_buff_head *xmitq,
@@ -1115,7 +1174,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
bool *respond, bool *dupl_addr)
{
struct tipc_node *n;
- struct tipc_link *l, *snd_l;
+ struct tipc_link *l;
struct tipc_link_entry *le;
bool addr_match = false;
bool sign_match = false;
@@ -1135,22 +1194,6 @@ void tipc_node_check_dest(struct net *net, u32 addr,
return;
tipc_node_write_lock(n);
- if (unlikely(!n->bc_entry.link)) {
- snd_l = tipc_bc_sndlink(net);
- if (!tipc_link_bc_create(net, tipc_own_addr(net),
- addr, U16_MAX,
- tipc_link_min_win(snd_l),
- tipc_link_max_win(snd_l),
- n->capabilities,
- &n->bc_entry.inputq1,
- &n->bc_entry.namedq, snd_l,
- &n->bc_entry.link)) {
- pr_warn("Broadcast rcv link creation failed, no mem\n");
- tipc_node_write_unlock_fast(n);
- tipc_node_put(n);
- return;
- }
- }
le = &n->links[b->identity];
@@ -1175,7 +1218,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
/* Peer has changed i/f address without rebooting.
* If so, the link will reset soon, and the next
* discovery will be accepted. So we can ignore it.
- * It may also be an cloned or malicious peer having
+ * It may also be a cloned or malicious peer having
* chosen the same node address and signature as an
* existing one.
* Ignore requests until the link goes down, if ever.
@@ -1483,6 +1526,7 @@ static void node_lost_contact(struct tipc_node *n,
/* Clean up broadcast state */
tipc_bcast_remove_peer(n->net, n->bc_entry.link);
+ skb_queue_purge(&n->bc_entry.namedq);
/* Abort any ongoing link failover */
for (i = 0; i < MAX_BEARERS; i++) {
@@ -1511,11 +1555,13 @@ static void node_lost_contact(struct tipc_node *n,
/**
* tipc_node_get_linkname - get the name of a link
*
+ * @net: the applicable net namespace
* @bearer_id: id of the bearer
- * @node: peer node address
+ * @addr: peer node address
* @linkname: link name output buffer
+ * @len: size of @linkname output buffer
*
- * Returns 0 on success
+ * Return: 0 on success
*/
int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
char *linkname, size_t len)
@@ -1586,7 +1632,8 @@ static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list)
case TIPC_MEDIUM_IMPORTANCE:
case TIPC_HIGH_IMPORTANCE:
case TIPC_CRITICAL_IMPORTANCE:
- if (msg_connected(hdr) || msg_named(hdr)) {
+ if (msg_connected(hdr) || msg_named(hdr) ||
+ msg_direct(hdr)) {
tipc_loopback_trace(peer_net, list);
spin_lock_init(&list->lock);
tipc_sk_rcv(peer_net, list);
@@ -1623,17 +1670,17 @@ static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list)
return;
default:
return;
- };
+ }
}
/**
- * tipc_node_xmit() is the general link level function for message sending
+ * tipc_node_xmit() - general link level function for message sending
* @net: the applicable net namespace
* @list: chain of buffers containing message
* @dnode: address of destination node
* @selector: a number used for deterministic link selection
* Consumes the buffer chain.
- * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF
+ * Return: 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF
*/
int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
u32 dnode, int selector)
@@ -1696,7 +1743,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
}
/* tipc_node_xmit_skb(): send single buffer to destination
- * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE
+ * Buffers sent via this function are generally TIPC_SYSTEM_IMPORTANCE
* messages, which will not be rejected
* The only exception is datagram messages rerouted after secondary
* lookup, which are rare and safe to dispose of anyway.
@@ -1728,12 +1775,23 @@ int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq)
return 0;
}
-void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests)
{
+ struct sk_buff_head xmitq;
struct sk_buff *txskb;
struct tipc_node *n;
+ u16 dummy;
u32 dst;
+ /* Use broadcast if all nodes support it */
+ if (!rc_dests && tipc_bcast_get_mode(net) != BCLINK_MODE_RCAST) {
+ __skb_queue_head_init(&xmitq);
+ __skb_queue_tail(&xmitq, skb);
+ tipc_bcast_xmit(net, &xmitq, &dummy);
+ return;
+ }
+
+ /* Otherwise use legacy replicast method */
rcu_read_lock();
list_for_each_entry_rcu(n, tipc_nodes(net), list) {
dst = n->addr;
@@ -1748,7 +1806,6 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
tipc_node_xmit_skb(net, txskb, dst, 0);
}
rcu_read_unlock();
-
kfree_skb(skb);
}
@@ -1771,7 +1828,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
struct tipc_link *ucl;
int rc;
- rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
+ rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr, xmitq);
if (rc & TIPC_LINK_DOWN_EVT) {
tipc_node_reset_links(n);
@@ -1843,7 +1900,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
/* Handle NAME_DISTRIBUTOR messages sent from 1.7 nodes */
if (!skb_queue_empty(&n->bc_entry.namedq))
- tipc_named_rcv(net, &n->bc_entry.namedq);
+ tipc_named_rcv(net, &n->bc_entry.namedq,
+ &n->bc_entry.named_rcv_nxt,
+ &n->bc_entry.named_open);
/* If reassembly or retransmission failure => reset all links to peer */
if (rc & TIPC_LINK_DOWN_EVT)
@@ -1854,9 +1913,11 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
/**
* tipc_node_check_state - check and if necessary update node state
+ * @n: target tipc_node
* @skb: TIPC packet
* @bearer_id: identity of bearer delivering the packet
- * Returns true if state and msg are ok, otherwise false
+ * @xmitq: queue for messages to be xmited on
+ * Return: true if state and msg are ok, otherwise false
*/
static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
int bearer_id, struct sk_buff_head *xmitq)
@@ -1957,7 +2018,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
return true;
}
- /* No synching needed if only one link */
+ /* No syncing needed if only one link */
if (!pl || !tipc_link_is_up(pl))
return true;
@@ -2006,7 +2067,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
* tipc_rcv - process TIPC packets/messages arriving from off-node
* @net: the applicable net namespace
* @skb: TIPC packet
- * @bearer: pointer to bearer message arrived on
+ * @b: pointer to bearer message arrived on
*
* Invoked with no locks held. Bearer pointer must point to a valid bearer
* structure (i.e. cannot be NULL), but bearer can be inactive.
@@ -2069,10 +2130,16 @@ rcv:
le = &n->links[bearer_id];
/* Ensure broadcast reception is in synch with peer's send state */
- if (unlikely(usr == LINK_PROTOCOL))
+ if (unlikely(usr == LINK_PROTOCOL)) {
+ if (unlikely(skb_linearize(skb))) {
+ tipc_node_put(n);
+ goto discard;
+ }
+ hdr = buf_msg(skb);
tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq);
- else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack))
+ } else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) {
tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr);
+ }
/* Receive packet directly if conditions permit */
tipc_node_read_lock(n);
@@ -2089,7 +2156,7 @@ rcv:
/* Check/update node state before receiving */
if (unlikely(skb)) {
if (unlikely(skb_linearize(skb)))
- goto discard;
+ goto out_node_put;
tipc_node_write_lock(n);
if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) {
if (le->link) {
@@ -2107,7 +2174,9 @@ rcv:
tipc_node_link_down(n, bearer_id, false);
if (unlikely(!skb_queue_empty(&n->bc_entry.namedq)))
- tipc_named_rcv(net, &n->bc_entry.namedq);
+ tipc_named_rcv(net, &n->bc_entry.namedq,
+ &n->bc_entry.named_rcv_nxt,
+ &n->bc_entry.named_open);
if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1)))
tipc_node_mcast_rcv(n);
@@ -2118,6 +2187,7 @@ rcv:
if (!skb_queue_empty(&xmitq))
tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
+out_node_put:
tipc_node_put(n);
discard:
kfree_skb(skb);
@@ -2145,7 +2215,11 @@ void tipc_node_apply_property(struct net *net, struct tipc_bearer *b,
&xmitq);
else if (prop == TIPC_NLA_PROP_MTU)
tipc_link_set_mtu(e->link, b->mtu);
+
+ /* Update MTU for node link entry */
+ e->mtu = tipc_link_mss(e->link);
}
+
tipc_node_write_unlock(n);
tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL);
}
@@ -2159,6 +2233,9 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
struct tipc_node *peer, *temp_node;
+ u8 node_id[NODE_ID_LEN];
+ u64 *w0 = (u64 *)&node_id[0];
+ u64 *w1 = (u64 *)&node_id[8];
u32 addr;
int err;
@@ -2172,10 +2249,22 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- if (!attrs[TIPC_NLA_NET_ADDR])
- return -EINVAL;
+ /* attrs[TIPC_NLA_NET_NODEID] and attrs[TIPC_NLA_NET_ADDR] are
+ * mutually exclusive cases
+ */
+ if (attrs[TIPC_NLA_NET_ADDR]) {
+ addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
+ if (!addr)
+ return -EINVAL;
+ }
- addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
+ if (attrs[TIPC_NLA_NET_NODEID]) {
+ if (!attrs[TIPC_NLA_NET_NODEID_W1])
+ return -EINVAL;
+ *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]);
+ *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]);
+ addr = hash128to32(node_id);
+ }
if (in_own_node(net, addr))
return -ENOTSUPP;
@@ -2426,7 +2515,7 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
if (strcmp(name, tipc_bclink_name) == 0) {
- err = tipc_nl_add_bc_link(net, &msg);
+ err = tipc_nl_add_bc_link(net, &msg, tipc_net(net)->bcl);
if (err)
goto err_free;
} else {
@@ -2470,6 +2559,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
struct tipc_node *node;
struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = tipc_net(net);
struct tipc_link_entry *le;
if (!info->attrs[TIPC_NLA_LINK])
@@ -2486,11 +2576,26 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
- if (strcmp(link_name, tipc_bclink_name) == 0) {
- err = tipc_bclink_reset_stats(net);
+ err = -EINVAL;
+ if (!strcmp(link_name, tipc_bclink_name)) {
+ err = tipc_bclink_reset_stats(net, tipc_bc_sndlink(net));
if (err)
return err;
return 0;
+ } else if (strstr(link_name, tipc_bclink_name)) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(node, &tn->node_list, list) {
+ tipc_node_read_lock(node);
+ link = node->bc_entry.link;
+ if (link && !strcmp(link_name, tipc_link_name(link))) {
+ err = tipc_bclink_reset_stats(net, link);
+ tipc_node_read_unlock(node);
+ break;
+ }
+ tipc_node_read_unlock(node);
+ }
+ rcu_read_unlock();
+ return err;
}
node = tipc_node_find_by_name(net, link_name, &bearer_id);
@@ -2514,7 +2619,8 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
/* Caller should hold node lock */
static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
- struct tipc_node *node, u32 *prev_link)
+ struct tipc_node *node, u32 *prev_link,
+ bool bc_link)
{
u32 i;
int err;
@@ -2530,6 +2636,14 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
if (err)
return err;
}
+
+ if (bc_link) {
+ *prev_link = i;
+ err = tipc_nl_add_bc_link(net, msg, node->bc_entry.link);
+ if (err)
+ return err;
+ }
+
*prev_link = 0;
return 0;
@@ -2538,17 +2652,36 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *node;
struct tipc_nl_msg msg;
u32 prev_node = cb->args[0];
u32 prev_link = cb->args[1];
int done = cb->args[2];
+ bool bc_link = cb->args[3];
int err;
if (done)
return 0;
+ if (!prev_node) {
+ /* Check if broadcast-receiver links dumping is needed */
+ if (attrs && attrs[TIPC_NLA_LINK]) {
+ err = nla_parse_nested_deprecated(link,
+ TIPC_NLA_LINK_MAX,
+ attrs[TIPC_NLA_LINK],
+ tipc_nl_link_policy,
+ NULL);
+ if (unlikely(err))
+ return err;
+ if (unlikely(!link[TIPC_NLA_LINK_BROADCAST]))
+ return -EINVAL;
+ bc_link = true;
+ }
+ }
+
msg.skb = skb;
msg.portid = NETLINK_CB(cb->skb).portid;
msg.seq = cb->nlh->nlmsg_seq;
@@ -2572,7 +2705,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
list) {
tipc_node_read_lock(node);
err = __tipc_nl_add_node_links(net, &msg, node,
- &prev_link);
+ &prev_link, bc_link);
tipc_node_read_unlock(node);
if (err)
goto out;
@@ -2580,14 +2713,14 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
prev_node = node->addr;
}
} else {
- err = tipc_nl_add_bc_link(net, &msg);
+ err = tipc_nl_add_bc_link(net, &msg, tn->bcl);
if (err)
goto out;
list_for_each_entry_rcu(node, &tn->node_list, list) {
tipc_node_read_lock(node);
err = __tipc_nl_add_node_links(net, &msg, node,
- &prev_link);
+ &prev_link, bc_link);
tipc_node_read_unlock(node);
if (err)
goto out;
@@ -2602,6 +2735,7 @@ out:
cb->args[0] = prev_node;
cb->args[1] = prev_link;
cb->args[2] = done;
+ cb->args[3] = bc_link;
return skb->len;
}
@@ -2770,17 +2904,22 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
#ifdef CONFIG_TIPC_CRYPTO
static int tipc_nl_retrieve_key(struct nlattr **attrs,
- struct tipc_aead_key **key)
+ struct tipc_aead_key **pkey)
{
struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY];
+ struct tipc_aead_key *key;
if (!attr)
return -ENODATA;
- *key = (struct tipc_aead_key *)nla_data(attr);
- if (nla_len(attr) < tipc_aead_key_size(*key))
+ if (nla_len(attr) < sizeof(*key))
+ return -EINVAL;
+ key = (struct tipc_aead_key *)nla_data(attr);
+ if (key->keylen > TIPC_AEAD_KEYLEN_MAX ||
+ nla_len(attr) < tipc_aead_key_size(key))
return -EINVAL;
+ *pkey = key;
return 0;
}
@@ -2798,15 +2937,27 @@ static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id)
return 0;
}
+static int tipc_nl_retrieve_rekeying(struct nlattr **attrs, u32 *intv)
+{
+ struct nlattr *attr = attrs[TIPC_NLA_NODE_REKEYING];
+
+ if (!attr)
+ return -ENODATA;
+
+ *intv = nla_get_u32(attr);
+ return 0;
+}
+
static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1];
struct net *net = sock_net(skb->sk);
- struct tipc_net *tn = tipc_net(net);
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx, *c = tx;
struct tipc_node *n = NULL;
struct tipc_aead_key *ukey;
- struct tipc_crypto *c;
- u8 *id, *own_id;
+ bool rekeying = true, master_key = false;
+ u8 *id, *own_id, mode;
+ u32 intv = 0;
int rc = 0;
if (!info->attrs[TIPC_NLA_NODE])
@@ -2816,52 +2967,66 @@ static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
info->attrs[TIPC_NLA_NODE],
tipc_nl_node_policy, info->extack);
if (rc)
- goto exit;
+ return rc;
own_id = tipc_own_id(net);
if (!own_id) {
- rc = -EPERM;
- goto exit;
+ GENL_SET_ERR_MSG(info, "not found own node identity (set id?)");
+ return -EPERM;
}
+ rc = tipc_nl_retrieve_rekeying(attrs, &intv);
+ if (rc == -ENODATA)
+ rekeying = false;
+
rc = tipc_nl_retrieve_key(attrs, &ukey);
- if (rc)
- goto exit;
+ if (rc == -ENODATA && rekeying)
+ goto rekeying;
+ else if (rc)
+ return rc;
- rc = tipc_aead_key_validate(ukey);
+ rc = tipc_aead_key_validate(ukey, info);
if (rc)
- goto exit;
+ return rc;
rc = tipc_nl_retrieve_nodeid(attrs, &id);
switch (rc) {
case -ENODATA:
- /* Cluster key mode */
- rc = tipc_crypto_key_init(tn->crypto_tx, ukey, CLUSTER_KEY);
+ mode = CLUSTER_KEY;
+ master_key = !!(attrs[TIPC_NLA_NODE_KEY_MASTER]);
break;
case 0:
- /* Per-node key mode */
- if (!memcmp(id, own_id, NODE_ID_LEN)) {
- c = tn->crypto_tx;
- } else {
+ mode = PER_NODE_KEY;
+ if (memcmp(id, own_id, NODE_ID_LEN)) {
n = tipc_node_find_by_id(net, id) ?:
tipc_node_create(net, 0, id, 0xffffu, 0, true);
- if (unlikely(!n)) {
- rc = -ENOMEM;
- break;
- }
+ if (unlikely(!n))
+ return -ENOMEM;
c = n->crypto_rx;
}
-
- rc = tipc_crypto_key_init(c, ukey, PER_NODE_KEY);
- if (n)
- tipc_node_put(n);
break;
default:
- break;
+ return rc;
}
-exit:
- return (rc < 0) ? rc : 0;
+ /* Initiate the TX/RX key */
+ rc = tipc_crypto_key_init(c, ukey, mode, master_key);
+ if (n)
+ tipc_node_put(n);
+
+ if (unlikely(rc < 0)) {
+ GENL_SET_ERR_MSG(info, "unable to initiate or attach new key");
+ return rc;
+ } else if (c == tx) {
+ /* Distribute TX key but not master one */
+ if (!master_key && tipc_crypto_key_distr(tx, rc, NULL))
+ GENL_SET_ERR_MSG(info, "failed to replicate new key");
+rekeying:
+ /* Schedule TX rekeying if needed */
+ tipc_crypto_rekeying_sched(tx, rekeying, intv);
+ }
+
+ return 0;
}
int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
@@ -2888,7 +3053,6 @@ static int __tipc_nl_node_flush_key(struct sk_buff *skb,
tipc_crypto_key_flush(n->crypto_rx);
rcu_read_unlock();
- pr_info("All keys are flushed!\n");
return 0;
}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index a6803b449a2c..154a5bbb0d29 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -55,7 +55,8 @@ enum {
TIPC_MCAST_RBCTL = (1 << 7),
TIPC_GAP_ACK_BLOCK = (1 << 8),
TIPC_TUNNEL_ENHANCED = (1 << 9),
- TIPC_NAGLE = (1 << 10)
+ TIPC_NAGLE = (1 << 10),
+ TIPC_NAMED_BCAST = (1 << 11)
};
#define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \
@@ -68,7 +69,8 @@ enum {
TIPC_MCAST_RBCTL | \
TIPC_GAP_ACK_BLOCK | \
TIPC_TUNNEL_ENHANCED | \
- TIPC_NAGLE)
+ TIPC_NAGLE | \
+ TIPC_NAMED_BCAST)
#define INVALID_BEARER_ID -1
@@ -77,12 +79,14 @@ bool tipc_node_get_id(struct net *net, u32 addr, u8 *id);
u32 tipc_node_get_addr(struct tipc_node *node);
char *tipc_node_get_id_str(struct tipc_node *node);
void tipc_node_put(struct tipc_node *node);
+void tipc_node_get(struct tipc_node *node);
struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
u16 capabilities, u32 hash_mixes,
bool preliminary);
#ifdef CONFIG_TIPC_CRYPTO
struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n);
struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos);
+struct tipc_crypto *tipc_node_crypto_rx_by_addr(struct net *net, u32 addr);
#endif
u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
@@ -101,7 +105,7 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
u32 selector);
void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr);
-void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 693e8902161e..e902b01ea3cb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,8 +1,9 @@
/*
* net/tipc/socket.c: TIPC socket API
*
- * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
+ * Copyright (c) 2001-2007, 2012-2019, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -48,12 +49,13 @@
#include "group.h"
#include "trace.h"
+#define NAGLE_START_INIT 4
+#define NAGLE_START_MAX 1024
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
-#define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff
#define TIPC_MIN_PORT 1
-#define TIPC_ACK_RATE 4 /* ACK at 1/4 of of rcv window size */
+#define TIPC_ACK_RATE 4 /* ACK at 1/4 of rcv window size */
enum {
TIPC_LISTEN = TCP_LISTEN,
@@ -71,32 +73,41 @@ struct sockaddr_pair {
/**
* struct tipc_sock - TIPC socket structure
* @sk: socket - interacts with 'port' and with user via the socket API
- * @conn_type: TIPC type used when connection was established
- * @conn_instance: TIPC instance used when connection was established
- * @published: non-zero if port has one or more associated names
* @max_pkt: maximum packet size "hint" used when building messages sent by port
* @maxnagle: maximum size of msg which can be subject to nagle
* @portid: unique port identity in TIPC socket hash table
* @phdr: preformatted message header used when sending messages
- * #cong_links: list of congested links
+ * @cong_links: list of congested links
* @publications: list of publications for port
* @blocking_link: address of the congested link we are currently sleeping on
* @pub_count: total # of publications port has made during its lifetime
* @conn_timeout: the time we can wait for an unresponded setup request
+ * @probe_unacked: probe has not received ack yet
* @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
* @cong_link_cnt: number of congested links
* @snt_unacked: # messages sent by socket, and not yet acked by peer
+ * @snd_win: send window size
+ * @peer_caps: peer capabilities mask
* @rcv_unacked: # messages read by user, but not yet acked back to peer
+ * @rcv_win: receive window size
* @peer: 'connected' peer for dgram/rdm
* @node: hash table node
* @mc_method: cookie for use between socket and broadcast layer
* @rcu: rcu struct for tipc_sock
+ * @group: TIPC communications group
+ * @oneway: message count in one direction (FIXME)
+ * @nagle_start: current nagle value
+ * @snd_backlog: send backlog count
+ * @msg_acc: messages accepted; used in managing backlog and nagle
+ * @pkt_cnt: TIPC socket packet count
+ * @expect_ack: whether this TIPC socket is expecting an ack
+ * @nodelay: setsockopt() TIPC_NODELAY setting
+ * @group_is_open: TIPC socket group is fully open (FIXME)
+ * @published: true if port has one or more associated names
+ * @conn_addrtype: address type used when establishing connection
*/
struct tipc_sock {
struct sock sk;
- u32 conn_type;
- u32 conn_instance;
- int published;
u32 max_pkt;
u32 maxnagle;
u32 portid;
@@ -119,10 +130,15 @@ struct tipc_sock {
struct rcu_head rcu;
struct tipc_group *group;
u32 oneway;
+ u32 nagle_start;
u16 snd_backlog;
+ u16 msg_acc;
+ u16 pkt_cnt;
bool expect_ack;
bool nodelay;
bool group_is_open;
+ bool published;
+ u8 conn_addrtype;
};
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
@@ -133,17 +149,16 @@ static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
bool kern);
static void tipc_sk_timeout(struct timer_list *t);
-static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
- struct tipc_name_seq const *seq);
-static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
- struct tipc_name_seq const *seq);
+static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua);
+static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua);
static int tipc_sk_leave(struct tipc_sock *tsk);
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
-static void tipc_sk_push_backlog(struct tipc_sock *tsk);
+static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
+static int tipc_wait_for_connect(struct socket *sock, long *timeo_p);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -191,17 +206,17 @@ static int tsk_importance(struct tipc_sock *tsk)
return msg_importance(&tsk->phdr);
}
-static int tsk_set_importance(struct tipc_sock *tsk, int imp)
+static struct tipc_sock *tipc_sk(const struct sock *sk)
{
- if (imp > TIPC_CRITICAL_IMPORTANCE)
- return -EINVAL;
- msg_set_importance(&tsk->phdr, (u32)imp);
- return 0;
+ return container_of(sk, struct tipc_sock, sk);
}
-static struct tipc_sock *tipc_sk(const struct sock *sk)
+int tsk_set_importance(struct sock *sk, int imp)
{
- return container_of(sk, struct tipc_sock, sk);
+ if (imp > TIPC_CRITICAL_IMPORTANCE)
+ return -EINVAL;
+ msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp);
+ return 0;
}
static bool tsk_conn_cong(struct tipc_sock *tsk)
@@ -256,6 +271,7 @@ static void tsk_set_nagle(struct tipc_sock *tsk)
/**
* tsk_advance_rx_queue - discard first buffer in socket receive queue
+ * @sk: network socket
*
* Caller must hold socket lock
*/
@@ -284,6 +300,8 @@ static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
/**
* tsk_rej_rx_queue - reject all buffers in socket receive queue
+ * @sk: network socket
+ * @error: response error code
*
* Caller must hold socket lock
*/
@@ -437,7 +455,7 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout)
* This routine creates additional data structures used by the TIPC socket,
* initializes them, and links them together.
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
static int tipc_sk_create(struct net *net, struct socket *sock,
int protocol, int kern)
@@ -474,6 +492,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
tsk = tipc_sk(sk);
tsk->max_pkt = MAX_PKT_DEFAULT;
tsk->maxnagle = 0;
+ tsk->nagle_start = NAGLE_START_INIT;
INIT_LIST_HEAD(&tsk->publications);
INIT_LIST_HEAD(&tsk->cong_links);
msg = &tsk->phdr;
@@ -483,6 +502,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
tipc_set_sk_state(sk, TIPC_OPEN);
if (tipc_sk_insert(tsk)) {
+ sk_free(sk);
pr_warn("Socket create failed; port number exhausted\n");
return -EINVAL;
}
@@ -497,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
sk->sk_shutdown = 0;
sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
- sk->sk_rcvbuf = sysctl_tipc_rmem[1];
+ sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
sk->sk_destruct = tipc_sock_destruct;
@@ -541,7 +561,7 @@ static void __tipc_shutdown(struct socket *sock, int error)
!tsk_conn_cong(tsk)));
/* Push out delayed messages if in Nagle mode */
- tipc_sk_push_backlog(tsk);
+ tipc_sk_push_backlog(tsk, false);
/* Remove pending SYN */
__skb_queue_purge(&sk->sk_write_queue);
@@ -601,7 +621,7 @@ static void __tipc_shutdown(struct socket *sock, int error)
* are returned or discarded according to the "destination droppable" setting
* specified for the message by the sender.
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
static int tipc_release(struct socket *sock)
{
@@ -622,7 +642,7 @@ static int tipc_release(struct socket *sock)
__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
sk->sk_shutdown = SHUTDOWN_MASK;
tipc_sk_leave(tsk);
- tipc_sk_withdraw(tsk, 0, NULL);
+ tipc_sk_withdraw(tsk, NULL);
__skb_queue_purge(&tsk->mc_method.deferredq);
sk_stop_timer(sk, &sk->sk_timer);
tipc_sk_remove(tsk);
@@ -639,76 +659,85 @@ static int tipc_release(struct socket *sock)
}
/**
- * tipc_bind - associate or disassocate TIPC name(s) with a socket
+ * __tipc_bind - associate or disassocate TIPC name(s) with a socket
* @sock: socket structure
- * @uaddr: socket address describing name(s) and desired operation
- * @uaddr_len: size of socket address data structure
+ * @skaddr: socket address describing name(s) and desired operation
+ * @alen: size of socket address data structure
*
- * Name and name sequence binding is indicated using a positive scope value;
+ * Name and name sequence binding are indicated using a positive scope value;
* a negative scope value unbinds the specified name. Specifying no name
* (i.e. a socket address length of 0) unbinds all names from the socket.
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*
* NOTE: This routine doesn't need to take the socket lock since it doesn't
* access any non-constant socket information.
*/
-static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
- int uaddr_len)
+static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
{
- struct sock *sk = sock->sk;
- struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
- struct tipc_sock *tsk = tipc_sk(sk);
- int res = -EINVAL;
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr;
+ struct tipc_sock *tsk = tipc_sk(sock->sk);
+ bool unbind = false;
- lock_sock(sk);
- if (unlikely(!uaddr_len)) {
- res = tipc_sk_withdraw(tsk, 0, NULL);
- goto exit;
- }
- if (tsk->group) {
- res = -EACCES;
- goto exit;
- }
- if (uaddr_len < sizeof(struct sockaddr_tipc)) {
- res = -EINVAL;
- goto exit;
+ if (unlikely(!alen))
+ return tipc_sk_withdraw(tsk, NULL);
+
+ if (ua->addrtype == TIPC_SERVICE_ADDR) {
+ ua->addrtype = TIPC_SERVICE_RANGE;
+ ua->sr.upper = ua->sr.lower;
}
- if (addr->family != AF_TIPC) {
- res = -EAFNOSUPPORT;
- goto exit;
+ if (ua->scope < 0) {
+ unbind = true;
+ ua->scope = -ua->scope;
}
+ /* Users may still use deprecated TIPC_ZONE_SCOPE */
+ if (ua->scope != TIPC_NODE_SCOPE)
+ ua->scope = TIPC_CLUSTER_SCOPE;
- if (addr->addrtype == TIPC_ADDR_NAME)
- addr->addr.nameseq.upper = addr->addr.nameseq.lower;
- else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
- res = -EAFNOSUPPORT;
- goto exit;
- }
+ if (tsk->group)
+ return -EACCES;
- if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
- (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
- (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
- res = -EACCES;
- goto exit;
- }
+ if (unbind)
+ return tipc_sk_withdraw(tsk, ua);
+ return tipc_sk_publish(tsk, ua);
+}
- res = (addr->scope >= 0) ?
- tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
- tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
-exit:
- release_sock(sk);
+int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
+{
+ int res;
+
+ lock_sock(sock->sk);
+ res = __tipc_bind(sock, skaddr, alen);
+ release_sock(sock->sk);
return res;
}
+static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
+{
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr;
+ u32 atype = ua->addrtype;
+
+ if (alen) {
+ if (!tipc_uaddr_valid(ua, alen))
+ return -EINVAL;
+ if (atype == TIPC_SOCKET_ADDR)
+ return -EAFNOSUPPORT;
+ if (ua->sr.type < TIPC_RESERVED_TYPES) {
+ pr_warn_once("Can't bind to reserved service type %u\n",
+ ua->sr.type);
+ return -EACCES;
+ }
+ }
+ return tipc_sk_bind(sock, skaddr, alen);
+}
+
/**
* tipc_getname - get port ID of socket or peer socket
* @sock: socket structure
* @uaddr: area for returned socket address
- * @uaddr_len: area for returned length of socket address
* @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*
* NOTE: This routine doesn't need to take the socket lock since it only
* accesses socket information that is unchanging (or which changes in
@@ -733,7 +762,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
addr->addr.id.node = tipc_own_addr(sock_net(sk));
}
- addr->addrtype = TIPC_ADDR_ID;
+ addr->addrtype = TIPC_SOCKET_ADDR;
addr->family = AF_TIPC;
addr->scope = 0;
addr->addr.name.domain = 0;
@@ -747,7 +776,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
* @sock: socket for which to calculate the poll bits
* @wait: ???
*
- * Returns pollmask value
+ * Return: pollmask value
*
* COMMENTARY:
* It appears that the usual socket locking mechanisms are not useful here
@@ -778,7 +807,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
case TIPC_ESTABLISHED:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
revents |= EPOLLOUT;
- /* fall through */
+ fallthrough;
case TIPC_LISTEN:
case TIPC_CONNECTING:
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
@@ -803,15 +832,15 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
/**
* tipc_sendmcast - send multicast message
* @sock: socket structure
- * @seq: destination address
+ * @ua: destination address struct
* @msg: message to send
* @dlen: length of data to send
* @timeout: timeout to wait for wakeup
*
* Called from function tipc_sendmsg(), which has done all sanity checks
- * Returns the number of bytes sent on success, or errno
+ * Return: the number of bytes sent on success, or errno
*/
-static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
+static int tipc_sendmcast(struct socket *sock, struct tipc_uaddr *ua,
struct msghdr *msg, size_t dlen, long timeout)
{
struct sock *sk = sock->sk;
@@ -819,7 +848,6 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
int mtu = tipc_bcast_get_mtu(net);
- struct tipc_mc_method *method = &tsk->mc_method;
struct sk_buff_head pkts;
struct tipc_nlist dsts;
int rc;
@@ -834,8 +862,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
/* Lookup destination nodes */
tipc_nlist_init(&dsts, tipc_own_addr(net));
- tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
- seq->upper, &dsts);
+ tipc_nametbl_lookup_mcast_nodes(net, ua, &dsts);
if (!dsts.local && !dsts.remote)
return -EHOSTUNREACH;
@@ -845,9 +872,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
msg_set_destport(hdr, 0);
msg_set_destnode(hdr, 0);
- msg_set_nametype(hdr, seq->type);
- msg_set_namelower(hdr, seq->lower);
- msg_set_nameupper(hdr, seq->upper);
+ msg_set_nametype(hdr, ua->sr.type);
+ msg_set_namelower(hdr, ua->sr.lower);
+ msg_set_nameupper(hdr, ua->sr.upper);
/* Build message as chain of buffers */
__skb_queue_head_init(&pkts);
@@ -857,7 +884,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
if (unlikely(rc == dlen)) {
trace_tipc_sk_sendmcast(sk, skb_peek(&pkts),
TIPC_DUMP_SK_SNDQ, " ");
- rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
+ rc = tipc_mcast_xmit(net, &pkts, &tsk->mc_method, &dsts,
&tsk->cong_link_cnt);
}
@@ -869,6 +896,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
/**
* tipc_send_group_msg - send a message to a member in the group
* @net: network namespace
+ * @tsk: tipc socket
* @m: message to send
* @mb: group member
* @dnode: destination node
@@ -924,13 +952,13 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
* @timeout: timeout to wait for wakeup
*
* Called from function tipc_sendmsg(), which has done all sanity checks
- * Returns the number of bytes sent on success, or errno
+ * Return: the number of bytes sent on success, or errno
*/
static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
int dlen, long timeout)
{
struct sock *sk = sock->sk;
- DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
int blks = tsk_blocks(GROUP_H_SIZE + dlen);
struct tipc_sock *tsk = tipc_sk(sk);
struct net *net = sock_net(sk);
@@ -938,8 +966,8 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
u32 node, port;
int rc;
- node = dest->addr.id.node;
- port = dest->addr.id.ref;
+ node = ua->sk.node;
+ port = ua->sk.ref;
if (!port && !node)
return -EHOSTUNREACH;
@@ -968,12 +996,12 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
* @timeout: timeout to wait for wakeup
*
* Called from function tipc_sendmsg(), which has done all sanity checks
- * Returns the number of bytes sent on success, or errno
+ * Return: the number of bytes sent on success, or errno
*/
static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
int dlen, long timeout)
{
- DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
struct list_head *cong_links = &tsk->cong_links;
@@ -984,16 +1012,13 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
struct net *net = sock_net(sk);
u32 node, port, exclude;
struct list_head dsts;
- u32 type, inst, scope;
int lookups = 0;
int dstcnt, rc;
bool cong;
INIT_LIST_HEAD(&dsts);
-
- type = msg_nametype(hdr);
- inst = dest->addr.name.name.instance;
- scope = msg_lookup_scope(hdr);
+ ua->sa.type = msg_nametype(hdr);
+ ua->scope = msg_lookup_scope(hdr);
while (++lookups < 4) {
exclude = tipc_group_exclude(tsk->group);
@@ -1002,8 +1027,8 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
/* Look for a non-congested destination member, if any */
while (1) {
- if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
- &dstcnt, exclude, false))
+ if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt,
+ exclude, false))
return -EHOSTUNREACH;
tipc_dest_pop(&dsts, &node, &port);
cong = tipc_group_cong(tsk->group, node, port, blks,
@@ -1047,18 +1072,18 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
/**
* tipc_send_group_bcast - send message to all members in communication group
- * @sk: socket structure
+ * @sock: socket structure
* @m: message to send
* @dlen: total length of message data
* @timeout: timeout to wait for wakeup
*
* Called from function tipc_sendmsg(), which has done all sanity checks
- * Returns the number of bytes sent on success, or errno
+ * Return: the number of bytes sent on success, or errno
*/
static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
int dlen, long timeout)
{
- DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct tipc_sock *tsk = tipc_sk(sk);
@@ -1083,9 +1108,9 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
return -EHOSTUNREACH;
/* Complete message header */
- if (dest) {
+ if (ua) {
msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
- msg_set_nameinst(hdr, dest->addr.name.name.instance);
+ msg_set_nameinst(hdr, ua->sa.instance);
} else {
msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
msg_set_nameinst(hdr, 0);
@@ -1127,34 +1152,30 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
* @timeout: timeout to wait for wakeup
*
* Called from function tipc_sendmsg(), which has done all sanity checks
- * Returns the number of bytes sent on success, or errno
+ * Return: the number of bytes sent on success, or errno
*/
static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
int dlen, long timeout)
{
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
struct sock *sk = sock->sk;
- DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
- u32 type, inst, scope, exclude;
struct list_head dsts;
- u32 dstcnt;
+ u32 dstcnt, exclude;
INIT_LIST_HEAD(&dsts);
-
- type = msg_nametype(hdr);
- inst = dest->addr.name.name.instance;
- scope = msg_lookup_scope(hdr);
+ ua->sa.type = msg_nametype(hdr);
+ ua->scope = msg_lookup_scope(hdr);
exclude = tipc_group_exclude(grp);
- if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
- &dstcnt, exclude, true))
+ if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, true))
return -EHOSTUNREACH;
if (dstcnt == 1) {
- tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
+ tipc_dest_pop(&dsts, &ua->sk.node, &ua->sk.ref);
return tipc_send_group_unicast(sock, m, dlen, timeout);
}
@@ -1164,6 +1185,7 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
/**
* tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
+ * @net: the associated network namespace
* @arrvq: queue with arriving messages, to be cloned after destination lookup
* @inputq: queue with cloned messages, delivered to socket after dest lookup
*
@@ -1173,18 +1195,19 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct sk_buff_head *inputq)
{
u32 self = tipc_own_addr(net);
- u32 type, lower, upper, scope;
struct sk_buff *skb, *_skb;
u32 portid, onode;
struct sk_buff_head tmpq;
struct list_head dports;
struct tipc_msg *hdr;
+ struct tipc_uaddr ua;
int user, mtyp, hlen;
- bool exact;
__skb_queue_head_init(&tmpq);
INIT_LIST_HEAD(&dports);
+ ua.addrtype = TIPC_SERVICE_RANGE;
+ /* tipc_skb_peek() increments the head skb's reference counter */
skb = tipc_skb_peek(arrvq, &inputq->lock);
for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
hdr = buf_msg(skb);
@@ -1192,7 +1215,13 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
mtyp = msg_type(hdr);
hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
onode = msg_orignode(hdr);
- type = msg_nametype(hdr);
+ ua.sr.type = msg_nametype(hdr);
+ ua.sr.lower = msg_namelower(hdr);
+ ua.sr.upper = msg_nameupper(hdr);
+ if (onode == self)
+ ua.scope = TIPC_ANY_SCOPE;
+ else
+ ua.scope = TIPC_CLUSTER_SCOPE;
if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
spin_lock_bh(&inputq->lock);
@@ -1207,24 +1236,13 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
/* Group messages require exact scope match */
if (msg_in_group(hdr)) {
- lower = 0;
- upper = ~0;
- scope = msg_lookup_scope(hdr);
- exact = true;
- } else {
- /* TIPC_NODE_SCOPE means "any scope" in this context */
- if (onode == self)
- scope = TIPC_NODE_SCOPE;
- else
- scope = TIPC_CLUSTER_SCOPE;
- exact = false;
- lower = msg_namelower(hdr);
- upper = msg_nameupper(hdr);
+ ua.sr.lower = 0;
+ ua.sr.upper = ~0;
+ ua.scope = msg_lookup_scope(hdr);
}
/* Create destination port list: */
- tipc_nametbl_mc_lookup(net, type, lower, upper,
- scope, exact, &dports);
+ tipc_nametbl_lookup_mcast_sockets(net, &ua, &dports);
/* Clone message per destination */
while (tipc_dest_pop(&dports, NULL, &portid)) {
@@ -1236,10 +1254,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
}
pr_warn("Failed to clone mcast rcv buffer\n");
}
- /* Append to inputq if not already done by other thread */
+ /* Append clones to inputq only if skb is still head of arrvq */
spin_lock_bh(&inputq->lock);
if (skb_peek(arrvq) == skb) {
skb_queue_splice_tail_init(&tmpq, inputq);
+ /* Decrement the skb's refcnt */
kfree_skb(__skb_dequeue(arrvq));
}
spin_unlock_bh(&inputq->lock);
@@ -1252,14 +1271,37 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
/* tipc_sk_push_backlog(): send accumulated buffers in socket write queue
* when socket is in Nagle mode
*/
-static void tipc_sk_push_backlog(struct tipc_sock *tsk)
+static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack)
{
struct sk_buff_head *txq = &tsk->sk.sk_write_queue;
+ struct sk_buff *skb = skb_peek_tail(txq);
struct net *net = sock_net(&tsk->sk);
u32 dnode = tsk_peer_node(tsk);
- struct sk_buff *skb = skb_peek(txq);
int rc;
+ if (nagle_ack) {
+ tsk->pkt_cnt += skb_queue_len(txq);
+ if (!tsk->pkt_cnt || tsk->msg_acc / tsk->pkt_cnt < 2) {
+ tsk->oneway = 0;
+ if (tsk->nagle_start < NAGLE_START_MAX)
+ tsk->nagle_start *= 2;
+ tsk->expect_ack = false;
+ pr_debug("tsk %10u: bad nagle %u -> %u, next start %u!\n",
+ tsk->portid, tsk->msg_acc, tsk->pkt_cnt,
+ tsk->nagle_start);
+ } else {
+ tsk->nagle_start = NAGLE_START_INIT;
+ if (skb) {
+ msg_set_ack_required(buf_msg(skb));
+ tsk->expect_ack = true;
+ } else {
+ tsk->expect_ack = false;
+ }
+ }
+ tsk->msg_acc = 0;
+ tsk->pkt_cnt = 0;
+ }
+
if (!skb || tsk->cong_link_cnt)
return;
@@ -1267,9 +1309,10 @@ static void tipc_sk_push_backlog(struct tipc_sock *tsk)
if (msg_is_syn(buf_msg(skb)))
return;
+ if (tsk->msg_acc)
+ tsk->pkt_cnt += skb_queue_len(txq);
tsk->snt_unacked += tsk->snd_backlog;
tsk->snd_backlog = 0;
- tsk->expect_ack = true;
rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
if (rc == -ELINKCONG)
tsk->cong_link_cnt = 1;
@@ -1279,6 +1322,8 @@ static void tipc_sk_push_backlog(struct tipc_sock *tsk)
* tipc_sk_conn_proto_rcv - receive a connection mng protocol message
* @tsk: receiving socket
* @skb: pointer to message buffer.
+ * @inputq: buffer list containing the buffers
+ * @xmitq: output message area
*/
static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
struct sk_buff_head *inputq,
@@ -1322,8 +1367,7 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
return;
} else if (mtyp == CONN_ACK) {
was_cong = tsk_conn_cong(tsk);
- tsk->expect_ack = false;
- tipc_sk_push_backlog(tsk);
+ tipc_sk_push_backlog(tsk, msg_nagle_ack(hdr));
tsk->snt_unacked -= msg_conn_ack(hdr);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
tsk->snd_win = msg_adv_win(hdr);
@@ -1347,7 +1391,7 @@ exit:
* and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
* (Note: 'SYN+' is prohibited on SOCK_STREAM.)
*
- * Returns the number of bytes sent on success, or errno otherwise
+ * Return: the number of bytes sent on success, or errno otherwise
*/
static int tipc_sendmsg(struct socket *sock,
struct msghdr *m, size_t dsz)
@@ -1367,44 +1411,43 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct tipc_sock *tsk = tipc_sk(sk);
- DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
struct list_head *clinks = &tsk->cong_links;
bool syn = !tipc_sk_type_connectionless(sk);
struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
- struct tipc_name_seq *seq;
+ struct tipc_socket_addr skaddr;
struct sk_buff_head pkts;
- u32 dport = 0, dnode = 0;
- u32 type = 0, inst = 0;
- int mtu, rc;
+ int atype, mtu, rc;
if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
return -EMSGSIZE;
- if (likely(dest)) {
- if (unlikely(m->msg_namelen < sizeof(*dest)))
- return -EINVAL;
- if (unlikely(dest->family != AF_TIPC))
+ if (ua) {
+ if (!tipc_uaddr_valid(ua, m->msg_namelen))
return -EINVAL;
+ atype = ua->addrtype;
}
+ /* If socket belongs to a communication group follow other paths */
if (grp) {
- if (!dest)
+ if (!ua)
return tipc_send_group_bcast(sock, m, dlen, timeout);
- if (dest->addrtype == TIPC_ADDR_NAME)
+ if (atype == TIPC_SERVICE_ADDR)
return tipc_send_group_anycast(sock, m, dlen, timeout);
- if (dest->addrtype == TIPC_ADDR_ID)
+ if (atype == TIPC_SOCKET_ADDR)
return tipc_send_group_unicast(sock, m, dlen, timeout);
- if (dest->addrtype == TIPC_ADDR_MCAST)
+ if (atype == TIPC_SERVICE_RANGE)
return tipc_send_group_mcast(sock, m, dlen, timeout);
return -EINVAL;
}
- if (unlikely(!dest)) {
- dest = &tsk->peer;
- if (!syn && dest->family != AF_TIPC)
+ if (!ua) {
+ ua = (struct tipc_uaddr *)&tsk->peer;
+ if (!syn && ua->family != AF_TIPC)
return -EDESTADDRREQ;
+ atype = ua->addrtype;
}
if (unlikely(syn)) {
@@ -1414,54 +1457,51 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
return -EISCONN;
if (tsk->published)
return -EOPNOTSUPP;
- if (dest->addrtype == TIPC_ADDR_NAME) {
- tsk->conn_type = dest->addr.name.name.type;
- tsk->conn_instance = dest->addr.name.name.instance;
- }
+ if (atype == TIPC_SERVICE_ADDR)
+ tsk->conn_addrtype = atype;
msg_set_syn(hdr, 1);
}
- seq = &dest->addr.nameseq;
- if (dest->addrtype == TIPC_ADDR_MCAST)
- return tipc_sendmcast(sock, seq, m, dlen, timeout);
+ memset(&skaddr, 0, sizeof(skaddr));
- if (dest->addrtype == TIPC_ADDR_NAME) {
- type = dest->addr.name.name.type;
- inst = dest->addr.name.name.instance;
- dnode = dest->addr.name.domain;
- dport = tipc_nametbl_translate(net, type, inst, &dnode);
- if (unlikely(!dport && !dnode))
+ /* Determine destination */
+ if (atype == TIPC_SERVICE_RANGE) {
+ return tipc_sendmcast(sock, ua, m, dlen, timeout);
+ } else if (atype == TIPC_SERVICE_ADDR) {
+ skaddr.node = ua->lookup_node;
+ ua->scope = tipc_node2scope(skaddr.node);
+ if (!tipc_nametbl_lookup_anycast(net, ua, &skaddr))
return -EHOSTUNREACH;
- } else if (dest->addrtype == TIPC_ADDR_ID) {
- dnode = dest->addr.id.node;
+ } else if (atype == TIPC_SOCKET_ADDR) {
+ skaddr = ua->sk;
} else {
return -EINVAL;
}
/* Block or return if destination link is congested */
rc = tipc_wait_for_cond(sock, &timeout,
- !tipc_dest_find(clinks, dnode, 0));
+ !tipc_dest_find(clinks, skaddr.node, 0));
if (unlikely(rc))
return rc;
- if (dest->addrtype == TIPC_ADDR_NAME) {
+ /* Finally build message header */
+ msg_set_destnode(hdr, skaddr.node);
+ msg_set_destport(hdr, skaddr.ref);
+ if (atype == TIPC_SERVICE_ADDR) {
msg_set_type(hdr, TIPC_NAMED_MSG);
msg_set_hdr_sz(hdr, NAMED_H_SIZE);
- msg_set_nametype(hdr, type);
- msg_set_nameinst(hdr, inst);
- msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
- msg_set_destnode(hdr, dnode);
- msg_set_destport(hdr, dport);
- } else { /* TIPC_ADDR_ID */
+ msg_set_nametype(hdr, ua->sa.type);
+ msg_set_nameinst(hdr, ua->sa.instance);
+ msg_set_lookup_scope(hdr, ua->scope);
+ } else { /* TIPC_SOCKET_ADDR */
msg_set_type(hdr, TIPC_DIRECT_MSG);
msg_set_lookup_scope(hdr, 0);
- msg_set_destnode(hdr, dnode);
- msg_set_destport(hdr, dest->addr.id.ref);
msg_set_hdr_sz(hdr, BASIC_H_SIZE);
}
+ /* Add message body */
__skb_queue_head_init(&pkts);
- mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
+ mtu = tipc_node_get_mtu(net, skaddr.node, tsk->portid, true);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
@@ -1470,16 +1510,22 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
return -ENOMEM;
}
+ /* Send message */
trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " ");
- rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+ rc = tipc_node_xmit(net, &pkts, skaddr.node, tsk->portid);
if (unlikely(rc == -ELINKCONG)) {
- tipc_dest_push(clinks, dnode, 0);
+ tipc_dest_push(clinks, skaddr.node, 0);
tsk->cong_link_cnt++;
rc = 0;
}
- if (unlikely(syn && !rc))
+ if (unlikely(syn && !rc)) {
tipc_set_sk_state(sk, TIPC_CONNECTING);
+ if (dlen && timeout) {
+ timeout = msecs_to_jiffies(timeout);
+ tipc_wait_for_connect(sock, &timeout);
+ }
+ }
return rc ? rc : dlen;
}
@@ -1492,7 +1538,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
*
* Used for SOCK_STREAM data.
*
- * Returns the number of bytes sent on success (or partial success),
+ * Return: the number of bytes sent on success (or partial success),
* or errno if no data sent
*/
static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
@@ -1516,6 +1562,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
+ struct sk_buff *skb;
u32 dnode = tsk_peer_node(tsk);
int maxnagle = tsk->maxnagle;
int maxpkt = tsk->max_pkt;
@@ -1526,7 +1573,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
return -EMSGSIZE;
/* Handle implicit connection setup */
- if (unlikely(dest)) {
+ if (unlikely(dest && sk->sk_state == TIPC_OPEN)) {
rc = __tipc_sendmsg(sock, m, dlen);
if (dlen && dlen == rc) {
tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
@@ -1544,17 +1591,30 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
break;
send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
blocks = tsk->snd_backlog;
- if (tsk->oneway++ >= 4 && send <= maxnagle) {
+ if (tsk->oneway++ >= tsk->nagle_start && maxnagle &&
+ send <= maxnagle) {
rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
if (unlikely(rc < 0))
break;
blocks += rc;
+ tsk->msg_acc++;
if (blocks <= 64 && tsk->expect_ack) {
tsk->snd_backlog = blocks;
sent += send;
break;
+ } else if (blocks > 64) {
+ tsk->pkt_cnt += skb_queue_len(txq);
+ } else {
+ skb = skb_peek_tail(txq);
+ if (skb) {
+ msg_set_ack_required(buf_msg(skb));
+ tsk->expect_ack = true;
+ } else {
+ tsk->expect_ack = false;
+ }
+ tsk->msg_acc = 0;
+ tsk->pkt_cnt = 0;
}
- tsk->expect_ack = true;
} else {
rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq);
if (unlikely(rc != send))
@@ -1586,7 +1646,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
*
* Used for SOCK_SEQPACKET messages.
*
- * Returns the number of bytes sent on success, or errno otherwise
+ * Return: the number of bytes sent on success, or errno otherwise
*/
static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
{
@@ -1630,7 +1690,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
/**
* tipc_sk_set_orig_addr - capture sender's address for received message
* @m: descriptor for message info
- * @hdr: received message header
+ * @skb: received message
*
* Note: Address is not captured if not requested by receiver.
*/
@@ -1643,7 +1703,7 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
return;
srcaddr->sock.family = AF_TIPC;
- srcaddr->sock.addrtype = TIPC_ADDR_ID;
+ srcaddr->sock.addrtype = TIPC_SOCKET_ADDR;
srcaddr->sock.scope = 0;
srcaddr->sock.addr.id.ref = msg_origport(hdr);
srcaddr->sock.addr.id.node = msg_orignode(hdr);
@@ -1655,7 +1715,7 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
/* Group message users may also want to know sending member's id */
srcaddr->member.family = AF_TIPC;
- srcaddr->member.addrtype = TIPC_ADDR_NAME;
+ srcaddr->member.addrtype = TIPC_SERVICE_ADDR;
srcaddr->member.scope = 0;
srcaddr->member.addr.name.name.type = msg_nametype(hdr);
srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
@@ -1671,90 +1731,80 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
*
* Note: Ancillary data is not captured if not requested by receiver.
*
- * Returns 0 if successful, otherwise errno
+ * Return: 0 if successful, otherwise errno
*/
static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb,
struct tipc_sock *tsk)
{
- struct tipc_msg *msg;
- u32 anc_data[3];
- u32 err;
- u32 dest_type;
- int has_name;
- int res;
+ struct tipc_msg *hdr;
+ u32 data[3] = {0,};
+ bool has_addr;
+ int dlen, rc;
if (likely(m->msg_controllen == 0))
return 0;
- msg = buf_msg(skb);
- /* Optionally capture errored message object(s) */
- err = msg ? msg_errcode(msg) : 0;
- if (unlikely(err)) {
- anc_data[0] = err;
- anc_data[1] = msg_data_sz(msg);
- res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
- if (res)
- return res;
- if (anc_data[1]) {
- if (skb_linearize(skb))
- return -ENOMEM;
- msg = buf_msg(skb);
- res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
- msg_data(msg));
- if (res)
- return res;
- }
+ hdr = buf_msg(skb);
+ dlen = msg_data_sz(hdr);
+
+ /* Capture errored message object, if any */
+ if (msg_errcode(hdr)) {
+ if (skb_linearize(skb))
+ return -ENOMEM;
+ hdr = buf_msg(skb);
+ data[0] = msg_errcode(hdr);
+ data[1] = dlen;
+ rc = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, data);
+ if (rc || !dlen)
+ return rc;
+ rc = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, dlen, msg_data(hdr));
+ if (rc)
+ return rc;
}
- /* Optionally capture message destination object */
- dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
- switch (dest_type) {
+ /* Capture TIPC_SERVICE_ADDR/RANGE destination address, if any */
+ switch (msg_type(hdr)) {
case TIPC_NAMED_MSG:
- has_name = 1;
- anc_data[0] = msg_nametype(msg);
- anc_data[1] = msg_namelower(msg);
- anc_data[2] = msg_namelower(msg);
+ has_addr = true;
+ data[0] = msg_nametype(hdr);
+ data[1] = msg_namelower(hdr);
+ data[2] = data[1];
break;
case TIPC_MCAST_MSG:
- has_name = 1;
- anc_data[0] = msg_nametype(msg);
- anc_data[1] = msg_namelower(msg);
- anc_data[2] = msg_nameupper(msg);
+ has_addr = true;
+ data[0] = msg_nametype(hdr);
+ data[1] = msg_namelower(hdr);
+ data[2] = msg_nameupper(hdr);
break;
case TIPC_CONN_MSG:
- has_name = (tsk->conn_type != 0);
- anc_data[0] = tsk->conn_type;
- anc_data[1] = tsk->conn_instance;
- anc_data[2] = tsk->conn_instance;
+ has_addr = !!tsk->conn_addrtype;
+ data[0] = msg_nametype(&tsk->phdr);
+ data[1] = msg_nameinst(&tsk->phdr);
+ data[2] = data[1];
break;
default:
- has_name = 0;
+ has_addr = false;
}
- if (has_name) {
- res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
- if (res)
- return res;
- }
-
- return 0;
+ if (!has_addr)
+ return 0;
+ return put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, data);
}
-static void tipc_sk_send_ack(struct tipc_sock *tsk)
+static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk)
{
struct sock *sk = &tsk->sk;
- struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
struct tipc_msg *msg;
u32 peer_port = tsk_peer_port(tsk);
u32 dnode = tsk_peer_node(tsk);
if (!tipc_sk_connected(sk))
- return;
+ return NULL;
skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
dnode, tsk_own_node(tsk), peer_port,
tsk->portid, TIPC_OK);
if (!skb)
- return;
+ return NULL;
msg = buf_msg(skb);
msg_set_conn_ack(msg, tsk->rcv_unacked);
tsk->rcv_unacked = 0;
@@ -1764,7 +1814,19 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk)
tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
msg_set_adv_win(msg, tsk->rcv_win);
}
- tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
+ return skb;
+}
+
+static void tipc_sk_send_ack(struct tipc_sock *tsk)
+{
+ struct sk_buff *skb;
+
+ skb = tipc_sk_build_ack(tsk);
+ if (!skb)
+ return;
+
+ tipc_node_xmit_skb(sock_net(&tsk->sk), skb, tsk_peer_node(tsk),
+ msg_link_selector(buf_msg(skb)));
}
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
@@ -1810,6 +1872,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
/**
* tipc_recvmsg - receive packet-oriented message
+ * @sock: network socket
* @m: descriptor for message info
* @buflen: length of user buffer area
* @flags: receive flags
@@ -1817,7 +1880,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
* Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
* If the complete message doesn't fit in user area, truncate it.
*
- * Returns size of returned message data, errno otherwise
+ * Return: size of returned message data, errno otherwise
*/
static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
size_t buflen, int flags)
@@ -1826,6 +1889,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
bool connected = !tipc_sk_type_connectionless(sk);
struct tipc_sock *tsk = tipc_sk(sk);
int rc, err, hlen, dlen, copy;
+ struct tipc_skb_cb *skb_cb;
struct sk_buff_head xmitq;
struct tipc_msg *hdr;
struct sk_buff *skb;
@@ -1849,6 +1913,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
if (unlikely(rc))
goto exit;
skb = skb_peek(&sk->sk_receive_queue);
+ skb_cb = TIPC_SKB_CB(skb);
hdr = buf_msg(skb);
dlen = msg_data_sz(hdr);
hlen = msg_hdr_sz(hdr);
@@ -1868,18 +1933,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
/* Capture data if non-error msg, otherwise just set return value */
if (likely(!err)) {
- copy = min_t(int, dlen, buflen);
- if (unlikely(copy != dlen))
- m->msg_flags |= MSG_TRUNC;
- rc = skb_copy_datagram_msg(skb, hlen, m, copy);
+ int offset = skb_cb->bytes_read;
+
+ copy = min_t(int, dlen - offset, buflen);
+ rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+ if (unlikely(rc))
+ goto exit;
+ if (unlikely(offset + copy < dlen)) {
+ if (flags & MSG_EOR) {
+ if (!(flags & MSG_PEEK))
+ skb_cb->bytes_read = offset + copy;
+ } else {
+ m->msg_flags |= MSG_TRUNC;
+ skb_cb->bytes_read = 0;
+ }
+ } else {
+ if (flags & MSG_EOR)
+ m->msg_flags |= MSG_EOR;
+ skb_cb->bytes_read = 0;
+ }
} else {
copy = 0;
rc = 0;
- if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
+ if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) {
rc = -ECONNRESET;
+ goto exit;
+ }
}
- if (unlikely(rc))
- goto exit;
/* Mark message as group event if applicable */
if (unlikely(grp_evt)) {
@@ -1902,6 +1982,9 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
tipc_node_distr_xmit(sock_net(sk), &xmitq);
}
+ if (skb_cb->bytes_read)
+ goto exit;
+
tsk_advance_rx_queue(sk);
if (likely(!connected))
@@ -1918,6 +2001,7 @@ exit:
/**
* tipc_recvstream - receive stream-oriented data
+ * @sock: network socket
* @m: descriptor for message info
* @buflen: total size of user buffer area
* @flags: receive flags
@@ -1925,7 +2009,7 @@ exit:
* Used for SOCK_STREAM messages only. If not enough data is available
* will optionally wait for more; never truncates data.
*
- * Returns size of returned message data, errno otherwise
+ * Return: size of returned message data, errno otherwise
*/
static int tipc_recvstream(struct socket *sock, struct msghdr *m,
size_t buflen, int flags)
@@ -1938,7 +2022,6 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
bool peek = flags & MSG_PEEK;
int offset, required, copy, copied = 0;
int hlen, dlen, err, rc;
- bool ack = false;
long timeout;
/* Catch invalid receive attempts */
@@ -1983,7 +2066,6 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Copy data if msg ok, otherwise return error/partial data */
if (likely(!err)) {
- ack = msg_ack_required(hdr);
offset = skb_cb->bytes_read;
copy = min_t(int, dlen - offset, buflen - copied);
rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
@@ -2011,7 +2093,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Send connection flow control advertisement when applicable */
tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
- if (ack || tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
+ if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
tipc_sk_send_ack(tsk);
/* Exit if all requested data or FIN/error received */
@@ -2043,7 +2125,6 @@ static void tipc_write_space(struct sock *sk)
/**
* tipc_data_ready - wake up threads to indicate messages have been received
* @sk: socket
- * @len: the length of messages
*/
static void tipc_data_ready(struct sock *sk)
{
@@ -2082,7 +2163,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
smp_wmb();
tsk->cong_link_cnt--;
wakeup = true;
- tipc_sk_push_backlog(tsk);
+ tipc_sk_push_backlog(tsk, false);
break;
case GROUP_PROTOCOL:
tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
@@ -2105,9 +2186,11 @@ static void tipc_sk_proto_rcv(struct sock *sk,
* tipc_sk_filter_connect - check incoming message for a connection-based socket
* @tsk: TIPC socket
* @skb: pointer to message buffer.
- * Returns true if message should be added to receive queue, false otherwise
+ * @xmitq: for Nagle ACK if any
+ * Return: true if message should be added to receive queue, false otherwise
*/
-static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
+static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
@@ -2169,10 +2252,21 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
return false;
case TIPC_ESTABLISHED:
if (!skb_queue_empty(&sk->sk_write_queue))
- tipc_sk_push_backlog(tsk);
+ tipc_sk_push_backlog(tsk, false);
/* Accept only connection-based messages sent by peer */
- if (likely(con_msg && !err && pport == oport && pnode == onode))
+ if (likely(con_msg && !err && pport == oport &&
+ pnode == onode)) {
+ if (msg_ack_required(hdr)) {
+ struct sk_buff *skb;
+
+ skb = tipc_sk_build_ack(tsk);
+ if (skb) {
+ msg_set_nagle_ack(buf_msg(skb));
+ __skb_queue_tail(xmitq, skb);
+ }
+ }
return true;
+ }
if (!tsk_peer_msg(tsk, hdr))
return false;
if (!err)
@@ -2207,7 +2301,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
* TIPC_HIGH_IMPORTANCE (8 MB)
* TIPC_CRITICAL_IMPORTANCE (16 MB)
*
- * Returns overload limit according to corresponding message importance
+ * Return: overload limit according to corresponding message importance
*/
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
{
@@ -2230,12 +2324,12 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
* tipc_sk_filter_rcv - validate incoming message
* @sk: socket
* @skb: pointer to message.
+ * @xmitq: output message area (FIXME)
*
* Enqueues message on receive queue if acceptable; optionally handles
* disconnect indication for a connected socket.
*
* Called with socket lock already taken
- *
*/
static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
struct sk_buff_head *xmitq)
@@ -2267,7 +2361,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
while ((skb = __skb_dequeue(&inputq))) {
hdr = buf_msg(skb);
limit = rcvbuf_limit(sk, skb);
- if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
+ if ((sk_conn && !tipc_sk_filter_connect(tsk, skb, xmitq)) ||
(!sk_conn && msg_connected(hdr)) ||
(!grp && msg_in_group(hdr)))
err = TIPC_ERR_NO_PORT;
@@ -2325,13 +2419,14 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
* @inputq: list of incoming buffers with potentially different destinations
* @sk: socket where the buffers should be enqueued
* @dport: port number for the socket
+ * @xmitq: output queue
*
* Caller must hold socket lock
*/
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
u32 dport, struct sk_buff_head *xmitq)
{
- unsigned long time_limit = jiffies + 2;
+ unsigned long time_limit = jiffies + usecs_to_jiffies(20000);
struct sk_buff *skb;
unsigned int lim;
atomic_t *dcnt;
@@ -2377,6 +2472,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
/**
* tipc_sk_rcv - handle a chain of incoming buffers
+ * @net: the associated network namespace
* @inputq: buffer list containing the buffers
* Consumes all buffers in list until inputq is empty
* Note: may be called in multiple threads referring to the same queue
@@ -2469,7 +2565,7 @@ static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr)
* @destlen: size of socket address data structure
* @flags: file-related flags associated with socket
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
int destlen, int flags)
@@ -2534,7 +2630,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
* case is EINPROGRESS, rather than EALREADY.
*/
res = -EINPROGRESS;
- /* fall through */
+ fallthrough;
case TIPC_CONNECTING:
if (!timeout) {
if (previous == TIPC_CONNECTING)
@@ -2562,7 +2658,7 @@ exit:
* @sock: socket structure
* @len: (unused)
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
static int tipc_listen(struct socket *sock, int len)
{
@@ -2579,7 +2675,7 @@ static int tipc_listen(struct socket *sock, int len)
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
struct sock *sk = sock->sk;
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err;
/* True wake-one mechanism for incoming connections: only
@@ -2588,12 +2684,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
* anymore, the common case will execute the loop only once.
*/
for (;;) {
- prepare_to_wait_exclusive(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
+ add_wait_queue(sk_sleep(sk), &wait);
release_sock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
lock_sock(sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
}
err = 0;
if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -2605,25 +2701,26 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
if (signal_pending(current))
break;
}
- finish_wait(sk_sleep(sk), &wait);
return err;
}
/**
* tipc_accept - wait for connection request
* @sock: listening socket
- * @newsock: new socket that is to be connected
+ * @new_sock: new socket that is to be connected
* @flags: file-related flags associated with socket
+ * @kern: caused by kernel or by userspace?
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
bool kern)
{
struct sock *new_sk, *sk = sock->sk;
- struct sk_buff *buf;
struct tipc_sock *new_tsock;
+ struct msghdr m = {NULL,};
struct tipc_msg *msg;
+ struct sk_buff *buf;
long timeo;
int res;
@@ -2661,26 +2758,25 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
/* Connect new socket to it's peer */
tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
- tsk_set_importance(new_tsock, msg_importance(msg));
+ tsk_set_importance(new_sk, msg_importance(msg));
if (msg_named(msg)) {
- new_tsock->conn_type = msg_nametype(msg);
- new_tsock->conn_instance = msg_nameinst(msg);
+ new_tsock->conn_addrtype = TIPC_SERVICE_ADDR;
+ msg_set_nametype(&new_tsock->phdr, msg_nametype(msg));
+ msg_set_nameinst(&new_tsock->phdr, msg_nameinst(msg));
}
/*
- * Respond to 'SYN-' by discarding it & returning 'ACK'-.
- * Respond to 'SYN+' by queuing it on new socket.
+ * Respond to 'SYN-' by discarding it & returning 'ACK'.
+ * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'.
*/
if (!msg_data_sz(msg)) {
- struct msghdr m = {NULL,};
-
tsk_advance_rx_queue(sk);
- __tipc_sendstream(new_sock, &m, 0);
} else {
__skb_dequeue(&sk->sk_receive_queue);
__skb_queue_head(&new_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
+ __tipc_sendstream(new_sock, &m, 0);
release_sock(new_sk);
exit:
release_sock(sk);
@@ -2694,7 +2790,7 @@ exit:
*
* Terminates connection (if necessary), then purges socket's receive queue.
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
static int tipc_shutdown(struct socket *sock, int how)
{
@@ -2708,18 +2804,18 @@ static int tipc_shutdown(struct socket *sock, int how)
trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " ");
__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
- sk->sk_shutdown = SEND_SHUTDOWN;
+ sk->sk_shutdown = SHUTDOWN_MASK;
if (sk->sk_state == TIPC_DISCONNECTING) {
/* Discard any unreceived messages */
__skb_queue_purge(&sk->sk_receive_queue);
- /* Wake up anyone sleeping in poll */
- sk->sk_state_change(sk);
res = 0;
} else {
res = -ENOTCONN;
}
+ /* Wake up anyone sleeping in poll. */
+ sk->sk_state_change(sk);
release_sock(sk);
return res;
@@ -2757,7 +2853,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
/* Try again later if dest link is congested */
if (tsk->cong_link_cnt) {
- sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
+ sk_reset_timer(sk, &sk->sk_timer,
+ jiffies + msecs_to_jiffies(100));
return;
}
/* Prepare SYN for retransmit */
@@ -2801,66 +2898,62 @@ static void tipc_sk_timeout(struct timer_list *t)
sock_put(sk);
}
-static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
- struct tipc_name_seq const *seq)
+static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
- struct publication *publ;
+ struct tipc_socket_addr skaddr;
+ struct publication *p;
u32 key;
- if (scope != TIPC_NODE_SCOPE)
- scope = TIPC_CLUSTER_SCOPE;
-
if (tipc_sk_connected(sk))
return -EINVAL;
key = tsk->portid + tsk->pub_count + 1;
if (key == tsk->portid)
return -EADDRINUSE;
-
- publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
- scope, tsk->portid, key);
- if (unlikely(!publ))
+ skaddr.ref = tsk->portid;
+ skaddr.node = tipc_own_addr(net);
+ p = tipc_nametbl_publish(net, ua, &skaddr, key);
+ if (unlikely(!p))
return -EINVAL;
- list_add(&publ->binding_sock, &tsk->publications);
+ list_add(&p->binding_sock, &tsk->publications);
tsk->pub_count++;
- tsk->published = 1;
+ tsk->published = true;
return 0;
}
-static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
- struct tipc_name_seq const *seq)
+static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua)
{
struct net *net = sock_net(&tsk->sk);
- struct publication *publ;
- struct publication *safe;
+ struct publication *safe, *p;
+ struct tipc_uaddr _ua;
int rc = -EINVAL;
- if (scope != TIPC_NODE_SCOPE)
- scope = TIPC_CLUSTER_SCOPE;
-
- list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
- if (seq) {
- if (publ->scope != scope)
- continue;
- if (publ->type != seq->type)
- continue;
- if (publ->lower != seq->lower)
- continue;
- if (publ->upper != seq->upper)
- break;
- tipc_nametbl_withdraw(net, publ->type, publ->lower,
- publ->upper, publ->key);
- rc = 0;
- break;
+ list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) {
+ if (!ua) {
+ tipc_uaddr(&_ua, TIPC_SERVICE_RANGE, p->scope,
+ p->sr.type, p->sr.lower, p->sr.upper);
+ tipc_nametbl_withdraw(net, &_ua, &p->sk, p->key);
+ continue;
}
- tipc_nametbl_withdraw(net, publ->type, publ->lower,
- publ->upper, publ->key);
+ /* Unbind specific publication */
+ if (p->scope != ua->scope)
+ continue;
+ if (p->sr.type != ua->sr.type)
+ continue;
+ if (p->sr.lower != ua->sr.lower)
+ continue;
+ if (p->sr.upper != ua->sr.upper)
+ break;
+ tipc_nametbl_withdraw(net, ua, &p->sk, p->key);
rc = 0;
+ break;
}
- if (list_empty(&tsk->publications))
+ if (list_empty(&tsk->publications)) {
tsk->published = 0;
+ rc = 0;
+ }
return rc;
}
@@ -2917,7 +3010,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk)
struct net *net = sock_net(sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
- u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
+ u32 portid = prandom_u32_max(remaining) + TIPC_MIN_PORT;
while (remaining--) {
portid++;
@@ -2977,13 +3070,15 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
struct net *net = sock_net(&tsk->sk);
struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
- struct tipc_name_seq seq;
+ struct tipc_uaddr ua;
int rc;
if (mreq->type < TIPC_RESERVED_TYPES)
return -EACCES;
if (mreq->scope > TIPC_NODE_SCOPE)
return -EINVAL;
+ if (mreq->scope != TIPC_NODE_SCOPE)
+ mreq->scope = TIPC_CLUSTER_SCOPE;
if (grp)
return -EACCES;
grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open);
@@ -2993,11 +3088,10 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
msg_set_lookup_scope(hdr, mreq->scope);
msg_set_nametype(hdr, mreq->type);
msg_set_dest_droppable(hdr, true);
- seq.type = mreq->type;
- seq.lower = mreq->instance;
- seq.upper = seq.lower;
- tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
- rc = tipc_sk_publish(tsk, mreq->scope, &seq);
+ tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope,
+ mreq->type, mreq->instance, mreq->instance);
+ tipc_nametbl_build_group(net, grp, &ua);
+ rc = tipc_sk_publish(tsk, &ua);
if (rc) {
tipc_group_delete(net, grp);
tsk->group = NULL;
@@ -3014,15 +3108,17 @@ static int tipc_sk_leave(struct tipc_sock *tsk)
{
struct net *net = sock_net(&tsk->sk);
struct tipc_group *grp = tsk->group;
- struct tipc_name_seq seq;
+ struct tipc_uaddr ua;
int scope;
if (!grp)
return -EINVAL;
- tipc_group_self(grp, &seq, &scope);
+ ua.addrtype = TIPC_SERVICE_RANGE;
+ tipc_group_self(grp, &ua.sr, &scope);
+ ua.scope = scope;
tipc_group_delete(net, grp);
tsk->group = NULL;
- tipc_sk_withdraw(tsk, scope, &seq);
+ tipc_sk_withdraw(tsk, &ua);
return 0;
}
@@ -3037,10 +3133,10 @@ static int tipc_sk_leave(struct tipc_sock *tsk)
* For stream sockets only, accepts and ignores all IPPROTO_TCP options
* (to ease compatibility).
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
- char __user *ov, unsigned int ol)
+ sockptr_t ov, unsigned int ol)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
@@ -3061,17 +3157,17 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
case TIPC_NODELAY:
if (ol < sizeof(value))
return -EINVAL;
- if (get_user(value, (u32 __user *)ov))
+ if (copy_from_sockptr(&value, ov, sizeof(u32)))
return -EFAULT;
break;
case TIPC_GROUP_JOIN:
if (ol < sizeof(mreq))
return -EINVAL;
- if (copy_from_user(&mreq, ov, sizeof(mreq)))
+ if (copy_from_sockptr(&mreq, ov, sizeof(mreq)))
return -EFAULT;
break;
default:
- if (ov || ol)
+ if (!sockptr_is_null(ov) || ol)
return -EINVAL;
}
@@ -3079,7 +3175,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
switch (opt) {
case TIPC_IMPORTANCE:
- res = tsk_set_importance(tsk, value);
+ res = tsk_set_importance(sk, value);
break;
case TIPC_SRC_DROPPABLE:
if (sock->type != SOCK_STREAM)
@@ -3131,14 +3227,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
* For stream sockets only, returns 0 length result for all IPPROTO_TCP options
* (to ease compatibility).
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
char __user *ov, int __user *ol)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_name_seq seq;
+ struct tipc_service_range seq;
int len, scope;
u32 value;
int res;
@@ -3239,12 +3335,12 @@ static int tipc_socketpair(struct socket *sock1, struct socket *sock2)
u32 onode = tipc_own_addr(sock_net(sock1->sk));
tsk1->peer.family = AF_TIPC;
- tsk1->peer.addrtype = TIPC_ADDR_ID;
+ tsk1->peer.addrtype = TIPC_SOCKET_ADDR;
tsk1->peer.scope = TIPC_NODE_SCOPE;
tsk1->peer.addr.id.ref = tsk2->portid;
tsk1->peer.addr.id.node = onode;
tsk2->peer.family = AF_TIPC;
- tsk2->peer.addrtype = TIPC_ADDR_ID;
+ tsk2->peer.addrtype = TIPC_SOCKET_ADDR;
tsk2->peer.scope = TIPC_NODE_SCOPE;
tsk2->peer.addr.id.ref = tsk1->portid;
tsk2->peer.addr.id.node = onode;
@@ -3335,7 +3431,7 @@ static struct proto tipc_proto = {
/**
* tipc_socket_init - initialize TIPC socket interface
*
- * Returns 0 on success, errno otherwise
+ * Return: 0 on success, errno otherwise
*/
int tipc_socket_init(void)
{
@@ -3369,13 +3465,14 @@ void tipc_socket_stop(void)
/* Caller should hold socket lock for the passed tipc socket. */
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
{
- u32 peer_node;
- u32 peer_port;
+ u32 peer_node, peer_port;
+ u32 conn_type, conn_instance;
struct nlattr *nest;
peer_node = tsk_peer_node(tsk);
peer_port = tsk_peer_port(tsk);
-
+ conn_type = msg_nametype(&tsk->phdr);
+ conn_instance = msg_nameinst(&tsk->phdr);
nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON);
if (!nest)
return -EMSGSIZE;
@@ -3385,12 +3482,12 @@ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
goto msg_full;
- if (tsk->conn_type != 0) {
+ if (tsk->conn_addrtype != 0) {
if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
goto msg_full;
- if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
+ if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, conn_type))
goto msg_full;
- if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
+ if (nla_put_u32(skb, TIPC_NLA_CON_INST, conn_instance))
goto msg_full;
}
nla_nest_end(skb, nest);
@@ -3621,11 +3718,11 @@ static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
goto attr_msg_cancel;
- if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
+ if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->sr.type))
goto attr_msg_cancel;
- if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
+ if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->sr.lower))
goto attr_msg_cancel;
- if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
+ if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->sr.upper))
goto attr_msg_cancel;
nla_nest_end(skb, attrs);
@@ -3654,7 +3751,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
if (p->key == *last_publ)
break;
}
- if (p->key != *last_publ) {
+ if (list_entry_is_head(p, &tsk->publications, binding_sock)) {
/* We never set seq or call nl_dump_check_consistent()
* this means that setting prev_seq here will cause the
* consistence check to fail in the netlink callback
@@ -3734,10 +3831,11 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
/**
* tipc_sk_filtering - check if a socket should be traced
* @sk: the socket to be examined
- * @sysctl_tipc_sk_filter[]: the socket tuple for filtering,
- * (portid, sock type, name type, name lower, name upper)
*
- * Returns true if the socket meets the socket tuple data
+ * @sysctl_tipc_sk_filter is used as the socket tuple for filtering:
+ * (portid, sock type, name type, name lower, name upper)
+ *
+ * Return: true if the socket meets the socket tuple data
* (value 0 = 'any') or when there is no tuple set (all = 0),
* otherwise false
*/
@@ -3772,16 +3870,16 @@ bool tipc_sk_filtering(struct sock *sk)
p = list_first_entry_or_null(&tsk->publications,
struct publication, binding_sock);
if (p) {
- type = p->type;
- lower = p->lower;
- upper = p->upper;
+ type = p->sr.type;
+ lower = p->sr.lower;
+ upper = p->sr.upper;
}
}
if (!tipc_sk_type_connectionless(sk)) {
- type = tsk->conn_type;
- lower = tsk->conn_instance;
- upper = tsk->conn_instance;
+ type = msg_nametype(&tsk->phdr);
+ lower = msg_nameinst(&tsk->phdr);
+ upper = lower;
}
if ((_type && _type != type) || (_lower && _lower != lower) ||
@@ -3802,7 +3900,7 @@ u32 tipc_sock_get_portid(struct sock *sk)
* @sk: tipc sk to be checked
* @skb: tipc msg to be checked
*
- * Returns true if the socket rx queue allocation is > 90%, otherwise false
+ * Return: true if the socket rx queue allocation is > 90%, otherwise false
*/
bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb)
@@ -3820,7 +3918,7 @@ bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb)
* @sk: tipc sk to be checked
* @skb: tipc msg to be checked
*
- * Returns true if the socket rx queue allocation is > 90%, otherwise false
+ * Return: true if the socket rx queue allocation is > 90%, otherwise false
*/
bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb)
@@ -3846,6 +3944,7 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
{
int i = 0;
size_t sz = (dqueues) ? SK_LMAX : SK_LMIN;
+ u32 conn_type, conn_instance;
struct tipc_sock *tsk;
struct publication *p;
bool tsk_connected;
@@ -3866,16 +3965,18 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
if (tsk_connected) {
i += scnprintf(buf + i, sz - i, " %x", tsk_peer_node(tsk));
i += scnprintf(buf + i, sz - i, " %u", tsk_peer_port(tsk));
- i += scnprintf(buf + i, sz - i, " %u", tsk->conn_type);
- i += scnprintf(buf + i, sz - i, " %u", tsk->conn_instance);
+ conn_type = msg_nametype(&tsk->phdr);
+ conn_instance = msg_nameinst(&tsk->phdr);
+ i += scnprintf(buf + i, sz - i, " %u", conn_type);
+ i += scnprintf(buf + i, sz - i, " %u", conn_instance);
}
i += scnprintf(buf + i, sz - i, " | %u", tsk->published);
if (tsk->published) {
p = list_first_entry_or_null(&tsk->publications,
struct publication, binding_sock);
- i += scnprintf(buf + i, sz - i, " %u", (p) ? p->type : 0);
- i += scnprintf(buf + i, sz - i, " %u", (p) ? p->lower : 0);
- i += scnprintf(buf + i, sz - i, " %u", (p) ? p->upper : 0);
+ i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.type : 0);
+ i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.lower : 0);
+ i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.upper : 0);
}
i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win);
i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win);
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 235b9679acee..02cdf166807d 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -74,5 +74,7 @@ int tipc_dump_done(struct netlink_callback *cb);
u32 tipc_sock_get_portid(struct sock *sk);
bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb);
bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb);
+int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen);
+int tsk_set_importance(struct sock *sk, int imp);
#endif
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index f340e53da625..05d49ad81290 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 2000-2017, Ericsson AB
* Copyright (c) 2005-2007, 2010-2013, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -39,75 +40,75 @@
#include "subscr.h"
static void tipc_sub_send_event(struct tipc_subscription *sub,
- u32 found_lower, u32 found_upper,
- u32 event, u32 port, u32 node)
+ struct publication *p,
+ u32 event)
{
+ struct tipc_subscr *s = &sub->evt.s;
struct tipc_event *evt = &sub->evt;
if (sub->inactive)
return;
tipc_evt_write(evt, event, event);
- tipc_evt_write(evt, found_lower, found_lower);
- tipc_evt_write(evt, found_upper, found_upper);
- tipc_evt_write(evt, port.ref, port);
- tipc_evt_write(evt, port.node, node);
+ if (p) {
+ tipc_evt_write(evt, found_lower, p->sr.lower);
+ tipc_evt_write(evt, found_upper, p->sr.upper);
+ tipc_evt_write(evt, port.ref, p->sk.ref);
+ tipc_evt_write(evt, port.node, p->sk.node);
+ } else {
+ tipc_evt_write(evt, found_lower, s->seq.lower);
+ tipc_evt_write(evt, found_upper, s->seq.upper);
+ tipc_evt_write(evt, port.ref, 0);
+ tipc_evt_write(evt, port.node, 0);
+ }
tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt);
}
/**
- * tipc_sub_check_overlap - test for subscription overlap with the
- * given values
+ * tipc_sub_check_overlap - test for subscription overlap with the given values
+ * @subscribed: the service range subscribed for
+ * @found: the service range we are checking for match
*
- * Returns 1 if there is overlap, otherwise 0.
+ * Returns true if there is overlap, otherwise false.
*/
-int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
- u32 found_upper)
+static bool tipc_sub_check_overlap(struct tipc_service_range *subscribed,
+ struct tipc_service_range *found)
{
- if (found_lower < seq->lower)
- found_lower = seq->lower;
- if (found_upper > seq->upper)
- found_upper = seq->upper;
- if (found_lower > found_upper)
- return 0;
- return 1;
+ u32 found_lower = found->lower;
+ u32 found_upper = found->upper;
+
+ if (found_lower < subscribed->lower)
+ found_lower = subscribed->lower;
+ if (found_upper > subscribed->upper)
+ found_upper = subscribed->upper;
+ return found_lower <= found_upper;
}
void tipc_sub_report_overlap(struct tipc_subscription *sub,
- u32 found_lower, u32 found_upper,
- u32 event, u32 port, u32 node,
- u32 scope, int must)
+ struct publication *p,
+ u32 event, bool must)
{
- struct tipc_subscr *s = &sub->evt.s;
- u32 filter = tipc_sub_read(s, filter);
- struct tipc_name_seq seq;
+ struct tipc_service_range *sr = &sub->s.seq;
+ u32 filter = sub->s.filter;
- seq.type = tipc_sub_read(s, seq.type);
- seq.lower = tipc_sub_read(s, seq.lower);
- seq.upper = tipc_sub_read(s, seq.upper);
-
- if (!tipc_sub_check_overlap(&seq, found_lower, found_upper))
+ if (!tipc_sub_check_overlap(sr, &p->sr))
return;
-
if (!must && !(filter & TIPC_SUB_PORTS))
return;
- if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
+ if (filter & TIPC_SUB_CLUSTER_SCOPE && p->scope == TIPC_NODE_SCOPE)
return;
- if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
+ if (filter & TIPC_SUB_NODE_SCOPE && p->scope != TIPC_NODE_SCOPE)
return;
spin_lock(&sub->lock);
- tipc_sub_send_event(sub, found_lower, found_upper,
- event, port, node);
+ tipc_sub_send_event(sub, p, event);
spin_unlock(&sub->lock);
}
static void tipc_sub_timeout(struct timer_list *t)
{
struct tipc_subscription *sub = from_timer(sub, t, timer);
- struct tipc_subscr *s = &sub->evt.s;
spin_lock(&sub->lock);
- tipc_sub_send_event(sub, s->seq.lower, s->seq.upper,
- TIPC_SUBSCR_TIMEOUT, 0, 0);
+ tipc_sub_send_event(sub, NULL, TIPC_SUBSCR_TIMEOUT);
sub->inactive = true;
spin_unlock(&sub->lock);
}
@@ -131,12 +132,14 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net,
struct tipc_subscr *s,
int conid)
{
+ u32 lower = tipc_sub_read(s, seq.lower);
+ u32 upper = tipc_sub_read(s, seq.upper);
u32 filter = tipc_sub_read(s, filter);
struct tipc_subscription *sub;
u32 timeout;
if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
- (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) {
+ lower > upper) {
pr_warn("Subscription rejected, illegal request\n");
return NULL;
}
@@ -151,6 +154,12 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net,
sub->conid = conid;
sub->inactive = false;
memcpy(&sub->evt.s, s, sizeof(*s));
+ sub->s.seq.type = tipc_sub_read(s, seq.type);
+ sub->s.seq.lower = lower;
+ sub->s.seq.upper = upper;
+ sub->s.filter = filter;
+ sub->s.timeout = tipc_sub_read(s, timeout);
+ memcpy(sub->s.usr_handle, s->usr_handle, 8);
spin_lock_init(&sub->lock);
kref_init(&sub->kref);
if (!tipc_nametbl_subscribe(sub)) {
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index aa015c233898..60b877531b66 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -3,6 +3,7 @@
*
* Copyright (c) 2003-2017, Ericsson AB
* Copyright (c) 2005-2007, 2012-2013, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -42,41 +43,43 @@
#define TIPC_MAX_SUBSCR 65535
#define TIPC_MAX_PUBL 65535
+struct publication;
struct tipc_subscription;
struct tipc_conn;
/**
* struct tipc_subscription - TIPC network topology subscription object
- * @subscriber: pointer to its subscriber
- * @seq: name sequence associated with subscription
+ * @s: host-endian copy of the user subscription
+ * @evt: template for events generated by subscription
+ * @kref: reference count for this subscription
+ * @net: network namespace associated with subscription
* @timer: timer governing subscription duration (optional)
- * @nameseq_list: adjacent subscriptions in name sequence's subscription list
+ * @service_list: adjacent subscriptions in name sequence's subscription list
* @sub_list: adjacent subscriptions in subscriber's subscription list
- * @evt: template for events generated by subscription
+ * @conid: connection identifier of topology server
+ * @inactive: true if this subscription is inactive
+ * @lock: serialize up/down and timer events
*/
struct tipc_subscription {
+ struct tipc_subscr s;
+ struct tipc_event evt;
struct kref kref;
struct net *net;
struct timer_list timer;
struct list_head service_list;
struct list_head sub_list;
- struct tipc_event evt;
int conid;
bool inactive;
- spinlock_t lock; /* serialize up/down and timer events */
+ spinlock_t lock;
};
struct tipc_subscription *tipc_sub_subscribe(struct net *net,
struct tipc_subscr *s,
int conid);
void tipc_sub_unsubscribe(struct tipc_subscription *sub);
-
-int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
- u32 found_upper);
void tipc_sub_report_overlap(struct tipc_subscription *sub,
- u32 found_lower, u32 found_upper,
- u32 event, u32 port, u32 node,
- u32 scope, int must);
+ struct publication *p,
+ u32 event, bool must);
int __net_init tipc_topsrv_init_net(struct net *net);
void __net_exit tipc_topsrv_exit_net(struct net *net);
@@ -96,6 +99,16 @@ void tipc_sub_get(struct tipc_subscription *subscription);
(swap_ ? swab32(val__) : val__); \
})
+/* tipc_sub_write - write val_ to field_ of struct sub_ in user endian format
+ */
+#define tipc_sub_write(sub_, field_, val_) \
+ ({ \
+ struct tipc_subscr *sub__ = sub_; \
+ u32 val__ = val_; \
+ int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \
+ (sub__)->field_ = swap_ ? swab32(val__) : val__; \
+ })
+
/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
*/
#define tipc_evt_write(evt_, field_, val_) \
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 58ab3d6dcdce..9fb65c988f7f 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -36,7 +36,7 @@
#include "core.h"
#include "trace.h"
#include "crypto.h"
-
+#include "bcast.h"
#include <linux/sysctl.h>
static struct ctl_table_header *tipc_ctl_hdr;
@@ -74,7 +74,23 @@ static struct ctl_table tipc_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ONE,
},
+ {
+ .procname = "key_exchange_enabled",
+ .data = &sysctl_tipc_key_exchange_enabled,
+ .maxlen = sizeof(sysctl_tipc_key_exchange_enabled),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
#endif
+ {
+ .procname = "bc_retruni",
+ .data = &sysctl_tipc_bc_retruni,
+ .maxlen = sizeof(sysctl_tipc_bc_retruni),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
{}
};
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 3a12fc18239b..d92ec92f0b71 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -48,7 +48,6 @@
#define MAX_SEND_MSG_COUNT 25
#define MAX_RECV_MSG_COUNT 25
#define CF_CONNECTED 1
-#define CF_SERVER 2
#define TIPC_SERVER_NAME_LEN 32
@@ -237,8 +236,8 @@ static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
tipc_sub_unsubscribe(sub);
atomic_dec(&tn->subscription_count);
- } else if (s) {
- break;
+ if (s)
+ break;
}
}
spin_unlock_bh(&con->sub_lock);
@@ -362,9 +361,10 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
{
struct tipc_net *tn = tipc_net(srv->net);
struct tipc_subscription *sub;
+ u32 s_filter = tipc_sub_read(s, filter);
- if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
- s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL);
+ if (s_filter & TIPC_SUB_CANCEL) {
+ tipc_sub_write(s, filter, s_filter & ~TIPC_SUB_CANCEL);
tipc_conn_delete_sub(con, s);
return 0;
}
@@ -400,12 +400,15 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
return -EWOULDBLOCK;
if (ret == sizeof(s)) {
read_lock_bh(&sk->sk_callback_lock);
- ret = tipc_conn_rcv_sub(srv, con, &s);
+ /* RACE: the connection can be closed in the meantime */
+ if (likely(connected(con)))
+ ret = tipc_conn_rcv_sub(srv, con, &s);
read_unlock_bh(&sk->sk_callback_lock);
+ if (!ret)
+ return 0;
}
- if (ret < 0)
- tipc_conn_close(con);
+ tipc_conn_close(con);
return ret;
}
@@ -447,12 +450,19 @@ static void tipc_conn_data_ready(struct sock *sk)
static void tipc_topsrv_accept(struct work_struct *work)
{
struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
- struct socket *lsock = srv->listener;
- struct socket *newsock;
+ struct socket *newsock, *lsock;
struct tipc_conn *con;
struct sock *newsk;
int ret;
+ spin_lock_bh(&srv->idr_lock);
+ if (!srv->listener) {
+ spin_unlock_bh(&srv->idr_lock);
+ return;
+ }
+ lsock = srv->listener;
+ spin_unlock_bh(&srv->idr_lock);
+
while (1) {
ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
if (ret < 0)
@@ -486,14 +496,13 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk)
read_lock_bh(&sk->sk_callback_lock);
srv = sk->sk_user_data;
- if (srv->listener)
+ if (srv)
queue_work(srv->rcv_wq, &srv->awork);
read_unlock_bh(&sk->sk_callback_lock);
}
static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
{
- int imp = TIPC_CRITICAL_IMPORTANCE;
struct socket *lsock = NULL;
struct sockaddr_tipc saddr;
struct sock *sk;
@@ -510,19 +519,20 @@ static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
sk->sk_user_data = srv;
write_unlock_bh(&sk->sk_callback_lock);
- rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
- (char *)&imp, sizeof(imp));
+ lock_sock(sk);
+ rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE);
+ release_sock(sk);
if (rc < 0)
goto err;
saddr.family = AF_TIPC;
- saddr.addrtype = TIPC_ADDR_NAMESEQ;
- saddr.addr.nameseq.type = TIPC_TOP_SRV;
+ saddr.addrtype = TIPC_SERVICE_RANGE;
+ saddr.addr.nameseq.type = TIPC_TOP_SRV;
saddr.addr.nameseq.lower = TIPC_TOP_SRV;
saddr.addr.nameseq.upper = TIPC_TOP_SRV;
saddr.scope = TIPC_NODE_SCOPE;
- rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr));
+ rc = tipc_sk_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr));
if (rc < 0)
goto err;
rc = kernel_listen(lsock, 0);
@@ -565,7 +575,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
sub.seq.upper = upper;
sub.timeout = TIPC_WAIT_FOREVER;
sub.filter = filter;
- *(u32 *)&sub.usr_handle = port;
+ *(u64 *)&sub.usr_handle = (u64)port;
con = tipc_conn_alloc(tipc_topsrv(net));
if (IS_ERR(con))
@@ -661,12 +671,18 @@ static int tipc_topsrv_start(struct net *net)
ret = tipc_topsrv_work_start(srv);
if (ret < 0)
- return ret;
+ goto err_start;
ret = tipc_topsrv_create_listener(srv);
if (ret < 0)
- tipc_topsrv_work_stop(srv);
+ goto err_create;
+ return 0;
+
+err_create:
+ tipc_topsrv_work_stop(srv);
+err_start:
+ kfree(srv);
return ret;
}
@@ -690,8 +706,9 @@ static void tipc_topsrv_stop(struct net *net)
__module_get(lsock->sk->sk_prot_creator->owner);
srv->listener = NULL;
spin_unlock_bh(&srv->idr_lock);
- sock_release(lsock);
+
tipc_topsrv_work_stop(srv);
+ sock_release(lsock);
idr_destroy(&srv->conn_idr);
kfree(srv);
}
diff --git a/net/tipc/trace.c b/net/tipc/trace.c
index 265f6a26aa3d..7d2931521e0e 100644
--- a/net/tipc/trace.c
+++ b/net/tipc/trace.c
@@ -36,7 +36,7 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
-/**
+/*
* socket tuples for filtering in socket traces:
* (portid, sock type, name type, name lower, name upper)
*/
diff --git a/net/tipc/trace.h b/net/tipc/trace.h
index 4d8e00483afc..04af83f0500c 100644
--- a/net/tipc/trace.h
+++ b/net/tipc/trace.h
@@ -255,7 +255,7 @@ DECLARE_EVENT_CLASS(tipc_link_class,
TP_fast_assign(
__assign_str(header, header);
- tipc_link_name_ext(l, __entry->name);
+ memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME);
tipc_link_dump(l, dqueues, __get_str(buf));
),
@@ -295,12 +295,14 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class,
),
TP_fast_assign(
- tipc_link_name_ext(r, __entry->name);
+ memcpy(__entry->name, tipc_link_name(r), TIPC_MAX_LINK_NAME);
__entry->from = f;
__entry->to = t;
__entry->len = skb_queue_len(tq);
- __entry->fseqno = msg_seqno(buf_msg(skb_peek(tq)));
- __entry->lseqno = msg_seqno(buf_msg(skb_peek_tail(tq)));
+ __entry->fseqno = __entry->len ?
+ msg_seqno(buf_msg(skb_peek(tq))) : 0;
+ __entry->lseqno = __entry->len ?
+ msg_seqno(buf_msg(skb_peek_tail(tq))) : 0;
),
TP_printk("<%s> retrans req: [%u-%u] transmq: %u [%u-%u]\n",
@@ -308,15 +310,16 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class,
__entry->len, __entry->fseqno, __entry->lseqno)
);
-DEFINE_EVENT(tipc_link_transmq_class, tipc_link_retrans,
+DEFINE_EVENT_CONDITION(tipc_link_transmq_class, tipc_link_retrans,
TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
- TP_ARGS(r, f, t, tq)
+ TP_ARGS(r, f, t, tq),
+ TP_CONDITION(less_eq(f, t))
);
DEFINE_EVENT_PRINT(tipc_link_transmq_class, tipc_link_bc_ack,
TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
TP_ARGS(r, f, t, tq),
- TP_printk("<%s> acked: [%u-%u] transmq: %u [%u-%u]\n",
+ TP_printk("<%s> acked: %u gap: %u transmq: %u [%u-%u]\n",
__entry->name, __entry->from, __entry->to,
__entry->len, __entry->fseqno, __entry->lseqno)
);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index d6620ad53546..c2bb818704c8 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -52,6 +52,7 @@
#include "bearer.h"
#include "netlink.h"
#include "msg.h"
+#include "udp_media.h"
/* IANA assigned UDP port */
#define UDP_PORT_DEFAULT 6118
@@ -63,6 +64,11 @@
*
* This is the bearer level originating address used in neighbor discovery
* messages, and all fields should be in network byte order
+ *
+ * @proto: Ethernet protocol in use
+ * @port: port being used
+ * @ipv4: IPv4 address of neighbor
+ * @ipv6: IPv6 address of neighbor
*/
struct udp_media_addr {
__be16 proto;
@@ -87,6 +93,7 @@ struct udp_replicast {
* @ubsock: bearer associated socket
* @ifindex: local address scope
* @work: used to schedule deferred work on a bearer
+ * @rcast: associated udp_replicast container
*/
struct udp_bearer {
struct tipc_bearer __rcu *bearer;
@@ -161,9 +168,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
struct udp_bearer *ub, struct udp_media_addr *src,
struct udp_media_addr *dst, struct dst_cache *cache)
{
- struct dst_entry *ndst = dst_cache_get(cache);
+ struct dst_entry *ndst;
int ttl, err = 0;
+ local_bh_disable();
+ ndst = dst_cache_get(cache);
if (dst->proto == htons(ETH_P_IP)) {
struct rtable *rt = (struct rtable *)ndst;
@@ -210,9 +219,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
src->port, dst->port, false);
#endif
}
+ local_bh_enable();
return err;
tx_error:
+ local_bh_enable();
kfree_skb(skb);
return err;
}
@@ -403,8 +414,10 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
err = ip_mc_join_group(sk, &mreqn);
#if IS_ENABLED(CONFIG_IPV6)
} else {
+ lock_sock(sk);
err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex,
&remote->ipv6);
+ release_sock(sk);
#endif
}
return err;
@@ -561,7 +574,7 @@ msg_full:
/**
* tipc_parse_udp_addr - build udp media address from netlink data
- * @nlattr: netlink attribute containing sockaddr storage aligned address
+ * @nla: netlink attribute containing sockaddr storage aligned address
* @addr: tipc media address to fill with address, port and protocol type
* @scope_id: IPv6 scope id pointer, not NULL indicates it's required
*/
@@ -656,6 +669,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
struct udp_tunnel_sock_cfg tuncfg = {NULL};
struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
u8 node_id[NODE_ID_LEN] = {0,};
+ struct net_device *dev;
int rmcast = 0;
ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
@@ -710,8 +724,6 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
rcu_assign_pointer(ub->bearer, b);
tipc_udp_media_addr_set(&b->addr, &local);
if (local.proto == htons(ETH_P_IP)) {
- struct net_device *dev;
-
dev = __ip_dev_find(net, local.ipv4.s_addr, false);
if (!dev) {
err = -ENODEV;
@@ -734,6 +746,12 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
b->mtu = b->media->mtu;
#if IS_ENABLED(CONFIG_IPV6)
} else if (local.proto == htons(ETH_P_IPV6)) {
+ dev = ub->ifindex ? __dev_get_by_index(net, ub->ifindex) : NULL;
+ dev = ipv6_dev_find(net, &local.ipv6, dev);
+ if (!dev) {
+ err = -ENODEV;
+ goto err;
+ }
udp_conf.family = AF_INET6;
udp_conf.use_udp6_tx_checksums = true;
udp_conf.use_udp6_rx_checksums = true;
@@ -741,6 +759,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
udp_conf.local_ip6 = in6addr_any;
else
udp_conf.local_ip6 = local.ipv6;
+ ub->ifindex = dev->ifindex;
b->mtu = 1280;
#endif
} else {
@@ -761,7 +780,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
if (err)
goto free;
- /**
+ /*
* The bcast media address port is used for all peers and the ip
* is used if it's a multicast address.
*/
@@ -795,6 +814,7 @@ static void cleanup_bearer(struct work_struct *work)
kfree_rcu(rcast, rcu);
}
+ atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
dst_cache_destroy(&ub->rcast.dst_cache);
udp_tunnel_sock_release(ub->ubsock);
synchronize_net();
@@ -815,6 +835,7 @@ static void tipc_udp_disable(struct tipc_bearer *b)
RCU_INIT_POINTER(ub->bearer, NULL);
/* sock_release need to be done outside of rtnl lock */
+ atomic_inc(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
INIT_WORK(&ub->work, cleanup_bearer);
schedule_work(&ub->work);
}