From 9945e8043ef9273cfb633d930e2a5a9116009b09 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 15 Oct 2015 14:52:40 -0400 Subject: tipc: limit usage of temporary skb list during packet reception During packet reception, the function tipc_link_rcv() adds its accepted packets to a temporary buffer queue, before finally splicing this queue into the lock protected input queue that will be delivered up to the socket layer. The purpose is to reduce potential contention on the input queue lock. However, since the vast majority of packets arrive in sequence, they will anyway be added one by one to the input queue, and the use of the temporary queue becomes a sub-optimization. The only case where this queue makes sense is when unpacking buffers from a bundle packet; here we want to avoid dozens of small buffers to be added individually to the lock-protected input queue in a tight loop. In this commit, we remove the general usage of the temporary queue, and keep it only for the packet unbundling case. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 75db07c78a69..11f74294e085 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -953,7 +953,7 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb, case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: case CONN_MANAGER: - __skb_queue_tail(inputq, skb); + skb_queue_tail(inputq, skb); return true; case NAME_DISTRIBUTOR: node->bclink.recv_permitted = true; @@ -982,6 +982,7 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, struct tipc_msg *hdr = buf_msg(skb); struct sk_buff **reasm_skb = &l->reasm_buf; struct sk_buff *iskb; + struct sk_buff_head tmpq; int usr = msg_user(hdr); int rc = 0; int pos = 0; @@ -1006,10 +1007,12 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, } if (usr == MSG_BUNDLER) { + skb_queue_head_init(&tmpq); l->stats.recv_bundles++; l->stats.recv_bundled += msg_msgcnt(hdr); while (tipc_msg_extract(skb, &iskb, &pos)) - tipc_data_input(l, iskb, inputq); + tipc_data_input(l, iskb, &tmpq); + tipc_skb_queue_splice_tail(&tmpq, inputq); return 0; } else if (usr == MSG_FRAGMENTER) { l->stats.recv_fragments++; @@ -1053,13 +1056,10 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct sk_buff_head *arrvq = &l->deferdq; - struct sk_buff_head tmpq; struct tipc_msg *hdr; u16 seqno, rcv_nxt; int rc = 0; - __skb_queue_head_init(&tmpq); - if (unlikely(!__tipc_skb_queue_sorted(arrvq, skb))) { if (!(skb_queue_len(arrvq) % TIPC_NACK_INTV)) tipc_link_build_proto_msg(l, STATE_MSG, 0, @@ -1114,8 +1114,8 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Packet can be delivered */ l->rcv_nxt++; l->stats.recv_info++; - if (unlikely(!tipc_data_input(l, skb, &tmpq))) - rc = tipc_link_input(l, skb, &tmpq); + if (unlikely(!tipc_data_input(l, skb, l->inputq))) + rc = tipc_link_input(l, skb, l->inputq); /* Ack at regular intervals */ if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) { @@ -1126,7 +1126,6 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, } } exit: - tipc_skb_queue_splice_tail(&tmpq, l->inputq); return rc; } -- cgit v1.2.3-59-g8ed1b From f9aa358a8109f9f33e96c3a7efb9a07631670294 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 15 Oct 2015 14:52:41 -0400 Subject: tipc: simplify tipc_link_rcv() reception loop Currently, all packets received in tipc_link_rcv() are unconditionally added to the packet deferred queue, whereafter that queue is walked and all its buffers evaluated for delivery. This is both non-optimal and and makes the queue sorting function unnecessary complex. This commit changes the loop so that an arrived packet is evaluated first, and added to the deferred queue only when a sequence number gap is discovered. A non-empty deferred queue is walked until it is empty or until its head's sequence number doesn't fit. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 84 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 11f74294e085..8e23ab523530 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1047,44 +1047,55 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) return released; } +/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission + */ +void tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) +{ + l->rcv_unacked = 0; + l->stats.sent_acks++; + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); +} + +/* tipc_link_build_nack_msg: prepare link nack message for transmission + */ +static void tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + u32 def_cnt = ++l->stats.deferred_recv; + + if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); +} + /* tipc_link_rcv - process TIPC packets/messages arriving from off-node - * @link: the link that should handle the message + * @l: the link that should handle the message * @skb: TIPC packet * @xmitq: queue to place packets to be sent after this call */ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { - struct sk_buff_head *arrvq = &l->deferdq; + struct sk_buff_head *defq = &l->deferdq; struct tipc_msg *hdr; u16 seqno, rcv_nxt; int rc = 0; - if (unlikely(!__tipc_skb_queue_sorted(arrvq, skb))) { - if (!(skb_queue_len(arrvq) % TIPC_NACK_INTV)) - tipc_link_build_proto_msg(l, STATE_MSG, 0, - 0, 0, 0, xmitq); - return rc; - } - - while ((skb = skb_peek(arrvq))) { + do { hdr = buf_msg(skb); + seqno = msg_seqno(hdr); + rcv_nxt = l->rcv_nxt; /* Verify and update link state */ - if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) { - __skb_dequeue(arrvq); - rc = tipc_link_proto_rcv(l, skb, xmitq); - continue; - } + if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) + return tipc_link_proto_rcv(l, skb, xmitq); if (unlikely(!link_is_up(l))) { rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); - if (!link_is_up(l)) { - kfree_skb(__skb_dequeue(arrvq)); - goto exit; - } + if (!link_is_up(l)) + goto drop; } + /* Don't send probe at next timeout expiration */ l->silent_intv_cnt = 0; /* Forward queues and wake up waiting users */ @@ -1095,37 +1106,36 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, } /* Defer reception if there is a gap in the sequence */ - seqno = msg_seqno(hdr); - rcv_nxt = l->rcv_nxt; if (unlikely(less(rcv_nxt, seqno))) { - l->stats.deferred_recv++; - goto exit; + __tipc_skb_queue_sorted(defq, skb); + tipc_link_build_nack_msg(l, xmitq); + break; } - __skb_dequeue(arrvq); - /* Drop if packet already received */ if (unlikely(more(rcv_nxt, seqno))) { l->stats.duplicates++; - kfree_skb(skb); - goto exit; + goto drop; } /* Packet can be delivered */ l->rcv_nxt++; l->stats.recv_info++; - if (unlikely(!tipc_data_input(l, skb, l->inputq))) + + if (!tipc_data_input(l, skb, l->inputq)) rc = tipc_link_input(l, skb, l->inputq); + if (rc) + break; /* Ack at regular intervals */ - if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) { - l->rcv_unacked = 0; - l->stats.sent_acks++; - tipc_link_build_proto_msg(l, STATE_MSG, - 0, 0, 0, 0, xmitq); - } - } -exit: + if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) + tipc_link_build_ack_msg(l, xmitq); + + } while ((skb = __skb_dequeue(defq))); + + return rc; +drop: + kfree_skb(skb); return rc; } @@ -1249,7 +1259,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, } /* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets - * with contents of the link's tranmsit and backlog queues. + * with contents of the link's transmit and backlog queues. */ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq) -- cgit v1.2.3-59-g8ed1b From 81204c492b05274ade680c54787cd8ba234dcfd7 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 15 Oct 2015 14:52:42 -0400 Subject: tipc: improve sequence number checking The sequence number of an incoming packet is currently only checked for less than, equality to, or bigger than the next expected number, meaning that the receive window in practice becomes one half sequence number cycle, or U16_MAX/2. This does not make sense, and may not even be safe if there are extreme delays in the network. Any packet sent by the peer during the ongoing cycle must belong inside his current send window, or should otherwise be dropped if possible. Since a link endpoint cannot know its peer's current send window, it has to base this sanity check on a worst-case assumption, i.e., that the peer is using a maximum sized window of 8191 packets. Using this assumption, we now add a check that the sequence number is not bigger than next_expected + TIPC_MAX_LINK_WIN. We also re-order the checks done, so that the receive window test is performed before the gap test. This way, we are guaranteed that no packet with illegal sequence numbers are ever added to the deferred queue. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 26 ++++++++++++-------------- net/tipc/link.h | 2 +- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 8e23ab523530..2b549f653d80 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1077,13 +1077,14 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, { struct sk_buff_head *defq = &l->deferdq; struct tipc_msg *hdr; - u16 seqno, rcv_nxt; + u16 seqno, rcv_nxt, win_lim; int rc = 0; do { hdr = buf_msg(skb); seqno = msg_seqno(hdr); rcv_nxt = l->rcv_nxt; + win_lim = rcv_nxt + TIPC_MAX_LINK_WIN; /* Verify and update link state */ if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) @@ -1098,6 +1099,12 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Don't send probe at next timeout expiration */ l->silent_intv_cnt = 0; + /* Drop if outside receive window */ + if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) { + l->stats.duplicates++; + goto drop; + } + /* Forward queues and wake up waiting users */ if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) { tipc_link_advance_backlog(l, xmitq); @@ -1105,29 +1112,20 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, link_prepare_wakeup(l); } - /* Defer reception if there is a gap in the sequence */ - if (unlikely(less(rcv_nxt, seqno))) { + /* Defer delivery if sequence gap */ + if (unlikely(seqno != rcv_nxt)) { __tipc_skb_queue_sorted(defq, skb); tipc_link_build_nack_msg(l, xmitq); break; } - /* Drop if packet already received */ - if (unlikely(more(rcv_nxt, seqno))) { - l->stats.duplicates++; - goto drop; - } - - /* Packet can be delivered */ + /* Deliver packet */ l->rcv_nxt++; l->stats.recv_info++; - if (!tipc_data_input(l, skb, l->inputq)) rc = tipc_link_input(l, skb, l->inputq); - if (rc) + if (unlikely(rc)) break; - - /* Ack at regular intervals */ if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) tipc_link_build_ack_msg(l, xmitq); diff --git a/net/tipc/link.h b/net/tipc/link.h index 39ff8b6919a4..7a1ad4294b7a 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -185,7 +185,7 @@ struct tipc_link { } backlog[5]; u16 snd_nxt; u16 last_retransm; - u32 window; + u16 window; u32 stale_count; /* Reception */ -- cgit v1.2.3-59-g8ed1b From 8306f99a517b91ebf8fa94d017c2c84ca62e107c Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 15 Oct 2015 14:52:43 -0400 Subject: tipc: disallow packet duplicates in link deferred queue After the previous commits, we are guaranteed that no packets of type LINK_PROTOCOL or with illegal sequence numbers will be attempted added to the link deferred queue. This makes it possible to make some simplifications to the sorting algorithm in the function tipc_skb_queue_sorted(). We also alter the function so that it will drop packets if one with the same seqeunce number is already present in the queue. This is necessary because we have identified weird packet sequences, involving duplicate packets, where a legitimate in-sequence packet may advance to the head of the queue without being detected and de-queued. Finally, we make this function outline, since it will now be called only in exceptional cases. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 2 +- net/tipc/msg.c | 31 +++++++++++++++++++++++++++++++ net/tipc/msg.h | 34 ++-------------------------------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 2b549f653d80..e7c608631276 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1114,7 +1114,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Defer delivery if sequence gap */ if (unlikely(seqno != rcv_nxt)) { - __tipc_skb_queue_sorted(defq, skb); + __tipc_skb_queue_sorted(defq, seqno, skb); tipc_link_build_nack_msg(l, xmitq); break; } diff --git a/net/tipc/msg.c b/net/tipc/msg.c index c5ac436235e0..454f5ec275c8 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -590,3 +590,34 @@ error: kfree_skb(head); return NULL; } + +/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number + * @list: list to be appended to + * @seqno: sequence number of buffer to add + * @skb: buffer to add + */ +void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, + struct sk_buff *skb) +{ + struct sk_buff *_skb, *tmp; + + if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) { + __skb_queue_head(list, skb); + return; + } + + if (more(seqno, buf_seqno(skb_peek_tail(list)))) { + __skb_queue_tail(list, skb); + return; + } + + skb_queue_walk_safe(list, _skb, tmp) { + if (more(seqno, buf_seqno(_skb))) + continue; + if (seqno == buf_seqno(_skb)) + break; + __skb_queue_before(list, _skb, skb); + return; + } + kfree_skb(skb); +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index a82c5848d4bc..c784ba05f2aa 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -790,6 +790,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list); +void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, + struct sk_buff *skb); static inline u16 buf_seqno(struct sk_buff *skb) { @@ -862,38 +864,6 @@ static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list, return skb; } -/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number - * @list: list to be appended to - * @skb: buffer to add - * Returns true if queue should treated further, otherwise false - */ -static inline bool __tipc_skb_queue_sorted(struct sk_buff_head *list, - struct sk_buff *skb) -{ - struct sk_buff *_skb, *tmp; - struct tipc_msg *hdr = buf_msg(skb); - u16 seqno = msg_seqno(hdr); - - if (skb_queue_empty(list) || (msg_user(hdr) == LINK_PROTOCOL)) { - __skb_queue_head(list, skb); - return true; - } - if (likely(less(seqno, buf_seqno(skb_peek(list))))) { - __skb_queue_head(list, skb); - return true; - } - if (!more(seqno, buf_seqno(skb_peek_tail(list)))) { - skb_queue_walk_safe(list, _skb, tmp) { - if (likely(less(seqno, buf_seqno(_skb)))) { - __skb_queue_before(list, _skb, skb); - return true; - } - } - } - __skb_queue_tail(list, skb); - return false; -} - /* tipc_skb_queue_splice_tail - append an skb list to lock protected list * @list: the new list to append. Not lock protected * @head: target list. Lock protected. -- cgit v1.2.3-59-g8ed1b From 73f646cec35477b5099d7e952297cb9e1855be45 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 15 Oct 2015 14:52:44 -0400 Subject: tipc: delay ESTABLISH state event when link is established Link establishing, just like link teardown, is a non-atomic action, in the sense that discovering that conditions are right to establish a link, and the actual adding of the link to one of the node's send slots is done in two different lock contexts. The link FSM is designed to help bridging the gap between the two contexts in a safe manner. We have now discovered a weakness in the implementaton of this FSM. Because we directly let the link go from state LINK_ESTABLISHING to state LINK_ESTABLISHED already in the first lock context, we are unable to distinguish between a fully established link, i.e., a link that has been added to its slot, and a link that has not yet reached the second lock context. It may hence happen that a manual intervention, e.g., when disabling an interface, causes the function tipc_node_link_down() to try removing the link from the node slots, decrementing its active link counter etc, although the link was never added there in the first place. We solve this by delaying the actual state change until we reach the second lock context, inside the function tipc_node_link_up(). This makes it possible for potentail callers of __tipc_node_link_down() to know if they should proceed or not, and the problem is solved. Unforunately, the situation described above also has a second problem. Since there by necessity is a tipc_node_link_up() call pending once the node lock has been released, we must defuse that call by setting the link back from LINK_ESTABLISHING to LINK_RESET state. This forces us to make a slight modification to the link FSM, which will now look as follows. +------------------------------------+ |RESET_EVT | | | | +--------------+ | +-----------------| SYNCHING |-----------------+ | |FAILURE_EVT +--------------+ PEER_RESET_EVT| | | A | | | | | | | | | | | | | | |SYNCH_ |SYNCH_ | | | |BEGIN_EVT |END_EVT | | | | | | | V | V V | +-------------+ +--------------+ +------------+ | | RESETTING |<---------| ESTABLISHED |--------->| PEER_RESET | | +-------------+ FAILURE_ +--------------+ PEER_ +------------+ | | EVT | A RESET_EVT | | | | | | | | +----------------+ | | | RESET_EVT| |RESET_EVT | | | | | | | | | | |ESTABLISH_EVT | | | | +-------------+ | | | | | | RESET_EVT | | | | | | | | | | | V V V | | | | +-------------+ +--------------+ RESET_EVT| +--->| RESET |--------->| ESTABLISHING |<----------------+ +-------------+ PEER_ +--------------+ | A RESET_EVT | | | | | | | |FAILOVER_ |FAILOVER_ |FAILOVER_ |BEGIN_EVT |END_EVT |BEGIN_EVT | | | V | | +-------------+ | | FAILINGOVER |<----------------+ +-------------+ Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 40 ++++++++++++++++++++++++++-------------- net/tipc/link.h | 1 + net/tipc/node.c | 31 ++++++++++++++++++++++--------- 3 files changed, 49 insertions(+), 23 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index e7c608631276..8c794c1dd531 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -125,6 +125,11 @@ bool tipc_link_is_reset(struct tipc_link *l) return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING); } +bool tipc_link_is_establishing(struct tipc_link *l) +{ + return l->state == LINK_ESTABLISHING; +} + bool tipc_link_is_synching(struct tipc_link *l) { return l->state == LINK_SYNCHING; @@ -321,14 +326,15 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt) switch (evt) { case LINK_ESTABLISH_EVT: l->state = LINK_ESTABLISHED; - rc |= TIPC_LINK_UP_EVT; break; case LINK_FAILOVER_BEGIN_EVT: l->state = LINK_FAILINGOVER; break; - case LINK_PEER_RESET_EVT: case LINK_RESET_EVT: + l->state = LINK_RESET; + break; case LINK_FAILURE_EVT: + case LINK_PEER_RESET_EVT: case LINK_SYNCH_BEGIN_EVT: case LINK_FAILOVER_END_EVT: break; @@ -1091,9 +1097,9 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, return tipc_link_proto_rcv(l, skb, xmitq); if (unlikely(!link_is_up(l))) { - rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); - if (!link_is_up(l)) - goto drop; + if (l->state == LINK_ESTABLISHING) + rc = TIPC_LINK_UP_EVT; + goto drop; } /* Don't send probe at next timeout expiration */ @@ -1338,6 +1344,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); u16 rcv_nxt = l->rcv_nxt; + int mtyp = msg_type(hdr); char *if_name; int rc = 0; @@ -1347,7 +1354,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (link_own_addr(l) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); - switch (msg_type(hdr)) { + switch (mtyp) { case RESET_MSG: /* Ignore duplicate RESET with old session number */ @@ -1374,12 +1381,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) l->priority = peers_prio; - if (msg_type(hdr) == RESET_MSG) { - rc |= tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); - } else if (!link_is_up(l)) { - tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); - rc |= tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); - } + /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ + if ((mtyp == RESET_MSG) || !link_is_up(l)) + rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); + + /* ACTIVATE_MSG takes up link if it was already locally reset */ + if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING)) + rc = TIPC_LINK_UP_EVT; + l->peer_session = msg_session(hdr); l->peer_bearer_id = msg_bearer_id(hdr); if (l->mtu > msg_max_pkt(hdr)) @@ -1396,9 +1405,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->stats.recv_states++; if (msg_probe(hdr)) l->stats.recv_probes++; - rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); - if (!link_is_up(l)) + + if (!link_is_up(l)) { + if (l->state == LINK_ESTABLISHING) + rc = TIPC_LINK_UP_EVT; break; + } /* Send NACK if peer has sent pkts we haven't received yet */ if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) diff --git a/net/tipc/link.h b/net/tipc/link.h index 7a1ad4294b7a..d42dfc0e7bf5 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -217,6 +217,7 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt); void tipc_link_reset_fragments(struct tipc_link *l_ptr); bool tipc_link_is_up(struct tipc_link *l); bool tipc_link_is_reset(struct tipc_link *l); +bool tipc_link_is_establishing(struct tipc_link *l); bool tipc_link_is_synching(struct tipc_link *l); bool tipc_link_is_failingover(struct tipc_link *l); bool tipc_link_is_blocked(struct tipc_link *l); diff --git a/net/tipc/node.c b/net/tipc/node.c index 703875fd6cde..656b5791f1a5 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -317,7 +317,11 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, struct tipc_link *ol = node_active_link(n, 0); struct tipc_link *nl = n->links[bearer_id].link; - if (!nl || !tipc_link_is_up(nl)) + if (!nl) + return; + + tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT); + if (!tipc_link_is_up(nl)) return; n->working_links++; @@ -437,17 +441,26 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) { struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_link *l = le->link; struct tipc_media_addr *maddr; struct sk_buff_head xmitq; + if (!l) + return; + __skb_queue_head_init(&xmitq); tipc_node_lock(n); - __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); - if (delete && le->link) { - kfree(le->link); - le->link = NULL; - n->link_cnt--; + if (!tipc_link_is_establishing(l)) { + __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); + if (delete) { + kfree(l); + le->link = NULL; + n->link_cnt--; + } + } else { + /* Defuse pending tipc_node_link_up() */ + tipc_link_fsm_evt(l, LINK_RESET_EVT); } tipc_node_unlock(n); @@ -579,7 +592,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: tipc_node_unlock(n); - if (reset) + if (reset && !tipc_link_is_reset(l)) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); } @@ -686,10 +699,10 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) break; case SELF_ESTABL_CONTACT_EVT: case PEER_LOST_CONTACT_EVT: - break; case NODE_SYNCH_END_EVT: - case NODE_SYNCH_BEGIN_EVT: case NODE_FAILOVER_BEGIN_EVT: + break; + case NODE_SYNCH_BEGIN_EVT: case NODE_FAILOVER_END_EVT: default: goto illegal_evt; -- cgit v1.2.3-59-g8ed1b From 282b3a056225b35024246f63feb91d769d714dad Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 15 Oct 2015 14:52:45 -0400 Subject: tipc: send out RESET immediately when link goes down When a link is taken down because of a node local event, such as disabling of a bearer or an interface, we currently leave it to the peer node to discover the broken communication. The default time for such failure discovery is 1.5-2 seconds. If we instead allow the terminating link endpoint to send out a RESET message at the moment it is reset, we can achieve the impression that both endpoints are going down instantly. Since this is a very common scenario, we find it worthwhile to make this small modification. Apart from letting the link produce the said message, we also have to ensure that the interface is able to transmit it before TIPC is detached. We do this by performing the disabling of a bearer in three steps: 1) Disable reception of TIPC packets from the interface in question. 2) Take down the links, while allowing them so send out a RESET message. 3) Disable transmission of TIPC packets on the interface. Apart from this, we now have to react on the NETDEV_GOING_DOWN event, instead of as currently the NEDEV_DOWN event, to ensure that such transmission is possible during the teardown phase. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 8 +++----- net/tipc/link.c | 12 ++++++++++++ net/tipc/link.h | 1 + net/tipc/node.c | 5 +++-- net/tipc/udp_media.c | 1 - 5 files changed, 19 insertions(+), 8 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index ce9f7bfc0b92..82b278668ab7 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -362,6 +362,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr) b_ptr->media->disable_media(b_ptr); tipc_node_delete_links(net, b_ptr->identity); + RCU_INIT_POINTER(b_ptr->media_ptr, NULL); if (b_ptr->link_req) tipc_disc_delete(b_ptr->link_req); @@ -399,16 +400,13 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, /* tipc_disable_l2_media - detach TIPC bearer from an L2 interface * - * Mark L2 bearer as inactive so that incoming buffers are thrown away, - * then get worker thread to complete bearer cleanup. (Can't do cleanup - * here because cleanup code needs to sleep and caller holds spinlocks.) + * Mark L2 bearer as inactive so that incoming buffers are thrown away */ void tipc_disable_l2_media(struct tipc_bearer *b) { struct net_device *dev; dev = (struct net_device *)rtnl_dereference(b->media_ptr); - RCU_INIT_POINTER(b->media_ptr, NULL); RCU_INIT_POINTER(dev->tipc_ptr, NULL); synchronize_net(); dev_put(dev); @@ -554,7 +552,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, case NETDEV_CHANGE: if (netif_carrier_ok(dev)) break; - case NETDEV_DOWN: + case NETDEV_GOING_DOWN: case NETDEV_CHANGEMTU: tipc_reset_bearer(net, b_ptr); break; diff --git a/net/tipc/link.c b/net/tipc/link.c index 8c794c1dd531..737b5980020d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1062,6 +1062,18 @@ void tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); } +/* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message + */ +void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) +{ + int mtyp = RESET_MSG; + + if (l->state == LINK_ESTABLISHING) + mtyp = ACTIVATE_MSG; + + tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); +} + /* tipc_link_build_nack_msg: prepare link nack message for transmission */ static void tipc_link_build_nack_msg(struct tipc_link *l, diff --git a/net/tipc/link.h b/net/tipc/link.h index d42dfc0e7bf5..5872f090c8d8 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -213,6 +213,7 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); void tipc_link_build_bcast_sync_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); void tipc_link_reset_fragments(struct tipc_link *l_ptr); bool tipc_link_is_up(struct tipc_link *l); diff --git a/net/tipc/node.c b/net/tipc/node.c index 656b5791f1a5..fba6e1af28e8 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -41,7 +41,7 @@ #include "socket.h" #include "bcast.h" #include "discover.h" - +#define pr_debug printk /* Node FSM states and events: */ enum { @@ -421,6 +421,8 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, if (!tipc_node_is_up(n)) { tipc_link_reset(l); + tipc_link_build_reset_msg(l, xmitq); + *maddr = &n->links[*bearer_id].maddr; node_lost_contact(n, &le->inputq); return; } @@ -463,7 +465,6 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_link_fsm_evt(l, LINK_RESET_EVT); } tipc_node_unlock(n); - tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index c170d3138953..9bc0b1e515fa 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -425,7 +425,6 @@ static void tipc_udp_disable(struct tipc_bearer *b) } if (ub->ubsock) sock_set_flag(ub->ubsock->sk, SOCK_DEAD); - RCU_INIT_POINTER(b->media_ptr, NULL); RCU_INIT_POINTER(ub->bearer, NULL); /* sock_release need to be done outside of rtnl lock */ -- cgit v1.2.3-59-g8ed1b From c819930090fe3f74c822be765c185b3431360193 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 15 Oct 2015 14:52:46 -0400 Subject: tipc: update node FSM when peer RESET message is received The change made in the previous commit revealed a small flaw in the way the node FSM is updated. When the function tipc_node_link_down() is called for the last link to a node, we should check whether this was caused by a local reset or by a received RESET message from the peer. In the latter case, we can directly issue a PEER_LOST_CONTACT_EVT to the node FSM, so that it is ready to re-establish contact. If this is not done, the peer node will sometimes have to go through a second establish cycle before the link becomes stable. We fix this in this commit by conditionally issuing the mentioned event in the function tipc_node_link_down(). We also move LINK_RESET FSM even away from the link_reset() function and into the caller function, partially because it is easier to follow the code when state changes are gathered at a limited number of locations, partially because there will be cases in future commits where we don't want the link to go RESET mode when link_reset() is called. Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 7 +++++-- net/tipc/link.h | 1 + net/tipc/node.c | 11 +++++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 737b5980020d..ff9b0b92e62e 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -120,6 +120,11 @@ bool tipc_link_is_up(struct tipc_link *l) return link_is_up(l); } +bool tipc_link_peer_is_down(struct tipc_link *l) +{ + return l->state == LINK_PEER_RESET; +} + bool tipc_link_is_reset(struct tipc_link *l) { return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING); @@ -584,8 +589,6 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr) void tipc_link_reset(struct tipc_link *l) { - tipc_link_fsm_evt(l, LINK_RESET_EVT); - /* Link is down, accept any session */ l->peer_session = WILDCARD_SESSION; diff --git a/net/tipc/link.h b/net/tipc/link.h index 5872f090c8d8..0201212cb49a 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -217,6 +217,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); void tipc_link_reset_fragments(struct tipc_link *l_ptr); bool tipc_link_is_up(struct tipc_link *l); +bool tipc_link_peer_is_down(struct tipc_link *l); bool tipc_link_is_reset(struct tipc_link *l); bool tipc_link_is_establishing(struct tipc_link *l); bool tipc_link_is_synching(struct tipc_link *l); diff --git a/net/tipc/node.c b/net/tipc/node.c index fba6e1af28e8..d1f340116c84 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -41,7 +41,7 @@ #include "socket.h" #include "bcast.h" #include "discover.h" -#define pr_debug printk + /* Node FSM states and events: */ enum { @@ -420,6 +420,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, } if (!tipc_node_is_up(n)) { + if (tipc_link_peer_is_down(l)) + tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT); + tipc_link_fsm_evt(l, LINK_RESET_EVT); tipc_link_reset(l); tipc_link_build_reset_msg(l, xmitq); *maddr = &n->links[*bearer_id].maddr; @@ -434,6 +438,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); tipc_link_reset(l); + tipc_link_fsm_evt(l, LINK_RESET_EVT); tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); *maddr = &n->links[tnl->bearer_id].maddr; @@ -581,6 +586,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, goto exit; } tipc_link_reset(l); + tipc_link_fsm_evt(l, LINK_RESET_EVT); if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); le->link = l; @@ -863,9 +869,6 @@ static void node_lost_contact(struct tipc_node *n_ptr, tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT); } - /* Prevent re-contact with node until cleanup is done */ - tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT); - /* Notify publications from this node */ n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN; -- cgit v1.2.3-59-g8ed1b