From a8ae890b9c5f0d812337d97f2552c0d772957dc2 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:44 +0100 Subject: smc: make smc_close_active_abort() static smc_close_active_abort() is used in smc_close.c only. Make it static. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_close.c | 2 +- net/smc/smc_close.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 48615d2ac4aa..e194c6cc308a 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -113,7 +113,7 @@ static int smc_close_abort(struct smc_connection *conn) /* terminate smc socket abnormally - active abort * RDMA communication no longer possible */ -void smc_close_active_abort(struct smc_sock *smc) +static void smc_close_active_abort(struct smc_sock *smc) { struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index ed82506b1b0a..8c498885d758 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -20,7 +20,6 @@ #define SMC_CLOSE_SOCK_PUT_DELAY HZ void smc_close_wake_tx_prepared(struct smc_sock *smc); -void smc_close_active_abort(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); void smc_close_sock_put_work(struct work_struct *work); int smc_close_shutdown_write(struct smc_sock *smc); -- cgit v1.2.3-59-g8ed1b From 0c9f1515aa80f12734123e5fcc50ffe525e1d533 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:45 +0100 Subject: smc: improve smc_clc_send_decline() error handling Let smc_clc_send_decline() return with an error, if the amount sent is smaller than the length of an smc decline message. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 9 +++------ net/smc/smc_clc.c | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6451c5013e06..d3ae0d5b1677 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -520,7 +520,7 @@ decline_rdma: smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { rc = smc_clc_send_decline(smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (rc < 0) goto out_err; } goto out_connected; @@ -879,11 +879,9 @@ static void smc_listen_work(struct work_struct *work) } /* QP confirmation over RoCE fabric */ reason_code = smc_serv_conf_first_link(new_smc); - if (reason_code < 0) { + if (reason_code < 0) /* peer is not aware of a problem */ - rc = reason_code; goto out_err_unlock; - } if (reason_code > 0) goto decline_rdma_unlock; } @@ -916,8 +914,7 @@ decline_rdma: smc_conn_free(&new_smc->conn); new_smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - rc = smc_clc_send_decline(new_smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (smc_clc_send_decline(new_smc, reason_code) < 0) goto out_err; } goto out_connected; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 1800e16b2a02..f5e17d29112b 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -133,7 +133,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) smc->sk.sk_err = EPROTO; if (len < 0) smc->sk.sk_err = -len; - return len; + return sock_error(&smc->sk); } /* send CLC PROPOSAL message across internal TCP socket */ -- cgit v1.2.3-59-g8ed1b From 4bd3e7fbfadcd284b1582a284d076afebbe3479d Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:46 +0100 Subject: smc: no update for unused sk_write_pending The smc code never checks the sk_write_pending sock field. Thus there is no need to update it. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index c48dc2d5fd3a..77555c6ed199 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -104,14 +104,12 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) if (atomic_read(&conn->sndbuf_space)) break; /* at least 1 byte of free space available */ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - sk->sk_write_pending++; sk_wait_event(sk, &timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || smc_cdc_rxed_any_close_or_senddone(conn) || atomic_read(&conn->sndbuf_space), &wait); - sk->sk_write_pending--; } remove_wait_queue(sk_sleep(sk), &wait); return rc; -- cgit v1.2.3-59-g8ed1b From 71c125c3f23d714c1d0725ca11c9f27416f697c8 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:47 +0100 Subject: smc: cleanup close checking during data receival When waiting for data to be received it must be checked if the peer signals shutdown. The SMC code uses two different checks for this purpose, even though just one check is sufficient. This patch removes the superfluous test for SOCK_DONE. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_rx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index cbf58637ee14..9dc392ca06bf 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -65,7 +65,6 @@ static int smc_rx_wait_data(struct smc_sock *smc, long *timeo) rc = sk_wait_event(sk, timeo, sk->sk_err || sk->sk_shutdown & RCV_SHUTDOWN || - sock_flag(sk, SOCK_DONE) || atomic_read(&conn->bytes_to_rcv) || smc_cdc_rxed_any_close_or_senddone(conn), &wait); @@ -116,7 +115,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len, if (read_done) { if (sk->sk_err || sk->sk_state == SMC_CLOSED || - (sk->sk_shutdown & RCV_SHUTDOWN) || + sk->sk_shutdown & RCV_SHUTDOWN || !timeo || signal_pending(current) || smc_cdc_rxed_any_close_or_senddone(conn) || @@ -124,8 +123,6 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len, peer_conn_abort) break; } else { - if (sock_flag(sk, SOCK_DONE)) - break; if (sk->sk_err) { read_done = sock_error(sk); break; -- cgit v1.2.3-59-g8ed1b From 6b5771aa3c351b118b1eee7bc98e1483eb0c8ca8 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:48 +0100 Subject: smc: no consumer update in tasklet context The SMC protocol requires to send a separate consumer cursor update, if it cannot be piggybacked to updates of the producer cursor. When receiving a blocked signal from the sender, this update is sent already in tasklet context. In addition consumer cursor updates are sent after data receival. Sending of cursor updates is controlled by sequence numbers. Assuming receiving stray messages the receiver drops updates with older sequence numbers than an already received cursor update with a higher sequence number. Sending consumer cursor updates in tasklet context may result in wrong order sends and its corresponding drops at the receiver. Since it is sufficient to send consumer cursor updates once the data is received, this patch gets rid of the consumer cursor update in tasklet context to guarantee in-sequence arrival of cursor updates. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 12 +++--------- net/smc/smc_tx.c | 9 ++------- 2 files changed, 5 insertions(+), 16 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 87f7bede6eab..d4155ff6acde 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -213,6 +213,9 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* guarantee 0 <= bytes_to_rcv <= rmbe_size */ smp_mb__after_atomic(); smc->sk.sk_data_ready(&smc->sk); + } else if ((conn->local_rx_ctrl.prod_flags.write_blocked) || + (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) { + smc->sk.sk_data_ready(&smc->sk); } if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { @@ -234,15 +237,6 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* trigger socket release if connection closed */ smc_close_wake_tx_prepared(smc); } - - /* socket connected but not accepted */ - if (!smc->sk.sk_socket) - return; - - /* data available */ - if ((conn->local_rx_ctrl.prod_flags.write_blocked) || - (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) - smc_tx_consumer_update(conn); } /* called under tasklet context */ diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 77555c6ed199..2e50fddf8ce9 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -448,9 +448,7 @@ static void smc_tx_work(struct work_struct *work) void smc_tx_consumer_update(struct smc_connection *conn) { union smc_host_cursor cfed, cons; - struct smc_cdc_tx_pend *pend; - struct smc_wr_buf *wr_buf; - int to_confirm, rc; + int to_confirm; smc_curs_write(&cons, smc_curs_read(&conn->local_tx_ctrl.cons, conn), @@ -464,10 +462,7 @@ void smc_tx_consumer_update(struct smc_connection *conn) ((to_confirm > conn->rmbe_update_limit) && ((to_confirm > (conn->rmbe_size / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); - if (!rc) - rc = smc_cdc_msg_send(conn, wr_buf, pend); - if (rc < 0) { + if (smc_cdc_get_slot_and_msg_send(conn) < 0) { schedule_delayed_work(&conn->tx_work, SMC_TX_WORK_DELAY); return; -- cgit v1.2.3-59-g8ed1b From e7b7a64a8493d47433fd003efbe6543e3f676294 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Dec 2017 13:38:49 +0100 Subject: smc: support variable CLC proposal messages According to RFC7609 [1] the CLC proposal message contains an area of unknown length for future growth. Additionally it may contain up to 8 IPv6 prefixes. The current version of the SMC-code does not understand CLC proposal messages using these variable length fields and, thus, is incompatible with SMC implementations in other operating systems. This patch makes sure, SMC understands incoming CLC proposals * with arbitrary length values for future growth * with up to 8 IPv6 prefixes [1] SMC-R Informational RFC: http://www.rfc-editor.org/info/rfc7609 Signed-off-by: Ursula Braun Reviewed-by: Hans Wippel Signed-off-by: David S. Miller --- net/smc/af_smc.c | 15 ++++++---- net/smc/smc_clc.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++--------- net/smc/smc_clc.h | 34 +++++++++++++++++++---- 3 files changed, 107 insertions(+), 24 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d3ae0d5b1677..daf8075f5a4c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -751,14 +751,16 @@ static void smc_listen_work(struct work_struct *work) { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); + struct smc_clc_msg_proposal_prefix *pclc_prfx; struct socket *newclcsock = new_smc->clcsock; struct smc_sock *lsmc = new_smc->listen_smc; struct smc_clc_msg_accept_confirm cclc; int local_contact = SMC_REUSE_CONTACT; struct sock *newsmcsk = &new_smc->sk; - struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_proposal *pclc; struct smc_ib_device *smcibdev; struct sockaddr_in peeraddr; + u8 buf[SMC_CLC_MAX_LEN]; struct smc_link *link; int reason_code = 0; int rc = 0, len; @@ -775,7 +777,7 @@ static void smc_listen_work(struct work_struct *work) /* do inband token exchange - *wait for and receive SMC Proposal CLC message */ - reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), + reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf), SMC_CLC_PROPOSAL); if (reason_code < 0) goto out_err; @@ -804,8 +806,11 @@ static void smc_listen_work(struct work_struct *work) reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } - if ((pclc.outgoing_subnet != subnet) || - (pclc.prefix_len != prefix_len)) { + + pclc = (struct smc_clc_msg_proposal *)&buf; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (pclc_prfx->outgoing_subnet != subnet || + pclc_prfx->prefix_len != prefix_len) { reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } @@ -816,7 +821,7 @@ static void smc_listen_work(struct work_struct *work) /* allocate connection / link group */ mutex_lock(&smc_create_lgr_pending); local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, - smcibdev, ibport, &pclc.lcl, 0); + smcibdev, ibport, &pclc->lcl, 0); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index f5e17d29112b..abf7ceb6690b 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -22,6 +22,54 @@ #include "smc_clc.h" #include "smc_ib.h" +/* check if received message has a correct header length and contains valid + * heading and trailing eyecatchers + */ +static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) +{ + struct smc_clc_msg_proposal_prefix *pclc_prfx; + struct smc_clc_msg_accept_confirm *clc; + struct smc_clc_msg_proposal *pclc; + struct smc_clc_msg_decline *dclc; + struct smc_clc_msg_trail *trl; + + if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + switch (clcm->type) { + case SMC_CLC_PROPOSAL: + pclc = (struct smc_clc_msg_proposal *)clcm; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (ntohs(pclc->hdr.length) != + sizeof(*pclc) + ntohs(pclc->iparea_offset) + + sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(struct smc_clc_ipv6_prefix) + + sizeof(*trl)) + return false; + trl = (struct smc_clc_msg_trail *) + ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); + break; + case SMC_CLC_ACCEPT: + case SMC_CLC_CONFIRM: + clc = (struct smc_clc_msg_accept_confirm *)clcm; + if (ntohs(clc->hdr.length) != sizeof(*clc)) + return false; + trl = &clc->trl; + break; + case SMC_CLC_DECLINE: + dclc = (struct smc_clc_msg_decline *)clcm; + if (ntohs(dclc->hdr.length) != sizeof(*dclc)) + return false; + trl = &dclc->trl; + break; + default: + return false; + } + if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + return true; +} + /* Wait for data on the tcp-socket, analyze received data * Returns: * 0 if success and it was not a decline that we received. @@ -72,9 +120,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || - (datlen < sizeof(struct smc_clc_msg_decline)) || - (datlen > sizeof(struct smc_clc_msg_accept_confirm)) || - memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) || + (datlen > buflen) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@ -89,7 +135,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, krflags = MSG_WAITALL; smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); - if (len < datlen) { + if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { smc->sk.sk_err = EPROTO; reason_code = -EPROTO; goto out; @@ -141,33 +187,43 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport) { + struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_trail trl; int reason_code = 0; + struct kvec vec[3]; struct msghdr msg; - struct kvec vec; - int len, rc; + int len, plen, rc; /* send SMC Proposal CLC message */ + plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl); memset(&pclc, 0, sizeof(pclc)); memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc.hdr.type = SMC_CLC_PROPOSAL; - pclc.hdr.length = htons(sizeof(pclc)); + pclc.hdr.length = htons(plen); pclc.hdr.version = SMC_CLC_V1; /* SMC version */ memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); + pclc.iparea_offset = htons(0); + memset(&pclc_prfx, 0, sizeof(pclc_prfx)); /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet, - &pclc.prefix_len); + rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, + &pclc_prfx.prefix_len); if (rc) return SMC_CLC_DECL_CNFERR; /* configuration error */ - memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + pclc_prfx.ipv6_prefixes_cnt = 0; + memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); - vec.iov_base = &pclc; - vec.iov_len = sizeof(pclc); + vec[0].iov_base = &pclc; + vec[0].iov_len = sizeof(pclc); + vec[1].iov_base = &pclc_prfx; + vec[1].iov_len = sizeof(pclc_prfx); + vec[2].iov_base = &trl; + vec[2].iov_len = sizeof(trl); /* due to the few bytes needed for clc-handshake this cannot block */ - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc)); + len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen); if (len < sizeof(pclc)) { if (len >= 0) { reason_code = -ENETUNREACH; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 12a9af1539a2..c145a0f36a68 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -44,7 +44,7 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */ #if defined(__BIG_ENDIAN_BITFIELD) u8 version : 4, flag : 1, - rsvd : 3; + rsvd : 3; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 rsvd : 3, flag : 1, @@ -62,17 +62,31 @@ struct smc_clc_msg_local { /* header2 of clc messages */ u8 mac[6]; /* mac of ib_device port */ }; -struct smc_clc_msg_proposal { /* clc proposal message */ - struct smc_clc_msg_hdr hdr; - struct smc_clc_msg_local lcl; - __be16 iparea_offset; /* offset to IP address information area */ +struct smc_clc_ipv6_prefix { + u8 prefix[4]; + u8 prefix_len; +} __packed; + +struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ __be32 outgoing_subnet; /* subnet mask */ u8 prefix_len; /* number of significant bits in mask */ u8 reserved[2]; u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */ - struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ } __aligned(4); +struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ + struct smc_clc_msg_hdr hdr; + struct smc_clc_msg_local lcl; + __be16 iparea_offset; /* offset to IP address information area */ +} __aligned(4); + +#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28 +#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix)) +#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \ + SMC_CLC_PROPOSAL_MAX_OFFSET + \ + SMC_CLC_PROPOSAL_MAX_PREFIX + \ + sizeof(struct smc_clc_msg_trail)) + struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; struct smc_clc_msg_local lcl; @@ -102,6 +116,14 @@ struct smc_clc_msg_decline { /* clc decline message */ struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ } __aligned(4); +/* determine start of the prefix area within the proposal message */ +static inline struct smc_clc_msg_proposal_prefix * +smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) +{ + return (struct smc_clc_msg_proposal_prefix *) + ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); +} + struct smc_sock; struct smc_ib_device; -- cgit v1.2.3-59-g8ed1b From 3163c5071f25e36567608cde0df21b198ee5fbeb Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 24 Jan 2018 10:28:12 +0100 Subject: net/smc: use local struct sock variables consistently Cleanup to consistently exploit the local struct sock definitions. No functional change. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 33 +++++++++++++++++---------------- net/smc/smc_close.c | 38 ++++++++++++++++++++------------------ 2 files changed, 37 insertions(+), 34 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index daf8075f5a4c..eccccf743b9f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -581,39 +581,39 @@ out_err: static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) { - struct sock *sk = &lsmc->sk; - struct socket *new_clcsock; + struct socket *new_clcsock = NULL; + struct sock *lsk = &lsmc->sk; struct sock *new_sk; int rc; - release_sock(&lsmc->sk); - new_sk = smc_sock_alloc(sock_net(sk), NULL); + release_sock(lsk); + new_sk = smc_sock_alloc(sock_net(lsk), NULL); if (!new_sk) { rc = -ENOMEM; - lsmc->sk.sk_err = ENOMEM; + lsk->sk_err = ENOMEM; *new_smc = NULL; - lock_sock(&lsmc->sk); + lock_sock(lsk); goto out; } *new_smc = smc_sk(new_sk); rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); - lock_sock(&lsmc->sk); + lock_sock(lsk); if (rc < 0) { - lsmc->sk.sk_err = -rc; + lsk->sk_err = -rc; new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); - sk->sk_prot->unhash(new_sk); + new_sk->sk_prot->unhash(new_sk); sock_put(new_sk); *new_smc = NULL; goto out; } - if (lsmc->sk.sk_state == SMC_CLOSED) { + if (lsk->sk_state == SMC_CLOSED) { if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); - sk->sk_prot->unhash(new_sk); + new_sk->sk_prot->unhash(new_sk); sock_put(new_sk); *new_smc = NULL; goto out; @@ -936,11 +936,12 @@ static void smc_tcp_listen_work(struct work_struct *work) { struct smc_sock *lsmc = container_of(work, struct smc_sock, tcp_listen_work); + struct sock *lsk = &lsmc->sk; struct smc_sock *new_smc; int rc = 0; - lock_sock(&lsmc->sk); - while (lsmc->sk.sk_state == SMC_LISTEN) { + lock_sock(lsk); + while (lsk->sk_state == SMC_LISTEN) { rc = smc_clcsock_accept(lsmc, &new_smc); if (rc) goto out; @@ -949,15 +950,15 @@ static void smc_tcp_listen_work(struct work_struct *work) new_smc->listen_smc = lsmc; new_smc->use_fallback = false; /* assume rdma capability first*/ - sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ + sock_hold(lsk); /* sock_put in smc_listen_work */ INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); smc_copy_sock_settings_to_smc(new_smc); schedule_work(&new_smc->smc_listen_work); } out: - release_sock(&lsmc->sk); - lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ + release_sock(lsk); + lsk->sk_data_ready(lsk); /* no more listening, wake accept */ } static int smc_listen(struct socket *sock, int backlog) diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index e194c6cc308a..11793912f001 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -115,36 +115,38 @@ static int smc_close_abort(struct smc_connection *conn) */ static void smc_close_active_abort(struct smc_sock *smc) { + struct sock *sk = &smc->sk; + struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; - smc->sk.sk_err = ECONNABORTED; + sk->sk_err = ECONNABORTED; if (smc->clcsock && smc->clcsock->sk) { smc->clcsock->sk->sk_err = ECONNABORTED; smc->clcsock->sk->sk_state_change(smc->clcsock->sk); } - switch (smc->sk.sk_state) { + switch (sk->sk_state) { case SMC_INIT: case SMC_ACTIVE: - smc->sk.sk_state = SMC_PEERABORTWAIT; + sk->sk_state = SMC_PEERABORTWAIT; break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: txflags->peer_conn_abort = 1; sock_release(smc->clcsock); if (!smc_cdc_rxed_any_close(&smc->conn)) - smc->sk.sk_state = SMC_PEERABORTWAIT; + sk->sk_state = SMC_PEERABORTWAIT; else - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: if (!txflags->peer_conn_closed) { - smc->sk.sk_state = SMC_PEERABORTWAIT; + sk->sk_state = SMC_PEERABORTWAIT; txflags->peer_conn_abort = 1; sock_release(smc->clcsock); } else { - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; } break; case SMC_PROCESSABORT: @@ -153,7 +155,7 @@ static void smc_close_active_abort(struct smc_sock *smc) txflags->peer_conn_abort = 1; sock_release(smc->clcsock); } - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; break; case SMC_PEERFINCLOSEWAIT: case SMC_PEERABORTWAIT: @@ -161,8 +163,8 @@ static void smc_close_active_abort(struct smc_sock *smc) break; } - sock_set_flag(&smc->sk, SOCK_DEAD); - smc->sk.sk_state_change(&smc->sk); + sock_set_flag(sk, SOCK_DEAD); + sk->sk_state_change(sk); } static inline bool smc_close_sent_any_close(struct smc_connection *conn) @@ -278,7 +280,7 @@ again: } if (old_state != sk->sk_state) - sk->sk_state_change(&smc->sk); + sk->sk_state_change(sk); return rc; } @@ -331,7 +333,7 @@ static void smc_close_passive_work(struct work_struct *work) struct sock *sk = &smc->sk; int old_state; - lock_sock(&smc->sk); + lock_sock(sk); old_state = sk->sk_state; if (!conn->alert_token_local) { @@ -340,7 +342,7 @@ static void smc_close_passive_work(struct work_struct *work) goto wakeup; } - rxflags = &smc->conn.local_rx_ctrl.conn_state_flags; + rxflags = &conn->local_rx_ctrl.conn_state_flags; if (rxflags->peer_conn_abort) { smc_close_passive_abort_received(smc); goto wakeup; @@ -348,7 +350,7 @@ static void smc_close_passive_work(struct work_struct *work) switch (sk->sk_state) { case SMC_INIT: - if (atomic_read(&smc->conn.bytes_to_rcv) || + if (atomic_read(&conn->bytes_to_rcv) || (rxflags->peer_done_writing && !smc_cdc_rxed_any_close(conn))) sk->sk_state = SMC_APPCLOSEWAIT1; @@ -365,7 +367,7 @@ static void smc_close_passive_work(struct work_struct *work) /* to check for closing */ case SMC_PEERCLOSEWAIT2: case SMC_PEERFINCLOSEWAIT: - if (!smc_cdc_rxed_any_close(&smc->conn)) + if (!smc_cdc_rxed_any_close(conn)) break; if (sock_flag(sk, SOCK_DEAD) && smc_close_sent_any_close(conn)) { @@ -394,12 +396,12 @@ wakeup: sk->sk_state_change(sk); if ((sk->sk_state == SMC_CLOSED) && (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { - smc_conn_free(&smc->conn); + smc_conn_free(conn); schedule_delayed_work(&smc->sock_put_work, SMC_CLOSE_SOCK_PUT_DELAY); } } - release_sock(&smc->sk); + release_sock(sk); } void smc_close_sock_put_work(struct work_struct *work) @@ -462,7 +464,7 @@ again: } if (old_state != sk->sk_state) - sk->sk_state_change(&smc->sk); + sk->sk_state_change(sk); return rc; } -- cgit v1.2.3-59-g8ed1b From 35a6b17847175c4a6bdb0a16c1692627ca56b7f1 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 24 Jan 2018 10:28:13 +0100 Subject: net/smc: simplify function smc_clcsock_accept() Cleanup to avoid duplicate code in smc_clcsock_accept(). No functional change. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index eccccf743b9f..05cbcd3a6f60 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -599,16 +599,9 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); lock_sock(lsk); - if (rc < 0) { + if (rc < 0) lsk->sk_err = -rc; - new_sk->sk_state = SMC_CLOSED; - sock_set_flag(new_sk, SOCK_DEAD); - new_sk->sk_prot->unhash(new_sk); - sock_put(new_sk); - *new_smc = NULL; - goto out; - } - if (lsk->sk_state == SMC_CLOSED) { + if (rc < 0 || lsk->sk_state == SMC_CLOSED) { if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; -- cgit v1.2.3-59-g8ed1b From 8429c1343519d7452b8ccd9af9716147512831c9 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 24 Jan 2018 10:28:14 +0100 Subject: net/smc: get rid of tx_pend waits in socket closing There is no need to wait for confirmation of pending tx requests for a closing connection, since pending tx slots are dismissed when finishing a connection. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 11 ----------- net/smc/smc_cdc.h | 1 - net/smc/smc_close.c | 25 ------------------------- net/smc/smc_wr.c | 18 ------------------ net/smc/smc_wr.h | 2 -- 5 files changed, 57 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index d4155ff6acde..51805334e001 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -57,9 +57,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, cdcpend->conn); } smc_tx_sndbuf_nonfull(smc); - if (smc->sk.sk_state != SMC_ACTIVE) - /* wake up smc_close_wait_tx_pends() */ - smc->sk.sk_state_change(&smc->sk); bh_unlock_sock(&smc->sk); } @@ -155,14 +152,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) (unsigned long)conn); } -bool smc_cdc_tx_has_pending(struct smc_connection *conn) -{ - struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; - - return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE, - smc_cdc_tx_filter, (unsigned long)conn); -} - /********************************* receive ***********************************/ static inline bool smc_cdc_before(u16 seq1, u16 seq2) diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 149ceda1b088..ab240b37ad11 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -214,7 +214,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); -bool smc_cdc_tx_has_pending(struct smc_connection *conn); int smc_cdc_init(void) __init; #endif /* SMC_CDC_H */ diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 11793912f001..bc539ccb8fa0 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -19,8 +19,6 @@ #include "smc_cdc.h" #include "smc_close.h" -#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ) - static void smc_close_cleanup_listen(struct sock *parent) { struct sock *sk; @@ -30,26 +28,6 @@ static void smc_close_cleanup_listen(struct sock *parent) smc_close_non_accepted(sk); } -static void smc_close_wait_tx_pends(struct smc_sock *smc) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - struct sock *sk = &smc->sk; - signed long timeout; - - timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME; - add_wait_queue(sk_sleep(sk), &wait); - while (!signal_pending(current) && timeout) { - int rc; - - rc = sk_wait_event(sk, &timeout, - !smc_cdc_tx_has_pending(&smc->conn), - &wait); - if (rc) - break; - } - remove_wait_queue(sk_sleep(sk), &wait); -} - /* wait for sndbuf data being transmitted */ static void smc_close_stream_wait(struct smc_sock *smc, long timeout) { @@ -230,7 +208,6 @@ again: rc = smc_close_final(conn); } sk->sk_state = SMC_CLOSED; - smc_close_wait_tx_pends(smc); break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: @@ -251,7 +228,6 @@ again: else /* peer has just issued a shutdown write */ sk->sk_state = SMC_PEERFINCLOSEWAIT; - smc_close_wait_tx_pends(smc); break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: @@ -271,7 +247,6 @@ again: lock_sock(sk); smc_close_abort(conn); sk->sk_state = SMC_CLOSED; - smc_close_wait_tx_pends(smc); break; case SMC_PEERABORTWAIT: case SMC_CLOSED: diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index de4537f66832..a4477b4a9c18 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -319,24 +319,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, } } -bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type, - smc_wr_tx_filter filter, unsigned long data) -{ - struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_rx; - int i; - - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i]; - if (wr_rx->type != wr_rx_hdr_type) - continue; - tx_pend = &link->wr_tx_pends[i].priv; - if (filter(tx_pend, data)) - return true; - } - return false; -} - /****************************** receive queue ********************************/ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 2acf12b06063..ef0c3494c9cb 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -93,8 +93,6 @@ int smc_wr_tx_put_slot(struct smc_link *link, int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); -bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type, - smc_wr_tx_filter filter, unsigned long data); void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, -- cgit v1.2.3-59-g8ed1b From 86e780d3a312faad967d2cfd5281f6bae81c0e55 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 24 Jan 2018 10:28:15 +0100 Subject: net/smc: make wait for work request uninterruptible Work requests are needed for every ib_post_send(), among them the ib_post_send() to signal closing. If an smc socket program is cancelled, the smc connections should be cleaned up, and require sending of closing signals to the peer. This may fail, if a wait for a free work request is needed, but is cancelled immediately due to the cancel interrupt. To guarantee notification of the peer, the wait for a work request is changed to uninterruptible. And the area to receive work request completion info with ib_poll_cq() is cleared first. And _tx_ variable names are used in the _tx_routines for the demultiplexing common type in the header. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_wr.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index a4477b4a9c18..5ed94109d1d6 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -122,6 +122,7 @@ static void smc_wr_tx_tasklet_fn(unsigned long data) again: polled++; do { + memset(&wc, 0, sizeof(wc)); rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc); if (polled == 1) { ib_req_notify_cq(dev->roce_cq_send, @@ -185,7 +186,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, if (rc) return rc; } else { - rc = wait_event_interruptible_timeout( + rc = wait_event_timeout( link->wr_tx_wait, (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); @@ -198,8 +199,6 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, smc_lgr_terminate(lgr); return -EPIPE; } - if (rc == -ERESTARTSYS) - return -EINTR; if (idx == link->wr_tx_cnt) return -EPIPE; } @@ -300,18 +299,18 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) return rc; } -void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, +void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, unsigned long data) { struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_rx; + struct smc_wr_rx_hdr *wr_tx; int i; for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i]; - if (wr_rx->type != wr_rx_hdr_type) + wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i]; + if (wr_tx->type != wr_tx_hdr_type) continue; tx_pend = &link->wr_tx_pends[i].priv; if (filter(tx_pend, data)) -- cgit v1.2.3-59-g8ed1b From bbb96bf2366e502fd16a8082f723c570e50269e8 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 24 Jan 2018 10:28:16 +0100 Subject: net/smc: improve state change handling after close wait When a socket is closed or shutdown, smc waits for data being transmitted in certain states. If the state changes during this wait, the close switch depending on state should be reentered. In addition, state change is avoided if sending of close or shutdown fails. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_close.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index bc539ccb8fa0..1468a2a3cdf4 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -165,9 +165,9 @@ int smc_close_active(struct smc_sock *smc) 0 : sock_flag(sk, SOCK_LINGER) ? sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; -again: old_state = sk->sk_state; - switch (old_state) { +again: + switch (sk->sk_state) { case SMC_INIT: sk->sk_state = SMC_CLOSED; if (smc->smc_listen_work.func) @@ -194,6 +194,8 @@ again: if (sk->sk_state == SMC_ACTIVE) { /* send close request */ rc = smc_close_final(conn); + if (rc) + break; sk->sk_state = SMC_PEERCLOSEWAIT1; } else { /* peer event has changed the state */ @@ -206,6 +208,8 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); + if (rc) + break; } sk->sk_state = SMC_CLOSED; break; @@ -216,12 +220,13 @@ again: release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); - if (sk->sk_err != ECONNABORTED) { - /* confirm close from peer */ - rc = smc_close_final(conn); - if (rc) - break; - } + if (sk->sk_state != SMC_APPCLOSEWAIT1 && + sk->sk_state != SMC_APPCLOSEWAIT2) + goto again; + /* confirm close from peer */ + rc = smc_close_final(conn); + if (rc) + break; if (smc_cdc_rxed_any_close(conn)) /* peer has closed the socket already */ sk->sk_state = SMC_CLOSED; @@ -235,6 +240,8 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); + if (rc) + break; } /* peer sending PeerConnectionClosed will cause transition */ break; @@ -401,20 +408,21 @@ int smc_close_shutdown_write(struct smc_sock *smc) 0 : sock_flag(sk, SOCK_LINGER) ? sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; -again: old_state = sk->sk_state; - switch (old_state) { +again: + switch (sk->sk_state) { case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); + if (sk->sk_state != SMC_ACTIVE) + goto again; /* send close wr request */ rc = smc_close_wr(conn); - if (sk->sk_state == SMC_ACTIVE) - sk->sk_state = SMC_PEERCLOSEWAIT1; - else - goto again; + if (rc) + break; + sk->sk_state = SMC_PEERCLOSEWAIT1; break; case SMC_APPCLOSEWAIT1: /* passive close */ @@ -423,8 +431,12 @@ again: release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); + if (sk->sk_state != SMC_APPCLOSEWAIT1) + goto again; /* confirm close from peer */ rc = smc_close_wr(conn); + if (rc) + break; sk->sk_state = SMC_APPCLOSEWAIT2; break; case SMC_APPCLOSEWAIT2: -- cgit v1.2.3-59-g8ed1b From aa377e682d0703452642d4059a94ab7fdaa90b89 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 24 Jan 2018 10:28:17 +0100 Subject: net/smc: continue waiting if peer signals write_shutdown If the peer sends a shutdown WRITE, this should not affect sending in general, and waiting for send buffer space in particular. Stop waiting of the local socket for send buffer space only, if peer signals closing, but not if peer signals just shutdown WRITE. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 2e50fddf8ce9..fea6482233a6 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -86,7 +86,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) rc = -EPIPE; break; } - if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { + if (smc_cdc_rxed_any_close(conn)) { rc = -ECONNRESET; break; } @@ -107,7 +107,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) sk_wait_event(sk, &timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || - smc_cdc_rxed_any_close_or_senddone(conn) || + smc_cdc_rxed_any_close(conn) || atomic_read(&conn->sndbuf_space), &wait); } -- cgit v1.2.3-59-g8ed1b From 611b63a12732635ba7bdc8578b42a1cc40f56a13 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 25 Jan 2018 11:15:31 +0100 Subject: net/smc: cancel tx worker in case of socket aborts If an SMC socket is aborted, the tx worker should be cancelled. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_close.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 1468a2a3cdf4..6de909612bd0 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -107,6 +107,9 @@ static void smc_close_active_abort(struct smc_sock *smc) case SMC_INIT: case SMC_ACTIVE: sk->sk_state = SMC_PEERABORTWAIT; + release_sock(sk); + cancel_delayed_work_sync(&smc->conn.tx_work); + lock_sock(sk); break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: @@ -116,6 +119,9 @@ static void smc_close_active_abort(struct smc_sock *smc) sk->sk_state = SMC_PEERABORTWAIT; else sk->sk_state = SMC_CLOSED; + release_sock(sk); + cancel_delayed_work_sync(&smc->conn.tx_work); + lock_sock(sk); break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: @@ -249,9 +255,6 @@ again: /* peer sending PeerConnectionClosed will cause transition */ break; case SMC_PROCESSABORT: - release_sock(sk); - cancel_delayed_work_sync(&conn->tx_work); - lock_sock(sk); smc_close_abort(conn); sk->sk_state = SMC_CLOSED; break; @@ -327,6 +330,9 @@ static void smc_close_passive_work(struct work_struct *work) rxflags = &conn->local_rx_ctrl.conn_state_flags; if (rxflags->peer_conn_abort) { smc_close_passive_abort_received(smc); + release_sock(&smc->sk); + cancel_delayed_work_sync(&conn->tx_work); + lock_sock(&smc->sk); goto wakeup; } -- cgit v1.2.3-59-g8ed1b From 5ac92a00aa743e76e86503d58f092830486af39b Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 25 Jan 2018 11:15:32 +0100 Subject: net/smc: handle state SMC_PEERFINCLOSEWAIT correctly A state transition from closing state SMC_PEERFINCLOSEWAIT to closing state SMC_APPFINCLOSEWAIT is not allowed. Once a closing indication from the peer has been received, the socket reaches state SMC_CLOSED. And receiving a peer_conn_abort just changes the state of the socket into one of the states SMC_PROCESSABORT or SMC_CLOSED; sending a peer_conn_abort occurs in smc_close_active() for state SMC_PROCESSABORT only. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_close.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 6de909612bd0..3dc109f5db56 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -280,7 +280,6 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) case SMC_APPFINCLOSEWAIT: case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - smc_close_abort(&smc->conn); sk->sk_state = SMC_PROCESSABORT; break; case SMC_PEERCLOSEWAIT1: @@ -288,7 +287,6 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) if (txflags->peer_done_writing && !smc_close_sent_any_close(&smc->conn)) { /* just shutdown, but not yet closed locally */ - smc_close_abort(&smc->conn); sk->sk_state = SMC_PROCESSABORT; } else { sk->sk_state = SMC_CLOSED; @@ -354,7 +352,6 @@ static void smc_close_passive_work(struct work_struct *work) /* fall through */ /* to check for closing */ case SMC_PEERCLOSEWAIT2: - case SMC_PEERFINCLOSEWAIT: if (!smc_cdc_rxed_any_close(conn)) break; if (sock_flag(sk, SOCK_DEAD) && @@ -366,6 +363,10 @@ static void smc_close_passive_work(struct work_struct *work) sk->sk_state = SMC_APPFINCLOSEWAIT; } break; + case SMC_PEERFINCLOSEWAIT: + if (smc_cdc_rxed_any_close(conn)) + sk->sk_state = SMC_CLOSED; + break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: case SMC_APPFINCLOSEWAIT: -- cgit v1.2.3-59-g8ed1b From b4772b3a87b772401e2af1c894fef323fb5c6e7c Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 25 Jan 2018 11:15:33 +0100 Subject: net/smc: terminate link group for ib_post_send problems If ib_post_send() fails, terminate all connections of this link group. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 4 +++- net/smc/smc_wr.c | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index fea6482233a6..71b7d9f079f0 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -248,8 +248,10 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, peer_rmbe_offset; rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr); - if (rc) + if (rc) { conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; + smc_lgr_terminate(lgr); + } return rc; } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 5ed94109d1d6..621c65850a18 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -248,8 +248,14 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) pend = container_of(priv, struct smc_wr_tx_pend, priv); rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], &failed_wr); - if (rc) + if (rc) { + struct smc_link_group *lgr = + container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + smc_wr_tx_put_slot(link, priv); + smc_lgr_terminate(lgr); + } return rc; } -- cgit v1.2.3-59-g8ed1b From 610db66f377cf99aa07a2b1990727238f2e2d6d3 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 25 Jan 2018 11:15:34 +0100 Subject: net/smc: do not reuse a linkgroup with setup problems Once a linkgroup is created successfully, it stays alive for a certain time to service more connections potentially created. If one of the initialization steps for a new linkgroup fails, the linkgroup should not be reused by other connections following. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 17 +++++++++++++++++ net/smc/smc_core.c | 3 +++ 2 files changed, 20 insertions(+) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 05cbcd3a6f60..cf0e11978b66 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -377,6 +377,15 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } +static void smc_lgr_forget(struct smc_link_group *lgr) +{ + spin_lock_bh(&smc_lgr_list.lock); + /* do not use this link group for new connections */ + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); + spin_unlock_bh(&smc_lgr_list.lock); +} + /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc) { @@ -513,6 +522,8 @@ out_connected: return rc ? rc : local_contact; decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); decline_rdma: @@ -526,6 +537,8 @@ decline_rdma: goto out_connected; out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); out_err: @@ -906,6 +919,8 @@ enqueue: return; decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); decline_rdma: /* RDMA setup failed, switch back to TCP */ @@ -918,6 +933,8 @@ decline_rdma: goto out_connected; out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); out_err: newsmcsk->sk_state = SMC_CLOSED; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 94f21116dac5..7406cbb41856 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -128,6 +128,8 @@ static void smc_lgr_free_work(struct work_struct *work) bool conns; spin_lock_bh(&smc_lgr_list.lock); + if (list_empty(&lgr->list)) + goto free; read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); @@ -136,6 +138,7 @@ static void smc_lgr_free_work(struct work_struct *work) return; } list_del_init(&lgr->list); /* remove from smc_lgr_list */ +free: spin_unlock_bh(&smc_lgr_list.lock); smc_lgr_free(lgr); } -- cgit v1.2.3-59-g8ed1b From 732720fafdb04ad8daec36ba7f09b44641ebc13f Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 25 Jan 2018 11:15:35 +0100 Subject: net/smc: wake up wr_reg_wait when terminating a link group If a new connection with a new rmb is added to a link group, its memory region is registered. If a link group is terminated, a pending registration requires a wake up. And consolidate setting of tx_flag peer_conn_abort in smc_lgr_terminate(). Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_close.c | 14 ++++++-------- net/smc/smc_core.c | 8 +++++--- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 3dc109f5db56..babe05d385e7 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -89,7 +89,7 @@ static int smc_close_abort(struct smc_connection *conn) } /* terminate smc socket abnormally - active abort - * RDMA communication no longer possible + * link group is terminated, i.e. RDMA communication no longer possible */ static void smc_close_active_abort(struct smc_sock *smc) { @@ -113,7 +113,6 @@ static void smc_close_active_abort(struct smc_sock *smc) break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - txflags->peer_conn_abort = 1; sock_release(smc->clcsock); if (!smc_cdc_rxed_any_close(&smc->conn)) sk->sk_state = SMC_PEERABORTWAIT; @@ -127,7 +126,6 @@ static void smc_close_active_abort(struct smc_sock *smc) case SMC_PEERCLOSEWAIT2: if (!txflags->peer_conn_closed) { sk->sk_state = SMC_PEERABORTWAIT; - txflags->peer_conn_abort = 1; sock_release(smc->clcsock); } else { sk->sk_state = SMC_CLOSED; @@ -135,10 +133,8 @@ static void smc_close_active_abort(struct smc_sock *smc) break; case SMC_PROCESSABORT: case SMC_APPFINCLOSEWAIT: - if (!txflags->peer_conn_closed) { - txflags->peer_conn_abort = 1; + if (!txflags->peer_conn_closed) sock_release(smc->clcsock); - } sk->sk_state = SMC_CLOSED; break; case SMC_PEERFINCLOSEWAIT: @@ -303,8 +299,9 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) } } -/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, - * or peer_done_writing. +/* Either some kind of closing has been received: peer_conn_closed, + * peer_conn_abort, or peer_done_writing + * or the link group of the connection terminates abnormally. */ static void smc_close_passive_work(struct work_struct *work) { @@ -327,6 +324,7 @@ static void smc_close_passive_work(struct work_struct *work) rxflags = &conn->local_rx_ctrl.conn_state_flags; if (rxflags->peer_conn_abort) { + /* peer has not received all data */ smc_close_passive_abort_received(smc); release_sock(&smc->sk); cancel_delayed_work_sync(&conn->tx_work); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 7406cbb41856..ed5b46d1fe41 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -234,9 +234,7 @@ static void smc_buf_unuse(struct smc_connection *conn) /* remove a finished connection from its link group */ void smc_conn_free(struct smc_connection *conn) { - struct smc_link_group *lgr = conn->lgr; - - if (!lgr) + if (!conn->lgr) return; smc_cdc_tx_dismiss_slots(conn); smc_lgr_unregister_conn(conn); @@ -331,12 +329,16 @@ void smc_lgr_terminate(struct smc_link_group *lgr) conn = rb_entry(node, struct smc_connection, alert_node); smc = container_of(conn, struct smc_sock, conn); sock_hold(&smc->sk); + conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; __smc_lgr_unregister_conn(conn); + write_unlock_bh(&lgr->conns_lock); schedule_work(&conn->close_work); + write_lock_bh(&lgr->conns_lock); sock_put(&smc->sk); node = rb_first(&lgr->conns_all); } write_unlock_bh(&lgr->conns_lock); + wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); } /* Determine vlan of internal TCP socket. -- cgit v1.2.3-59-g8ed1b From 1a0a04c7a82c4c4667ab5a9660dc37f6d365d9d3 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 25 Jan 2018 11:15:36 +0100 Subject: net/smc: check for healthy link group resp. connections If a problem for at least one connection of a link group is detected, the whole link group and all its connections are terminated. This patch adds a check for healthy link group when trying to reserve a work request, and checks for healthy connections before starting a tx worker. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 9 +++++++-- net/smc/smc_diag.c | 6 ++++-- net/smc/smc_tx.c | 15 ++++++++++++--- net/smc/smc_wr.c | 11 ++++++----- 4 files changed, 29 insertions(+), 12 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 51805334e001..6e8f5fbe0f09 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -65,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_cdc_tx_pend **pend) { struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + int rc; - return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, - (struct smc_wr_tx_pend_priv **)pend); + rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, + (struct smc_wr_tx_pend_priv **)pend); + if (!conn->alert_token_local) + /* abnormal termination */ + rc = -EPIPE; + return rc; } static inline void smc_cdc_add_pending_send(struct smc_connection *conn, diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index d2d01cf70224..427b91c1c964 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) goto errout; - if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) { + if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && + smc->conn.alert_token_local) { struct smc_connection *conn = &smc->conn; struct smc_diag_conninfo cinfo = { .token = conn->alert_token_local, @@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, goto errout; } - if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) { + if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr && + !list_empty(&smc->conn.lgr->list)) { struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport, diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 71b7d9f079f0..838bce20c361 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -408,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) goto out_unlock; } rc = 0; - schedule_delayed_work(&conn->tx_work, - SMC_TX_WORK_DELAY); + if (conn->alert_token_local) /* connection healthy */ + schedule_delayed_work(&conn->tx_work, + SMC_TX_WORK_DELAY); } goto out_unlock; } @@ -440,10 +441,17 @@ static void smc_tx_work(struct work_struct *work) int rc; lock_sock(&smc->sk); + if (smc->sk.sk_err || + !conn->alert_token_local || + conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) + goto out; + rc = smc_tx_sndbuf_nonempty(conn); if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && !atomic_read(&conn->bytes_to_rcv)) conn->local_rx_ctrl.prod_flags.write_blocked = 0; + +out: release_sock(&smc->sk); } @@ -464,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn) ((to_confirm > conn->rmbe_update_limit) && ((to_confirm > (conn->rmbe_size / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { - if (smc_cdc_get_slot_and_msg_send(conn) < 0) { + if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && + conn->alert_token_local) { /* connection healthy */ schedule_delayed_work(&conn->tx_work, SMC_TX_WORK_DELAY); return; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 621c65850a18..1b8af23e6e2b 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -174,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, struct smc_wr_tx_pend_priv **wr_pend_priv) { struct smc_wr_tx_pend *wr_pend; + u32 idx = link->wr_tx_cnt; struct ib_send_wr *wr_ib; u64 wr_id; - u32 idx; int rc; *wr_buf = NULL; @@ -186,16 +186,17 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, if (rc) return rc; } else { + struct smc_link_group *lgr; + + lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); rc = wait_event_timeout( link->wr_tx_wait, + list_empty(&lgr->list) || /* lgr terminated */ (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); if (!rc) { /* timeout - terminate connections */ - struct smc_link_group *lgr; - - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); smc_lgr_terminate(lgr); return -EPIPE; } -- cgit v1.2.3-59-g8ed1b From da05bf2981f1035fc37d4253ccf0692faf19b8c0 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 26 Jan 2018 09:28:46 +0100 Subject: net/smc: handle device, port, and QP error events RoCE device changes cause an IB event, processed in the global event handler for the ROCE device. Problems for a certain Queue Pair cause a QP event, processed in the QP event handler for this QP. Among those events are port errors and other fatal device errors. All link groups using such a port or device must be terminated in those cases. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 90f1a7f9085c..2a8957bd6d38 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -141,6 +141,17 @@ out: return rc; } +static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) +{ + struct smc_link_group *lgr, *l; + + list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { + if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && + lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) + smc_lgr_terminate(lgr); + } +} + /* process context wrapper for might_sleep smc_ib_remember_port_attr */ static void smc_ib_port_event_work(struct work_struct *work) { @@ -151,6 +162,8 @@ static void smc_ib_port_event_work(struct work_struct *work) for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) { smc_ib_remember_port_attr(smcibdev, port_idx + 1); clear_bit(port_idx, &smcibdev->port_event_mask); + if (!smc_ib_port_active(smcibdev, port_idx + 1)) + smc_ib_port_terminate(smcibdev, port_idx + 1); } } @@ -165,15 +178,7 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, switch (ibevent->event) { case IB_EVENT_PORT_ERR: - port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); - /* fall through */ case IB_EVENT_DEVICE_FATAL: - /* tbd in follow-on patch: - * abnormal close of corresponding connections - */ - break; case IB_EVENT_PORT_ACTIVE: port_idx = ibevent->element.port_num - 1; set_bit(port_idx, &smcibdev->port_event_mask); @@ -186,7 +191,8 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, void smc_ib_dealloc_protection_domain(struct smc_link *lnk) { - ib_dealloc_pd(lnk->roce_pd); + if (lnk->roce_pd) + ib_dealloc_pd(lnk->roce_pd); lnk->roce_pd = NULL; } @@ -203,14 +209,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) { + struct smc_ib_device *smcibdev = + (struct smc_ib_device *)ibevent->device; + u8 port_idx; + switch (ibevent->event) { case IB_EVENT_DEVICE_FATAL: case IB_EVENT_GID_CHANGE: case IB_EVENT_PORT_ERR: case IB_EVENT_QP_ACCESS_ERR: - /* tbd in follow-on patch: - * abnormal close of corresponding connections - */ + port_idx = ibevent->element.port_num - 1; + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); break; default: break; @@ -219,7 +229,8 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) void smc_ib_destroy_queue_pair(struct smc_link *lnk) { - ib_destroy_qp(lnk->roce_qp); + if (lnk->roce_qp) + ib_destroy_qp(lnk->roce_qp); lnk->roce_qp = NULL; } @@ -462,6 +473,7 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) { if (!smcibdev->initialized) return; + smcibdev->initialized = 0; smc_wr_remove_dev(smcibdev); ib_unregister_event_handler(&smcibdev->event_handler); ib_destroy_cq(smcibdev->roce_cq_recv); -- cgit v1.2.3-59-g8ed1b From 8dce2786a2905e5b0ce49263910b4a47af75b752 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 26 Jan 2018 09:28:47 +0100 Subject: net/smc: smc_poll improvements Increase the socket refcount during poll wait. Take the socket lock before checking socket state. For a listening socket return a mask independent of state SMC_ACTIVE and cover errors or closed state as well. Get rid of the accept_q loop in smc_accept_poll(). Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 74 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 34 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index cf0e11978b66..90c22a854f28 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1122,21 +1122,15 @@ out: static unsigned int smc_accept_poll(struct sock *parent) { - struct smc_sock *isk; - struct sock *sk; - - lock_sock(parent); - list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { - sk = (struct sock *)isk; + struct smc_sock *isk = smc_sk(parent); + int mask = 0; - if (sk->sk_state == SMC_ACTIVE) { - release_sock(parent); - return POLLIN | POLLRDNORM; - } - } - release_sock(parent); + spin_lock(&isk->accept_q_lock); + if (!list_empty(&isk->accept_q)) + mask = POLLIN | POLLRDNORM; + spin_unlock(&isk->accept_q_lock); - return 0; + return mask; } static unsigned int smc_poll(struct file *file, struct socket *sock, @@ -1147,9 +1141,15 @@ static unsigned int smc_poll(struct file *file, struct socket *sock, struct smc_sock *smc; int rc; + if (!sk) + return POLLNVAL; + smc = smc_sk(sock->sk); + sock_hold(sk); + lock_sock(sk); if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { /* delegate to CLC child sock */ + release_sock(sk); mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); /* if non-blocking connect finished ... */ lock_sock(sk); @@ -1161,37 +1161,43 @@ static unsigned int smc_poll(struct file *file, struct socket *sock, rc = smc_connect_rdma(smc); if (rc < 0) mask |= POLLERR; - else - /* success cases including fallback */ - mask |= POLLOUT | POLLWRNORM; + /* success cases including fallback */ + mask |= POLLOUT | POLLWRNORM; } } - release_sock(sk); } else { - sock_poll_wait(file, sk_sleep(sk), wait); - if (sk->sk_state == SMC_LISTEN) - /* woken up by sk_data_ready in smc_listen_work() */ - mask |= smc_accept_poll(sk); + if (sk->sk_state != SMC_CLOSED) { + release_sock(sk); + sock_poll_wait(file, sk_sleep(sk), wait); + lock_sock(sk); + } if (sk->sk_err) mask |= POLLERR; - if (atomic_read(&smc->conn.sndbuf_space) || - (sk->sk_shutdown & SEND_SHUTDOWN)) { - mask |= POLLOUT | POLLWRNORM; - } else { - sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - } - if (atomic_read(&smc->conn.bytes_to_rcv)) - mask |= POLLIN | POLLRDNORM; if ((sk->sk_shutdown == SHUTDOWN_MASK) || (sk->sk_state == SMC_CLOSED)) mask |= POLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLIN | POLLRDNORM | POLLRDHUP; - if (sk->sk_state == SMC_APPCLOSEWAIT1) - mask |= POLLIN; + if (sk->sk_state == SMC_LISTEN) { + /* woken up by sk_data_ready in smc_listen_work() */ + mask = smc_accept_poll(sk); + } else { + if (atomic_read(&smc->conn.sndbuf_space) || + sk->sk_shutdown & SEND_SHUTDOWN) { + mask |= POLLOUT | POLLWRNORM; + } else { + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + } + if (atomic_read(&smc->conn.bytes_to_rcv)) + mask |= POLLIN | POLLRDNORM; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLIN | POLLRDNORM | POLLRDHUP; + if (sk->sk_state == SMC_APPCLOSEWAIT1) + mask |= POLLIN; + } } + release_sock(sk); + sock_put(sk); return mask; } -- cgit v1.2.3-59-g8ed1b From 51f1de79ad8ed3555fd01ae8fd432691d397684b Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 26 Jan 2018 09:28:48 +0100 Subject: net/smc: replace sock_put worker by socket refcounting Proper socket refcounting makes the sock_put worker obsolete. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 65 +++++++++++++++++++++++++++++------------------------ net/smc/smc.h | 1 - net/smc/smc_cdc.c | 20 +++++++++-------- net/smc/smc_close.c | 63 ++++++++++++++++++++++++++++++--------------------- net/smc/smc_close.h | 1 - net/smc/smc_core.c | 6 ++--- 6 files changed, 88 insertions(+), 68 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 90c22a854f28..732a37ddbc21 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -115,7 +115,6 @@ static int smc_release(struct socket *sock) goto out; smc = smc_sk(sk); - sock_hold(sk); if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires * sock lock for child sockets again @@ -124,10 +123,7 @@ static int smc_release(struct socket *sock) else lock_sock(sk); - if (smc->use_fallback) { - sk->sk_state = SMC_CLOSED; - sk->sk_state_change(sk); - } else { + if (!smc->use_fallback) { rc = smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; @@ -136,20 +132,21 @@ static int smc_release(struct socket *sock) sock_release(smc->clcsock); smc->clcsock = NULL; } + if (smc->use_fallback) { + sock_put(sk); /* passive closing */ + sk->sk_state = SMC_CLOSED; + sk->sk_state_change(sk); + } /* detach socket */ sock_orphan(sk); sock->sk = NULL; - if (smc->use_fallback) { - schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); - } else if (sk->sk_state == SMC_CLOSED) { + if (!smc->use_fallback && sk->sk_state == SMC_CLOSED) smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); - } release_sock(sk); - sock_put(sk); + sk->sk_prot->unhash(sk); + sock_put(sk); /* final sock_put */ out: return rc; } @@ -181,7 +178,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_LIST_HEAD(&smc->accept_q); spin_lock_init(&smc->accept_q_lock); - INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work); sk->sk_prot->hash(sk); sk_refcnt_debug_inc(sk); @@ -399,6 +395,8 @@ static int smc_connect_rdma(struct smc_sock *smc) int rc = 0; u8 ibport; + sock_hold(&smc->sk); /* sock put in passive closing */ + if (!tcp_sk(smc->clcsock->sk)->syn_smc) { /* peer has not signalled SMC-capability */ smc->use_fallback = true; @@ -542,6 +540,8 @@ out_err_unlock: mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); out_err: + if (smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ return rc; } @@ -620,7 +620,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); new_sk->sk_prot->unhash(new_sk); - sock_put(new_sk); + sock_put(new_sk); /* final */ *new_smc = NULL; goto out; } @@ -637,7 +637,7 @@ static void smc_accept_enqueue(struct sock *parent, struct sock *sk) { struct smc_sock *par = smc_sk(parent); - sock_hold(sk); + sock_hold(sk); /* sock_put in smc_accept_unlink () */ spin_lock(&par->accept_q_lock); list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); spin_unlock(&par->accept_q_lock); @@ -653,7 +653,7 @@ static void smc_accept_unlink(struct sock *sk) list_del_init(&smc_sk(sk)->accept_q); spin_unlock(&par->accept_q_lock); sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); - sock_put(sk); + sock_put(sk); /* sock_hold in smc_accept_enqueue */ } /* remove a sock from the accept queue to bind it to a new socket created @@ -671,7 +671,7 @@ struct sock *smc_accept_dequeue(struct sock *parent, smc_accept_unlink(new_sk); if (new_sk->sk_state == SMC_CLOSED) { new_sk->sk_prot->unhash(new_sk); - sock_put(new_sk); + sock_put(new_sk); /* final */ continue; } if (new_sock) @@ -686,14 +686,11 @@ void smc_close_non_accepted(struct sock *sk) { struct smc_sock *smc = smc_sk(sk); - sock_hold(sk); lock_sock(sk); if (!sk->sk_lingertime) /* wait for peer closing */ sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; - if (smc->use_fallback) { - sk->sk_state = SMC_CLOSED; - } else { + if (!smc->use_fallback) { smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; @@ -706,14 +703,15 @@ void smc_close_non_accepted(struct sock *sk) sock_release(tcp); } if (smc->use_fallback) { - schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); - } else if (sk->sk_state == SMC_CLOSED) { - smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); + sock_put(sk); /* passive closing */ + sk->sk_state = SMC_CLOSED; + } else { + if (sk->sk_state == SMC_CLOSED) + smc_conn_free(&smc->conn); } release_sock(sk); - sock_put(sk); + sk->sk_prot->unhash(sk); + sock_put(sk); /* final sock_put */ } static int smc_serv_conf_first_link(struct smc_sock *smc) @@ -937,6 +935,8 @@ out_err_unlock: smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); out_err: + if (newsmcsk->sk_state == SMC_INIT) + sock_put(&new_smc->sk); /* passive closing */ newsmcsk->sk_state = SMC_CLOSED; smc_conn_free(&new_smc->conn); goto enqueue; /* queue new sock with sk_err set */ @@ -963,12 +963,15 @@ static void smc_tcp_listen_work(struct work_struct *work) sock_hold(lsk); /* sock_put in smc_listen_work */ INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); smc_copy_sock_settings_to_smc(new_smc); - schedule_work(&new_smc->smc_listen_work); + sock_hold(&new_smc->sk); /* sock_put in passive closing */ + if (!schedule_work(&new_smc->smc_listen_work)) + sock_put(&new_smc->sk); } out: release_sock(lsk); lsk->sk_data_ready(lsk); /* no more listening, wake accept */ + sock_put(&lsmc->sk); /* sock_hold in smc_listen */ } static int smc_listen(struct socket *sock, int backlog) @@ -1002,7 +1005,9 @@ static int smc_listen(struct socket *sock, int backlog) sk->sk_ack_backlog = 0; sk->sk_state = SMC_LISTEN; INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); - schedule_work(&smc->tcp_listen_work); + sock_hold(sk); /* sock_hold in tcp_listen_worker */ + if (!schedule_work(&smc->tcp_listen_work)) + sock_put(sk); out: release_sock(sk); @@ -1019,6 +1024,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, int rc = 0; lsmc = smc_sk(sk); + sock_hold(sk); /* sock_put below */ lock_sock(sk); if (lsmc->sk.sk_state != SMC_LISTEN) { @@ -1053,6 +1059,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, out: release_sock(sk); + sock_put(sk); /* sock_hold above */ return rc; } diff --git a/net/smc/smc.h b/net/smc/smc.h index 0bee9d16cf29..bfbe20234105 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -178,7 +178,6 @@ struct smc_sock { /* smc sock container */ struct work_struct smc_listen_work;/* prepare new accept socket */ struct list_head accept_q; /* sockets to be accepted */ spinlock_t accept_q_lock; /* protects accept_q */ - struct delayed_work sock_put_work; /* final socket freeing */ bool use_fallback; /* fallback to tcp */ u8 wait_close_tx_prepared : 1; /* shutdown wr or close diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 6e8f5fbe0f09..3cd086e5bd28 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -212,6 +212,14 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smc->sk.sk_data_ready(&smc->sk); } + /* piggy backed tx info */ + /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ + if (diff_cons && smc_tx_prepared_sends(conn)) { + smc_tx_sndbuf_nonempty(conn); + /* trigger socket release if connection closed */ + smc_close_wake_tx_prepared(smc); + } + if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { smc->sk.sk_err = ECONNRESET; conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; @@ -221,15 +229,9 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, if (smc->clcsock && smc->clcsock->sk) smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; sock_set_flag(&smc->sk, SOCK_DONE); - schedule_work(&conn->close_work); - } - - /* piggy backed tx info */ - /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ - if (diff_cons && smc_tx_prepared_sends(conn)) { - smc_tx_sndbuf_nonempty(conn); - /* trigger socket release if connection closed */ - smc_close_wake_tx_prepared(smc); + sock_hold(&smc->sk); /* sock_put in close_work */ + if (!schedule_work(&conn->close_work)) + sock_put(&smc->sk); } } diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index babe05d385e7..4339852a8910 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -110,6 +110,7 @@ static void smc_close_active_abort(struct smc_sock *smc) release_sock(sk); cancel_delayed_work_sync(&smc->conn.tx_work); lock_sock(sk); + sock_put(sk); /* passive closing */ break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: @@ -125,11 +126,13 @@ static void smc_close_active_abort(struct smc_sock *smc) case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: if (!txflags->peer_conn_closed) { + /* just SHUTDOWN_SEND done */ sk->sk_state = SMC_PEERABORTWAIT; sock_release(smc->clcsock); } else { sk->sk_state = SMC_CLOSED; } + sock_put(sk); /* passive closing */ break; case SMC_PROCESSABORT: case SMC_APPFINCLOSEWAIT: @@ -138,6 +141,8 @@ static void smc_close_active_abort(struct smc_sock *smc) sk->sk_state = SMC_CLOSED; break; case SMC_PEERFINCLOSEWAIT: + sock_put(sk); /* passive closing */ + break; case SMC_PEERABORTWAIT: case SMC_CLOSED: break; @@ -229,12 +234,14 @@ again: rc = smc_close_final(conn); if (rc) break; - if (smc_cdc_rxed_any_close(conn)) + if (smc_cdc_rxed_any_close(conn)) { /* peer has closed the socket already */ sk->sk_state = SMC_CLOSED; - else + sock_put(sk); /* postponed passive closing */ + } else { /* peer has just issued a shutdown write */ sk->sk_state = SMC_PEERFINCLOSEWAIT; + } break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: @@ -272,27 +279,33 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) struct sock *sk = &smc->sk; switch (sk->sk_state) { + case SMC_INIT: case SMC_ACTIVE: - case SMC_APPFINCLOSEWAIT: case SMC_APPCLOSEWAIT1: - case SMC_APPCLOSEWAIT2: + sk->sk_state = SMC_PROCESSABORT; + sock_put(sk); /* passive closing */ + break; + case SMC_APPFINCLOSEWAIT: sk->sk_state = SMC_PROCESSABORT; break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: if (txflags->peer_done_writing && - !smc_close_sent_any_close(&smc->conn)) { + !smc_close_sent_any_close(&smc->conn)) /* just shutdown, but not yet closed locally */ sk->sk_state = SMC_PROCESSABORT; - } else { + else sk->sk_state = SMC_CLOSED; - } + sock_put(sk); /* passive closing */ break; + case SMC_APPCLOSEWAIT2: case SMC_PEERFINCLOSEWAIT: + sk->sk_state = SMC_CLOSED; + sock_put(sk); /* passive closing */ + break; case SMC_PEERABORTWAIT: sk->sk_state = SMC_CLOSED; break; - case SMC_INIT: case SMC_PROCESSABORT: /* nothing to do, add tracing in future patch */ break; @@ -336,13 +349,18 @@ static void smc_close_passive_work(struct work_struct *work) case SMC_INIT: if (atomic_read(&conn->bytes_to_rcv) || (rxflags->peer_done_writing && - !smc_cdc_rxed_any_close(conn))) + !smc_cdc_rxed_any_close(conn))) { sk->sk_state = SMC_APPCLOSEWAIT1; - else + } else { sk->sk_state = SMC_CLOSED; + sock_put(sk); /* passive closing */ + } break; case SMC_ACTIVE: sk->sk_state = SMC_APPCLOSEWAIT1; + /* postpone sock_put() for passive closing to cover + * received SEND_SHUTDOWN as well + */ break; case SMC_PEERCLOSEWAIT1: if (rxflags->peer_done_writing) @@ -360,13 +378,20 @@ static void smc_close_passive_work(struct work_struct *work) /* just shutdown, but not yet closed locally */ sk->sk_state = SMC_APPFINCLOSEWAIT; } + sock_put(sk); /* passive closing */ break; case SMC_PEERFINCLOSEWAIT: - if (smc_cdc_rxed_any_close(conn)) + if (smc_cdc_rxed_any_close(conn)) { sk->sk_state = SMC_CLOSED; + sock_put(sk); /* passive closing */ + } break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: + /* postpone sock_put() for passive closing to cover + * received SEND_SHUTDOWN as well + */ + break; case SMC_APPFINCLOSEWAIT: case SMC_PEERABORTWAIT: case SMC_PROCESSABORT: @@ -382,23 +407,11 @@ wakeup: if (old_state != sk->sk_state) { sk->sk_state_change(sk); if ((sk->sk_state == SMC_CLOSED) && - (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { + (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) smc_conn_free(conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); - } } release_sock(sk); -} - -void smc_close_sock_put_work(struct work_struct *work) -{ - struct smc_sock *smc = container_of(to_delayed_work(work), - struct smc_sock, - sock_put_work); - - smc->sk.sk_prot->unhash(&smc->sk); - sock_put(&smc->sk); + sock_put(sk); /* sock_hold done by schedulers of close_work */ } int smc_close_shutdown_write(struct smc_sock *smc) diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index 8c498885d758..19eb6a211c23 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -21,7 +21,6 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); -void smc_close_sock_put_work(struct work_struct *work); int smc_close_shutdown_write(struct smc_sock *smc); void smc_close_init(struct smc_sock *smc); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index ed5b46d1fe41..2424c7100aaf 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -328,13 +328,13 @@ void smc_lgr_terminate(struct smc_link_group *lgr) while (node) { conn = rb_entry(node, struct smc_connection, alert_node); smc = container_of(conn, struct smc_sock, conn); - sock_hold(&smc->sk); + sock_hold(&smc->sk); /* sock_put in close work */ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; __smc_lgr_unregister_conn(conn); write_unlock_bh(&lgr->conns_lock); - schedule_work(&conn->close_work); + if (!schedule_work(&conn->close_work)) + sock_put(&smc->sk); write_lock_bh(&lgr->conns_lock); - sock_put(&smc->sk); node = rb_first(&lgr->conns_all); } write_unlock_bh(&lgr->conns_lock); -- cgit v1.2.3-59-g8ed1b From 127f497058236e5f07672e11382232f80cb7e8c4 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 26 Jan 2018 09:28:49 +0100 Subject: net/smc: release clcsock from tcp_listen_worker Closing a listen socket may hit the warning WARN_ON(sock_owned_by_user(sk)) of tcp_close(), if the wake up of the smc_tcp_listen_worker has not yet finished. This patch introduces smc_close_wait_listen_clcsock() making sure the listening internal clcsock has been closed in smc_tcp_listen_work(), before the listening external SMC socket finishes closing. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 13 ++++++++++++- net/smc/smc_close.c | 33 ++++++++++++++++++++++++--------- 2 files changed, 36 insertions(+), 10 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 732a37ddbc21..267e68379110 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -670,6 +670,10 @@ struct sock *smc_accept_dequeue(struct sock *parent, smc_accept_unlink(new_sk); if (new_sk->sk_state == SMC_CLOSED) { + if (isk->clcsock) { + sock_release(isk->clcsock); + isk->clcsock = NULL; + } new_sk->sk_prot->unhash(new_sk); sock_put(new_sk); /* final */ continue; @@ -969,8 +973,15 @@ static void smc_tcp_listen_work(struct work_struct *work) } out: + if (lsmc->clcsock) { + sock_release(lsmc->clcsock); + lsmc->clcsock = NULL; + } release_sock(lsk); - lsk->sk_data_ready(lsk); /* no more listening, wake accept */ + /* no more listening, wake up smc_close_wait_listen_clcsock and + * accept + */ + lsk->sk_state_change(lsk); sock_put(&lsmc->sk); /* sock_hold in smc_listen */ } diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 4339852a8910..e339c0186dcf 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -19,6 +19,8 @@ #include "smc_cdc.h" #include "smc_close.h" +#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) + static void smc_close_cleanup_listen(struct sock *parent) { struct sock *sk; @@ -28,6 +30,27 @@ static void smc_close_cleanup_listen(struct sock *parent) smc_close_non_accepted(sk); } +static void smc_close_wait_listen_clcsock(struct smc_sock *smc) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct sock *sk = &smc->sk; + signed long timeout; + + timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME; + add_wait_queue(sk_sleep(sk), &wait); + do { + release_sock(sk); + if (smc->clcsock) + timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE, + timeout); + sched_annotate_sleep(); + lock_sock(sk); + if (!smc->clcsock) + break; + } while (timeout); + remove_wait_queue(sk_sleep(sk), &wait); +} + /* wait for sndbuf data being transmitted */ static void smc_close_stream_wait(struct smc_sock *smc, long timeout) { @@ -114,7 +137,6 @@ static void smc_close_active_abort(struct smc_sock *smc) break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - sock_release(smc->clcsock); if (!smc_cdc_rxed_any_close(&smc->conn)) sk->sk_state = SMC_PEERABORTWAIT; else @@ -128,7 +150,6 @@ static void smc_close_active_abort(struct smc_sock *smc) if (!txflags->peer_conn_closed) { /* just SHUTDOWN_SEND done */ sk->sk_state = SMC_PEERABORTWAIT; - sock_release(smc->clcsock); } else { sk->sk_state = SMC_CLOSED; } @@ -136,8 +157,6 @@ static void smc_close_active_abort(struct smc_sock *smc) break; case SMC_PROCESSABORT: case SMC_APPFINCLOSEWAIT: - if (!txflags->peer_conn_closed) - sock_release(smc->clcsock); sk->sk_state = SMC_CLOSED; break; case SMC_PEERFINCLOSEWAIT: @@ -177,8 +196,6 @@ again: switch (sk->sk_state) { case SMC_INIT: sk->sk_state = SMC_CLOSED; - if (smc->smc_listen_work.func) - cancel_work_sync(&smc->smc_listen_work); break; case SMC_LISTEN: sk->sk_state = SMC_CLOSED; @@ -187,11 +204,9 @@ again: rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); /* wake up kernel_accept of smc_tcp_listen_worker */ smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); + smc_close_wait_listen_clcsock(smc); } - release_sock(sk); smc_close_cleanup_listen(sk); - cancel_work_sync(&smc->smc_listen_work); - lock_sock(sk); break; case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); -- cgit v1.2.3-59-g8ed1b From a8fbf8e7ecda38d70a7983246fbadf04063f3843 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 26 Jan 2018 09:28:50 +0100 Subject: net/smc: return booleans instead of integers Return statements in functions returning bool should use true/false instead of 1/0. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc.h b/net/smc/smc.h index bfbe20234105..9518986c97b1 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -252,12 +252,12 @@ static inline int smc_uncompress_bufsize(u8 compressed) static inline bool using_ipsec(struct smc_sock *smc) { return (smc->clcsock->sk->sk_policy[0] || - smc->clcsock->sk->sk_policy[1]) ? 1 : 0; + smc->clcsock->sk->sk_policy[1]) ? true : false; } #else static inline bool using_ipsec(struct smc_sock *smc) { - return 0; + return false; } #endif -- cgit v1.2.3-59-g8ed1b