diff options
Diffstat (limited to 'net/smc')
-rw-r--r-- | net/smc/af_smc.c | 190 | ||||
-rw-r--r-- | net/smc/smc.h | 6 | ||||
-rw-r--r-- | net/smc/smc_cdc.c | 55 | ||||
-rw-r--r-- | net/smc/smc_cdc.h | 2 | ||||
-rw-r--r-- | net/smc/smc_clc.c | 3 | ||||
-rw-r--r-- | net/smc/smc_close.c | 14 | ||||
-rw-r--r-- | net/smc/smc_core.c | 228 | ||||
-rw-r--r-- | net/smc/smc_core.h | 39 | ||||
-rw-r--r-- | net/smc/smc_diag.c | 22 | ||||
-rw-r--r-- | net/smc/smc_ib.c | 6 | ||||
-rw-r--r-- | net/smc/smc_ib.h | 8 | ||||
-rw-r--r-- | net/smc/smc_ism.c | 1 | ||||
-rw-r--r-- | net/smc/smc_llc.c | 21 | ||||
-rw-r--r-- | net/smc/smc_pnet.c | 31 | ||||
-rw-r--r-- | net/smc/smc_tracepoint.h | 23 | ||||
-rw-r--r-- | net/smc/smc_wr.c | 66 | ||||
-rw-r--r-- | net/smc/smc_wr.h | 9 |
17 files changed, 494 insertions, 230 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index b61c802e3bf3..d5ea62b82bb8 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -89,8 +89,8 @@ int smc_hash_sk(struct sock *sk) write_lock_bh(&h->lock); sk_add_node(sk, head); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock_bh(&h->lock); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); return 0; } @@ -194,7 +194,9 @@ static int smc_release(struct socket *sock) /* cleanup for a dangling non-blocking connect */ if (smc->connect_nonblock && sk->sk_state == SMC_INIT) tcp_abort(smc->clcsock->sk, ECONNABORTED); - flush_work(&smc->connect_work); + + if (cancel_work_sync(&smc->connect_work)) + sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */ if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires @@ -564,12 +566,17 @@ static void smc_stat_fallback(struct smc_sock *smc) mutex_unlock(&net->smc.mutex_fback_rsn); } -static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code) +static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) { wait_queue_head_t *smc_wait = sk_sleep(&smc->sk); - wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk); + wait_queue_head_t *clc_wait; unsigned long flags; + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } smc->use_fallback = true; smc->fallback_rsn = reason_code; smc_stat_fallback(smc); @@ -584,18 +591,30 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code) * smc socket->wq, which should be removed * to clcsocket->wq during the fallback. */ + clc_wait = sk_sleep(smc->clcsock->sk); spin_lock_irqsave(&smc_wait->lock, flags); - spin_lock(&clc_wait->lock); + spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING); list_splice_init(&smc_wait->head, &clc_wait->head); spin_unlock(&clc_wait->lock); spin_unlock_irqrestore(&smc_wait->lock, flags); } + mutex_unlock(&smc->clcsock_release_lock); + return 0; } /* fall back during connect */ static int smc_connect_fallback(struct smc_sock *smc, int reason_code) { - smc_switch_to_fallback(smc, reason_code); + struct net *net = sock_net(&smc->sk); + int rc = 0; + + rc = smc_switch_to_fallback(smc, reason_code); + if (rc) { /* fallback fails */ + this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt); + if (smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ + return rc; + } smc_copy_sock_settings_to_clc(smc); smc->connect_nonblock = 0; if (smc->sk.sk_state == SMC_INIT) @@ -630,10 +649,16 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code, static void smc_conn_abort(struct smc_sock *smc, int local_first) { - if (local_first) - smc_lgr_cleanup_early(&smc->conn); - else - smc_conn_free(&smc->conn); + struct smc_connection *conn = &smc->conn; + struct smc_link_group *lgr = conn->lgr; + bool lgr_valid = false; + + if (smc_conn_lgr_valid(conn)) + lgr_valid = true; + + smc_conn_free(conn); + if (local_first && lgr_valid) + smc_lgr_cleanup_early(lgr); } /* check if there is a rdma device available for this connection. */ @@ -1510,11 +1535,12 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, { /* RDMA setup failed, switch back to TCP */ smc_conn_abort(new_smc, local_first); - if (reason_code < 0) { /* error, no fallback possible */ + if (reason_code < 0 || + smc_switch_to_fallback(new_smc, reason_code)) { + /* error, no fallback possible */ smc_listen_out_err(new_smc); return; } - smc_switch_to_fallback(new_smc, reason_code); if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { if (smc_clc_send_decline(new_smc, reason_code, version) < 0) { smc_listen_out_err(new_smc); @@ -1956,8 +1982,11 @@ static void smc_listen_work(struct work_struct *work) /* check if peer is smc capable */ if (!tcp_sk(newclcsock->sk)->syn_smc) { - smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC); - smc_listen_out_connected(new_smc); + rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC); + if (rc) + smc_listen_out_err(new_smc); + else + smc_listen_out_connected(new_smc); return; } @@ -2134,8 +2163,10 @@ static int smc_listen(struct socket *sock, int backlog) smc->clcsock->sk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); rc = kernel_listen(smc->clcsock, backlog); - if (rc) + if (rc) { + smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; goto out; + } sk->sk_max_ack_backlog = backlog; sk->sk_ack_backlog = 0; sk->sk_state = SMC_LISTEN; @@ -2244,7 +2275,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_FASTOPEN) { if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { - smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + if (rc) + goto out; } else { rc = -EINVAL; goto out; @@ -2368,8 +2401,10 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, static int smc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; + bool do_shutdown = true; struct smc_sock *smc; int rc = -EINVAL; + int old_state; int rc1 = 0; smc = smc_sk(sk); @@ -2396,7 +2431,11 @@ static int smc_shutdown(struct socket *sock, int how) } switch (how) { case SHUT_RDWR: /* shutdown in both directions */ + old_state = sk->sk_state; rc = smc_close_active(smc); + if (old_state == SMC_ACTIVE && + sk->sk_state == SMC_PEERCLOSEWAIT1) + do_shutdown = false; break; case SHUT_WR: rc = smc_close_shutdown_write(smc); @@ -2406,7 +2445,7 @@ static int smc_shutdown(struct socket *sock, int how) /* nothing more to do because peer is not involved */ break; } - if (smc->clcsock) + if (do_shutdown && smc->clcsock) rc1 = kernel_sock_shutdown(smc->clcsock, how); /* map sock_shutdown_cmd constants to sk_shutdown value range */ sk->sk_shutdown |= how + 1; @@ -2431,6 +2470,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, /* generic setsockopts reaching us here always apply to the * CLC socket */ + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } if (unlikely(!smc->clcsock->ops->setsockopt)) rc = -EOPNOTSUPP; else @@ -2440,6 +2484,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_err = smc->clcsock->sk->sk_err; sk_error_report(sk); } + mutex_unlock(&smc->clcsock_release_lock); if (optlen < sizeof(int)) return -EINVAL; @@ -2456,7 +2501,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, case TCP_FASTOPEN_NO_COOKIE: /* option not supported by SMC */ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { - smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); } else { rc = -EINVAL; } @@ -2499,13 +2544,23 @@ static int smc_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct smc_sock *smc; + int rc; smc = smc_sk(sock->sk); + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } /* socket options apply to the CLC socket */ - if (unlikely(!smc->clcsock->ops->getsockopt)) + if (unlikely(!smc->clcsock->ops->getsockopt)) { + mutex_unlock(&smc->clcsock_release_lock); return -EOPNOTSUPP; - return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, - optval, optlen); + } + rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, + optval, optlen); + mutex_unlock(&smc->clcsock_release_lock); + return rc; } static int smc_ioctl(struct socket *sock, unsigned int cmd, @@ -2690,8 +2745,8 @@ static const struct proto_ops smc_sock_ops = { .splice_read = smc_splice_read, }; -static int smc_create(struct net *net, struct socket *sock, int protocol, - int kern) +static int __smc_create(struct net *net, struct socket *sock, int protocol, + int kern, struct socket *clcsock) { int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; struct smc_sock *smc; @@ -2716,12 +2771,19 @@ static int smc_create(struct net *net, struct socket *sock, int protocol, smc = smc_sk(sk); smc->use_fallback = false; /* assume rdma capability first */ smc->fallback_rsn = 0; - rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, - &smc->clcsock); - if (rc) { - sk_common_release(sk); - goto out; + + rc = 0; + if (!clcsock) { + rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, + &smc->clcsock); + if (rc) { + sk_common_release(sk); + goto out; + } + } else { + smc->clcsock = clcsock; } + smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); @@ -2729,12 +2791,76 @@ out: return rc; } +static int smc_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + return __smc_create(net, sock, protocol, kern, NULL); +} + static const struct net_proto_family smc_sock_family_ops = { .family = PF_SMC, .owner = THIS_MODULE, .create = smc_create, }; +static int smc_ulp_init(struct sock *sk) +{ + struct socket *tcp = sk->sk_socket; + struct net *net = sock_net(sk); + struct socket *smcsock; + int protocol, ret; + + /* only TCP can be replaced */ + if (tcp->type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP || + (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) + return -ESOCKTNOSUPPORT; + /* don't handle wq now */ + if (tcp->state != SS_UNCONNECTED || !tcp->file || tcp->wq.fasync_list) + return -ENOTCONN; + + if (sk->sk_family == AF_INET) + protocol = SMCPROTO_SMC; + else + protocol = SMCPROTO_SMC6; + + smcsock = sock_alloc(); + if (!smcsock) + return -ENFILE; + + smcsock->type = SOCK_STREAM; + __module_get(THIS_MODULE); /* tried in __tcp_ulp_find_autoload */ + ret = __smc_create(net, smcsock, protocol, 1, tcp); + if (ret) { + sock_release(smcsock); /* module_put() which ops won't be NULL */ + return ret; + } + + /* replace tcp socket to smc */ + smcsock->file = tcp->file; + smcsock->file->private_data = smcsock; + smcsock->file->f_inode = SOCK_INODE(smcsock); /* replace inode when sock_close */ + smcsock->file->f_path.dentry->d_inode = SOCK_INODE(smcsock); /* dput() in __fput */ + tcp->file = NULL; + + return ret; +} + +static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk, + const gfp_t priority) +{ + struct inet_connection_sock *icsk = inet_csk(newsk); + + /* don't inherit ulp ops to child when listen */ + icsk->icsk_ulp_ops = NULL; +} + +static struct tcp_ulp_ops smc_ulp_ops __read_mostly = { + .name = "smc", + .owner = THIS_MODULE, + .init = smc_ulp_init, + .clone = smc_ulp_clone, +}; + unsigned int smc_net_id; static __net_init int smc_net_init(struct net *net) @@ -2845,6 +2971,12 @@ static int __init smc_init(void) goto out_sock; } + rc = tcp_register_ulp(&smc_ulp_ops); + if (rc) { + pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); + goto out_sock; + } + static_branch_enable(&tcp_have_smc); return 0; @@ -2873,6 +3005,7 @@ out_pernet_subsys: static void __exit smc_exit(void) { static_branch_disable(&tcp_have_smc); + tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); smc_ib_unregister_client(); @@ -2895,3 +3028,4 @@ MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); MODULE_DESCRIPTION("smc socket address family"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_SMC); +MODULE_ALIAS_TCP_ULP("smc"); diff --git a/net/smc/smc.h b/net/smc/smc.h index f4286ca1f228..3d0b8e300deb 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -180,6 +180,11 @@ struct smc_connection { u16 tx_cdc_seq; /* sequence # for CDC send */ u16 tx_cdc_seq_fin; /* sequence # - tx completed */ spinlock_t send_lock; /* protect wr_sends */ + atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe + * - inc when post wqe, + * - dec on polled tx cqe + */ + wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ @@ -216,6 +221,7 @@ struct smc_connection { */ u64 peer_token; /* SMC-D token of peer */ u8 killed : 1; /* abnormal termination */ + u8 freed : 1; /* normal termiation */ u8 out_of_sync : 1; /* out of sync with peer */ }; diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 99acd337ba90..9d5a97168969 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, struct smc_sock *smc; int diff; - if (!conn) - /* already dismissed */ - return; - smc = container_of(conn, struct smc_sock, conn); bh_lock_sock(&smc->sk); if (!wc_status) { @@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, conn); conn->tx_cdc_seq_fin = cdcpend->ctrl_seq; } + + if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) && + unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq))) + wake_up(&conn->cdc_pend_tx_wq); + WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0); + smc_tx_sndbuf_nonfull(smc); bh_unlock_sock(&smc->sk); } @@ -107,6 +109,10 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); if (!rc) { smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); @@ -114,6 +120,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, } else { conn->tx_cdc_seq--; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; + atomic_dec(&conn->cdc_pend_tx_wr); } return rc; @@ -136,7 +143,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn, peer->token = htonl(local->token); peer->prod_flags.failover_validation = 1; + /* We need to set pend->conn here to make sure smc_cdc_tx_handler() + * can handle properly + */ + smc_cdc_add_pending_send(conn, pend); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); + if (unlikely(rc)) + atomic_dec(&conn->cdc_pend_tx_wr); + return rc; } @@ -179,7 +197,8 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) { int rc; - if (!conn->lgr || (conn->lgr->is_smcd && conn->lgr->peer_shutdown)) + if (!smc_conn_lgr_valid(conn) || + (conn->lgr->is_smcd && conn->lgr->peer_shutdown)) return -EPIPE; if (conn->lgr->is_smcd) { @@ -193,31 +212,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) return rc; } -static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend, - unsigned long data) +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn) { - struct smc_connection *conn = (struct smc_connection *)data; - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - return cdc_pend->conn == conn; -} - -static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend) -{ - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - cdc_pend->conn = NULL; -} - -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) -{ - struct smc_link *link = conn->lnk; - - smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE, - smc_cdc_tx_filter, smc_cdc_tx_dismisser, - (unsigned long)conn); + wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr)); } /* Send a SMC-D CDC header. diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 0a0a89abd38b..696cc11f2303 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 8409ab71a5e4..ce27399b38b1 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -774,7 +774,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? SMC_FIRST_CONTACT_MASK : 0; - if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) && + if ((!smc_conn_lgr_valid(&smc->conn) || !smc->conn.lgr->is_smcd) && smc_ib_is_valid_local_systemid()) memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); @@ -1021,7 +1021,6 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, struct smc_link *link = conn->lnk; /* SMC-R specific settings */ - link = conn->lnk; memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); clc->hdr.typev1 = SMC_TYPE_R; diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 0f9ffba07d26..292e4d904ab6 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -195,6 +195,7 @@ int smc_close_active(struct smc_sock *smc) int old_state; long timeout; int rc = 0; + int rc1 = 0; timeout = current->flags & PF_EXITING ? 0 : sock_flag(sk, SOCK_LINGER) ? @@ -228,6 +229,15 @@ again: /* send close request */ rc = smc_close_final(conn); sk->sk_state = SMC_PEERCLOSEWAIT1; + + /* actively shutdown clcsock before peer close it, + * prevent peer from entering TIME_WAIT state. + */ + if (smc->clcsock && smc->clcsock->sk) { + rc1 = kernel_sock_shutdown(smc->clcsock, + SHUT_RDWR); + rc = rc ? rc : rc1; + } } else { /* peer event has changed the state */ goto again; @@ -354,9 +364,9 @@ static void smc_close_passive_work(struct work_struct *work) if (rxflags->peer_conn_abort) { /* peer has not received all data */ smc_close_passive_abort_received(smc); - release_sock(&smc->sk); + release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); - lock_sock(&smc->sk); + lock_sock(sk); goto wakeup; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 25ebd30feecd..29525d03b253 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -171,8 +171,10 @@ static int smc_lgr_register_conn(struct smc_connection *conn, bool first) if (!conn->lgr->is_smcd) { rc = smcr_lgr_conn_assign_link(conn, first); - if (rc) + if (rc) { + conn->lgr = NULL; return rc; + } } /* find a new alert_token_local value not yet used by some connection * in this link group @@ -209,14 +211,13 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; - if (!lgr) + if (!smc_conn_lgr_valid(conn)) return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { __smc_lgr_unregister_conn(conn); } write_unlock_bh(&lgr->conns_lock); - conn->lgr = NULL; } int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) @@ -348,6 +349,9 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr, goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id)) goto errattr; + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE, + lgr->net->net_cookie, SMC_NLA_LGR_R_PAD)) + goto errattr; memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN); smc_target[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target)) @@ -622,21 +626,18 @@ int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -void smc_lgr_cleanup_early(struct smc_connection *conn) +void smc_lgr_cleanup_early(struct smc_link_group *lgr) { - struct smc_link_group *lgr = conn->lgr; - struct list_head *lgr_list; spinlock_t *lgr_lock; if (!lgr) return; - smc_conn_free(conn); - lgr_list = smc_lgr_list_head(lgr, &lgr_lock); + smc_lgr_list_head(lgr, &lgr_lock); spin_lock_bh(lgr_lock); /* do not use this link group for new connections */ - if (!list_empty(lgr_list)) - list_del_init(lgr_list); + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); spin_unlock_bh(lgr_lock); __smc_lgr_terminate(lgr, true); } @@ -648,7 +649,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; - if (smc_link_usable(lnk)) + if (smc_link_sendable(lnk)) lnk->state = SMC_LNK_INACTIVE; } wake_up_all(&lgr->llc_msg_waiter); @@ -747,9 +748,12 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, } get_device(&lnk->smcibdev->ibdev->dev); atomic_inc(&lnk->smcibdev->lnk_cnt); + refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */ + lnk->clearing = 0; lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; + smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */ lnk->link_idx = link_idx; smc_ibdev_cnt_inc(lnk); smcr_copy_dev_info_to_link(lnk); @@ -804,6 +808,7 @@ out: lnk->state = SMC_LNK_UNUSED; if (!atomic_dec_return(&smcibdev->lnk_cnt)) wake_up(&smcibdev->lnks_deleted); + smc_lgr_put(lgr); /* lgr_hold above */ return rc; } @@ -842,6 +847,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->terminating = 0; lgr->freeing = 0; lgr->vlan_id = ini->vlan_id; + refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */ mutex_init(&lgr->sndbufs_lock); mutex_init(&lgr->rmbs_lock); rwlock_init(&lgr->conns_lock); @@ -898,6 +904,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) smc_wr_free_lgr_mem(lgr); goto free_wq; } + lgr->net = smc_ib_net(lnk->smcibdev); lgr_list = &smc_lgr_list.list; lgr_lock = &smc_lgr_list.lock; atomic_inc(&lgr_cnt); @@ -993,8 +1000,12 @@ void smc_switch_link_and_count(struct smc_connection *conn, struct smc_link *to_lnk) { atomic_dec(&conn->lnk->conn_cnt); + /* link_hold in smc_conn_create() */ + smcr_link_put(conn->lnk); conn->lnk = to_lnk; atomic_inc(&conn->lnk->conn_cnt); + /* link_put in smc_conn_free() */ + smcr_link_hold(conn->lnk); } struct smc_link *smc_switch_conns(struct smc_link_group *lgr, @@ -1102,18 +1113,24 @@ static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, smc_buf_free(lgr, true, rmb_desc); } else { rmb_desc->used = 0; + memset(rmb_desc->cpu_addr, 0, rmb_desc->len); } } static void smc_buf_unuse(struct smc_connection *conn, struct smc_link_group *lgr) { - if (conn->sndbuf_desc) + if (conn->sndbuf_desc) { conn->sndbuf_desc->used = 0; - if (conn->rmb_desc && lgr->is_smcd) + memset(conn->sndbuf_desc->cpu_addr, 0, conn->sndbuf_desc->len); + } + if (conn->rmb_desc && lgr->is_smcd) { conn->rmb_desc->used = 0; - else if (conn->rmb_desc) + memset(conn->rmb_desc->cpu_addr, 0, conn->rmb_desc->len + + sizeof(struct smcd_cdc_msg)); + } else if (conn->rmb_desc) { smcr_buf_unuse(conn->rmb_desc, lgr); + } } /* remove a finished connection from its link group */ @@ -1121,14 +1138,25 @@ void smc_conn_free(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; - if (!lgr) + if (!lgr || conn->freed) + /* Connection has never been registered in a + * link group, or has already been freed. + */ return; + + conn->freed = 1; + if (!smc_conn_lgr_valid(conn)) + /* Connection has already unregistered from + * link group. + */ + goto lgr_put; + if (lgr->is_smcd) { if (!list_empty(&lgr->list)) smc_ism_unset_conn(conn); tasklet_kill(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); if (current_work() != &conn->abort_work) cancel_work_sync(&conn->abort_work); } @@ -1139,6 +1167,10 @@ void smc_conn_free(struct smc_connection *conn) if (!lgr->conns_num) smc_lgr_schedule_free_work(lgr); +lgr_put: + if (!lgr->is_smcd) + smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */ + smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */ } /* unregister a link from a buf_desc */ @@ -1194,21 +1226,11 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk) } } -/* must be called under lgr->llc_conf_mutex lock */ -void smcr_link_clear(struct smc_link *lnk, bool log) +static void __smcr_link_clear(struct smc_link *lnk) { + struct smc_link_group *lgr = lnk->lgr; struct smc_ib_device *smcibdev; - if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) - return; - lnk->peer_qpn = 0; - smc_llc_link_clear(lnk, log); - smcr_buf_unmap_lgr(lnk); - smcr_rtoken_clear_link(lnk); - smc_ib_modify_qp_reset(lnk); - smc_wr_free_link(lnk); - smc_ib_destroy_queue_pair(lnk); - smc_ib_dealloc_protection_domain(lnk); smc_wr_free_link_mem(lnk); smc_ibdev_cnt_dec(lnk); put_device(&lnk->smcibdev->ibdev->dev); @@ -1217,6 +1239,36 @@ void smcr_link_clear(struct smc_link *lnk, bool log) lnk->state = SMC_LNK_UNUSED; if (!atomic_dec_return(&smcibdev->lnk_cnt)) wake_up(&smcibdev->lnks_deleted); + smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */ +} + +/* must be called under lgr->llc_conf_mutex lock */ +void smcr_link_clear(struct smc_link *lnk, bool log) +{ + if (!lnk->lgr || lnk->clearing || + lnk->state == SMC_LNK_UNUSED) + return; + lnk->clearing = 1; + lnk->peer_qpn = 0; + smc_llc_link_clear(lnk, log); + smcr_buf_unmap_lgr(lnk); + smcr_rtoken_clear_link(lnk); + smc_ib_modify_qp_error(lnk); + smc_wr_free_link(lnk); + smc_ib_destroy_queue_pair(lnk); + smc_ib_dealloc_protection_domain(lnk); + smcr_link_put(lnk); /* theoretically last link_put */ +} + +void smcr_link_hold(struct smc_link *lnk) +{ + refcount_inc(&lnk->refcnt); +} + +void smcr_link_put(struct smc_link *lnk) +{ + if (refcount_dec_and_test(&lnk->refcnt)) + __smcr_link_clear(lnk); } static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, @@ -1281,6 +1333,21 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) __smc_lgr_free_bufs(lgr, true); } +/* won't be freed until no one accesses to lgr anymore */ +static void __smc_lgr_free(struct smc_link_group *lgr) +{ + smc_lgr_free_bufs(lgr); + if (lgr->is_smcd) { + if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) + wake_up(&lgr->smcd->lgrs_deleted); + } else { + smc_wr_free_lgr_mem(lgr); + if (!atomic_dec_return(&lgr_cnt)) + wake_up(&lgrs_deleted); + } + kfree(lgr); +} + /* remove a link group */ static void smc_lgr_free(struct smc_link_group *lgr) { @@ -1296,19 +1363,23 @@ static void smc_lgr_free(struct smc_link_group *lgr) smc_llc_lgr_clear(lgr); } - smc_lgr_free_bufs(lgr); destroy_workqueue(lgr->tx_wq); if (lgr->is_smcd) { smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); put_device(&lgr->smcd->dev); - if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) - wake_up(&lgr->smcd->lgrs_deleted); - } else { - smc_wr_free_lgr_mem(lgr); - if (!atomic_dec_return(&lgr_cnt)) - wake_up(&lgrs_deleted); } - kfree(lgr); + smc_lgr_put(lgr); /* theoretically last lgr_put */ +} + +void smc_lgr_hold(struct smc_link_group *lgr) +{ + refcount_inc(&lgr->refcnt); +} + +void smc_lgr_put(struct smc_link_group *lgr) +{ + if (refcount_dec_and_test(&lgr->refcnt)) + __smc_lgr_free(lgr); } static void smc_sk_wake_ups(struct smc_sock *smc) @@ -1337,7 +1408,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) else tasklet_unlock_wait(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); } smc_lgr_unregister_conn(conn); smc_close_active_abort(smc); @@ -1528,9 +1599,9 @@ void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type) lgr_type = "ASYMMETRIC_LOCAL"; break; } - pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: " + pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: " "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id, - lgr_type, lgr->pnet_id); + lgr->net->net_cookie, lgr_type, lgr->pnet_id); } /* set new lgr type and tag a link as asymmetric */ @@ -1565,7 +1636,8 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, SMC_MAX_PNETID_LEN) || lgr->type == SMC_LGR_SYMMETRIC || - lgr->type == SMC_LGR_ASYMMETRIC_PEER) + lgr->type == SMC_LGR_ASYMMETRIC_PEER || + !rdma_dev_access_netns(smcibdev->ibdev, lgr->net)) continue; /* trigger local add link processing */ @@ -1587,7 +1659,6 @@ static void smcr_link_down(struct smc_link *lnk) if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) return; - smc_ib_modify_qp_reset(lnk); to_lnk = smc_switch_conns(lgr, lnk, true); if (!to_lnk) { /* no backup link available */ smcr_link_clear(lnk, true); @@ -1672,14 +1743,26 @@ static void smc_link_down_work(struct work_struct *work) mutex_unlock(&lgr->llc_conf_mutex); } -/* Determine vlan of internal TCP socket. - * @vlan_id: address to store the determined vlan id into - */ +static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev, + struct netdev_nested_priv *priv) +{ + unsigned short *vlan_id = (unsigned short *)priv->data; + + if (is_vlan_dev(lower_dev)) { + *vlan_id = vlan_dev_vlan_id(lower_dev); + return 1; + } + + return 0; +} + +/* Determine vlan of internal TCP socket. */ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(clcsock->sk); + struct netdev_nested_priv priv; struct net_device *ndev; - int i, nest_lvl, rc = 0; + int rc = 0; ini->vlan_id = 0; if (!dst) { @@ -1697,20 +1780,9 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) goto out_rel; } + priv.data = (void *)&ini->vlan_id; rtnl_lock(); - nest_lvl = ndev->lower_level; - for (i = 0; i < nest_lvl; i++) { - struct list_head *lower = &ndev->adj_list.lower; - - if (list_empty(lower)) - break; - lower = lower->next; - ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); - if (is_vlan_dev(ndev)) { - ini->vlan_id = vlan_dev_vlan_id(ndev); - break; - } - } + netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv); rtnl_unlock(); out_rel: @@ -1723,8 +1795,10 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, u8 peer_systemid[], u8 peer_gid[], u8 peer_mac_v1[], - enum smc_lgr_role role, u32 clcqpn) + enum smc_lgr_role role, u32 clcqpn, + struct net *net) { + struct smc_link *lnk; int i; if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) || @@ -1732,12 +1806,17 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, return false; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_active(&lgr->lnk[i])) + lnk = &lgr->lnk[i]; + + if (!smc_link_active(lnk)) continue; - if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && - !memcmp(lgr->lnk[i].peer_gid, peer_gid, SMC_GID_SIZE) && + /* use verbs API to check netns, instead of lgr->net */ + if (!rdma_dev_access_netns(lnk->smcibdev->ibdev, net)) + return false; + if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) && + !memcmp(lnk->peer_gid, peer_gid, SMC_GID_SIZE) && (smcr_version == SMC_V2 || - !memcmp(lgr->lnk[i].peer_mac, peer_mac_v1, ETH_ALEN))) + !memcmp(lnk->peer_mac, peer_mac_v1, ETH_ALEN))) return true; } return false; @@ -1753,6 +1832,7 @@ static bool smcd_lgr_match(struct smc_link_group *lgr, int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) { struct smc_connection *conn = &smc->conn; + struct net *net = sock_net(&smc->sk); struct list_head *lgr_list; struct smc_link_group *lgr; enum smc_lgr_role role; @@ -1779,7 +1859,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) smcr_lgr_match(lgr, ini->smcr_version, ini->peer_systemid, ini->peer_gid, ini->peer_mac, role, - ini->ib_clcqpn)) && + ini->ib_clcqpn, net)) && !lgr->sync_err && (ini->smcd_version == SMC_V2 || lgr->vlan_id == ini->vlan_id) && @@ -1818,12 +1898,19 @@ create: write_lock_bh(&lgr->conns_lock); rc = smc_lgr_register_conn(conn, true); write_unlock_bh(&lgr->conns_lock); - if (rc) + if (rc) { + smc_lgr_cleanup_early(lgr); goto out; + } } + smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */ + if (!conn->lgr->is_smcd) + smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */ + conn->freed = 0; conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; + init_waitqueue_head(&conn->cdc_pend_tx_wq); INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); if (ini->is_smcd) { conn->rx_off = sizeof(struct smcd_cdc_msg); @@ -2148,7 +2235,6 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (buf_desc) { SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize); SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb); - memset(buf_desc->cpu_addr, 0, bufsize); break; /* found reusable slot */ } @@ -2205,14 +2291,16 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || + !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || + !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -2221,7 +2309,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) { int i; - if (!conn->lgr || conn->lgr->is_smcd) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&conn->lgr->lnk[i])) @@ -2235,7 +2323,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) { int i; - if (!conn->lgr || conn->lgr->is_smcd) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&conn->lgr->lnk[i])) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 59cef3b830d8..4cb03e942364 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -137,6 +137,8 @@ struct smc_link { u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */ u8 link_idx; /* index in lgr link array */ u8 link_is_asym; /* is link asymmetric? */ + u8 clearing : 1; /* link is being cleared */ + refcount_t refcnt; /* link reference count */ struct smc_link_group *lgr; /* parent link group */ struct work_struct link_down_wrk; /* wrk to bring link down */ char ibname[IB_DEVICE_NAME_MAX]; /* ib device name */ @@ -249,6 +251,7 @@ struct smc_link_group { u8 terminating : 1;/* lgr is terminating */ u8 freeing : 1; /* lgr is being freed */ + refcount_t refcnt; /* lgr reference count */ bool is_smcd; /* SMC-R or SMC-D */ u8 smc_version; u8 negotiated_eid[SMC_MAX_EID_LEN]; @@ -306,6 +309,8 @@ struct smc_link_group { u8 nexthop_mac[ETH_ALEN]; u8 uses_gateway; __be32 saddr; + /* net namespace */ + struct net *net; }; struct { /* SMC-D */ u64 peer_gid; @@ -407,7 +412,18 @@ static inline struct smc_connection *smc_lgr_find_conn( return res; } -/* returns true if the specified link is usable */ +static inline bool smc_conn_lgr_valid(struct smc_connection *conn) +{ + return conn->lgr && conn->alert_token_local; +} + +/* + * Returns true if the specified link is usable. + * + * usable means the link is ready to receive RDMA messages, map memory + * on the link, etc. This doesn't ensure we are able to send RDMA messages + * on this link, if sending RDMA messages is needed, use smc_link_sendable() + */ static inline bool smc_link_usable(struct smc_link *lnk) { if (lnk->state == SMC_LNK_UNUSED || lnk->state == SMC_LNK_INACTIVE) @@ -415,6 +431,21 @@ static inline bool smc_link_usable(struct smc_link *lnk) return true; } +/* + * Returns true if the specified link is ready to receive AND send RDMA + * messages. + * + * For the client side in first contact, the underlying QP may still in + * RESET or RTR when the link state is ACTIVATING, checks in smc_link_usable() + * is not strong enough. For those places that need to send any CDC or LLC + * messages, use smc_link_sendable(), otherwise, use smc_link_usable() instead + */ +static inline bool smc_link_sendable(struct smc_link *lnk) +{ + return smc_link_usable(lnk) && + lnk->qp_attr.cur_qp_state == IB_QPS_RTS; +} + static inline bool smc_link_active(struct smc_link *lnk) { return lnk->state == SMC_LNK_ACTIVE; @@ -462,8 +493,10 @@ static inline void smc_set_pci_values(struct pci_dev *pci_dev, struct smc_sock; struct smc_clc_msg_accept_confirm; -void smc_lgr_cleanup_early(struct smc_connection *conn); +void smc_lgr_cleanup_early(struct smc_link_group *lgr); void smc_lgr_terminate_sched(struct smc_link_group *lgr); +void smc_lgr_hold(struct smc_link_group *lgr); +void smc_lgr_put(struct smc_link_group *lgr); void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, @@ -495,6 +528,8 @@ void smc_core_exit(void); int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini); void smcr_link_clear(struct smc_link *lnk, bool log); +void smcr_link_hold(struct smc_link *lnk); +void smcr_link_put(struct smc_link *lnk); void smc_switch_link_and_count(struct smc_connection *conn, struct smc_link *to_lnk); int smcr_buf_map_lgr(struct smc_link *lnk); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index c952986a6aca..b8898c787d23 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -89,7 +89,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, r->diag_state = sk->sk_state; if (smc->use_fallback) r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP; - else if (smc->conn.lgr && smc->conn.lgr->is_smcd) + else if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd) r->diag_mode = SMC_DIAG_MODE_SMCD; else r->diag_mode = SMC_DIAG_MODE_SMCR; @@ -142,27 +142,29 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, goto errout; } - if (smc->conn.lgr && !smc->conn.lgr->is_smcd && + if (smc_conn_lgr_valid(&smc->conn) && !smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { + struct smc_link *link = smc->conn.lnk; + struct net *net = read_pnet(&link->smcibdev->ibdev->coredev.rdma_net); + struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, - .lnk[0].ibport = smc->conn.lnk->ibport, - .lnk[0].link_id = smc->conn.lnk->link_id, + .lnk[0].ibport = link->ibport, + .lnk[0].link_id = link->link_id, + .lnk[0].net_cookie = net->net_cookie, }; memcpy(linfo.lnk[0].ibname, smc->conn.lgr->lnk[0].smcibdev->ibdev->name, - sizeof(smc->conn.lnk->smcibdev->ibdev->name)); - smc_gid_be16_convert(linfo.lnk[0].gid, - smc->conn.lnk->gid); - smc_gid_be16_convert(linfo.lnk[0].peer_gid, - smc->conn.lnk->peer_gid); + sizeof(link->smcibdev->ibdev->name)); + smc_gid_be16_convert(linfo.lnk[0].gid, link->gid); + smc_gid_be16_convert(linfo.lnk[0].peer_gid, link->peer_gid); if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) goto errout; } - if (smc->conn.lgr && smc->conn.lgr->is_smcd && + if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_connection *conn = &smc->conn; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index d93055ec17ae..a3e2d3b89568 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -12,6 +12,8 @@ * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ +#include <linux/etherdevice.h> +#include <linux/if_vlan.h> #include <linux/random.h> #include <linux/workqueue.h> #include <linux/scatterlist.h> @@ -109,12 +111,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk) IB_QP_MAX_QP_RD_ATOMIC); } -int smc_ib_modify_qp_reset(struct smc_link *lnk) +int smc_ib_modify_qp_error(struct smc_link *lnk) { struct ib_qp_attr qp_attr; memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.qp_state = IB_QPS_RESET; + qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE); } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 07585937370e..5d8b49c57f50 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -69,6 +69,13 @@ static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE]) return cpu_to_be32(INADDR_NONE); } +static inline struct net *smc_ib_net(struct smc_ib_device *smcibdev) +{ + if (smcibdev && smcibdev->ibdev) + return read_pnet(&smcibdev->ibdev->coredev.rdma_net); + return NULL; +} + struct smc_init_info_smcrv2; struct smc_buf_desc; struct smc_link; @@ -90,6 +97,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk); int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); +int smc_ib_modify_qp_error(struct smc_link *lnk); long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, struct smc_buf_desc *buf_slot, u8 link_idx); diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index fd28cc498b98..a2084ecdb97e 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -6,6 +6,7 @@ * Copyright IBM Corp. 2018 */ +#include <linux/if_vlan.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/slab.h> diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index b102680296b8..c4d057b2941d 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -242,9 +242,10 @@ static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, } /* drop parallel or already-in-progress llc requests */ if (flow_type != msg_type) - pr_warn_once("smc: SMC-R lg %*phN dropped parallel " + pr_warn_once("smc: SMC-R lg %*phN net %llu dropped parallel " "LLC msg: msg %d flow %d role %d\n", SMC_LGR_ID_SIZE, &lgr->id, + lgr->net->net_cookie, qentry->msg.raw.hdr.common.type, flow_type, lgr->role); kfree(qentry); @@ -359,9 +360,10 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, smc_llc_flow_qentry_clr(flow)); return NULL; } - pr_warn_once("smc: SMC-R lg %*phN dropped unexpected LLC msg: " + pr_warn_once("smc: SMC-R lg %*phN net %llu dropped unexpected LLC msg: " "msg %d exp %d flow %d role %d flags %x\n", - SMC_LGR_ID_SIZE, &lgr->id, rcv_msg, exp_msg, + SMC_LGR_ID_SIZE, &lgr->id, lgr->net->net_cookie, + rcv_msg, exp_msg, flow->type, lgr->role, flow->qentry->msg.raw.hdr.flags); smc_llc_flow_qentry_del(flow); @@ -1630,7 +1632,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) delllc.reason = htonl(rsn); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&lgr->lnk[i])) + if (!smc_link_sendable(&lgr->lnk[i])) continue; if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc)) break; @@ -1816,8 +1818,9 @@ finish: static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type) { - pr_warn_ratelimited("smc: SMC-R lg %*phN LLC protocol violation: " - "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, type); + pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu LLC protocol violation: " + "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, + lgr->net->net_cookie, type); smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL); smc_lgr_terminate_sched(lgr); } @@ -2146,9 +2149,10 @@ int smc_llc_link_init(struct smc_link *link) void smc_llc_link_active(struct smc_link *link) { - pr_warn_ratelimited("smc: SMC-R lg %*phN link added: id %*phN, " + pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu link added: id %*phN, " "peerid %*phN, ibdev %s, ibport %d\n", SMC_LGR_ID_SIZE, &link->lgr->id, + link->lgr->net->net_cookie, SMC_LGR_ID_SIZE, &link->link_uid, SMC_LGR_ID_SIZE, &link->peer_link_uid, link->smcibdev->ibdev->name, link->ibport); @@ -2164,9 +2168,10 @@ void smc_llc_link_active(struct smc_link *link) void smc_llc_link_clear(struct smc_link *link, bool log) { if (log) - pr_warn_ratelimited("smc: SMC-R lg %*phN link removed: id %*phN" + pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu link removed: id %*phN" ", peerid %*phN, ibdev %s, ibport %d\n", SMC_LGR_ID_SIZE, &link->lgr->id, + link->lgr->net->net_cookie, SMC_LGR_ID_SIZE, &link->link_uid, SMC_LGR_ID_SIZE, &link->peer_link_uid, link->smcibdev->ibdev->name, link->ibport); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 67e9d9fde085..291f1484a1b7 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -64,6 +64,7 @@ struct smc_pnetentry { struct { char eth_name[IFNAMSIZ + 1]; struct net_device *ndev; + netdevice_tracker dev_tracker; }; struct { char ib_name[IB_DEVICE_NAME_MAX + 1]; @@ -119,7 +120,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) smc_pnet_match(pnetelem->pnet_name, pnet_name)) { list_del(&pnetelem->list); if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) { - dev_put(pnetelem->ndev); + dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); pr_warn_ratelimited("smc: net device %s " "erased user defined " "pnetid %.16s\n", @@ -195,7 +196,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { - dev_hold(ndev); + dev_hold_track(ndev, &pnetelem->dev_tracker, GFP_ATOMIC); pnetelem->ndev = ndev; rc = 0; pr_warn_ratelimited("smc: adding net device %s with " @@ -226,7 +227,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) write_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { - dev_put(pnetelem->ndev); + dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker); pnetelem->ndev = NULL; rc = 0; pr_warn_ratelimited("smc: removing net device %s with " @@ -368,7 +369,8 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); new_pe->ndev = ndev; - + if (ndev) + netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL); rc = -EEXIST; new_netdev = true; write_lock(&pnettable->lock); @@ -976,14 +978,16 @@ static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i, /* find a roce device for the given pnetid */ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, struct smc_init_info *ini, - struct smc_ib_device *known_dev) + struct smc_ib_device *known_dev, + struct net *net) { struct smc_ib_device *ibdev; int i; mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { - if (ibdev == known_dev) + if (ibdev == known_dev || + !rdma_dev_access_netns(ibdev->ibdev, net)) continue; for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) @@ -1000,12 +1004,14 @@ out: mutex_unlock(&smc_ib_devices.mutex); } -/* find alternate roce device with same pnet_id and vlan_id */ +/* find alternate roce device with same pnet_id, vlan_id and net namespace */ void smc_pnet_find_alt_roce(struct smc_link_group *lgr, struct smc_init_info *ini, struct smc_ib_device *known_dev) { - _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev); + struct net *net = lgr->net; + + _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev, net); } /* if handshake network device belongs to a roce device, return its @@ -1014,6 +1020,7 @@ void smc_pnet_find_alt_roce(struct smc_link_group *lgr, static void smc_pnet_find_rdma_dev(struct net_device *netdev, struct smc_init_info *ini) { + struct net *net = dev_net(netdev); struct smc_ib_device *ibdev; mutex_lock(&smc_ib_devices.mutex); @@ -1021,6 +1028,10 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, struct net_device *ndev; int i; + /* check rdma net namespace */ + if (!rdma_dev_access_netns(ibdev->ibdev, net)) + continue; + for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; @@ -1051,15 +1062,17 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, struct smc_init_info *ini) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct net *net; ndev = pnet_find_base_ndev(ndev); + net = dev_net(ndev); if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, ndev_pnetid) && smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { smc_pnet_find_rdma_dev(ndev, ini); return; /* pnetid could not be determined */ } - _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL); + _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL, net); } static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h index ec17f29646f5..9fc5e586d24a 100644 --- a/net/smc/smc_tracepoint.h +++ b/net/smc/smc_tracepoint.h @@ -22,6 +22,7 @@ TRACE_EVENT(smc_switch_to_fallback, TP_STRUCT__entry( __field(const void *, sk) __field(const void *, clcsk) + __field(u64, net_cookie) __field(int, fallback_rsn) ), @@ -31,11 +32,13 @@ TRACE_EVENT(smc_switch_to_fallback, __entry->sk = sk; __entry->clcsk = clcsk; + __entry->net_cookie = sock_net(sk)->net_cookie; __entry->fallback_rsn = fallback_rsn; ), - TP_printk("sk=%p clcsk=%p fallback_rsn=%d", - __entry->sk, __entry->clcsk, __entry->fallback_rsn) + TP_printk("sk=%p clcsk=%p net=%llu fallback_rsn=%d", + __entry->sk, __entry->clcsk, + __entry->net_cookie, __entry->fallback_rsn) ); DECLARE_EVENT_CLASS(smc_msg_event, @@ -46,19 +49,23 @@ DECLARE_EVENT_CLASS(smc_msg_event, TP_STRUCT__entry( __field(const void *, smc) + __field(u64, net_cookie) __field(size_t, len) __string(name, smc->conn.lnk->ibname) ), TP_fast_assign( + const struct sock *sk = &smc->sk; + __entry->smc = smc; + __entry->net_cookie = sock_net(sk)->net_cookie; __entry->len = len; __assign_str(name, smc->conn.lnk->ibname); ), - TP_printk("smc=%p len=%zu dev=%s", - __entry->smc, __entry->len, - __get_str(name)) + TP_printk("smc=%p net=%llu len=%zu dev=%s", + __entry->smc, __entry->net_cookie, + __entry->len, __get_str(name)) ); DEFINE_EVENT(smc_msg_event, smc_tx_sendmsg, @@ -84,6 +91,7 @@ TRACE_EVENT(smcr_link_down, TP_STRUCT__entry( __field(const void *, lnk) __field(const void *, lgr) + __field(u64, net_cookie) __field(int, state) __string(name, lnk->ibname) __field(void *, location) @@ -94,13 +102,14 @@ TRACE_EVENT(smcr_link_down, __entry->lnk = lnk; __entry->lgr = lgr; + __entry->net_cookie = lgr->net->net_cookie; __entry->state = lnk->state; __assign_str(name, lnk->ibname); __entry->location = location; ), - TP_printk("lnk=%p lgr=%p state=%d dev=%s location=%pS", - __entry->lnk, __entry->lgr, + TP_printk("lnk=%p lgr=%p net=%llu state=%d dev=%s location=%pS", + __entry->lnk, __entry->lgr, __entry->net_cookie, __entry->state, __get_str(name), __entry->location) ); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 600ab5889227..24be1d03fef9 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -54,21 +54,13 @@ struct smc_wr_tx_pend { /* control data for a pending send request */ /* returns true if at least one tx work request is pending on the given link */ static inline bool smc_wr_is_tx_pend(struct smc_link *link) { - if (find_first_bit(link->wr_tx_mask, link->wr_tx_cnt) != - link->wr_tx_cnt) { - return true; - } - return false; + return !bitmap_empty(link->wr_tx_mask, link->wr_tx_cnt); } /* wait till all pending tx work requests on the given link are completed */ -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link) +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link) { - if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link), - SMC_WR_TX_WAIT_PENDING_TIME)) - return 0; - else /* timeout */ - return -EPIPE; + wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link)); } static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id) @@ -87,7 +79,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) struct smc_wr_tx_pend pnd_snd; struct smc_link *link; u32 pnd_snd_idx; - int i; link = wc->qp->qp_context; @@ -128,14 +119,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) } if (wc->status) { - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - /* clear full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[i], 0, - sizeof(link->wr_tx_pends[i])); - memset(&link->wr_tx_bufs[i], 0, - sizeof(link->wr_tx_bufs[i])); - clear_bit(i, link->wr_tx_mask); - } if (link->lgr->smc_version == SMC_V2) { memset(link->wr_tx_v2_pend, 0, sizeof(*link->wr_tx_v2_pend)); @@ -188,7 +171,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) { *idx = link->wr_tx_cnt; - if (!smc_link_usable(link)) + if (!smc_link_sendable(link)) return -ENOLINK; for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { if (!test_and_set_bit(*idx, link->wr_tx_mask)) @@ -231,7 +214,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, } else { rc = wait_event_interruptible_timeout( link->wr_tx_wait, - !smc_link_usable(link) || + !smc_link_sendable(link) || lgr->terminating || (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); @@ -358,18 +341,20 @@ int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout) { struct smc_wr_tx_pend *pend; + u32 pnd_idx; int rc; pend = container_of(priv, struct smc_wr_tx_pend, priv); pend->compl_requested = 1; - init_completion(&link->wr_tx_compl[pend->idx]); + pnd_idx = pend->idx; + init_completion(&link->wr_tx_compl[pnd_idx]); rc = smc_wr_tx_send(link, priv); if (rc) return rc; /* wait for completion by smc_wr_tx_process_cqe() */ rc = wait_for_completion_interruptible_timeout( - &link->wr_tx_compl[pend->idx], timeout); + &link->wr_tx_compl[pnd_idx], timeout); if (rc <= 0) rc = -ENODATA; if (rc > 0) @@ -419,25 +404,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) return rc; } -void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type, - smc_wr_tx_filter filter, - smc_wr_tx_dismisser dismisser, - unsigned long data) -{ - struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_tx; - int i; - - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i]; - if (wr_tx->type != wr_tx_hdr_type) - continue; - tx_pend = &link->wr_tx_pends[i].priv; - if (filter(tx_pend, data)) - dismisser(tx_pend); - } -} - /****************************** receive queue ********************************/ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) @@ -673,10 +639,7 @@ void smc_wr_free_link(struct smc_link *lnk) smc_wr_wakeup_reg_wait(lnk); smc_wr_wakeup_tx_wait(lnk); - if (smc_wr_tx_wait_no_pending_sends(lnk)) - memset(lnk->wr_tx_mask, 0, - BITS_TO_LONGS(SMC_WR_BUF_CNT) * - sizeof(*lnk->wr_tx_mask)); + smc_wr_tx_wait_no_pending_sends(lnk); wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); @@ -729,7 +692,7 @@ void smc_wr_free_link_mem(struct smc_link *lnk) lnk->wr_tx_compl = NULL; kfree(lnk->wr_tx_pends); lnk->wr_tx_pends = NULL; - kfree(lnk->wr_tx_mask); + bitmap_free(lnk->wr_tx_mask); lnk->wr_tx_mask = NULL; kfree(lnk->wr_tx_sges); lnk->wr_tx_sges = NULL; @@ -805,9 +768,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_rx_sges) goto no_mem_wr_tx_sges; - link->wr_tx_mask = kcalloc(BITS_TO_LONGS(SMC_WR_BUF_CNT), - sizeof(*link->wr_tx_mask), - GFP_KERNEL); + link->wr_tx_mask = bitmap_zalloc(SMC_WR_BUF_CNT, GFP_KERNEL); if (!link->wr_tx_mask) goto no_mem_wr_rx_sges; link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT, @@ -920,8 +881,7 @@ int smc_wr_create_link(struct smc_link *lnk) goto dma_unmap; } smc_wr_init_sge(lnk); - memset(lnk->wr_tx_mask, 0, - BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); + bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT); init_waitqueue_head(&lnk->wr_tx_wait); atomic_set(&lnk->wr_tx_refcnt, 0); init_waitqueue_head(&lnk->wr_reg_wait); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index f353311e6f84..a54e90a1110f 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -22,7 +22,6 @@ #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) -#define SMC_WR_TX_WAIT_PENDING_TIME (5 * HZ) #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */ @@ -62,7 +61,7 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val) static inline bool smc_wr_tx_link_hold(struct smc_link *link) { - if (!smc_link_usable(link)) + if (!smc_link_sendable(link)) return false; atomic_inc(&link->wr_tx_refcnt); return true; @@ -126,11 +125,7 @@ int smc_wr_tx_v2_send(struct smc_link *link, int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); -void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, - smc_wr_tx_filter filter, - smc_wr_tx_dismisser dismisser, - unsigned long data); -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link); +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link); int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); int smc_wr_rx_post_init(struct smc_link *link); |