diff options
Diffstat (limited to '')
| -rw-r--r-- | net/smc/Makefile | 2 | ||||
| -rw-r--r-- | net/smc/af_smc.c | 310 | ||||
| -rw-r--r-- | net/smc/smc.h | 9 | ||||
| -rw-r--r-- | net/smc/smc_cdc.c | 113 | ||||
| -rw-r--r-- | net/smc/smc_cdc.h | 86 | ||||
| -rw-r--r-- | net/smc/smc_clc.c | 197 | ||||
| -rw-r--r-- | net/smc/smc_clc.h | 99 | ||||
| -rw-r--r-- | net/smc/smc_core.c | 350 | ||||
| -rw-r--r-- | net/smc/smc_core.h | 85 | ||||
| -rw-r--r-- | net/smc/smc_diag.c | 33 | ||||
| -rw-r--r-- | net/smc/smc_ib.c | 173 | ||||
| -rw-r--r-- | net/smc/smc_ib.h | 7 | ||||
| -rw-r--r-- | net/smc/smc_ism.c | 348 | ||||
| -rw-r--r-- | net/smc/smc_ism.h | 48 | ||||
| -rw-r--r-- | net/smc/smc_llc.c | 80 | ||||
| -rw-r--r-- | net/smc/smc_llc.h | 7 | ||||
| -rw-r--r-- | net/smc/smc_pnet.c | 171 | ||||
| -rw-r--r-- | net/smc/smc_pnet.h | 19 | ||||
| -rw-r--r-- | net/smc/smc_rx.c | 21 | ||||
| -rw-r--r-- | net/smc/smc_tx.c | 242 | ||||
| -rw-r--r-- | net/smc/smc_tx.h | 6 | ||||
| -rw-r--r-- | net/smc/smc_wr.c | 41 | ||||
| -rw-r--r-- | net/smc/smc_wr.h | 3 | 
23 files changed, 1861 insertions, 589 deletions
diff --git a/net/smc/Makefile b/net/smc/Makefile index 188104654b54..4df96b4b8130 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -1,4 +1,4 @@  obj-$(CONFIG_SMC)	+= smc.o  obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o  smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o -smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o +smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e7de5f282722..2d8a1e15e4f9 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -23,6 +23,7 @@  #include <linux/workqueue.h>  #include <linux/in.h>  #include <linux/sched/signal.h> +#include <linux/if_vlan.h>  #include <net/sock.h>  #include <net/tcp.h> @@ -35,6 +36,7 @@  #include "smc_cdc.h"  #include "smc_core.h"  #include "smc_ib.h" +#include "smc_ism.h"  #include "smc_pnet.h"  #include "smc_tx.h"  #include "smc_rx.h" @@ -342,20 +344,17 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)  	rc = smc_ib_modify_qp_rts(link);  	if (rc) -		return SMC_CLC_DECL_INTERR; +		return SMC_CLC_DECL_ERR_RDYLNK;  	smc_wr_remember_qp_attr(link);  	if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) -		return SMC_CLC_DECL_INTERR; +		return SMC_CLC_DECL_ERR_REGRMB;  	/* send CONFIRM LINK response over RoCE fabric */ -	rc = smc_llc_send_confirm_link(link, -				       link->smcibdev->mac[link->ibport - 1], -				       &link->smcibdev->gid[link->ibport - 1], -				       SMC_LLC_RESP); +	rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);  	if (rc < 0) -		return SMC_CLC_DECL_TCL; +		return SMC_CLC_DECL_TIMEOUT_CL;  	/* receive ADD LINK request from server over RoCE fabric */  	rest = wait_for_completion_interruptible_timeout(&link->llc_add, @@ -371,18 +370,17 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)  	/* send add link reject message, only one link supported for now */  	rc = smc_llc_send_add_link(link,  				   link->smcibdev->mac[link->ibport - 1], -				   &link->smcibdev->gid[link->ibport - 1], -				   SMC_LLC_RESP); +				   link->gid, SMC_LLC_RESP);  	if (rc < 0) -		return SMC_CLC_DECL_TCL; +		return SMC_CLC_DECL_TIMEOUT_AL;  	smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);  	return 0;  } -static void smc_conn_save_peer_info(struct smc_sock *smc, -				    struct smc_clc_msg_accept_confirm *clc) +static void smcr_conn_save_peer_info(struct smc_sock *smc, +				     struct smc_clc_msg_accept_confirm *clc)  {  	int bufsize = smc_uncompress_bufsize(clc->rmbe_size); @@ -393,6 +391,28 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,  	smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);  } +static void smcd_conn_save_peer_info(struct smc_sock *smc, +				     struct smc_clc_msg_accept_confirm *clc) +{ +	int bufsize = smc_uncompress_bufsize(clc->dmbe_size); + +	smc->conn.peer_rmbe_idx = clc->dmbe_idx; +	smc->conn.peer_token = clc->token; +	/* msg header takes up space in the buffer */ +	smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); +	atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); +	smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; +} + +static void smc_conn_save_peer_info(struct smc_sock *smc, +				    struct smc_clc_msg_accept_confirm *clc) +{ +	if (smc->conn.lgr->is_smcd) +		smcd_conn_save_peer_info(smc, clc); +	else +		smcr_conn_save_peer_info(smc, clc); +} +  static void smc_link_save_peer_info(struct smc_link *link,  				    struct smc_clc_msg_accept_confirm *clc)  { @@ -404,9 +424,10 @@ static void smc_link_save_peer_info(struct smc_link *link,  }  /* fall back during connect */ -static int smc_connect_fallback(struct smc_sock *smc) +static int smc_connect_fallback(struct smc_sock *smc, int reason_code)  {  	smc->use_fallback = true; +	smc->fallback_rsn = reason_code;  	smc_copy_sock_settings_to_clc(smc);  	if (smc->sk.sk_state == SMC_INIT)  		smc->sk.sk_state = SMC_ACTIVE; @@ -423,7 +444,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)  			sock_put(&smc->sk); /* passive closing */  		return reason_code;  	} -	if (reason_code != SMC_CLC_DECL_REPLY) { +	if (reason_code != SMC_CLC_DECL_PEERDECL) {  		rc = smc_clc_send_decline(smc, reason_code);  		if (rc < 0) {  			if (smc->sk.sk_state == SMC_INIT) @@ -431,7 +452,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)  			return rc;  		}  	} -	return smc_connect_fallback(smc); +	return smc_connect_fallback(smc, reason_code);  }  /* abort connecting */ @@ -448,7 +469,7 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,  /* check if there is a rdma device available for this connection. */  /* called for connect and listen */  static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, -			  u8 *ibport) +			  u8 *ibport, unsigned short vlan_id, u8 gid[])  {  	int reason_code = 0; @@ -456,22 +477,59 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,  	 * within same PNETID that also contains the ethernet device  	 * used for the internal TCP socket  	 */ -	smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport); +	smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id, +				    gid);  	if (!(*ibdev))  		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */  	return reason_code;  } +/* check if there is an ISM device available for this connection. */ +/* called for connect and listen */ +static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev) +{ +	/* Find ISM device with same PNETID as connecting interface  */ +	smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev); +	if (!(*ismdev)) +		return SMC_CLC_DECL_CNFERR; /* configuration error */ +	return 0; +} + +/* Check for VLAN ID and register it on ISM device just for CLC handshake */ +static int smc_connect_ism_vlan_setup(struct smc_sock *smc, +				      struct smcd_dev *ismdev, +				      unsigned short vlan_id) +{ +	if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id)) +		return SMC_CLC_DECL_CNFERR; +	return 0; +} + +/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is + * used, the VLAN ID will be registered again during the connection setup. + */ +static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, +					struct smcd_dev *ismdev, +					unsigned short vlan_id) +{ +	if (!is_smcd) +		return 0; +	if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id)) +		return SMC_CLC_DECL_CNFERR; +	return 0; +} +  /* CLC handshake during connect */ -static int smc_connect_clc(struct smc_sock *smc, +static int smc_connect_clc(struct smc_sock *smc, int smc_type,  			   struct smc_clc_msg_accept_confirm *aclc, -			   struct smc_ib_device *ibdev, u8 ibport) +			   struct smc_ib_device *ibdev, u8 ibport, +			   u8 gid[], struct smcd_dev *ismdev)  {  	int rc = 0;  	/* do inband token exchange */ -	rc = smc_clc_send_proposal(smc, ibdev, ibport); +	rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);  	if (rc)  		return rc;  	/* receive SMC Accept CLC message */ @@ -488,8 +546,8 @@ static int smc_connect_rdma(struct smc_sock *smc,  	int reason_code = 0;  	mutex_lock(&smc_create_lgr_pending); -	local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl, -					aclc->hdr.flag); +	local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev, +					ibport, &aclc->lcl, NULL, 0);  	if (local_contact < 0) {  		if (local_contact == -ENOMEM)  			reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ @@ -504,14 +562,14 @@ static int smc_connect_rdma(struct smc_sock *smc,  	smc_conn_save_peer_info(smc, aclc);  	/* create send buffer and rmb */ -	if (smc_buf_create(smc)) +	if (smc_buf_create(smc, false))  		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);  	if (local_contact == SMC_FIRST_CONTACT)  		smc_link_save_peer_info(link, aclc);  	if (smc_rmb_rtoken_handling(&smc->conn, aclc)) -		return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, +		return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,  					 local_contact);  	smc_close_init(smc); @@ -519,12 +577,12 @@ static int smc_connect_rdma(struct smc_sock *smc,  	if (local_contact == SMC_FIRST_CONTACT) {  		if (smc_ib_ready_link(link)) -			return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, +			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,  						 local_contact);  	} else {  		if (!smc->conn.rmb_desc->reused &&  		    smc_reg_rmb(link, smc->conn.rmb_desc, true)) -			return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, +			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,  						 local_contact);  	}  	smc_rmb_sync_sg_for_device(&smc->conn); @@ -551,41 +609,113 @@ static int smc_connect_rdma(struct smc_sock *smc,  	return 0;  } +/* setup for ISM connection of client */ +static int smc_connect_ism(struct smc_sock *smc, +			   struct smc_clc_msg_accept_confirm *aclc, +			   struct smcd_dev *ismdev) +{ +	int local_contact = SMC_FIRST_CONTACT; +	int rc = 0; + +	mutex_lock(&smc_create_lgr_pending); +	local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, +					NULL, ismdev, aclc->gid); +	if (local_contact < 0) +		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0); + +	/* Create send and receive buffers */ +	if (smc_buf_create(smc, true)) +		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); + +	smc_conn_save_peer_info(smc, aclc); +	smc_close_init(smc); +	smc_rx_init(smc); +	smc_tx_init(smc); + +	rc = smc_clc_send_confirm(smc); +	if (rc) +		return smc_connect_abort(smc, rc, local_contact); +	mutex_unlock(&smc_create_lgr_pending); + +	smc_copy_sock_settings_to_clc(smc); +	if (smc->sk.sk_state == SMC_INIT) +		smc->sk.sk_state = SMC_ACTIVE; + +	return 0; +} +  /* perform steps before actually connecting */  static int __smc_connect(struct smc_sock *smc)  { +	bool ism_supported = false, rdma_supported = false;  	struct smc_clc_msg_accept_confirm aclc;  	struct smc_ib_device *ibdev; +	struct smcd_dev *ismdev; +	u8 gid[SMC_GID_SIZE]; +	unsigned short vlan; +	int smc_type;  	int rc = 0;  	u8 ibport;  	sock_hold(&smc->sk); /* sock put in passive closing */  	if (smc->use_fallback) -		return smc_connect_fallback(smc); +		return smc_connect_fallback(smc, smc->fallback_rsn);  	/* if peer has not signalled SMC-capability, fall back */  	if (!tcp_sk(smc->clcsock->sk)->syn_smc) -		return smc_connect_fallback(smc); +		return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);  	/* IPSec connections opt out of SMC-R optimizations */  	if (using_ipsec(smc))  		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); -	/* check if a RDMA device is available; if not, fall back */ -	if (smc_check_rdma(smc, &ibdev, &ibport)) +	/* check for VLAN ID */ +	if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))  		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); +	/* check if there is an ism device available */ +	if (!smc_check_ism(smc, &ismdev) && +	    !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) { +		/* ISM is supported for this connection */ +		ism_supported = true; +		smc_type = SMC_TYPE_D; +	} + +	/* check if there is a rdma device available */ +	if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) { +		/* RDMA is supported for this connection */ +		rdma_supported = true; +		if (ism_supported) +			smc_type = SMC_TYPE_B; /* both */ +		else +			smc_type = SMC_TYPE_R; /* only RDMA */ +	} + +	/* if neither ISM nor RDMA are supported, fallback */ +	if (!rdma_supported && !ism_supported) +		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); +  	/* perform CLC handshake */ -	rc = smc_connect_clc(smc, &aclc, ibdev, ibport); -	if (rc) +	rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev); +	if (rc) { +		smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);  		return smc_connect_decline_fallback(smc, rc); +	} -	/* connect using rdma */ -	rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); -	if (rc) +	/* depending on previous steps, connect using rdma or ism */ +	if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) +		rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); +	else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) +		rc = smc_connect_ism(smc, &aclc, ismdev); +	else +		rc = SMC_CLC_DECL_MODEUNSUPP; +	if (rc) { +		smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);  		return smc_connect_decline_fallback(smc, rc); +	} +	smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);  	return 0;  } @@ -817,15 +947,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)  	link = &lgr->lnk[SMC_SINGLE_LINK];  	if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) -		return SMC_CLC_DECL_INTERR; +		return SMC_CLC_DECL_ERR_REGRMB;  	/* send CONFIRM LINK request to client over the RoCE fabric */ -	rc = smc_llc_send_confirm_link(link, -				       link->smcibdev->mac[link->ibport - 1], -				       &link->smcibdev->gid[link->ibport - 1], -				       SMC_LLC_REQ); +	rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);  	if (rc < 0) -		return SMC_CLC_DECL_TCL; +		return SMC_CLC_DECL_TIMEOUT_CL;  	/* receive CONFIRM LINK response from client over the RoCE fabric */  	rest = wait_for_completion_interruptible_timeout( @@ -845,10 +972,9 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)  	/* send ADD LINK request to client over the RoCE fabric */  	rc = smc_llc_send_add_link(link,  				   link->smcibdev->mac[link->ibport - 1], -				   &link->smcibdev->gid[link->ibport - 1], -				   SMC_LLC_REQ); +				   link->gid, SMC_LLC_REQ);  	if (rc < 0) -		return SMC_CLC_DECL_TCL; +		return SMC_CLC_DECL_TIMEOUT_AL;  	/* receive ADD LINK response from client over the RoCE fabric */  	rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp, @@ -923,7 +1049,8 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,  	}  	smc_conn_free(&new_smc->conn);  	new_smc->use_fallback = true; -	if (reason_code && reason_code != SMC_CLC_DECL_REPLY) { +	new_smc->fallback_rsn = reason_code; +	if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {  		if (smc_clc_send_decline(new_smc, reason_code) < 0) {  			smc_listen_out_err(new_smc);  			return; @@ -953,7 +1080,8 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,  				int *local_contact)  {  	/* allocate connection / link group */ -	*local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0); +	*local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, +					 &pclc->lcl, NULL, 0);  	if (*local_contact < 0) {  		if (*local_contact == -ENOMEM)  			return SMC_CLC_DECL_MEM;/* insufficient memory*/ @@ -961,12 +1089,50 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,  	}  	/* create send buffer and rmb */ -	if (smc_buf_create(new_smc)) +	if (smc_buf_create(new_smc, false))  		return SMC_CLC_DECL_MEM;  	return 0;  } +/* listen worker: initialize connection and buffers for SMC-D */ +static int smc_listen_ism_init(struct smc_sock *new_smc, +			       struct smc_clc_msg_proposal *pclc, +			       struct smcd_dev *ismdev, +			       int *local_contact) +{ +	struct smc_clc_msg_smcd *pclc_smcd; + +	pclc_smcd = smc_get_clc_msg_smcd(pclc); +	*local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL, +					 ismdev, pclc_smcd->gid); +	if (*local_contact < 0) { +		if (*local_contact == -ENOMEM) +			return SMC_CLC_DECL_MEM;/* insufficient memory*/ +		return SMC_CLC_DECL_INTERR; /* other error */ +	} + +	/* Check if peer can be reached via ISM device */ +	if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, +			    new_smc->conn.lgr->vlan_id, +			    new_smc->conn.lgr->smcd)) { +		if (*local_contact == SMC_FIRST_CONTACT) +			smc_lgr_forget(new_smc->conn.lgr); +		smc_conn_free(&new_smc->conn); +		return SMC_CLC_DECL_CNFERR; +	} + +	/* Create send and receive buffers */ +	if (smc_buf_create(new_smc, true)) { +		if (*local_contact == SMC_FIRST_CONTACT) +			smc_lgr_forget(new_smc->conn.lgr); +		smc_conn_free(&new_smc->conn); +		return SMC_CLC_DECL_MEM; +	} + +	return 0; +} +  /* listen worker: register buffers */  static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)  { @@ -975,7 +1141,7 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)  	if (local_contact != SMC_FIRST_CONTACT) {  		if (!new_smc->conn.rmb_desc->reused) {  			if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) -				return SMC_CLC_DECL_INTERR; +				return SMC_CLC_DECL_ERR_REGRMB;  		}  	}  	smc_rmb_sync_sg_for_device(&new_smc->conn); @@ -995,13 +1161,13 @@ static void smc_listen_rdma_finish(struct smc_sock *new_smc,  		smc_link_save_peer_info(link, cclc);  	if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) { -		reason_code = SMC_CLC_DECL_INTERR; +		reason_code = SMC_CLC_DECL_ERR_RTOK;  		goto decline;  	}  	if (local_contact == SMC_FIRST_CONTACT) {  		if (smc_ib_ready_link(link)) { -			reason_code = SMC_CLC_DECL_INTERR; +			reason_code = SMC_CLC_DECL_ERR_RDYLNK;  			goto decline;  		}  		/* QP confirmation over RoCE fabric */ @@ -1025,8 +1191,11 @@ static void smc_listen_work(struct work_struct *work)  	struct smc_clc_msg_accept_confirm cclc;  	struct smc_clc_msg_proposal *pclc;  	struct smc_ib_device *ibdev; +	bool ism_supported = false; +	struct smcd_dev *ismdev;  	u8 buf[SMC_CLC_MAX_LEN];  	int local_contact = 0; +	unsigned short vlan;  	int reason_code = 0;  	int rc = 0;  	u8 ibport; @@ -1039,6 +1208,7 @@ static void smc_listen_work(struct work_struct *work)  	/* check if peer is smc capable */  	if (!tcp_sk(newclcsock->sk)->syn_smc) {  		new_smc->use_fallback = true; +		new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;  		smc_listen_out_connected(new_smc);  		return;  	} @@ -1065,15 +1235,26 @@ static void smc_listen_work(struct work_struct *work)  	smc_rx_init(new_smc);  	smc_tx_init(new_smc); +	/* check if ISM is available */ +	if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) && +	    !smc_check_ism(new_smc, &ismdev) && +	    !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) { +		ism_supported = true; +	} +  	/* check if RDMA is available */ -	if (smc_check_rdma(new_smc, &ibdev, &ibport) || -	    smc_listen_rdma_check(new_smc, pclc) || -	    smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, -				 &local_contact) || -	    smc_listen_rdma_reg(new_smc, local_contact)) { +	if (!ism_supported && +	    ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || +	     smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) || +	     smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) || +	     smc_listen_rdma_check(new_smc, pclc) || +	     smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, +				  &local_contact) || +	     smc_listen_rdma_reg(new_smc, local_contact))) {  		/* SMC not supported, decline */  		mutex_unlock(&smc_create_lgr_pending); -		smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact); +		smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP, +				   local_contact);  		return;  	} @@ -1095,7 +1276,8 @@ static void smc_listen_work(struct work_struct *work)  	}  	/* finish worker */ -	smc_listen_rdma_finish(new_smc, &cclc, local_contact); +	if (!ism_supported) +		smc_listen_rdma_finish(new_smc, &cclc, local_contact);  	smc_conn_save_peer_info(new_smc, &cclc);  	mutex_unlock(&smc_create_lgr_pending);  	smc_listen_out_connected(new_smc); @@ -1119,6 +1301,7 @@ static void smc_tcp_listen_work(struct work_struct *work)  		new_smc->listen_smc = lsmc;  		new_smc->use_fallback = lsmc->use_fallback; +		new_smc->fallback_rsn = lsmc->fallback_rsn;  		sock_hold(lsk); /* sock_put in smc_listen_work */  		INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);  		smc_copy_sock_settings_to_smc(new_smc); @@ -1275,6 +1458,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)  	if (msg->msg_flags & MSG_FASTOPEN) {  		if (sk->sk_state == SMC_INIT) {  			smc->use_fallback = true; +			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;  		} else {  			rc = -EINVAL;  			goto out; @@ -1353,7 +1537,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,  			mask |= EPOLLERR;  	} else {  		if (sk->sk_state != SMC_CLOSED) -			sock_poll_wait(file, sk_sleep(sk), wait); +			sock_poll_wait(file, wait);  		if (sk->sk_err)  			mask |= EPOLLERR;  		if ((sk->sk_shutdown == SHUTDOWN_MASK) || @@ -1471,6 +1655,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,  		/* option not supported by SMC */  		if (sk->sk_state == SMC_INIT) {  			smc->use_fallback = true; +			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;  		} else {  			if (!smc->use_fallback)  				rc = -EINVAL; @@ -1578,12 +1763,8 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,  		    smc->sk.sk_state == SMC_CLOSED) {  			answ = 0;  		} else { -			smc_curs_write(&cons, -			       smc_curs_read(&conn->local_tx_ctrl.cons, conn), -				       conn); -			smc_curs_write(&urg, -				       smc_curs_read(&conn->urg_curs, conn), -				       conn); +			smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); +			smc_curs_copy(&urg, &conn->urg_curs, conn);  			answ = smc_curs_diff(conn->rmb_desc->len,  					     &cons, &urg) == 1;  		} @@ -1716,6 +1897,7 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,  	/* create internal TCP socket for CLC handshake and fallback */  	smc = smc_sk(sk);  	smc->use_fallback = false; /* assume rdma capability first */ +	smc->fallback_rsn = 0;  	rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,  			      &smc->clcsock);  	if (rc) { diff --git a/net/smc/smc.h b/net/smc/smc.h index d7ca26570482..08786ace6010 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -21,8 +21,6 @@  #define SMCPROTO_SMC		0	/* SMC protocol, IPv4 */  #define SMCPROTO_SMC6		1	/* SMC protocol, IPv6 */ -#define SMC_MAX_PORTS		2	/* Max # of ports */ -  extern struct proto smc_proto;  extern struct proto smc_proto6; @@ -185,6 +183,11 @@ struct smc_connection {  	spinlock_t		acurs_lock;	/* protect cursors */  #endif  	struct work_struct	close_work;	/* peer sent some closing */ +	struct tasklet_struct	rx_tsklet;	/* Receiver tasklet for SMC-D */ +	u8			rx_off;		/* receive offset: +						 * 0 for SMC-R, 32 for SMC-D +						 */ +	u64			peer_token;	/* SMC-D token of peer */  };  struct smc_connect_info { @@ -205,6 +208,8 @@ struct smc_sock {				/* smc sock container */  	struct list_head	accept_q;	/* sockets to be accepted */  	spinlock_t		accept_q_lock;	/* protects accept_q */  	bool			use_fallback;	/* fallback to tcp */ +	int			fallback_rsn;	/* reason for fallback */ +	u32			peer_diagnosis; /* decline reason from peer */  	int			sockopt_defer_accept;  						/* sockopt TCP_DEFER_ACCEPT  						 * value diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 9bde1e4ca288..ed5dcf03fe0b 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -34,14 +34,15 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,  			       enum ib_wc_status wc_status)  {  	struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd; +	struct smc_connection *conn = cdcpend->conn;  	struct smc_sock *smc;  	int diff; -	if (!cdcpend->conn) +	if (!conn)  		/* already dismissed */  		return; -	smc = container_of(cdcpend->conn, struct smc_sock, conn); +	smc = container_of(conn, struct smc_sock, conn);  	bh_lock_sock(&smc->sk);  	if (!wc_status) {  		diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len, @@ -52,9 +53,7 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,  		atomic_add(diff, &cdcpend->conn->sndbuf_space);  		/* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */  		smp_mb__after_atomic(); -		smc_curs_write(&cdcpend->conn->tx_curs_fin, -			       smc_curs_read(&cdcpend->cursor, cdcpend->conn), -			       cdcpend->conn); +		smc_curs_copy(&conn->tx_curs_fin, &cdcpend->cursor, conn);  	}  	smc_tx_sndbuf_nonfull(smc);  	bh_unlock_sock(&smc->sk); @@ -110,14 +109,13 @@ int smc_cdc_msg_send(struct smc_connection *conn,  			    &conn->local_tx_ctrl, conn);  	rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);  	if (!rc) -		smc_curs_write(&conn->rx_curs_confirmed, -			       smc_curs_read(&conn->local_tx_ctrl.cons, conn), -			       conn); +		smc_curs_copy(&conn->rx_curs_confirmed, +			      &conn->local_tx_ctrl.cons, conn);  	return rc;  } -int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) +static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)  {  	struct smc_cdc_tx_pend *pend;  	struct smc_wr_buf *wr_buf; @@ -130,6 +128,21 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)  	return smc_cdc_msg_send(conn, wr_buf, pend);  } +int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) +{ +	int rc; + +	if (conn->lgr->is_smcd) { +		spin_lock_bh(&conn->send_lock); +		rc = smcd_cdc_msg_send(conn); +		spin_unlock_bh(&conn->send_lock); +	} else { +		rc = smcr_cdc_get_slot_and_msg_send(conn); +	} + +	return rc; +} +  static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,  			      unsigned long data)  { @@ -157,6 +170,44 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)  				(unsigned long)conn);  } +/* Send a SMC-D CDC header. + * This increments the free space available in our send buffer. + * Also update the confirmed receive buffer with what was sent to the peer. + */ +int smcd_cdc_msg_send(struct smc_connection *conn) +{ +	struct smc_sock *smc = container_of(conn, struct smc_sock, conn); +	struct smcd_cdc_msg cdc; +	int rc, diff; + +	memset(&cdc, 0, sizeof(cdc)); +	cdc.common.type = SMC_CDC_MSG_TYPE; +	cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap; +	cdc.prod_count = conn->local_tx_ctrl.prod.count; + +	cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap; +	cdc.cons_count = conn->local_tx_ctrl.cons.count; +	cdc.prod_flags = conn->local_tx_ctrl.prod_flags; +	cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags; +	rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1); +	if (rc) +		return rc; +	smc_curs_copy(&conn->rx_curs_confirmed, &conn->local_tx_ctrl.cons, +		      conn); +	/* Calculate transmitted data and increment free send buffer space */ +	diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, +			     &conn->tx_curs_sent); +	/* increased by confirmed number of bytes */ +	smp_mb__before_atomic(); +	atomic_add(diff, &conn->sndbuf_space); +	/* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ +	smp_mb__after_atomic(); +	smc_curs_copy(&conn->tx_curs_fin, &conn->tx_curs_sent, conn); + +	smc_tx_sndbuf_nonfull(smc); +	return rc; +} +  /********************************* receive ***********************************/  static inline bool smc_cdc_before(u16 seq1, u16 seq2) @@ -171,14 +222,12 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc,  	char *base;  	/* new data included urgent business */ -	smc_curs_write(&conn->urg_curs, -		       smc_curs_read(&conn->local_rx_ctrl.prod, conn), -		       conn); +	smc_curs_copy(&conn->urg_curs, &conn->local_rx_ctrl.prod, conn);  	conn->urg_state = SMC_URG_VALID;  	if (!sock_flag(&smc->sk, SOCK_URGINLINE))  		/* we'll skip the urgent byte, so don't account for it */  		(*diff_prod)--; -	base = (char *)conn->rmb_desc->cpu_addr; +	base = (char *)conn->rmb_desc->cpu_addr + conn->rx_off;  	if (conn->urg_curs.count)  		conn->urg_rx_byte = *(base + conn->urg_curs.count - 1);  	else @@ -193,12 +242,8 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,  	struct smc_connection *conn = &smc->conn;  	int diff_cons, diff_prod; -	smc_curs_write(&prod_old, -		       smc_curs_read(&conn->local_rx_ctrl.prod, conn), -		       conn); -	smc_curs_write(&cons_old, -		       smc_curs_read(&conn->local_rx_ctrl.cons, conn), -		       conn); +	smc_curs_copy(&prod_old, &conn->local_rx_ctrl.prod, conn); +	smc_curs_copy(&cons_old, &conn->local_rx_ctrl.cons, conn);  	smc_cdc_msg_to_host(&conn->local_rx_ctrl, cdc, conn);  	diff_cons = smc_curs_diff(conn->peer_rmbe_size, &cons_old, @@ -277,6 +322,34 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)  	sock_put(&smc->sk); /* no free sk in softirq-context */  } +/* Schedule a tasklet for this connection. Triggered from the ISM device IRQ + * handler to indicate update in the DMBE. + * + * Context: + * - tasklet context + */ +static void smcd_cdc_rx_tsklet(unsigned long data) +{ +	struct smc_connection *conn = (struct smc_connection *)data; +	struct smcd_cdc_msg cdc; +	struct smc_sock *smc; + +	if (!conn) +		return; + +	memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc)); +	smc = container_of(conn, struct smc_sock, conn); +	smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc); +} + +/* Initialize receive tasklet. Called from ISM device IRQ handler to start + * receiver side. + */ +void smcd_cdc_rx_init(struct smc_connection *conn) +{ +	tasklet_init(&conn->rx_tsklet, smcd_cdc_rx_tsklet, (unsigned long)conn); +} +  /***************************** init, exit, misc ******************************/  static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) @@ -293,7 +366,7 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)  		return; /* invalid message */  	/* lookup connection */ -	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); +	lgr = smc_get_lgr(link);  	read_lock_bh(&lgr->conns_lock);  	conn = smc_lgr_find_conn(ntohl(cdc->token), lgr);  	read_unlock_bh(&lgr->conns_lock); diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index f60082fee5b8..934df4473a7c 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -50,6 +50,20 @@ struct smc_cdc_msg {  	u8				reserved[18];  } __packed;					/* format defined in RFC7609 */ +/* CDC message for SMC-D */ +struct smcd_cdc_msg { +	struct smc_wr_rx_hdr common;	/* Type = 0xFE */ +	u8 res1[7]; +	u16 prod_wrap; +	u32 prod_count; +	u8 res2[2]; +	u16 cons_wrap; +	u32 cons_count; +	struct smc_cdc_producer_flags	prod_flags; +	struct smc_cdc_conn_state_flags conn_state_flags; +	u8 res3[8]; +} __packed; +  static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)  {  	return conn->local_rx_ctrl.conn_state_flags.peer_conn_abort || @@ -90,47 +104,34 @@ static inline u64 smc_curs_read(union smc_host_cursor *curs,  #endif  } -static inline u64 smc_curs_read_net(union smc_cdc_cursor *curs, -				    struct smc_connection *conn) -{ -#ifndef KERNEL_HAS_ATOMIC64 -	unsigned long flags; -	u64 ret; - -	spin_lock_irqsave(&conn->acurs_lock, flags); -	ret = curs->acurs; -	spin_unlock_irqrestore(&conn->acurs_lock, flags); -	return ret; -#else -	return atomic64_read(&curs->acurs); -#endif -} - -static inline void smc_curs_write(union smc_host_cursor *curs, u64 val, -				  struct smc_connection *conn) +/* Copy cursor src into tgt */ +static inline void smc_curs_copy(union smc_host_cursor *tgt, +				 union smc_host_cursor *src, +				 struct smc_connection *conn)  {  #ifndef KERNEL_HAS_ATOMIC64  	unsigned long flags;  	spin_lock_irqsave(&conn->acurs_lock, flags); -	curs->acurs = val; +	tgt->acurs = src->acurs;  	spin_unlock_irqrestore(&conn->acurs_lock, flags);  #else -	atomic64_set(&curs->acurs, val); +	atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));  #endif  } -static inline void smc_curs_write_net(union smc_cdc_cursor *curs, u64 val, -				      struct smc_connection *conn) +static inline void smc_curs_copy_net(union smc_cdc_cursor *tgt, +				     union smc_cdc_cursor *src, +				     struct smc_connection *conn)  {  #ifndef KERNEL_HAS_ATOMIC64  	unsigned long flags;  	spin_lock_irqsave(&conn->acurs_lock, flags); -	curs->acurs = val; +	tgt->acurs = src->acurs;  	spin_unlock_irqrestore(&conn->acurs_lock, flags);  #else -	atomic64_set(&curs->acurs, val); +	atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));  #endif  } @@ -165,7 +166,7 @@ static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer,  {  	union smc_host_cursor temp; -	smc_curs_write(&temp, smc_curs_read(local, conn), conn); +	smc_curs_copy(&temp, local, conn);  	peer->count = htonl(temp.count);  	peer->wrap = htons(temp.wrap);  	/* peer->reserved = htons(0); must be ensured by caller */ @@ -192,8 +193,8 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local,  	union smc_host_cursor temp, old;  	union smc_cdc_cursor net; -	smc_curs_write(&old, smc_curs_read(local, conn), conn); -	smc_curs_write_net(&net, smc_curs_read_net(peer, conn), conn); +	smc_curs_copy(&old, local, conn); +	smc_curs_copy_net(&net, peer, conn);  	temp.count = ntohl(net.count);  	temp.wrap = ntohs(net.wrap);  	if ((old.wrap > temp.wrap) && temp.wrap) @@ -201,12 +202,12 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local,  	if ((old.wrap == temp.wrap) &&  	    (old.count > temp.count))  		return; -	smc_curs_write(local, smc_curs_read(&temp, conn), conn); +	smc_curs_copy(local, &temp, conn);  } -static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, -				       struct smc_cdc_msg *peer, -				       struct smc_connection *conn) +static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local, +					struct smc_cdc_msg *peer, +					struct smc_connection *conn)  {  	local->common.type = peer->common.type;  	local->len = peer->len; @@ -218,6 +219,27 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,  	local->conn_state_flags = peer->conn_state_flags;  } +static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local, +					struct smcd_cdc_msg *peer) +{ +	local->prod.wrap = peer->prod_wrap; +	local->prod.count = peer->prod_count; +	local->cons.wrap = peer->cons_wrap; +	local->cons.count = peer->cons_count; +	local->prod_flags = peer->prod_flags; +	local->conn_state_flags = peer->conn_state_flags; +} + +static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, +				       struct smc_cdc_msg *peer, +				       struct smc_connection *conn) +{ +	if (conn->lgr->is_smcd) +		smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer); +	else +		smcr_cdc_msg_to_host(local, peer, conn); +} +  struct smc_cdc_tx_pend;  int smc_cdc_get_free_slot(struct smc_connection *conn, @@ -227,6 +249,8 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);  int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,  		     struct smc_cdc_tx_pend *pend);  int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); +int smcd_cdc_msg_send(struct smc_connection *conn);  int smc_cdc_init(void) __init; +void smcd_cdc_rx_init(struct smc_connection *conn);  #endif /* SMC_CDC_H */ diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index ae5d168653ce..83aba9ade060 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -23,9 +23,15 @@  #include "smc_core.h"  #include "smc_clc.h"  #include "smc_ib.h" +#include "smc_ism.h" + +#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 +#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48  /* eye catcher "SMCR" EBCDIC for CLC messages */  static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; +/* eye catcher "SMCD" EBCDIC for CLC messages */ +static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};  /* check if received message has a correct header length and contains valid   * heading and trailing eyecatchers @@ -38,10 +44,14 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)  	struct smc_clc_msg_decline *dclc;  	struct smc_clc_msg_trail *trl; -	if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) +	if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && +	    memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))  		return false;  	switch (clcm->type) {  	case SMC_CLC_PROPOSAL: +		if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && +		    clcm->path != SMC_TYPE_B) +			return false;  		pclc = (struct smc_clc_msg_proposal *)clcm;  		pclc_prfx = smc_clc_proposal_get_prefix(pclc);  		if (ntohs(pclc->hdr.length) != @@ -56,10 +66,16 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)  		break;  	case SMC_CLC_ACCEPT:  	case SMC_CLC_CONFIRM: +		if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D) +			return false;  		clc = (struct smc_clc_msg_accept_confirm *)clcm; -		if (ntohs(clc->hdr.length) != sizeof(*clc)) +		if ((clcm->path == SMC_TYPE_R && +		     ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) || +		    (clcm->path == SMC_TYPE_D && +		     ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))  			return false; -		trl = &clc->trl; +		trl = (struct smc_clc_msg_trail *) +			((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl));  		break;  	case SMC_CLC_DECLINE:  		dclc = (struct smc_clc_msg_decline *)clcm; @@ -70,7 +86,8 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)  	default:  		return false;  	} -	if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) +	if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && +	    memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))  		return false;  	return true;  } @@ -296,6 +313,9 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,  	datlen = ntohs(clcm->length);  	if ((len < sizeof(struct smc_clc_msg_hdr)) ||  	    (datlen > buflen) || +	    (clcm->version != SMC_CLC_V1) || +	    (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && +	     clcm->path != SMC_TYPE_B) ||  	    ((clcm->type != SMC_CLC_DECLINE) &&  	     (clcm->type != expected_type))) {  		smc->sk.sk_err = EPROTO; @@ -314,7 +334,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,  		goto out;  	}  	if (clcm->type == SMC_CLC_DECLINE) { -		reason_code = SMC_CLC_DECL_REPLY; +		struct smc_clc_msg_decline *dclc; + +		dclc = (struct smc_clc_msg_decline *)clcm; +		reason_code = SMC_CLC_DECL_PEERDECL; +		smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);  		if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {  			smc->conn.lgr->sync_err = 1;  			smc_lgr_terminate(smc->conn.lgr); @@ -357,17 +381,18 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)  }  /* send CLC PROPOSAL message across internal TCP socket */ -int smc_clc_send_proposal(struct smc_sock *smc, -			  struct smc_ib_device *smcibdev, -			  u8 ibport) +int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, +			  struct smc_ib_device *ibdev, u8 ibport, u8 gid[], +			  struct smcd_dev *ismdev)  {  	struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];  	struct smc_clc_msg_proposal_prefix pclc_prfx; +	struct smc_clc_msg_smcd pclc_smcd;  	struct smc_clc_msg_proposal pclc;  	struct smc_clc_msg_trail trl;  	int len, i, plen, rc;  	int reason_code = 0; -	struct kvec vec[4]; +	struct kvec vec[5];  	struct msghdr msg;  	/* retrieve ip prefixes for CLC proposal msg */ @@ -382,18 +407,34 @@ int smc_clc_send_proposal(struct smc_sock *smc,  	memset(&pclc, 0, sizeof(pclc));  	memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));  	pclc.hdr.type = SMC_CLC_PROPOSAL; -	pclc.hdr.length = htons(plen);  	pclc.hdr.version = SMC_CLC_V1;		/* SMC version */ -	memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); -	memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); -	memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); -	pclc.iparea_offset = htons(0); +	pclc.hdr.path = smc_type; +	if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) { +		/* add SMC-R specifics */ +		memcpy(pclc.lcl.id_for_peer, local_systemid, +		       sizeof(local_systemid)); +		memcpy(&pclc.lcl.gid, gid, SMC_GID_SIZE); +		memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN); +		pclc.iparea_offset = htons(0); +	} +	if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) { +		/* add SMC-D specifics */ +		memset(&pclc_smcd, 0, sizeof(pclc_smcd)); +		plen += sizeof(pclc_smcd); +		pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET); +		pclc_smcd.gid = ismdev->local_gid; +	} +	pclc.hdr.length = htons(plen);  	memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));  	memset(&msg, 0, sizeof(msg));  	i = 0;  	vec[i].iov_base = &pclc;  	vec[i++].iov_len = sizeof(pclc); +	if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) { +		vec[i].iov_base = &pclc_smcd; +		vec[i++].iov_len = sizeof(pclc_smcd); +	}  	vec[i].iov_base = &pclc_prfx;  	vec[i++].iov_len = sizeof(pclc_prfx);  	if (pclc_prfx.ipv6_prefixes_cnt > 0) { @@ -429,35 +470,55 @@ int smc_clc_send_confirm(struct smc_sock *smc)  	struct kvec vec;  	int len; -	link = &conn->lgr->lnk[SMC_SINGLE_LINK];  	/* send SMC Confirm CLC msg */  	memset(&cclc, 0, sizeof(cclc)); -	memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));  	cclc.hdr.type = SMC_CLC_CONFIRM; -	cclc.hdr.length = htons(sizeof(cclc));  	cclc.hdr.version = SMC_CLC_V1;		/* SMC version */ -	memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); -	memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], -	       SMC_GID_SIZE); -	memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); -	hton24(cclc.qpn, link->roce_qp->qp_num); -	cclc.rmb_rkey = -		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); -	cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ -	cclc.rmbe_alert_token = htonl(conn->alert_token_local); -	cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); -	cclc.rmbe_size = conn->rmbe_size_short; -	cclc.rmb_dma_addr = cpu_to_be64( -		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); -	hton24(cclc.psn, link->psn_initial); - -	memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); +	if (smc->conn.lgr->is_smcd) { +		/* SMC-D specific settings */ +		memcpy(cclc.hdr.eyecatcher, SMCD_EYECATCHER, +		       sizeof(SMCD_EYECATCHER)); +		cclc.hdr.path = SMC_TYPE_D; +		cclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); +		cclc.gid = conn->lgr->smcd->local_gid; +		cclc.token = conn->rmb_desc->token; +		cclc.dmbe_size = conn->rmbe_size_short; +		cclc.dmbe_idx = 0; +		memcpy(&cclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); +		memcpy(cclc.smcd_trl.eyecatcher, SMCD_EYECATCHER, +		       sizeof(SMCD_EYECATCHER)); +	} else { +		/* SMC-R specific settings */ +		link = &conn->lgr->lnk[SMC_SINGLE_LINK]; +		memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, +		       sizeof(SMC_EYECATCHER)); +		cclc.hdr.path = SMC_TYPE_R; +		cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); +		memcpy(cclc.lcl.id_for_peer, local_systemid, +		       sizeof(local_systemid)); +		memcpy(&cclc.lcl.gid, link->gid, SMC_GID_SIZE); +		memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], +		       ETH_ALEN); +		hton24(cclc.qpn, link->roce_qp->qp_num); +		cclc.rmb_rkey = +			htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); +		cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ +		cclc.rmbe_alert_token = htonl(conn->alert_token_local); +		cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); +		cclc.rmbe_size = conn->rmbe_size_short; +		cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address +				(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); +		hton24(cclc.psn, link->psn_initial); +		memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER, +		       sizeof(SMC_EYECATCHER)); +	}  	memset(&msg, 0, sizeof(msg));  	vec.iov_base = &cclc; -	vec.iov_len = sizeof(cclc); -	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc)); -	if (len < sizeof(cclc)) { +	vec.iov_len = ntohs(cclc.hdr.length); +	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, +			     ntohs(cclc.hdr.length)); +	if (len < ntohs(cclc.hdr.length)) {  		if (len >= 0) {  			reason_code = -ENETUNREACH;  			smc->sk.sk_err = -reason_code; @@ -480,35 +541,57 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)  	int rc = 0;  	int len; -	link = &conn->lgr->lnk[SMC_SINGLE_LINK];  	memset(&aclc, 0, sizeof(aclc)); -	memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));  	aclc.hdr.type = SMC_CLC_ACCEPT; -	aclc.hdr.length = htons(sizeof(aclc));  	aclc.hdr.version = SMC_CLC_V1;		/* SMC version */  	if (srv_first_contact)  		aclc.hdr.flag = 1; -	memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); -	memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], -	       SMC_GID_SIZE); -	memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); -	hton24(aclc.qpn, link->roce_qp->qp_num); -	aclc.rmb_rkey = -		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); -	aclc.rmbe_idx = 1;			/* as long as 1 RMB = 1 RMBE */ -	aclc.rmbe_alert_token = htonl(conn->alert_token_local); -	aclc.qp_mtu = link->path_mtu; -	aclc.rmbe_size = conn->rmbe_size_short, -	aclc.rmb_dma_addr = cpu_to_be64( -		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); -	hton24(aclc.psn, link->psn_initial); -	memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + +	if (new_smc->conn.lgr->is_smcd) { +		/* SMC-D specific settings */ +		aclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); +		memcpy(aclc.hdr.eyecatcher, SMCD_EYECATCHER, +		       sizeof(SMCD_EYECATCHER)); +		aclc.hdr.path = SMC_TYPE_D; +		aclc.gid = conn->lgr->smcd->local_gid; +		aclc.token = conn->rmb_desc->token; +		aclc.dmbe_size = conn->rmbe_size_short; +		aclc.dmbe_idx = 0; +		memcpy(&aclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); +		memcpy(aclc.smcd_trl.eyecatcher, SMCD_EYECATCHER, +		       sizeof(SMCD_EYECATCHER)); +	} else { +		/* SMC-R specific settings */ +		aclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); +		memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, +		       sizeof(SMC_EYECATCHER)); +		aclc.hdr.path = SMC_TYPE_R; +		link = &conn->lgr->lnk[SMC_SINGLE_LINK]; +		memcpy(aclc.lcl.id_for_peer, local_systemid, +		       sizeof(local_systemid)); +		memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE); +		memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], +		       ETH_ALEN); +		hton24(aclc.qpn, link->roce_qp->qp_num); +		aclc.rmb_rkey = +			htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); +		aclc.rmbe_idx = 1;		/* as long as 1 RMB = 1 RMBE */ +		aclc.rmbe_alert_token = htonl(conn->alert_token_local); +		aclc.qp_mtu = link->path_mtu; +		aclc.rmbe_size = conn->rmbe_size_short, +		aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address +				(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); +		hton24(aclc.psn, link->psn_initial); +		memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER, +		       sizeof(SMC_EYECATCHER)); +	}  	memset(&msg, 0, sizeof(msg));  	vec.iov_base = &aclc; -	vec.iov_len = sizeof(aclc); -	len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc)); -	if (len < sizeof(aclc)) { +	vec.iov_len = ntohs(aclc.hdr.length); +	len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, +			     ntohs(aclc.hdr.length)); +	if (len < ntohs(aclc.hdr.length)) {  		if (len >= 0)  			new_smc->sk.sk_err = EPROTO;  		else diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 41ff9ea96139..18da89b681c2 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -23,17 +23,26 @@  #define SMC_CLC_DECLINE		0x04  #define SMC_CLC_V1		0x1		/* SMC version                */ +#define SMC_TYPE_R		0		/* SMC-R only		      */ +#define SMC_TYPE_D		1		/* SMC-D only		      */ +#define SMC_TYPE_B		3		/* SMC-R and SMC-D	      */  #define CLC_WAIT_TIME		(6 * HZ)	/* max. wait time on clcsock  */  #define SMC_CLC_DECL_MEM	0x01010000  /* insufficient memory resources  */ -#define SMC_CLC_DECL_TIMEOUT	0x02000000  /* timeout                        */ +#define SMC_CLC_DECL_TIMEOUT_CL	0x02010000  /* timeout w4 QP confirm link     */ +#define SMC_CLC_DECL_TIMEOUT_AL	0x02020000  /* timeout w4 QP add link	      */  #define SMC_CLC_DECL_CNFERR	0x03000000  /* configuration error            */ -#define SMC_CLC_DECL_IPSEC	0x03030000  /* IPsec usage                    */ +#define SMC_CLC_DECL_PEERNOSMC	0x03010000  /* peer did not indicate SMC      */ +#define SMC_CLC_DECL_IPSEC	0x03020000  /* IPsec usage		      */ +#define SMC_CLC_DECL_NOSMCDEV	0x03030000  /* no SMC device found	      */ +#define SMC_CLC_DECL_MODEUNSUPP	0x03040000  /* smc modes do not match (R or D)*/ +#define SMC_CLC_DECL_RMBE_EC	0x03050000  /* peer has eyecatcher in RMBE    */ +#define SMC_CLC_DECL_OPTUNSUPP	0x03060000  /* fastopen sockopt not supported */  #define SMC_CLC_DECL_SYNCERR	0x04000000  /* synchronization error          */ -#define SMC_CLC_DECL_REPLY	0x06000000  /* reply to a received decline    */ +#define SMC_CLC_DECL_PEERDECL	0x05000000  /* peer declined during handshake */  #define SMC_CLC_DECL_INTERR	0x99990000  /* internal error                 */ -#define SMC_CLC_DECL_TCL	0x02040000  /* timeout w4 QP confirm          */ -#define SMC_CLC_DECL_SEND	0x07000000  /* sending problem                */ -#define SMC_CLC_DECL_RMBE_EC	0x08000000  /* peer has eyecatcher in RMBE    */ +#define SMC_CLC_DECL_ERR_RTOK	0x99990001  /*	 rtoken handling failed       */ +#define SMC_CLC_DECL_ERR_RDYLNK	0x99990002  /*	 ib ready link failed	      */ +#define SMC_CLC_DECL_ERR_REGRMB	0x99990003  /*	 reg rmb failed		      */  struct smc_clc_msg_hdr {	/* header1 of clc messages */  	u8 eyecatcher[4];	/* eye catcher */ @@ -42,9 +51,11 @@ struct smc_clc_msg_hdr {	/* header1 of clc messages */  #if defined(__BIG_ENDIAN_BITFIELD)  	u8 version : 4,  	   flag    : 1, -	   rsvd    : 3; +	   rsvd	   : 1, +	   path	   : 2;  #elif defined(__LITTLE_ENDIAN_BITFIELD) -	u8 rsvd    : 3, +	u8 path    : 2, +	   rsvd    : 1,  	   flag    : 1,  	   version : 4;  #endif @@ -77,6 +88,11 @@ struct smc_clc_msg_proposal_prefix {	/* prefix part of clc proposal message*/  	u8 ipv6_prefixes_cnt;	/* number of IPv6 prefixes in prefix array */  } __aligned(4); +struct smc_clc_msg_smcd {	/* SMC-D GID information */ +	u64 gid;		/* ISM GID of requestor */ +	u8 res[32]; +}; +  struct smc_clc_msg_proposal {	/* clc proposal message sent by Linux */  	struct smc_clc_msg_hdr hdr;  	struct smc_clc_msg_local lcl; @@ -94,23 +110,45 @@ struct smc_clc_msg_proposal {	/* clc proposal message sent by Linux */  struct smc_clc_msg_accept_confirm {	/* clc accept / confirm message */  	struct smc_clc_msg_hdr hdr; -	struct smc_clc_msg_local lcl; -	u8 qpn[3];		/* QP number */ -	__be32 rmb_rkey;	/* RMB rkey */ -	u8 rmbe_idx;		/* Index of RMBE in RMB */ -	__be32 rmbe_alert_token;/* unique connection id */ +	union { +		struct { /* SMC-R */ +			struct smc_clc_msg_local lcl; +			u8 qpn[3];		/* QP number */ +			__be32 rmb_rkey;	/* RMB rkey */ +			u8 rmbe_idx;		/* Index of RMBE in RMB */ +			__be32 rmbe_alert_token;/* unique connection id */  #if defined(__BIG_ENDIAN_BITFIELD) -	u8 rmbe_size : 4,	/* RMBE buf size (compressed notation) */ -	   qp_mtu   : 4;	/* QP mtu */ +			u8 rmbe_size : 4,	/* buf size (compressed) */ +			   qp_mtu   : 4;	/* QP mtu */  #elif defined(__LITTLE_ENDIAN_BITFIELD) -	u8 qp_mtu   : 4, -	   rmbe_size : 4; +			u8 qp_mtu   : 4, +			   rmbe_size : 4;  #endif -	u8 reserved; -	__be64 rmb_dma_addr;	/* RMB virtual address */ -	u8 reserved2; -	u8 psn[3];		/* initial packet sequence number */ -	struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ +			u8 reserved; +			__be64 rmb_dma_addr;	/* RMB virtual address */ +			u8 reserved2; +			u8 psn[3];		/* packet sequence number */ +			struct smc_clc_msg_trail smcr_trl; +						/* eye catcher "SMCR" EBCDIC */ +		} __packed; +		struct { /* SMC-D */ +			u64 gid;		/* Sender GID */ +			u64 token;		/* DMB token */ +			u8 dmbe_idx;		/* DMBE index */ +#if defined(__BIG_ENDIAN_BITFIELD) +			u8 dmbe_size : 4,	/* buf size (compressed) */ +			   reserved3 : 4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) +			u8 reserved3 : 4, +			   dmbe_size : 4; +#endif +			u16 reserved4; +			u32 linkid;		/* Link identifier */ +			u32 reserved5[3]; +			struct smc_clc_msg_trail smcd_trl; +						/* eye catcher "SMCD" EBCDIC */ +		} __packed; +	};  } __packed;			/* format defined in RFC7609 */  struct smc_clc_msg_decline {	/* clc decline message */ @@ -129,13 +167,26 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)  	       ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));  } +/* get SMC-D info from proposal message */ +static inline struct smc_clc_msg_smcd * +smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop) +{ +	if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd)) +		return NULL; + +	return (struct smc_clc_msg_smcd *)(prop + 1); +} + +struct smcd_dev; +  int smc_clc_prfx_match(struct socket *clcsock,  		       struct smc_clc_msg_proposal_prefix *prop);  int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,  		     u8 expected_type);  int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); -int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, -			  u8 ibport); +int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, +			  struct smc_ib_device *smcibdev, u8 ibport, u8 gid[], +			  struct smcd_dev *ismdev);  int smc_clc_send_confirm(struct smc_sock *smc);  int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index add82b0266f3..e871368500e3 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -16,6 +16,7 @@  #include <net/tcp.h>  #include <net/sock.h>  #include <rdma/ib_verbs.h> +#include <rdma/ib_cache.h>  #include "smc.h"  #include "smc_clc.h" @@ -25,10 +26,12 @@  #include "smc_llc.h"  #include "smc_cdc.h"  #include "smc_close.h" +#include "smc_ism.h"  #define SMC_LGR_NUM_INCR		256  #define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)  #define SMC_LGR_FREE_DELAY_CLNT		(SMC_LGR_FREE_DELAY_SERV + 10 * HZ) +#define SMC_LGR_FREE_DELAY_FAST		(8 * HZ)  static struct smc_lgr_list smc_lgr_list = {	/* established link groups */  	.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), @@ -46,8 +49,13 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)  	 * otherwise there is a risk of out-of-sync link groups.  	 */  	mod_delayed_work(system_wq, &lgr->free_work, -			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : -						 SMC_LGR_FREE_DELAY_SERV); +			 (!lgr->is_smcd && lgr->role == SMC_CLNT) ? +			 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV); +} + +void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) +{ +	mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);  }  /* Register connection's alert token in our lookup structure. @@ -132,6 +140,20 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)  	smc_lgr_schedule_free_work(lgr);  } +/* Send delete link, either as client to request the initiation + * of the DELETE LINK sequence from server; or as server to + * initiate the delete processing. See smc_llc_rx_delete_link(). + */ +static int smc_link_send_delete(struct smc_link *lnk) +{ +	if (lnk->state == SMC_LNK_ACTIVE && +	    !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) { +		smc_llc_link_deleting(lnk); +		return 0; +	} +	return -ENOTCONN; +} +  static void smc_lgr_free_work(struct work_struct *work)  {  	struct smc_link_group *lgr = container_of(to_delayed_work(work), @@ -152,17 +174,30 @@ static void smc_lgr_free_work(struct work_struct *work)  	list_del_init(&lgr->list); /* remove from smc_lgr_list */  free:  	spin_unlock_bh(&smc_lgr_list.lock); + +	if (!lgr->is_smcd && !lgr->terminating)	{ +		/* try to send del link msg, on error free lgr immediately */ +		if (!smc_link_send_delete(&lgr->lnk[SMC_SINGLE_LINK])) { +			/* reschedule in case we never receive a response */ +			smc_lgr_schedule_free_work(lgr); +			return; +		} +	} +  	if (!delayed_work_pending(&lgr->free_work)) { -		if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE) -			smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); +		struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + +		if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE) +			smc_llc_link_inactive(lnk);  		smc_lgr_free(lgr);  	}  }  /* create a new SMC link group */ -static int smc_lgr_create(struct smc_sock *smc, +static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,  			  struct smc_ib_device *smcibdev, u8 ibport, -			  char *peer_systemid, unsigned short vlan_id) +			  char *peer_systemid, unsigned short vlan_id, +			  struct smcd_dev *smcismdev, u64 peer_gid)  {  	struct smc_link_group *lgr;  	struct smc_link *lnk; @@ -170,17 +205,23 @@ static int smc_lgr_create(struct smc_sock *smc,  	int rc = 0;  	int i; +	if (is_smcd && vlan_id) { +		rc = smc_ism_get_vlan(smcismdev, vlan_id); +		if (rc) +			goto out; +	} +  	lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);  	if (!lgr) {  		rc = -ENOMEM;  		goto out;  	} -	lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; +	lgr->is_smcd = is_smcd;  	lgr->sync_err = 0; -	memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);  	lgr->vlan_id = vlan_id;  	rwlock_init(&lgr->sndbufs_lock);  	rwlock_init(&lgr->rmbs_lock); +	rwlock_init(&lgr->conns_lock);  	for (i = 0; i < SMC_RMBE_SIZES; i++) {  		INIT_LIST_HEAD(&lgr->sndbufs[i]);  		INIT_LIST_HEAD(&lgr->rmbs[i]); @@ -189,36 +230,48 @@ static int smc_lgr_create(struct smc_sock *smc,  	memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);  	INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);  	lgr->conns_all = RB_ROOT; - -	lnk = &lgr->lnk[SMC_SINGLE_LINK]; -	/* initialize link */ -	lnk->state = SMC_LNK_ACTIVATING; -	lnk->link_id = SMC_SINGLE_LINK; -	lnk->smcibdev = smcibdev; -	lnk->ibport = ibport; -	lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; -	if (!smcibdev->initialized) -		smc_ib_setup_per_ibdev(smcibdev); -	get_random_bytes(rndvec, sizeof(rndvec)); -	lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); -	rc = smc_llc_link_init(lnk); -	if (rc) -		goto free_lgr; -	rc = smc_wr_alloc_link_mem(lnk); -	if (rc) -		goto clear_llc_lnk; -	rc = smc_ib_create_protection_domain(lnk); -	if (rc) -		goto free_link_mem; -	rc = smc_ib_create_queue_pair(lnk); -	if (rc) -		goto dealloc_pd; -	rc = smc_wr_create_link(lnk); -	if (rc) -		goto destroy_qp; - +	if (is_smcd) { +		/* SMC-D specific settings */ +		lgr->peer_gid = peer_gid; +		lgr->smcd = smcismdev; +	} else { +		/* SMC-R specific settings */ +		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; +		memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); + +		lnk = &lgr->lnk[SMC_SINGLE_LINK]; +		/* initialize link */ +		lnk->state = SMC_LNK_ACTIVATING; +		lnk->link_id = SMC_SINGLE_LINK; +		lnk->smcibdev = smcibdev; +		lnk->ibport = ibport; +		lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; +		if (!smcibdev->initialized) +			smc_ib_setup_per_ibdev(smcibdev); +		get_random_bytes(rndvec, sizeof(rndvec)); +		lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + +			(rndvec[2] << 16); +		rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, +					  vlan_id, lnk->gid, &lnk->sgid_index); +		if (rc) +			goto free_lgr; +		rc = smc_llc_link_init(lnk); +		if (rc) +			goto free_lgr; +		rc = smc_wr_alloc_link_mem(lnk); +		if (rc) +			goto clear_llc_lnk; +		rc = smc_ib_create_protection_domain(lnk); +		if (rc) +			goto free_link_mem; +		rc = smc_ib_create_queue_pair(lnk); +		if (rc) +			goto dealloc_pd; +		rc = smc_wr_create_link(lnk); +		if (rc) +			goto destroy_qp; +	}  	smc->conn.lgr = lgr; -	rwlock_init(&lgr->conns_lock);  	spin_lock_bh(&smc_lgr_list.lock);  	list_add(&lgr->list, &smc_lgr_list.list);  	spin_unlock_bh(&smc_lgr_list.lock); @@ -264,7 +317,12 @@ void smc_conn_free(struct smc_connection *conn)  {  	if (!conn->lgr)  		return; -	smc_cdc_tx_dismiss_slots(conn); +	if (conn->lgr->is_smcd) { +		smc_ism_unset_conn(conn); +		tasklet_kill(&conn->rx_tsklet); +	} else { +		smc_cdc_tx_dismiss_slots(conn); +	}  	smc_lgr_unregister_conn(conn);  	smc_buf_unuse(conn);  } @@ -280,8 +338,8 @@ static void smc_link_clear(struct smc_link *lnk)  	smc_wr_free_link_mem(lnk);  } -static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, -			 struct smc_buf_desc *buf_desc) +static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, +			  struct smc_buf_desc *buf_desc)  {  	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; @@ -301,6 +359,28 @@ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,  	kfree(buf_desc);  } +static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, +			  struct smc_buf_desc *buf_desc) +{ +	if (is_dmb) { +		/* restore original buf len */ +		buf_desc->len += sizeof(struct smcd_cdc_msg); +		smc_ism_unregister_dmb(lgr->smcd, buf_desc); +	} else { +		kfree(buf_desc->cpu_addr); +	} +	kfree(buf_desc); +} + +static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, +			 struct smc_buf_desc *buf_desc) +{ +	if (lgr->is_smcd) +		smcd_buf_free(lgr, is_rmb, buf_desc); +	else +		smcr_buf_free(lgr, is_rmb, buf_desc); +} +  static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)  {  	struct smc_buf_desc *buf_desc, *bf_desc; @@ -332,7 +412,10 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)  void smc_lgr_free(struct smc_link_group *lgr)  {  	smc_lgr_free_bufs(lgr); -	smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); +	if (lgr->is_smcd) +		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); +	else +		smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);  	kfree(lgr);  } @@ -357,7 +440,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)  	lgr->terminating = 1;  	if (!list_empty(&lgr->list)) /* forget lgr */  		list_del_init(&lgr->list); -	smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); +	if (!lgr->is_smcd) +		smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);  	write_lock_bh(&lgr->conns_lock);  	node = rb_first(&lgr->conns_all); @@ -374,7 +458,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)  		node = rb_first(&lgr->conns_all);  	}  	write_unlock_bh(&lgr->conns_lock); -	wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); +	if (!lgr->is_smcd) +		wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);  	smc_lgr_schedule_free_work(lgr);  } @@ -392,17 +477,44 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)  	spin_lock_bh(&smc_lgr_list.lock);  	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { -		if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && +		if (!lgr->is_smcd && +		    lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&  		    lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)  			__smc_lgr_terminate(lgr);  	}  	spin_unlock_bh(&smc_lgr_list.lock);  } +/* Called when SMC-D device is terminated or peer is lost */ +void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid) +{ +	struct smc_link_group *lgr, *l; +	LIST_HEAD(lgr_free_list); + +	/* run common cleanup function and build free list */ +	spin_lock_bh(&smc_lgr_list.lock); +	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { +		if (lgr->is_smcd && lgr->smcd == dev && +		    (!peer_gid || lgr->peer_gid == peer_gid) && +		    !list_empty(&lgr->list)) { +			__smc_lgr_terminate(lgr); +			list_move(&lgr->list, &lgr_free_list); +		} +	} +	spin_unlock_bh(&smc_lgr_list.lock); + +	/* cancel the regular free workers and actually free lgrs */ +	list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { +		list_del_init(&lgr->list); +		cancel_delayed_work_sync(&lgr->free_work); +		smc_lgr_free(lgr); +	} +} +  /* Determine vlan of internal TCP socket.   * @vlan_id: address to store the determined vlan id into   */ -static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) +int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)  {  	struct dst_entry *dst = sk_dst_get(clcsock->sk);  	struct net_device *ndev; @@ -446,41 +558,30 @@ out:  	return rc;  } -/* determine the link gid matching the vlan id of the link group */ -static int smc_link_determine_gid(struct smc_link_group *lgr) +static bool smcr_lgr_match(struct smc_link_group *lgr, +			   struct smc_clc_msg_local *lcl, +			   enum smc_lgr_role role)  { -	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; -	struct ib_gid_attr gattr; -	union ib_gid gid; -	int i; - -	if (!lgr->vlan_id) { -		lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1]; -		return 0; -	} +	return !memcmp(lgr->peer_systemid, lcl->id_for_peer, +		       SMC_SYSTEMID_LEN) && +		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, +			SMC_GID_SIZE) && +		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, +			sizeof(lcl->mac)) && +		lgr->role == role; +} -	for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; -	     i++) { -		if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, -				 &gattr)) -			continue; -		if (gattr.ndev) { -			if (is_vlan_dev(gattr.ndev) && -			    vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { -				lnk->gid = gid; -				dev_put(gattr.ndev); -				return 0; -			} -			dev_put(gattr.ndev); -		} -	} -	return -ENODEV; +static bool smcd_lgr_match(struct smc_link_group *lgr, +			   struct smcd_dev *smcismdev, u64 peer_gid) +{ +	return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;  }  /* create a new SMC connection (and a new link group if necessary) */ -int smc_conn_create(struct smc_sock *smc, +int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,  		    struct smc_ib_device *smcibdev, u8 ibport, -		    struct smc_clc_msg_local *lcl, int srv_first_contact) +		    struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, +		    u64 peer_gid)  {  	struct smc_connection *conn = &smc->conn;  	int local_contact = SMC_FIRST_CONTACT; @@ -502,17 +603,12 @@ int smc_conn_create(struct smc_sock *smc,  	spin_lock_bh(&smc_lgr_list.lock);  	list_for_each_entry(lgr, &smc_lgr_list.list, list) {  		write_lock_bh(&lgr->conns_lock); -		if (!memcmp(lgr->peer_systemid, lcl->id_for_peer, -			    SMC_SYSTEMID_LEN) && -		    !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, -			    SMC_GID_SIZE) && -		    !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, -			    sizeof(lcl->mac)) && +		if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) : +		     smcr_lgr_match(lgr, lcl, role)) &&  		    !lgr->sync_err && -		    (lgr->role == role) && -		    (lgr->vlan_id == vlan_id) && -		    ((role == SMC_CLNT) || -		     (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) { +		    lgr->vlan_id == vlan_id && +		    (role == SMC_CLNT || +		     lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {  			/* link group found */  			local_contact = SMC_REUSE_CONTACT;  			conn->lgr = lgr; @@ -535,16 +631,19 @@ int smc_conn_create(struct smc_sock *smc,  create:  	if (local_contact == SMC_FIRST_CONTACT) { -		rc = smc_lgr_create(smc, smcibdev, ibport, -				    lcl->id_for_peer, vlan_id); +		rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport, +				    lcl->id_for_peer, vlan_id, smcd, peer_gid);  		if (rc)  			goto out;  		smc_lgr_register_conn(conn); /* add smc conn to lgr */ -		rc = smc_link_determine_gid(conn->lgr);  	}  	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;  	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;  	conn->urg_state = SMC_URG_READ; +	if (is_smcd) { +		conn->rx_off = sizeof(struct smcd_cdc_msg); +		smcd_cdc_rx_init(conn); /* init tasklet for this conn */ +	}  #ifndef KERNEL_HAS_ATOMIC64  	spin_lock_init(&conn->acurs_lock);  #endif @@ -609,8 +708,8 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)  	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);  } -static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, -					       bool is_rmb, int bufsize) +static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, +						bool is_rmb, int bufsize)  {  	struct smc_buf_desc *buf_desc;  	struct smc_link *lnk; @@ -668,7 +767,44 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,  	return buf_desc;  } -static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) +#define SMCD_DMBE_SIZES		7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ + +static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, +						bool is_dmb, int bufsize) +{ +	struct smc_buf_desc *buf_desc; +	int rc; + +	if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) +		return ERR_PTR(-EAGAIN); + +	/* try to alloc a new DMB */ +	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); +	if (!buf_desc) +		return ERR_PTR(-ENOMEM); +	if (is_dmb) { +		rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); +		if (rc) { +			kfree(buf_desc); +			return ERR_PTR(-EAGAIN); +		} +		buf_desc->pages = virt_to_page(buf_desc->cpu_addr); +		/* CDC header stored in buf. So, pretend it was smaller */ +		buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); +	} else { +		buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | +					     __GFP_NOWARN | __GFP_NORETRY | +					     __GFP_NOMEMALLOC); +		if (!buf_desc->cpu_addr) { +			kfree(buf_desc); +			return ERR_PTR(-EAGAIN); +		} +		buf_desc->len = bufsize; +	} +	return buf_desc; +} + +static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)  {  	struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);  	struct smc_connection *conn = &smc->conn; @@ -706,7 +842,11 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)  			break; /* found reusable slot */  		} -		buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); +		if (is_smcd) +			buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); +		else +			buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); +  		if (PTR_ERR(buf_desc) == -ENOMEM)  			break;  		if (IS_ERR(buf_desc)) @@ -727,7 +867,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)  		conn->rmbe_size_short = bufsize_short;  		smc->sk.sk_rcvbuf = bufsize * 2;  		atomic_set(&conn->bytes_to_rcv, 0); -		conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); +		conn->rmbe_update_limit = +			smc_rmb_wnd_update_limit(buf_desc->len); +		if (is_smcd) +			smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */  	} else {  		conn->sndbuf_desc = buf_desc;  		smc->sk.sk_sndbuf = bufsize * 2; @@ -740,6 +883,8 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)  {  	struct smc_link_group *lgr = conn->lgr; +	if (!conn->lgr || conn->lgr->is_smcd) +		return;  	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,  			       conn->sndbuf_desc, DMA_TO_DEVICE);  } @@ -748,6 +893,8 @@ void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)  {  	struct smc_link_group *lgr = conn->lgr; +	if (!conn->lgr || conn->lgr->is_smcd) +		return;  	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,  				  conn->sndbuf_desc, DMA_TO_DEVICE);  } @@ -756,6 +903,8 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)  {  	struct smc_link_group *lgr = conn->lgr; +	if (!conn->lgr || conn->lgr->is_smcd) +		return;  	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,  			       conn->rmb_desc, DMA_FROM_DEVICE);  } @@ -764,6 +913,8 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)  {  	struct smc_link_group *lgr = conn->lgr; +	if (!conn->lgr || conn->lgr->is_smcd) +		return;  	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,  				  conn->rmb_desc, DMA_FROM_DEVICE);  } @@ -774,16 +925,16 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)   * the Linux implementation uses just one RMB-element per RMB, i.e. uses an   * extra RMB for every connection in a link group   */ -int smc_buf_create(struct smc_sock *smc) +int smc_buf_create(struct smc_sock *smc, bool is_smcd)  {  	int rc;  	/* create send buffer */ -	rc = __smc_buf_create(smc, false); +	rc = __smc_buf_create(smc, is_smcd, false);  	if (rc)  		return rc;  	/* create rmb */ -	rc = __smc_buf_create(smc, true); +	rc = __smc_buf_create(smc, is_smcd, true);  	if (rc)  		smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);  	return rc; @@ -865,7 +1016,14 @@ void smc_core_exit(void)  	spin_unlock_bh(&smc_lgr_list.lock);  	list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {  		list_del_init(&lgr->list); -		smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); +		if (!lgr->is_smcd) { +			struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + +			if (lnk->state == SMC_LNK_ACTIVE) +				smc_llc_send_delete_link(lnk, SMC_LLC_REQ, +							 false); +			smc_llc_link_inactive(lnk); +		}  		cancel_delayed_work_sync(&lgr->free_work);  		smc_lgr_free(lgr); /* free link group */  	} diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 93cb3523bf50..c156674733c9 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -34,7 +34,8 @@ enum smc_lgr_role {		/* possible roles of a link group */  enum smc_link_state {			/* possible states of a link */  	SMC_LNK_INACTIVE,	/* link is inactive */  	SMC_LNK_ACTIVATING,	/* link is being activated */ -	SMC_LNK_ACTIVE		/* link is active */ +	SMC_LNK_ACTIVE,		/* link is active */ +	SMC_LNK_DELETING,	/* link is being deleted */  };  #define SMC_WR_BUF_SIZE		48	/* size of work request buffer */ @@ -84,14 +85,15 @@ struct smc_link {  	wait_queue_head_t	wr_reg_wait;	/* wait for wr_reg result */  	enum smc_wr_reg_state	wr_reg_state;	/* state of wr_reg request */ -	union ib_gid		gid;		/* gid matching used vlan id */ +	u8			gid[SMC_GID_SIZE];/* gid matching used vlan id*/ +	u8			sgid_index;	/* gid index for vlan id      */  	u32			peer_qpn;	/* QP number of peer */  	enum ib_mtu		path_mtu;	/* used mtu */  	enum ib_mtu		peer_mtu;	/* mtu size of peer */  	u32			psn_initial;	/* QP tx initial packet seqno */  	u32			peer_psn;	/* QP rx initial packet seqno */  	u8			peer_mac[ETH_ALEN];	/* = gid[8:10||13:15] */ -	u8			peer_gid[sizeof(union ib_gid)];	/* gid of peer*/ +	u8			peer_gid[SMC_GID_SIZE];	/* gid of peer*/  	u8			link_id;	/* unique # within link group */  	enum smc_link_state	state;		/* state of link */ @@ -124,15 +126,28 @@ struct smc_buf_desc {  	void			*cpu_addr;	/* virtual address of buffer */  	struct page		*pages;  	int			len;		/* length of buffer */ -	struct sg_table		sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */ -	struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX]; -						/* for rmb only: memory region -						 * incl. rkey provided to peer -						 */ -	u32			order;		/* allocation order */  	u32			used;		/* currently used / unused */  	u8			reused	: 1;	/* new created / reused */  	u8			regerr	: 1;	/* err during registration */ +	union { +		struct { /* SMC-R */ +			struct sg_table		sgt[SMC_LINKS_PER_LGR_MAX]; +						/* virtual buffer */ +			struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX]; +						/* for rmb only: memory region +						 * incl. rkey provided to peer +						 */ +			u32			order;	/* allocation order */ +		}; +		struct { /* SMC-D */ +			unsigned short		sba_idx; +						/* SBA index number */ +			u64			token; +						/* DMB token number */ +			dma_addr_t		dma_addr; +						/* DMA address */ +		}; +	};  };  struct smc_rtoken {				/* address/key of remote RMB */ @@ -148,12 +163,10 @@ struct smc_rtoken {				/* address/key of remote RMB */   * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)   */ +struct smcd_dev; +  struct smc_link_group {  	struct list_head	list; -	enum smc_lgr_role	role;		/* client or server */ -	struct smc_link		lnk[SMC_LINKS_PER_LGR_MAX];	/* smc link */ -	char			peer_systemid[SMC_SYSTEMID_LEN]; -						/* unique system_id of peer */  	struct rb_root		conns_all;	/* connection tree */  	rwlock_t		conns_lock;	/* protects conns_all */  	unsigned int		conns_num;	/* current # of connections */ @@ -163,17 +176,34 @@ struct smc_link_group {  	rwlock_t		sndbufs_lock;	/* protects tx buffers */  	struct list_head	rmbs[SMC_RMBE_SIZES];	/* rx buffers */  	rwlock_t		rmbs_lock;	/* protects rx buffers */ -	struct smc_rtoken	rtokens[SMC_RMBS_PER_LGR_MAX] -				       [SMC_LINKS_PER_LGR_MAX]; -						/* remote addr/key pairs */ -	unsigned long		rtokens_used_mask[BITS_TO_LONGS( -							SMC_RMBS_PER_LGR_MAX)]; -						/* used rtoken elements */  	u8			id[SMC_LGR_ID_SIZE];	/* unique lgr id */  	struct delayed_work	free_work;	/* delayed freeing of an lgr */  	u8			sync_err : 1;	/* lgr no longer fits to peer */  	u8			terminating : 1;/* lgr is terminating */ + +	bool			is_smcd;	/* SMC-R or SMC-D */ +	union { +		struct { /* SMC-R */ +			enum smc_lgr_role	role; +						/* client or server */ +			struct smc_link		lnk[SMC_LINKS_PER_LGR_MAX]; +						/* smc link */ +			char			peer_systemid[SMC_SYSTEMID_LEN]; +						/* unique system_id of peer */ +			struct smc_rtoken	rtokens[SMC_RMBS_PER_LGR_MAX] +						[SMC_LINKS_PER_LGR_MAX]; +						/* remote addr/key pairs */ +			DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX); +						/* used rtoken elements */ +		}; +		struct { /* SMC-D */ +			u64			peer_gid; +						/* Peer GID (remote) */ +			struct smcd_dev		*smcd; +						/* ISM device for VLAN reg. */ +		}; +	};  };  /* Find the connection associated with the given alert token in the link group. @@ -217,7 +247,8 @@ void smc_lgr_free(struct smc_link_group *lgr);  void smc_lgr_forget(struct smc_link_group *lgr);  void smc_lgr_terminate(struct smc_link_group *lgr);  void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); -int smc_buf_create(struct smc_sock *smc); +void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid); +int smc_buf_create(struct smc_sock *smc, bool is_smcd);  int smc_uncompress_bufsize(u8 compressed);  int smc_rmb_rtoken_handling(struct smc_connection *conn,  			    struct smc_clc_msg_accept_confirm *clc); @@ -227,9 +258,19 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);  void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);  void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);  void smc_rmb_sync_sg_for_device(struct smc_connection *conn); +int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id); +  void smc_conn_free(struct smc_connection *conn); -int smc_conn_create(struct smc_sock *smc, +int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,  		    struct smc_ib_device *smcibdev, u8 ibport, -		    struct smc_clc_msg_local *lcl, int srv_first_contact); +		    struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, +		    u64 peer_gid); +void smcd_conn_free(struct smc_connection *conn); +void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);  void smc_core_exit(void); + +static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) +{ +	return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); +}  #endif diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 839354402215..dbf64a93d68a 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -79,6 +79,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,  			   struct nlattr *bc)  {  	struct smc_sock *smc = smc_sk(sk); +	struct smc_diag_fallback fallback;  	struct user_namespace *user_ns;  	struct smc_diag_msg *r;  	struct nlmsghdr *nlh; @@ -91,11 +92,21 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,  	r = nlmsg_data(nlh);  	smc_diag_msg_common_fill(r, sk);  	r->diag_state = sk->sk_state; -	r->diag_fallback = smc->use_fallback; +	if (smc->use_fallback) +		r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP; +	else if (smc->conn.lgr && smc->conn.lgr->is_smcd) +		r->diag_mode = SMC_DIAG_MODE_SMCD; +	else +		r->diag_mode = SMC_DIAG_MODE_SMCR;  	user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk);  	if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))  		goto errout; +	fallback.reason = smc->fallback_rsn; +	fallback.peer_diagnosis = smc->peer_diagnosis; +	if (nla_put(skb, SMC_DIAG_FALLBACK, sizeof(fallback), &fallback) < 0) +		goto errout; +  	if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&  	    smc->conn.alert_token_local) {  		struct smc_connection *conn = &smc->conn; @@ -136,7 +147,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,  			goto errout;  	} -	if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr && +	if (smc->conn.lgr && !smc->conn.lgr->is_smcd && +	    (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&  	    !list_empty(&smc->conn.lgr->list)) {  		struct smc_diag_lgrinfo linfo = {  			.role = smc->conn.lgr->role, @@ -148,13 +160,28 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,  		       smc->conn.lgr->lnk[0].smcibdev->ibdev->name,  		       sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name));  		smc_gid_be16_convert(linfo.lnk[0].gid, -				     smc->conn.lgr->lnk[0].gid.raw); +				     smc->conn.lgr->lnk[0].gid);  		smc_gid_be16_convert(linfo.lnk[0].peer_gid,  				     smc->conn.lgr->lnk[0].peer_gid);  		if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)  			goto errout;  	} +	if (smc->conn.lgr && smc->conn.lgr->is_smcd && +	    (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && +	    !list_empty(&smc->conn.lgr->list)) { +		struct smc_connection *conn = &smc->conn; +		struct smcd_diag_dmbinfo dinfo = { +			.linkid = *((u32 *)conn->lgr->id), +			.peer_gid = conn->lgr->peer_gid, +			.my_gid = conn->lgr->smcd->local_gid, +			.token = conn->rmb_desc->token, +			.peer_token = conn->peer_token +		}; + +		if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0) +			goto errout; +	}  	nlmsg_end(skb, nlh);  	return 0; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 0eed7ab9f28b..e519ef29c0ff 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -16,6 +16,7 @@  #include <linux/workqueue.h>  #include <linux/scatterlist.h>  #include <rdma/ib_verbs.h> +#include <rdma/ib_cache.h>  #include "smc_pnet.h"  #include "smc_ib.h" @@ -68,7 +69,7 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk)  	qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu);  	qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;  	rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); -	rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, 0, 1, 0); +	rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0);  	rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid);  	memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,  	       sizeof(lnk->peer_mac)); @@ -112,8 +113,7 @@ int smc_ib_modify_qp_reset(struct smc_link *lnk)  int smc_ib_ready_link(struct smc_link *lnk)  { -	struct smc_link_group *lgr = -		container_of(lnk, struct smc_link_group, lnk[0]); +	struct smc_link_group *lgr = smc_get_lgr(lnk);  	int rc = 0;  	rc = smc_ib_modify_qp_init(lnk); @@ -143,6 +143,95 @@ out:  	return rc;  } +static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport) +{ +	const struct ib_gid_attr *attr; +	int rc = 0; + +	attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0); +	if (IS_ERR(attr)) +		return -ENODEV; + +	if (attr->ndev) +		memcpy(smcibdev->mac[ibport - 1], attr->ndev->dev_addr, +		       ETH_ALEN); +	else +		rc = -ENODEV; + +	rdma_put_gid_attr(attr); +	return rc; +} + +/* Create an identifier unique for this instance of SMC-R. + * The MAC-address of the first active registered IB device + * plus a random 2-byte number is used to create this identifier. + * This name is delivered to the peer during connection initialization. + */ +static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, +						u8 ibport) +{ +	memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1], +	       sizeof(smcibdev->mac[ibport - 1])); +	get_random_bytes(&local_systemid[0], 2); +} + +bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) +{ +	return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; +} + +/* determine the gid for an ib-device port and vlan id */ +int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, +			 unsigned short vlan_id, u8 gid[], u8 *sgid_index) +{ +	const struct ib_gid_attr *attr; +	int i; + +	for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { +		attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i); +		if (IS_ERR(attr)) +			continue; + +		if (attr->ndev && +		    ((!vlan_id && !is_vlan_dev(attr->ndev)) || +		     (vlan_id && is_vlan_dev(attr->ndev) && +		      vlan_dev_vlan_id(attr->ndev) == vlan_id)) && +		    attr->gid_type == IB_GID_TYPE_ROCE) { +			if (gid) +				memcpy(gid, &attr->gid, SMC_GID_SIZE); +			if (sgid_index) +				*sgid_index = attr->index; +			rdma_put_gid_attr(attr); +			return 0; +		} +		rdma_put_gid_attr(attr); +	} +	return -ENODEV; +} + +static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) +{ +	int rc; + +	memset(&smcibdev->pattr[ibport - 1], 0, +	       sizeof(smcibdev->pattr[ibport - 1])); +	rc = ib_query_port(smcibdev->ibdev, ibport, +			   &smcibdev->pattr[ibport - 1]); +	if (rc) +		goto out; +	/* the SMC protocol requires specification of the RoCE MAC address */ +	rc = smc_ib_fill_mac(smcibdev, ibport); +	if (rc) +		goto out; +	if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET, +		     sizeof(local_systemid)) && +	    smc_ib_port_active(smcibdev, ibport)) +		/* create unique system identifier */ +		smc_ib_define_local_systemid(smcibdev, ibport); +out: +	return rc; +} +  /* process context wrapper for might_sleep smc_ib_remember_port_attr */  static void smc_ib_port_event_work(struct work_struct *work)  { @@ -370,62 +459,6 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,  	buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;  } -static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) -{ -	struct ib_gid_attr gattr; -	int rc; - -	rc = ib_query_gid(smcibdev->ibdev, ibport, 0, -			  &smcibdev->gid[ibport - 1], &gattr); -	if (rc || !gattr.ndev) -		return -ENODEV; - -	memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN); -	dev_put(gattr.ndev); -	return 0; -} - -/* Create an identifier unique for this instance of SMC-R. - * The MAC-address of the first active registered IB device - * plus a random 2-byte number is used to create this identifier. - * This name is delivered to the peer during connection initialization. - */ -static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, -						u8 ibport) -{ -	memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1], -	       sizeof(smcibdev->mac[ibport - 1])); -	get_random_bytes(&local_systemid[0], 2); -} - -bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) -{ -	return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; -} - -int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) -{ -	int rc; - -	memset(&smcibdev->pattr[ibport - 1], 0, -	       sizeof(smcibdev->pattr[ibport - 1])); -	rc = ib_query_port(smcibdev->ibdev, ibport, -			   &smcibdev->pattr[ibport - 1]); -	if (rc) -		goto out; -	/* the SMC protocol requires specification of the RoCE MAC address */ -	rc = smc_ib_fill_gid_and_mac(smcibdev, ibport); -	if (rc) -		goto out; -	if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET, -		     sizeof(local_systemid)) && -	    smc_ib_port_active(smcibdev, ibport)) -		/* create unique system identifier */ -		smc_ib_define_local_systemid(smcibdev, ibport); -out: -	return rc; -} -  long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)  {  	struct ib_cq_init_attr cqattr =	{ @@ -454,9 +487,6 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)  		smcibdev->roce_cq_recv = NULL;  		goto err;  	} -	INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, -			      smc_ib_global_event_handler); -	ib_register_event_handler(&smcibdev->event_handler);  	smc_wr_add_dev(smcibdev);  	smcibdev->initialized = 1;  	return rc; @@ -472,7 +502,6 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)  		return;  	smcibdev->initialized = 0;  	smc_wr_remove_dev(smcibdev); -	ib_unregister_event_handler(&smcibdev->event_handler);  	ib_destroy_cq(smcibdev->roce_cq_recv);  	ib_destroy_cq(smcibdev->roce_cq_send);  } @@ -483,6 +512,8 @@ static struct ib_client smc_ib_client;  static void smc_ib_add_dev(struct ib_device *ibdev)  {  	struct smc_ib_device *smcibdev; +	u8 port_cnt; +	int i;  	if (ibdev->node_type != RDMA_NODE_IB_CA)  		return; @@ -498,6 +529,21 @@ static void smc_ib_add_dev(struct ib_device *ibdev)  	list_add_tail(&smcibdev->list, &smc_ib_devices.list);  	spin_unlock(&smc_ib_devices.lock);  	ib_set_client_data(ibdev, &smc_ib_client, smcibdev); +	INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, +			      smc_ib_global_event_handler); +	ib_register_event_handler(&smcibdev->event_handler); + +	/* trigger reading of the port attributes */ +	port_cnt = smcibdev->ibdev->phys_port_cnt; +	for (i = 0; +	     i < min_t(size_t, port_cnt, SMC_MAX_PORTS); +	     i++) { +		set_bit(i, &smcibdev->port_event_mask); +		/* determine pnetids of the port */ +		smc_pnetid_by_dev_port(ibdev->dev.parent, i, +				       smcibdev->pnetid[i]); +	} +	schedule_work(&smcibdev->port_event_work);  }  /* callback function for ib_register_client() */ @@ -512,6 +558,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)  	spin_unlock(&smc_ib_devices.lock);  	smc_pnet_remove_by_ibdev(smcibdev);  	smc_ib_cleanup_per_ibdev(smcibdev); +	ib_unregister_event_handler(&smcibdev->event_handler);  	kfree(smcibdev);  } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index e90630dadf8e..bac7fd65a4c0 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -15,6 +15,7 @@  #include <linux/interrupt.h>  #include <linux/if_ether.h>  #include <rdma/ib_verbs.h> +#include <net/smc.h>  #define SMC_MAX_PORTS			2	/* Max # of ports */  #define SMC_GID_SIZE			sizeof(union ib_gid) @@ -39,7 +40,8 @@ struct smc_ib_device {				/* ib-device infos for smc */  	struct tasklet_struct	recv_tasklet;	/* called by recv cq handler */  	char			mac[SMC_MAX_PORTS][ETH_ALEN];  						/* mac address per port*/ -	union ib_gid		gid[SMC_MAX_PORTS]; /* gid per port */ +	u8			pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN]; +						/* pnetid per port */  	u8			initialized : 1; /* ib dev CQ, evthdl done */  	struct work_struct	port_event_work;  	unsigned long		port_event_mask; @@ -51,7 +53,6 @@ struct smc_link;  int smc_ib_register_client(void) __init;  void smc_ib_unregister_client(void);  bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport); -int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);  int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,  		      struct smc_buf_desc *buf_slot,  		      enum dma_data_direction data_direction); @@ -75,4 +76,6 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,  void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,  			       struct smc_buf_desc *buf_slot,  			       enum dma_data_direction data_direction); +int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, +			 unsigned short vlan_id, u8 gid[], u8 *sgid_index);  #endif diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c new file mode 100644 index 000000000000..e36f21ce7252 --- /dev/null +++ b/net/smc/smc_ism.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Shared Memory Communications Direct over ISM devices (SMC-D) + * + * Functions for ISM device. + * + * Copyright IBM Corp. 2018 + */ + +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <asm/page.h> + +#include "smc.h" +#include "smc_core.h" +#include "smc_ism.h" +#include "smc_pnet.h" + +struct smcd_dev_list smcd_dev_list = { +	.list = LIST_HEAD_INIT(smcd_dev_list.list), +	.lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock) +}; + +/* Test if an ISM communication is possible. */ +int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) +{ +	return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0, +					   vlan_id); +} + +int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos, +		  void *data, size_t len) +{ +	int rc; + +	rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal, +				  pos->offset, data, len); + +	return rc < 0 ? rc : 0; +} + +/* Set a connection using this DMBE. */ +void smc_ism_set_conn(struct smc_connection *conn) +{ +	unsigned long flags; + +	spin_lock_irqsave(&conn->lgr->smcd->lock, flags); +	conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = conn; +	spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags); +} + +/* Unset a connection using this DMBE. */ +void smc_ism_unset_conn(struct smc_connection *conn) +{ +	unsigned long flags; + +	if (!conn->rmb_desc) +		return; + +	spin_lock_irqsave(&conn->lgr->smcd->lock, flags); +	conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = NULL; +	spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags); +} + +/* Register a VLAN identifier with the ISM device. Use a reference count + * and add a VLAN identifier only when the first DMB using this VLAN is + * registered. + */ +int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid) +{ +	struct smc_ism_vlanid *new_vlan, *vlan; +	unsigned long flags; +	int rc = 0; + +	if (!vlanid)			/* No valid vlan id */ +		return -EINVAL; + +	/* create new vlan entry, in case we need it */ +	new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL); +	if (!new_vlan) +		return -ENOMEM; +	new_vlan->vlanid = vlanid; +	refcount_set(&new_vlan->refcnt, 1); + +	/* if there is an existing entry, increase count and return */ +	spin_lock_irqsave(&smcd->lock, flags); +	list_for_each_entry(vlan, &smcd->vlan, list) { +		if (vlan->vlanid == vlanid) { +			refcount_inc(&vlan->refcnt); +			kfree(new_vlan); +			goto out; +		} +	} + +	/* no existing entry found. +	 * add new entry to device; might fail, e.g., if HW limit reached +	 */ +	if (smcd->ops->add_vlan_id(smcd, vlanid)) { +		kfree(new_vlan); +		rc = -EIO; +		goto out; +	} +	list_add_tail(&new_vlan->list, &smcd->vlan); +out: +	spin_unlock_irqrestore(&smcd->lock, flags); +	return rc; +} + +/* Unregister a VLAN identifier with the ISM device. Use a reference count + * and remove a VLAN identifier only when the last DMB using this VLAN is + * unregistered. + */ +int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid) +{ +	struct smc_ism_vlanid *vlan; +	unsigned long flags; +	bool found = false; +	int rc = 0; + +	if (!vlanid)			/* No valid vlan id */ +		return -EINVAL; + +	spin_lock_irqsave(&smcd->lock, flags); +	list_for_each_entry(vlan, &smcd->vlan, list) { +		if (vlan->vlanid == vlanid) { +			if (!refcount_dec_and_test(&vlan->refcnt)) +				goto out; +			found = true; +			break; +		} +	} +	if (!found) { +		rc = -ENOENT; +		goto out;		/* VLAN id not in table */ +	} + +	/* Found and the last reference just gone */ +	if (smcd->ops->del_vlan_id(smcd, vlanid)) +		rc = -EIO; +	list_del(&vlan->list); +	kfree(vlan); +out: +	spin_unlock_irqrestore(&smcd->lock, flags); +	return rc; +} + +int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc) +{ +	struct smcd_dmb dmb; + +	memset(&dmb, 0, sizeof(dmb)); +	dmb.dmb_tok = dmb_desc->token; +	dmb.sba_idx = dmb_desc->sba_idx; +	dmb.cpu_addr = dmb_desc->cpu_addr; +	dmb.dma_addr = dmb_desc->dma_addr; +	dmb.dmb_len = dmb_desc->len; +	return smcd->ops->unregister_dmb(smcd, &dmb); +} + +int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, +			 struct smc_buf_desc *dmb_desc) +{ +	struct smcd_dmb dmb; +	int rc; + +	memset(&dmb, 0, sizeof(dmb)); +	dmb.dmb_len = dmb_len; +	dmb.sba_idx = dmb_desc->sba_idx; +	dmb.vlan_id = lgr->vlan_id; +	dmb.rgid = lgr->peer_gid; +	rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb); +	if (!rc) { +		dmb_desc->sba_idx = dmb.sba_idx; +		dmb_desc->token = dmb.dmb_tok; +		dmb_desc->cpu_addr = dmb.cpu_addr; +		dmb_desc->dma_addr = dmb.dma_addr; +		dmb_desc->len = dmb.dmb_len; +	} +	return rc; +} + +struct smc_ism_event_work { +	struct work_struct work; +	struct smcd_dev *smcd; +	struct smcd_event event; +}; + +#define ISM_EVENT_REQUEST		0x0001 +#define ISM_EVENT_RESPONSE		0x0002 +#define ISM_EVENT_REQUEST_IR		0x00000001 +#define ISM_EVENT_CODE_TESTLINK		0x83 + +static void smcd_handle_sw_event(struct smc_ism_event_work *wrk) +{ +	union { +		u64	info; +		struct { +			u32		uid; +			unsigned short	vlanid; +			u16		code; +		}; +	} ev_info; + +	switch (wrk->event.code) { +	case ISM_EVENT_CODE_TESTLINK:	/* Activity timer */ +		ev_info.info = wrk->event.info; +		if (ev_info.code == ISM_EVENT_REQUEST) { +			ev_info.code = ISM_EVENT_RESPONSE; +			wrk->smcd->ops->signal_event(wrk->smcd, +						     wrk->event.tok, +						     ISM_EVENT_REQUEST_IR, +						     ISM_EVENT_CODE_TESTLINK, +						     ev_info.info); +			} +		break; +	} +} + +/* worker for SMC-D events */ +static void smc_ism_event_work(struct work_struct *work) +{ +	struct smc_ism_event_work *wrk = +		container_of(work, struct smc_ism_event_work, work); + +	switch (wrk->event.type) { +	case ISM_EVENT_GID:	/* GID event, token is peer GID */ +		smc_smcd_terminate(wrk->smcd, wrk->event.tok); +		break; +	case ISM_EVENT_DMB: +		break; +	case ISM_EVENT_SWR:	/* Software defined event */ +		smcd_handle_sw_event(wrk); +		break; +	} +	kfree(wrk); +} + +static void smcd_release(struct device *dev) +{ +	struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev); + +	kfree(smcd->conn); +	kfree(smcd); +} + +struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, +				const struct smcd_ops *ops, int max_dmbs) +{ +	struct smcd_dev *smcd; + +	smcd = kzalloc(sizeof(*smcd), GFP_KERNEL); +	if (!smcd) +		return NULL; +	smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *), +			     GFP_KERNEL); +	if (!smcd->conn) { +		kfree(smcd); +		return NULL; +	} + +	smcd->dev.parent = parent; +	smcd->dev.release = smcd_release; +	device_initialize(&smcd->dev); +	dev_set_name(&smcd->dev, name); +	smcd->ops = ops; +	smc_pnetid_by_dev_port(parent, 0, smcd->pnetid); + +	spin_lock_init(&smcd->lock); +	INIT_LIST_HEAD(&smcd->vlan); +	smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", +						 WQ_MEM_RECLAIM, name); +	return smcd; +} +EXPORT_SYMBOL_GPL(smcd_alloc_dev); + +int smcd_register_dev(struct smcd_dev *smcd) +{ +	spin_lock(&smcd_dev_list.lock); +	list_add_tail(&smcd->list, &smcd_dev_list.list); +	spin_unlock(&smcd_dev_list.lock); + +	return device_add(&smcd->dev); +} +EXPORT_SYMBOL_GPL(smcd_register_dev); + +void smcd_unregister_dev(struct smcd_dev *smcd) +{ +	spin_lock(&smcd_dev_list.lock); +	list_del(&smcd->list); +	spin_unlock(&smcd_dev_list.lock); +	flush_workqueue(smcd->event_wq); +	destroy_workqueue(smcd->event_wq); +	smc_smcd_terminate(smcd, 0); + +	device_del(&smcd->dev); +} +EXPORT_SYMBOL_GPL(smcd_unregister_dev); + +void smcd_free_dev(struct smcd_dev *smcd) +{ +	put_device(&smcd->dev); +} +EXPORT_SYMBOL_GPL(smcd_free_dev); + +/* SMCD Device event handler. Called from ISM device interrupt handler. + * Parameters are smcd device pointer, + * - event->type (0 --> DMB, 1 --> GID), + * - event->code (event code), + * - event->tok (either DMB token when event type 0, or GID when event type 1) + * - event->time (time of day) + * - event->info (debug info). + * + * Context: + * - Function called in IRQ context from ISM device driver event handler. + */ +void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event) +{ +	struct smc_ism_event_work *wrk; + +	/* copy event to event work queue, and let it be handled there */ +	wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC); +	if (!wrk) +		return; +	INIT_WORK(&wrk->work, smc_ism_event_work); +	wrk->smcd = smcd; +	wrk->event = *event; +	queue_work(smcd->event_wq, &wrk->work); +} +EXPORT_SYMBOL_GPL(smcd_handle_event); + +/* SMCD Device interrupt handler. Called from ISM device interrupt handler. + * Parameters are smcd device pointer and DMB number. Find the connection and + * schedule the tasklet for this connection. + * + * Context: + * - Function called in IRQ context from ISM device driver IRQ handler. + */ +void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno) +{ +	struct smc_connection *conn = NULL; +	unsigned long flags; + +	spin_lock_irqsave(&smcd->lock, flags); +	conn = smcd->conn[dmbno]; +	if (conn) +		tasklet_schedule(&conn->rx_tsklet); +	spin_unlock_irqrestore(&smcd->lock, flags); +} +EXPORT_SYMBOL_GPL(smcd_handle_irq); diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h new file mode 100644 index 000000000000..aee45b860b79 --- /dev/null +++ b/net/smc/smc_ism.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Shared Memory Communications Direct over ISM devices (SMC-D) + * + * SMC-D ISM device structure definitions. + * + * Copyright IBM Corp. 2018 + */ + +#ifndef SMCD_ISM_H +#define SMCD_ISM_H + +#include <linux/uio.h> + +#include "smc.h" + +struct smcd_dev_list {	/* List of SMCD devices */ +	struct list_head list; +	spinlock_t lock;	/* Protects list of devices */ +}; + +extern struct smcd_dev_list	smcd_dev_list; /* list of smcd devices */ + +struct smc_ism_vlanid {			/* VLAN id set on ISM device */ +	struct list_head list; +	unsigned short vlanid;		/* Vlan id */ +	refcount_t refcnt;		/* Reference count */ +}; + +struct smc_ism_position {	/* ISM device position to write to */ +	u64 token;		/* Token of DMB */ +	u32 offset;		/* Offset into DMBE */ +	u8 index;		/* Index of DMBE */ +	u8 signal;		/* Generate interrupt on owner side */ +}; + +struct smcd_dev; + +int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev); +void smc_ism_set_conn(struct smc_connection *conn); +void smc_ism_unset_conn(struct smc_connection *conn); +int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id); +int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id); +int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size, +			 struct smc_buf_desc *dmb_desc); +int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); +int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, +		  void *data, size_t len); +#endif diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 5800a6b43d83..9c916c709ca7 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -182,12 +182,10 @@ static int smc_llc_add_pending_send(struct smc_link *link,  }  /* high-level API to send LLC confirm link */ -int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], -			      union ib_gid *gid, +int smc_llc_send_confirm_link(struct smc_link *link,  			      enum smc_llc_reqresp reqresp)  { -	struct smc_link_group *lgr = container_of(link, struct smc_link_group, -						  lnk[SMC_SINGLE_LINK]); +	struct smc_link_group *lgr = smc_get_lgr(link);  	struct smc_llc_msg_confirm_link *confllc;  	struct smc_wr_tx_pend_priv *pend;  	struct smc_wr_buf *wr_buf; @@ -203,8 +201,9 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],  	confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;  	if (reqresp == SMC_LLC_RESP)  		confllc->hd.flags |= SMC_LLC_FLAG_RESP; -	memcpy(confllc->sender_mac, mac, ETH_ALEN); -	memcpy(confllc->sender_gid, gid, SMC_GID_SIZE); +	memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1], +	       ETH_ALEN); +	memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE);  	hton24(confllc->sender_qp_num, link->roce_qp->qp_num);  	confllc->link_num = link->link_id;  	memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); @@ -241,8 +240,7 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link,  /* prepare an add link message */  static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc, -				  struct smc_link *link, u8 mac[], -				  union ib_gid *gid, +				  struct smc_link *link, u8 mac[], u8 gid[],  				  enum smc_llc_reqresp reqresp)  {  	memset(addllc, 0, sizeof(*addllc)); @@ -259,8 +257,7 @@ static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,  }  /* send ADD LINK request or response */ -int smc_llc_send_add_link(struct smc_link *link, u8 mac[], -			  union ib_gid *gid, +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],  			  enum smc_llc_reqresp reqresp)  {  	struct smc_llc_msg_add_link *addllc; @@ -281,7 +278,7 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[],  /* prepare a delete link message */  static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,  				     struct smc_link *link, -				     enum smc_llc_reqresp reqresp) +				     enum smc_llc_reqresp reqresp, bool orderly)  {  	memset(delllc, 0, sizeof(*delllc));  	delllc->hd.common.type = SMC_LLC_DELETE_LINK; @@ -290,13 +287,14 @@ static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,  		delllc->hd.flags |= SMC_LLC_FLAG_RESP;  	/* DEL_LINK_ALL because only 1 link supported */  	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; -	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; +	if (orderly) +		delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;  	delllc->link_num = link->link_id;  }  /* send DELETE LINK request or response */  int smc_llc_send_delete_link(struct smc_link *link, -			     enum smc_llc_reqresp reqresp) +			     enum smc_llc_reqresp reqresp, bool orderly)  {  	struct smc_llc_msg_del_link *delllc;  	struct smc_wr_tx_pend_priv *pend; @@ -307,7 +305,7 @@ int smc_llc_send_delete_link(struct smc_link *link,  	if (rc)  		return rc;  	delllc = (struct smc_llc_msg_del_link *)wr_buf; -	smc_llc_prep_delete_link(delllc, link, reqresp); +	smc_llc_prep_delete_link(delllc, link, reqresp, orderly);  	/* send llc message */  	rc = smc_wr_tx_send(link, pend);  	return rc; @@ -381,11 +379,9 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)  static void smc_llc_rx_confirm_link(struct smc_link *link,  				    struct smc_llc_msg_confirm_link *llc)  { -	struct smc_link_group *lgr; +	struct smc_link_group *lgr = smc_get_lgr(link);  	int conf_rc; -	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); -  	/* RMBE eyecatchers are not supported */  	if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)  		conf_rc = 0; @@ -411,8 +407,7 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,  static void smc_llc_rx_add_link(struct smc_link *link,  				struct smc_llc_msg_add_link *llc)  { -	struct smc_link_group *lgr = container_of(link, struct smc_link_group, -						  lnk[SMC_SINGLE_LINK]); +	struct smc_link_group *lgr = smc_get_lgr(link);  	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {  		if (link->state == SMC_LNK_ACTIVATING) @@ -426,14 +421,12 @@ static void smc_llc_rx_add_link(struct smc_link *link,  		if (lgr->role == SMC_SERV) {  			smc_llc_prep_add_link(llc, link,  					link->smcibdev->mac[link->ibport - 1], -					&link->smcibdev->gid[link->ibport - 1], -					SMC_LLC_REQ); +					link->gid, SMC_LLC_REQ);  		} else {  			smc_llc_prep_add_link(llc, link,  					link->smcibdev->mac[link->ibport - 1], -					&link->smcibdev->gid[link->ibport - 1], -					SMC_LLC_RESP); +					link->gid, SMC_LLC_RESP);  		}  		smc_llc_send_message(link, llc, sizeof(*llc));  	} @@ -442,22 +435,23 @@ static void smc_llc_rx_add_link(struct smc_link *link,  static void smc_llc_rx_delete_link(struct smc_link *link,  				   struct smc_llc_msg_del_link *llc)  { -	struct smc_link_group *lgr = container_of(link, struct smc_link_group, -						  lnk[SMC_SINGLE_LINK]); +	struct smc_link_group *lgr = smc_get_lgr(link);  	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {  		if (lgr->role == SMC_SERV) -			smc_lgr_terminate(lgr); +			smc_lgr_schedule_free_work_fast(lgr);  	} else { +		smc_lgr_forget(lgr); +		smc_llc_link_deleting(link);  		if (lgr->role == SMC_SERV) { -			smc_lgr_forget(lgr); -			smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ); -			smc_llc_send_message(link, llc, sizeof(*llc)); +			/* client asks to delete this link, send request */ +			smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);  		} else { -			smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP); -			smc_llc_send_message(link, llc, sizeof(*llc)); -			smc_lgr_terminate(lgr); +			/* server requests to delete this link, send response */ +			smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);  		} +		smc_llc_send_message(link, llc, sizeof(*llc)); +		smc_lgr_schedule_free_work_fast(lgr);  	}  } @@ -476,17 +470,14 @@ static void smc_llc_rx_test_link(struct smc_link *link,  static void smc_llc_rx_confirm_rkey(struct smc_link *link,  				    struct smc_llc_msg_confirm_rkey *llc)  { -	struct smc_link_group *lgr;  	int rc; -	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); -  	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {  		link->llc_confirm_rkey_rc = llc->hd.flags &  					    SMC_LLC_FLAG_RKEY_NEG;  		complete(&link->llc_confirm_rkey);  	} else { -		rc = smc_rtoken_add(lgr, +		rc = smc_rtoken_add(smc_get_lgr(link),  				    llc->rtoken[0].rmb_vaddr,  				    llc->rtoken[0].rmb_key); @@ -514,18 +505,15 @@ static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,  static void smc_llc_rx_delete_rkey(struct smc_link *link,  				   struct smc_llc_msg_delete_rkey *llc)  { -	struct smc_link_group *lgr;  	u8 err_mask = 0;  	int i, max; -	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); -  	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {  		/* unused as long as we don't send this type of msg */  	} else {  		max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);  		for (i = 0; i < max; i++) { -			if (smc_rtoken_delete(lgr, llc->rkey[i])) +			if (smc_rtoken_delete(smc_get_lgr(link), llc->rkey[i]))  				err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);  		} @@ -583,12 +571,10 @@ static void smc_llc_testlink_work(struct work_struct *work)  	struct smc_link *link = container_of(to_delayed_work(work),  					     struct smc_link, llc_testlink_wrk);  	unsigned long next_interval; -	struct smc_link_group *lgr;  	unsigned long expire_time;  	u8 user_data[16] = { 0 };  	int rc; -	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);  	if (link->state != SMC_LNK_ACTIVE)  		return;		/* don't reschedule worker */  	expire_time = link->wr_rx_tstamp + link->llc_testlink_time; @@ -602,7 +588,7 @@ static void smc_llc_testlink_work(struct work_struct *work)  	rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,  						       SMC_LLC_WAIT_TIME);  	if (rc <= 0) { -		smc_lgr_terminate(lgr); +		smc_lgr_terminate(smc_get_lgr(link));  		return;  	}  	next_interval = link->llc_testlink_time; @@ -613,8 +599,7 @@ out:  int smc_llc_link_init(struct smc_link *link)  { -	struct smc_link_group *lgr = container_of(link, struct smc_link_group, -						  lnk[SMC_SINGLE_LINK]); +	struct smc_link_group *lgr = smc_get_lgr(link);  	link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM,  					       *((u32 *)lgr->id),  					       link->link_id); @@ -640,6 +625,11 @@ void smc_llc_link_active(struct smc_link *link, int testlink_time)  	}  } +void smc_llc_link_deleting(struct smc_link *link) +{ +	link->state = SMC_LNK_DELETING; +} +  /* called in tasklet context */  void smc_llc_link_inactive(struct smc_link *link)  { diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 65c8645e96a1..9e2ff088e301 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -36,14 +36,15 @@ enum smc_llc_msg_type {  };  /* transmit */ -int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid, +int smc_llc_send_confirm_link(struct smc_link *lnk,  			      enum smc_llc_reqresp reqresp); -int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid, +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],  			  enum smc_llc_reqresp reqresp);  int smc_llc_send_delete_link(struct smc_link *link, -			     enum smc_llc_reqresp reqresp); +			     enum smc_llc_reqresp reqresp, bool orderly);  int smc_llc_link_init(struct smc_link *link);  void smc_llc_link_active(struct smc_link *link, int testlink_time); +void smc_llc_link_deleting(struct smc_link *link);  void smc_llc_link_inactive(struct smc_link *link);  void smc_llc_link_clear(struct smc_link *link);  int smc_llc_do_confirm_rkey(struct smc_link *link, diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index d7b88b2d1b22..01c6ce042a1c 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -22,13 +22,12 @@  #include "smc_pnet.h"  #include "smc_ib.h" - -#define SMC_MAX_PNET_ID_LEN	16	/* Max. length of PNET id */ +#include "smc_ism.h"  static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {  	[SMC_PNETID_NAME] = {  		.type = NLA_NUL_STRING, -		.len = SMC_MAX_PNET_ID_LEN - 1 +		.len = SMC_MAX_PNETID_LEN - 1  	},  	[SMC_PNETID_ETHNAME] = {  		.type = NLA_NUL_STRING, @@ -65,7 +64,7 @@ static struct smc_pnettable {   */  struct smc_pnetentry {  	struct list_head list; -	char pnet_name[SMC_MAX_PNET_ID_LEN + 1]; +	char pnet_name[SMC_MAX_PNETID_LEN + 1];  	struct net_device *ndev;  	struct smc_ib_device *smcibdev;  	u8 ib_port; @@ -209,7 +208,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)  		return false;  	while (--end >= bf && isspace(*end))  		; -	if (end - bf >= SMC_MAX_PNET_ID_LEN) +	if (end - bf >= SMC_MAX_PNETID_LEN)  		return false;  	while (bf <= end) {  		if (!isalnum(*bf)) @@ -358,9 +357,6 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)  		kfree(pnetelem);  		return rc;  	} -	rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port); -	if (rc) -		smc_pnet_remove_by_pnetid(pnetelem->pnet_name);  	return rc;  } @@ -485,10 +481,10 @@ static int smc_pnet_netdev_event(struct notifier_block *this,  	case NETDEV_REBOOT:  	case NETDEV_UNREGISTER:  		smc_pnet_remove_by_ndev(event_dev); +		return NOTIFY_OK;  	default: -		break; +		return NOTIFY_DONE;  	} -	return NOTIFY_DONE;  }  static struct notifier_block smc_netdev_notifier = { @@ -515,28 +511,104 @@ void smc_pnet_exit(void)  	genl_unregister_family(&smc_pnet_nl_family);  } -/* PNET table analysis for a given sock: - * determine ib_device and port belonging to used internal TCP socket - * ethernet interface. +/* Determine one base device for stacked net devices. + * If the lower device level contains more than one devices + * (for instance with bonding slaves), just the first device + * is used to reach a base device.   */ -void smc_pnet_find_roce_resource(struct sock *sk, -				 struct smc_ib_device **smcibdev, u8 *ibport) +static struct net_device *pnet_find_base_ndev(struct net_device *ndev)  { -	struct dst_entry *dst = sk_dst_get(sk); -	struct smc_pnetentry *pnetelem; +	int i, nest_lvl; -	*smcibdev = NULL; -	*ibport = 0; +	rtnl_lock(); +	nest_lvl = dev_get_nest_level(ndev); +	for (i = 0; i < nest_lvl; i++) { +		struct list_head *lower = &ndev->adj_list.lower; + +		if (list_empty(lower)) +			break; +		lower = lower->next; +		ndev = netdev_lower_get_next(ndev, &lower); +	} +	rtnl_unlock(); +	return ndev; +} + +/* Determine the corresponding IB device port based on the hardware PNETID. + * Searching stops at the first matching active IB device port with vlan_id + * configured. + */ +static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, +					 struct smc_ib_device **smcibdev, +					 u8 *ibport, unsigned short vlan_id, +					 u8 gid[]) +{ +	u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; +	struct smc_ib_device *ibdev; +	int i; + +	ndev = pnet_find_base_ndev(ndev); +	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, +				   ndev_pnetid)) +		return; /* pnetid could not be determined */ + +	spin_lock(&smc_ib_devices.lock); +	list_for_each_entry(ibdev, &smc_ib_devices.list, list) { +		for (i = 1; i <= SMC_MAX_PORTS; i++) { +			if (!rdma_is_port_valid(ibdev->ibdev, i)) +				continue; +			if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid, +				    SMC_MAX_PNETID_LEN) && +			    smc_ib_port_active(ibdev, i) && +			    !smc_ib_determine_gid(ibdev, i, vlan_id, gid, +						  NULL))  { +				*smcibdev = ibdev; +				*ibport = i; +				goto out; +			} +		} +	} +out: +	spin_unlock(&smc_ib_devices.lock); +} + +static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, +					struct smcd_dev **smcismdev) +{ +	u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; +	struct smcd_dev *ismdev; + +	ndev = pnet_find_base_ndev(ndev); +	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, +				   ndev_pnetid)) +		return; /* pnetid could not be determined */ + +	spin_lock(&smcd_dev_list.lock); +	list_for_each_entry(ismdev, &smcd_dev_list.list, list) { +		if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) { +			*smcismdev = ismdev; +			break; +		} +	} +	spin_unlock(&smcd_dev_list.lock); +} + +/* Lookup of coupled ib_device via SMC pnet table */ +static void smc_pnet_find_roce_by_table(struct net_device *netdev, +					struct smc_ib_device **smcibdev, +					u8 *ibport, unsigned short vlan_id, +					u8 gid[]) +{ +	struct smc_pnetentry *pnetelem; -	if (!dst) -		return; -	if (!dst->dev) -		goto out_rel;  	read_lock(&smc_pnettable.lock);  	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { -		if (dst->dev == pnetelem->ndev) { +		if (netdev == pnetelem->ndev) {  			if (smc_ib_port_active(pnetelem->smcibdev, -					       pnetelem->ib_port)) { +					       pnetelem->ib_port) && +			    !smc_ib_determine_gid(pnetelem->smcibdev, +						  pnetelem->ib_port, vlan_id, +						  gid, NULL)) {  				*smcibdev = pnetelem->smcibdev;  				*ibport = pnetelem->ib_port;  			} @@ -544,6 +616,55 @@ void smc_pnet_find_roce_resource(struct sock *sk,  		}  	}  	read_unlock(&smc_pnettable.lock); +} + +/* PNET table analysis for a given sock: + * determine ib_device and port belonging to used internal TCP socket + * ethernet interface. + */ +void smc_pnet_find_roce_resource(struct sock *sk, +				 struct smc_ib_device **smcibdev, u8 *ibport, +				 unsigned short vlan_id, u8 gid[]) +{ +	struct dst_entry *dst = sk_dst_get(sk); + +	*smcibdev = NULL; +	*ibport = 0; + +	if (!dst) +		goto out; +	if (!dst->dev) +		goto out_rel; + +	/* if possible, lookup via hardware-defined pnetid */ +	smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid); +	if (*smcibdev) +		goto out_rel; + +	/* lookup via SMC PNET table */ +	smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid); + +out_rel: +	dst_release(dst); +out: +	return; +} + +void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev) +{ +	struct dst_entry *dst = sk_dst_get(sk); + +	*smcismdev = NULL; +	if (!dst) +		goto out; +	if (!dst->dev) +		goto out_rel; + +	/* if possible, lookup via hardware-defined pnetid */ +	smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev); +  out_rel:  	dst_release(dst); +out: +	return;  } diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 5a29519db976..8ff777636e32 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -12,12 +12,29 @@  #ifndef _SMC_PNET_H  #define _SMC_PNET_H +#if IS_ENABLED(CONFIG_HAVE_PNETID) +#include <asm/pnet.h> +#endif +  struct smc_ib_device; +struct smcd_dev; + +static inline int smc_pnetid_by_dev_port(struct device *dev, +					 unsigned short port, u8 *pnetid) +{ +#if IS_ENABLED(CONFIG_HAVE_PNETID) +	return pnet_id_by_dev_port(dev, port, pnetid); +#else +	return -ENOENT; +#endif +}  int smc_pnet_init(void) __init;  void smc_pnet_exit(void);  int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);  void smc_pnet_find_roce_resource(struct sock *sk, -				 struct smc_ib_device **smcibdev, u8 *ibport); +				 struct smc_ib_device **smcibdev, u8 *ibport, +				 unsigned short vlan_id, u8 gid[]); +void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);  #endif diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 3d77b383cccd..bbcf0fe4ae10 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -82,8 +82,7 @@ static int smc_rx_update_consumer(struct smc_sock *smc,  		}  	} -	smc_curs_write(&conn->local_tx_ctrl.cons, smc_curs_read(&cons, conn), -		       conn); +	smc_curs_copy(&conn->local_tx_ctrl.cons, &cons, conn);  	/* send consumer cursor update if required */  	/* similar to advertising new TCP rcv_wnd if required */ @@ -97,8 +96,7 @@ static void smc_rx_update_cons(struct smc_sock *smc, size_t len)  	struct smc_connection *conn = &smc->conn;  	union smc_host_cursor cons; -	smc_curs_write(&cons, smc_curs_read(&conn->local_tx_ctrl.cons, conn), -		       conn); +	smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);  	smc_rx_update_consumer(smc, cons, len);  } @@ -157,10 +155,8 @@ static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len,  	struct splice_pipe_desc spd;  	struct partial_page partial;  	struct smc_spd_priv *priv; -	struct page *page;  	int bytes; -	page = virt_to_page(smc->conn.rmb_desc->cpu_addr);  	priv = kzalloc(sizeof(*priv), GFP_KERNEL);  	if (!priv)  		return -ENOMEM; @@ -172,7 +168,7 @@ static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len,  	spd.nr_pages_max = 1;  	spd.nr_pages = 1; -	spd.pages = &page; +	spd.pages = &smc->conn.rmb_desc->pages;  	spd.partial = &partial;  	spd.ops = &smc_pipe_ops;  	spd.spd_release = smc_rx_spd_release; @@ -245,10 +241,7 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,  			if (!(flags & MSG_TRUNC))  				rc = memcpy_to_msg(msg, &conn->urg_rx_byte, 1);  			len = 1; -			smc_curs_write(&cons, -				       smc_curs_read(&conn->local_tx_ctrl.cons, -						     conn), -				       conn); +			smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);  			if (smc_curs_diff(conn->rmb_desc->len, &cons,  					  &conn->urg_curs) > 1)  				conn->urg_rx_skip_pend = true; @@ -305,7 +298,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,  	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);  	/* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */ -	rcvbuf_base = conn->rmb_desc->cpu_addr; +	rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr;  	do { /* while (read_remaining) */  		if (read_done >= target || (pipe && read_done)) @@ -370,9 +363,7 @@ copy:  			continue;  		} -		smc_curs_write(&cons, -			       smc_curs_read(&conn->local_tx_ctrl.cons, conn), -			       conn); +		smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);  		/* subsequent splice() calls pick up where previous left */  		if (splbytes)  			smc_curs_add(conn->rmb_desc->len, &cons, splbytes); diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index f82886b7d1d8..d8366ed51757 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -24,6 +24,7 @@  #include "smc.h"  #include "smc_wr.h"  #include "smc_cdc.h" +#include "smc_ism.h"  #include "smc_tx.h"  #define SMC_TX_WORK_DELAY	HZ @@ -180,9 +181,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)  		copylen = min_t(size_t, send_remaining, writespace);  		/* determine start of sndbuf */  		sndbuf_base = conn->sndbuf_desc->cpu_addr; -		smc_curs_write(&prep, -			       smc_curs_read(&conn->tx_curs_prep, conn), -			       conn); +		smc_curs_copy(&prep, &conn->tx_curs_prep, conn);  		tx_cnt_prep = prep.count;  		/* determine chunks where to write into sndbuf */  		/* either unwrapped case, or 1st chunk of wrapped case */ @@ -213,9 +212,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)  		smc_sndbuf_sync_sg_for_device(conn);  		/* update cursors */  		smc_curs_add(conn->sndbuf_desc->len, &prep, copylen); -		smc_curs_write(&conn->tx_curs_prep, -			       smc_curs_read(&prep, conn), -			       conn); +		smc_curs_copy(&conn->tx_curs_prep, &prep, conn);  		/* increased in send tasklet smc_cdc_tx_handler() */  		smp_mb__before_atomic();  		atomic_sub(copylen, &conn->sndbuf_space); @@ -250,12 +247,29 @@ out_err:  /***************************** sndbuf consumer *******************************/ +/* sndbuf consumer: actual data transfer of one target chunk with ISM write */ +int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, +		      u32 offset, int signal) +{ +	struct smc_ism_position pos; +	int rc; + +	memset(&pos, 0, sizeof(pos)); +	pos.token = conn->peer_token; +	pos.index = conn->peer_rmbe_idx; +	pos.offset = conn->tx_off + offset; +	pos.signal = signal; +	rc = smc_ism_write(conn->lgr->smcd, &pos, data, len); +	if (rc) +		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; +	return rc; +} +  /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */  static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,  			     int num_sges, struct ib_sge sges[])  {  	struct smc_link_group *lgr = conn->lgr; -	struct ib_send_wr *failed_wr = NULL;  	struct ib_rdma_wr rdma_wr;  	struct smc_link *link;  	int rc; @@ -273,7 +287,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,  		/* offset within RMBE */  		peer_rmbe_offset;  	rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; -	rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr); +	rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL);  	if (rc) {  		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;  		smc_lgr_terminate(lgr); @@ -297,26 +311,109 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,  	smc_curs_add(conn->sndbuf_desc->len, sent, len);  } +/* SMC-R helper for smc_tx_rdma_writes() */ +static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, +			       size_t src_off, size_t src_len, +			       size_t dst_off, size_t dst_len) +{ +	dma_addr_t dma_addr = +		sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); +	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; +	int src_len_sum = src_len, dst_len_sum = dst_len; +	struct ib_sge sges[SMC_IB_MAX_SEND_SGE]; +	int sent_count = src_off; +	int srcchunk, dstchunk; +	int num_sges; +	int rc; + +	for (dstchunk = 0; dstchunk < 2; dstchunk++) { +		num_sges = 0; +		for (srcchunk = 0; srcchunk < 2; srcchunk++) { +			sges[srcchunk].addr = dma_addr + src_off; +			sges[srcchunk].length = src_len; +			sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; +			num_sges++; + +			src_off += src_len; +			if (src_off >= conn->sndbuf_desc->len) +				src_off -= conn->sndbuf_desc->len; +						/* modulo in send ring */ +			if (src_len_sum == dst_len) +				break; /* either on 1st or 2nd iteration */ +			/* prepare next (== 2nd) iteration */ +			src_len = dst_len - src_len; /* remainder */ +			src_len_sum += src_len; +		} +		rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); +		if (rc) +			return rc; +		if (dst_len_sum == len) +			break; /* either on 1st or 2nd iteration */ +		/* prepare next (== 2nd) iteration */ +		dst_off = 0; /* modulo offset in RMBE ring buffer */ +		dst_len = len - dst_len; /* remainder */ +		dst_len_sum += dst_len; +		src_len = min_t(int, dst_len, conn->sndbuf_desc->len - +				sent_count); +		src_len_sum = src_len; +	} +	return 0; +} + +/* SMC-D helper for smc_tx_rdma_writes() */ +static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len, +			       size_t src_off, size_t src_len, +			       size_t dst_off, size_t dst_len) +{ +	int src_len_sum = src_len, dst_len_sum = dst_len; +	int srcchunk, dstchunk; +	int rc; + +	for (dstchunk = 0; dstchunk < 2; dstchunk++) { +		for (srcchunk = 0; srcchunk < 2; srcchunk++) { +			void *data = conn->sndbuf_desc->cpu_addr + src_off; + +			rc = smcd_tx_ism_write(conn, data, src_len, dst_off + +					       sizeof(struct smcd_cdc_msg), 0); +			if (rc) +				return rc; +			dst_off += src_len; +			src_off += src_len; +			if (src_off >= conn->sndbuf_desc->len) +				src_off -= conn->sndbuf_desc->len; +						/* modulo in send ring */ +			if (src_len_sum == dst_len) +				break; /* either on 1st or 2nd iteration */ +			/* prepare next (== 2nd) iteration */ +			src_len = dst_len - src_len; /* remainder */ +			src_len_sum += src_len; +		} +		if (dst_len_sum == len) +			break; /* either on 1st or 2nd iteration */ +		/* prepare next (== 2nd) iteration */ +		dst_off = 0; /* modulo offset in RMBE ring buffer */ +		dst_len = len - dst_len; /* remainder */ +		dst_len_sum += dst_len; +		src_len = min_t(int, dst_len, conn->sndbuf_desc->len - src_off); +		src_len_sum = src_len; +	} +	return 0; +} +  /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;   * usable snd_wnd as max transmit   */  static int smc_tx_rdma_writes(struct smc_connection *conn)  { -	size_t src_off, src_len, dst_off, dst_len; /* current chunk values */ -	size_t len, dst_len_sum, src_len_sum, dstchunk, srcchunk; +	size_t len, src_len, dst_off, dst_len; /* current chunk values */  	union smc_host_cursor sent, prep, prod, cons; -	struct ib_sge sges[SMC_IB_MAX_SEND_SGE]; -	struct smc_link_group *lgr = conn->lgr;  	struct smc_cdc_producer_flags *pflags;  	int to_send, rmbespace; -	struct smc_link *link; -	dma_addr_t dma_addr; -	int num_sges;  	int rc;  	/* source: sndbuf */ -	smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn); -	smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn); +	smc_curs_copy(&sent, &conn->tx_curs_sent, conn); +	smc_curs_copy(&prep, &conn->tx_curs_prep, conn);  	/* cf. wmem_alloc - (snd_max - snd_una) */  	to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);  	if (to_send <= 0) @@ -327,12 +424,8 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)  	rmbespace = atomic_read(&conn->peer_rmbe_space);  	if (rmbespace <= 0)  		return 0; -	smc_curs_write(&prod, -		       smc_curs_read(&conn->local_tx_ctrl.prod, conn), -		       conn); -	smc_curs_write(&cons, -		       smc_curs_read(&conn->local_rx_ctrl.cons, conn), -		       conn); +	smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn); +	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);  	/* if usable snd_wnd closes ask peer to advertise once it opens again */  	pflags = &conn->local_tx_ctrl.prod_flags; @@ -341,7 +434,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)  	len = min(to_send, rmbespace);  	/* initialize variables for first iteration of subsequent nested loop */ -	link = &lgr->lnk[SMC_SINGLE_LINK];  	dst_off = prod.count;  	if (prod.wrap == cons.wrap) {  		/* the filled destination area is unwrapped, @@ -358,8 +450,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)  		 */  		dst_len = len;  	} -	dst_len_sum = dst_len; -	src_off = sent.count;  	/* dst_len determines the maximum src_len */  	if (sent.count + dst_len <= conn->sndbuf_desc->len) {  		/* unwrapped src case: single chunk of entire dst_len */ @@ -368,51 +458,23 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)  		/* wrapped src case: 2 chunks of sum dst_len; start with 1st: */  		src_len = conn->sndbuf_desc->len - sent.count;  	} -	src_len_sum = src_len; -	dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); -	for (dstchunk = 0; dstchunk < 2; dstchunk++) { -		num_sges = 0; -		for (srcchunk = 0; srcchunk < 2; srcchunk++) { -			sges[srcchunk].addr = dma_addr + src_off; -			sges[srcchunk].length = src_len; -			sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; -			num_sges++; -			src_off += src_len; -			if (src_off >= conn->sndbuf_desc->len) -				src_off -= conn->sndbuf_desc->len; -						/* modulo in send ring */ -			if (src_len_sum == dst_len) -				break; /* either on 1st or 2nd iteration */ -			/* prepare next (== 2nd) iteration */ -			src_len = dst_len - src_len; /* remainder */ -			src_len_sum += src_len; -		} -		rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); -		if (rc) -			return rc; -		if (dst_len_sum == len) -			break; /* either on 1st or 2nd iteration */ -		/* prepare next (== 2nd) iteration */ -		dst_off = 0; /* modulo offset in RMBE ring buffer */ -		dst_len = len - dst_len; /* remainder */ -		dst_len_sum += dst_len; -		src_len = min_t(int, -				dst_len, conn->sndbuf_desc->len - sent.count); -		src_len_sum = src_len; -	} + +	if (conn->lgr->is_smcd) +		rc = smcd_tx_rdma_writes(conn, len, sent.count, src_len, +					 dst_off, dst_len); +	else +		rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len, +					 dst_off, dst_len); +	if (rc) +		return rc;  	if (conn->urg_tx_pend && len == to_send)  		pflags->urg_data_present = 1;  	smc_tx_advance_cursors(conn, &prod, &sent, len);  	/* update connection's cursors with advanced local cursors */ -	smc_curs_write(&conn->local_tx_ctrl.prod, -		       smc_curs_read(&prod, conn), -		       conn); +	smc_curs_copy(&conn->local_tx_ctrl.prod, &prod, conn);  							/* dst: peer RMBE */ -	smc_curs_write(&conn->tx_curs_sent, -		       smc_curs_read(&sent, conn), -		       conn); -							/* src: local sndbuf */ +	smc_curs_copy(&conn->tx_curs_sent, &sent, conn);/* src: local sndbuf */  	return 0;  } @@ -420,7 +482,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)  /* Wakeup sndbuf consumers from any context (IRQ or process)   * since there is more data to transmit; usable snd_wnd as max transmit   */ -int smc_tx_sndbuf_nonempty(struct smc_connection *conn) +static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)  {  	struct smc_cdc_producer_flags *pflags;  	struct smc_cdc_tx_pend *pend; @@ -467,6 +529,37 @@ out_unlock:  	return rc;  } +static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) +{ +	struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; +	int rc = 0; + +	spin_lock_bh(&conn->send_lock); +	if (!pflags->urg_data_present) +		rc = smc_tx_rdma_writes(conn); +	if (!rc) +		rc = smcd_cdc_msg_send(conn); + +	if (!rc && pflags->urg_data_present) { +		pflags->urg_data_pending = 0; +		pflags->urg_data_present = 0; +	} +	spin_unlock_bh(&conn->send_lock); +	return rc; +} + +int smc_tx_sndbuf_nonempty(struct smc_connection *conn) +{ +	int rc; + +	if (conn->lgr->is_smcd) +		rc = smcd_tx_sndbuf_nonempty(conn); +	else +		rc = smcr_tx_sndbuf_nonempty(conn); + +	return rc; +} +  /* Wakeup sndbuf consumers from process context   * since there is more data to transmit   */ @@ -499,17 +592,11 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)  	int sender_free = conn->rmb_desc->len;  	int to_confirm; -	smc_curs_write(&cons, -		       smc_curs_read(&conn->local_tx_ctrl.cons, conn), -		       conn); -	smc_curs_write(&cfed, -		       smc_curs_read(&conn->rx_curs_confirmed, conn), -		       conn); +	smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); +	smc_curs_copy(&cfed, &conn->rx_curs_confirmed, conn);  	to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);  	if (to_confirm > conn->rmbe_update_limit) { -		smc_curs_write(&prod, -			       smc_curs_read(&conn->local_rx_ctrl.prod, conn), -			       conn); +		smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn);  		sender_free = conn->rmb_desc->len -  			      smc_curs_diff(conn->rmb_desc->len, &prod, &cfed);  	} @@ -525,9 +612,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)  					      SMC_TX_WORK_DELAY);  			return;  		} -		smc_curs_write(&conn->rx_curs_confirmed, -			       smc_curs_read(&conn->local_tx_ctrl.cons, conn), -			       conn); +		smc_curs_copy(&conn->rx_curs_confirmed, +			      &conn->local_tx_ctrl.cons, conn);  		conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;  	}  	if (conn->local_rx_ctrl.prod_flags.write_blocked && diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h index 9d2238909fa0..07e6ad76224a 100644 --- a/net/smc/smc_tx.h +++ b/net/smc/smc_tx.h @@ -22,8 +22,8 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn)  {  	union smc_host_cursor sent, prep; -	smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn); -	smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn); +	smc_curs_copy(&sent, &conn->tx_curs_sent, conn); +	smc_curs_copy(&prep, &conn->tx_curs_prep, conn);  	return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);  } @@ -33,5 +33,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);  int smc_tx_sndbuf_nonempty(struct smc_connection *conn);  void smc_tx_sndbuf_nonfull(struct smc_sock *smc);  void smc_tx_consumer_update(struct smc_connection *conn, bool force); +int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, +		      u32 offset, int signal);  #endif /* SMC_TX_H */ diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index dbd2605d1962..3c458d279855 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -92,8 +92,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)  	if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))  		return;  	if (wc->status) { -		struct smc_link_group *lgr; -  		for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {  			/* clear full struct smc_wr_tx_pend including .priv */  			memset(&link->wr_tx_pends[i], 0, @@ -103,9 +101,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)  			clear_bit(i, link->wr_tx_mask);  		}  		/* terminate connections of this link group abnormally */ -		lgr = container_of(link, struct smc_link_group, -				   lnk[SMC_SINGLE_LINK]); -		smc_lgr_terminate(lgr); +		smc_lgr_terminate(smc_get_lgr(link));  	}  	if (pnd_snd.handler)  		pnd_snd.handler(&pnd_snd.priv, link, wc->status); @@ -186,18 +182,14 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,  		if (rc)  			return rc;  	} else { -		struct smc_link_group *lgr; - -		lgr = container_of(link, struct smc_link_group, -				   lnk[SMC_SINGLE_LINK]);  		rc = wait_event_timeout(  			link->wr_tx_wait, -			list_empty(&lgr->list) || /* lgr terminated */ +			link->state == SMC_LNK_INACTIVE ||  			(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),  			SMC_WR_TX_WAIT_FREE_SLOT_TIME);  		if (!rc) {  			/* timeout - terminate connections */ -			smc_lgr_terminate(lgr); +			smc_lgr_terminate(smc_get_lgr(link));  			return -EPIPE;  		}  		if (idx == link->wr_tx_cnt) @@ -240,22 +232,16 @@ int smc_wr_tx_put_slot(struct smc_link *link,   */  int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)  { -	struct ib_send_wr *failed_wr = NULL;  	struct smc_wr_tx_pend *pend;  	int rc;  	ib_req_notify_cq(link->smcibdev->roce_cq_send,  			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);  	pend = container_of(priv, struct smc_wr_tx_pend, priv); -	rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], -			  &failed_wr); +	rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);  	if (rc) { -		struct smc_link_group *lgr = -			container_of(link, struct smc_link_group, -				     lnk[SMC_SINGLE_LINK]); -  		smc_wr_tx_put_slot(link, priv); -		smc_lgr_terminate(lgr); +		smc_lgr_terminate(smc_get_lgr(link));  	}  	return rc;  } @@ -263,7 +249,6 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)  /* Register a memory region and wait for result. */  int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)  { -	struct ib_send_wr *failed_wr = NULL;  	int rc;  	ib_req_notify_cq(link->smcibdev->roce_cq_send, @@ -272,9 +257,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)  	link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;  	link->wr_reg.mr = mr;  	link->wr_reg.key = mr->rkey; -	failed_wr = &link->wr_reg.wr; -	rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr); -	WARN_ON(failed_wr != &link->wr_reg.wr); +	rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, NULL);  	if (rc)  		return rc; @@ -283,11 +266,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)  					      SMC_WR_REG_MR_WAIT_TIME);  	if (!rc) {  		/* timeout - terminate connections */ -		struct smc_link_group *lgr; - -		lgr = container_of(link, struct smc_link_group, -				   lnk[SMC_SINGLE_LINK]); -		smc_lgr_terminate(lgr); +		smc_lgr_terminate(smc_get_lgr(link));  		return -EPIPE;  	}  	if (rc == -ERESTARTSYS) @@ -380,8 +359,6 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)  			smc_wr_rx_demultiplex(&wc[i]);  			smc_wr_rx_post(link); /* refill WR RX */  		} else { -			struct smc_link_group *lgr; -  			/* handle status errors */  			switch (wc[i].status) {  			case IB_WC_RETRY_EXC_ERR: @@ -390,9 +367,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)  				/* terminate connections of this link group  				 * abnormally  				 */ -				lgr = container_of(link, struct smc_link_group, -						   lnk[SMC_SINGLE_LINK]); -				smc_lgr_terminate(lgr); +				smc_lgr_terminate(smc_get_lgr(link));  				break;  			default:  				smc_wr_rx_post(link); /* refill WR RX */ diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 210bec3c3ebe..1d85bb14fd6f 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -63,7 +63,6 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)  /* post a new receive work request to fill a completed old work request entry */  static inline int smc_wr_rx_post(struct smc_link *link)  { -	struct ib_recv_wr *bad_recv_wr = NULL;  	int rc;  	u64 wr_id, temp_wr_id;  	u32 index; @@ -72,7 +71,7 @@ static inline int smc_wr_rx_post(struct smc_link *link)  	temp_wr_id = wr_id;  	index = do_div(temp_wr_id, link->wr_rx_cnt);  	link->wr_rx_ibs[index].wr_id = wr_id; -	rc = ib_post_recv(link->roce_qp, &link->wr_rx_ibs[index], &bad_recv_wr); +	rc = ib_post_recv(link->roce_qp, &link->wr_rx_ibs[index], NULL);  	return rc;  }  | 
