From be6a3f38ff2a2bfd2e591fdc566940a0d4d9428c Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 28 Jun 2018 19:05:04 +0200 Subject: net/smc: determine port attributes independent from pnet table For SMC it is important to know the current port state of RoCE devices. Monitoring port states has been triggered, when a RoCE device was added to the pnet table. To support future alternatives to the pnet table the monitoring of ports is made independent of the existence of a pnet table. It starts once the smc_ib_device is established. Due to this change smc_ib_remember_port_attr() is now a local function and shuffling its location and the location of its used functions makes any forward references obsolete. And the duplicate SMC_MAX_PORTS definition is removed. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc.h | 2 - net/smc/smc_ib.c | 130 ++++++++++++++++++++++++++++------------------------- net/smc/smc_ib.h | 1 - net/smc/smc_pnet.c | 7 +-- 4 files changed, 72 insertions(+), 68 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc.h b/net/smc/smc.h index 51ae1f10d81a..7c86f716a92e 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -21,8 +21,6 @@ #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ -#define SMC_MAX_PORTS 2 /* Max # of ports */ - extern struct proto smc_proto; extern struct proto smc_proto6; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 0eed7ab9f28b..f8b159ced032 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -143,6 +143,62 @@ out: return rc; } +static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) +{ + struct ib_gid_attr gattr; + int rc; + + rc = ib_query_gid(smcibdev->ibdev, ibport, 0, + &smcibdev->gid[ibport - 1], &gattr); + if (rc || !gattr.ndev) + return -ENODEV; + + memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN); + dev_put(gattr.ndev); + return 0; +} + +/* Create an identifier unique for this instance of SMC-R. + * The MAC-address of the first active registered IB device + * plus a random 2-byte number is used to create this identifier. + * This name is delivered to the peer during connection initialization. + */ +static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, + u8 ibport) +{ + memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1], + sizeof(smcibdev->mac[ibport - 1])); + get_random_bytes(&local_systemid[0], 2); +} + +bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) +{ + return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; +} + +static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) +{ + int rc; + + memset(&smcibdev->pattr[ibport - 1], 0, + sizeof(smcibdev->pattr[ibport - 1])); + rc = ib_query_port(smcibdev->ibdev, ibport, + &smcibdev->pattr[ibport - 1]); + if (rc) + goto out; + /* the SMC protocol requires specification of the RoCE MAC address */ + rc = smc_ib_fill_gid_and_mac(smcibdev, ibport); + if (rc) + goto out; + if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET, + sizeof(local_systemid)) && + smc_ib_port_active(smcibdev, ibport)) + /* create unique system identifier */ + smc_ib_define_local_systemid(smcibdev, ibport); +out: + return rc; +} + /* process context wrapper for might_sleep smc_ib_remember_port_attr */ static void smc_ib_port_event_work(struct work_struct *work) { @@ -370,62 +426,6 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0; } -static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) -{ - struct ib_gid_attr gattr; - int rc; - - rc = ib_query_gid(smcibdev->ibdev, ibport, 0, - &smcibdev->gid[ibport - 1], &gattr); - if (rc || !gattr.ndev) - return -ENODEV; - - memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN); - dev_put(gattr.ndev); - return 0; -} - -/* Create an identifier unique for this instance of SMC-R. - * The MAC-address of the first active registered IB device - * plus a random 2-byte number is used to create this identifier. - * This name is delivered to the peer during connection initialization. - */ -static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, - u8 ibport) -{ - memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1], - sizeof(smcibdev->mac[ibport - 1])); - get_random_bytes(&local_systemid[0], 2); -} - -bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) -{ - return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; -} - -int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) -{ - int rc; - - memset(&smcibdev->pattr[ibport - 1], 0, - sizeof(smcibdev->pattr[ibport - 1])); - rc = ib_query_port(smcibdev->ibdev, ibport, - &smcibdev->pattr[ibport - 1]); - if (rc) - goto out; - /* the SMC protocol requires specification of the RoCE MAC address */ - rc = smc_ib_fill_gid_and_mac(smcibdev, ibport); - if (rc) - goto out; - if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET, - sizeof(local_systemid)) && - smc_ib_port_active(smcibdev, ibport)) - /* create unique system identifier */ - smc_ib_define_local_systemid(smcibdev, ibport); -out: - return rc; -} - long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) { struct ib_cq_init_attr cqattr = { @@ -454,9 +454,6 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) smcibdev->roce_cq_recv = NULL; goto err; } - INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, - smc_ib_global_event_handler); - ib_register_event_handler(&smcibdev->event_handler); smc_wr_add_dev(smcibdev); smcibdev->initialized = 1; return rc; @@ -472,7 +469,6 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) return; smcibdev->initialized = 0; smc_wr_remove_dev(smcibdev); - ib_unregister_event_handler(&smcibdev->event_handler); ib_destroy_cq(smcibdev->roce_cq_recv); ib_destroy_cq(smcibdev->roce_cq_send); } @@ -483,6 +479,8 @@ static struct ib_client smc_ib_client; static void smc_ib_add_dev(struct ib_device *ibdev) { struct smc_ib_device *smcibdev; + u8 port_cnt; + int i; if (ibdev->node_type != RDMA_NODE_IB_CA) return; @@ -498,6 +496,17 @@ static void smc_ib_add_dev(struct ib_device *ibdev) list_add_tail(&smcibdev->list, &smc_ib_devices.list); spin_unlock(&smc_ib_devices.lock); ib_set_client_data(ibdev, &smc_ib_client, smcibdev); + INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, + smc_ib_global_event_handler); + ib_register_event_handler(&smcibdev->event_handler); + + /* trigger reading of the port attributes */ + port_cnt = smcibdev->ibdev->phys_port_cnt; + for (i = 0; + i < min_t(size_t, port_cnt, SMC_MAX_PORTS); + i++) + set_bit(i, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); } /* callback function for ib_register_client() */ @@ -512,6 +521,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) spin_unlock(&smc_ib_devices.lock); smc_pnet_remove_by_ibdev(smcibdev); smc_ib_cleanup_per_ibdev(smcibdev); + ib_unregister_event_handler(&smcibdev->event_handler); kfree(smcibdev); } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index e90630dadf8e..2c480b352928 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -51,7 +51,6 @@ struct smc_link; int smc_ib_register_client(void) __init; void smc_ib_unregister_client(void); bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport); -int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport); int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index d7b88b2d1b22..a82a5cad0282 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -358,9 +358,6 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) kfree(pnetelem); return rc; } - rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port); - if (rc) - smc_pnet_remove_by_pnetid(pnetelem->pnet_name); return rc; } @@ -485,10 +482,10 @@ static int smc_pnet_netdev_event(struct notifier_block *this, case NETDEV_REBOOT: case NETDEV_UNREGISTER: smc_pnet_remove_by_ndev(event_dev); + return NOTIFY_OK; default: - break; + return NOTIFY_DONE; } - return NOTIFY_DONE; } static struct notifier_block smc_netdev_notifier = { -- cgit v1.2.3-59-g8ed1b From 0afff91c6f5ecef27715ea71e34dc2baacba1060 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 28 Jun 2018 19:05:05 +0200 Subject: net/smc: add pnetid support s390 hardware supports the definition of a so-call Physical NETwork IDentifier (short PNETID) per network device port. These PNETIDS can be used to identify network devices that are attached to the same physical network (broadcast domain). On s390 try to use the PNETID of the ethernet device port used for initial connecting, and derive the IB device port used for SMC RDMA traffic. On platforms without PNETID support fall back to the existing solution of a configured pnet table. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/net/smc.h | 2 + net/smc/smc_ib.c | 6 ++- net/smc/smc_ib.h | 3 ++ net/smc/smc_pnet.c | 109 +++++++++++++++++++++++++++++++++++++++++++---------- net/smc/smc_pnet.h | 14 +++++++ 5 files changed, 114 insertions(+), 20 deletions(-) (limited to 'net/smc') diff --git a/include/net/smc.h b/include/net/smc.h index 8381d163fefa..2173932fab9d 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -11,6 +11,8 @@ #ifndef _SMC_H #define _SMC_H +#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */ + struct smc_hashinfo { rwlock_t lock; struct hlist_head ht; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index f8b159ced032..36de2fd76170 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -504,8 +504,12 @@ static void smc_ib_add_dev(struct ib_device *ibdev) port_cnt = smcibdev->ibdev->phys_port_cnt; for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); - i++) + i++) { set_bit(i, &smcibdev->port_event_mask); + /* determine pnetids of the port */ + smc_pnetid_by_dev_port(ibdev->dev.parent, i, + smcibdev->pnetid[i]); + } schedule_work(&smcibdev->port_event_work); } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 2c480b352928..7c1223c91229 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -15,6 +15,7 @@ #include #include #include +#include #define SMC_MAX_PORTS 2 /* Max # of ports */ #define SMC_GID_SIZE sizeof(union ib_gid) @@ -40,6 +41,8 @@ struct smc_ib_device { /* ib-device infos for smc */ char mac[SMC_MAX_PORTS][ETH_ALEN]; /* mac address per port*/ union ib_gid gid[SMC_MAX_PORTS]; /* gid per port */ + u8 pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN]; + /* pnetid per port */ u8 initialized : 1; /* ib dev CQ, evthdl done */ struct work_struct port_event_work; unsigned long port_event_mask; diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index a82a5cad0282..cdc6e23b6ce1 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -23,12 +23,10 @@ #include "smc_pnet.h" #include "smc_ib.h" -#define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */ - static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_NUL_STRING, - .len = SMC_MAX_PNET_ID_LEN - 1 + .len = SMC_MAX_PNETID_LEN - 1 }, [SMC_PNETID_ETHNAME] = { .type = NLA_NUL_STRING, @@ -65,7 +63,7 @@ static struct smc_pnettable { */ struct smc_pnetentry { struct list_head list; - char pnet_name[SMC_MAX_PNET_ID_LEN + 1]; + char pnet_name[SMC_MAX_PNETID_LEN + 1]; struct net_device *ndev; struct smc_ib_device *smcibdev; u8 ib_port; @@ -209,7 +207,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) return false; while (--end >= bf && isspace(*end)) ; - if (end - bf >= SMC_MAX_PNET_ID_LEN) + if (end - bf >= SMC_MAX_PNETID_LEN) return false; while (bf <= end) { if (!isalnum(*bf)) @@ -512,26 +510,70 @@ void smc_pnet_exit(void) genl_unregister_family(&smc_pnet_nl_family); } -/* PNET table analysis for a given sock: - * determine ib_device and port belonging to used internal TCP socket - * ethernet interface. +/* Determine one base device for stacked net devices. + * If the lower device level contains more than one devices + * (for instance with bonding slaves), just the first device + * is used to reach a base device. */ -void smc_pnet_find_roce_resource(struct sock *sk, - struct smc_ib_device **smcibdev, u8 *ibport) +static struct net_device *pnet_find_base_ndev(struct net_device *ndev) { - struct dst_entry *dst = sk_dst_get(sk); - struct smc_pnetentry *pnetelem; + int i, nest_lvl; - *smcibdev = NULL; - *ibport = 0; + rtnl_lock(); + nest_lvl = dev_get_nest_level(ndev); + for (i = 0; i < nest_lvl; i++) { + struct list_head *lower = &ndev->adj_list.lower; + + if (list_empty(lower)) + break; + lower = lower->next; + ndev = netdev_lower_get_next(ndev, &lower); + } + rtnl_unlock(); + return ndev; +} + +/* Determine the corresponding IB device port based on the hardware PNETID. + * Searching stops at the first matching active IB device port. + */ +static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, + struct smc_ib_device **smcibdev, + u8 *ibport) +{ + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct smc_ib_device *ibdev; + int i; + + ndev = pnet_find_base_ndev(ndev); + if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, + ndev_pnetid)) + return; /* pnetid could not be determined */ + + spin_lock(&smc_ib_devices.lock); + list_for_each_entry(ibdev, &smc_ib_devices.list, list) { + for (i = 1; i <= SMC_MAX_PORTS; i++) { + if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid, + SMC_MAX_PNETID_LEN) && + smc_ib_port_active(ibdev, i)) { + *smcibdev = ibdev; + *ibport = i; + break; + } + } + } + spin_unlock(&smc_ib_devices.lock); +} + +/* Lookup of coupled ib_device via SMC pnet table */ +static void smc_pnet_find_roce_by_table(struct net_device *netdev, + struct smc_ib_device **smcibdev, + u8 *ibport) +{ + struct smc_pnetentry *pnetelem; - if (!dst) - return; - if (!dst->dev) - goto out_rel; read_lock(&smc_pnettable.lock); list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { - if (dst->dev == pnetelem->ndev) { + if (netdev == pnetelem->ndev) { if (smc_ib_port_active(pnetelem->smcibdev, pnetelem->ib_port)) { *smcibdev = pnetelem->smcibdev; @@ -541,6 +583,35 @@ void smc_pnet_find_roce_resource(struct sock *sk, } } read_unlock(&smc_pnettable.lock); +} + +/* PNET table analysis for a given sock: + * determine ib_device and port belonging to used internal TCP socket + * ethernet interface. + */ +void smc_pnet_find_roce_resource(struct sock *sk, + struct smc_ib_device **smcibdev, u8 *ibport) +{ + struct dst_entry *dst = sk_dst_get(sk); + + *smcibdev = NULL; + *ibport = 0; + + if (!dst) + goto out; + if (!dst->dev) + goto out_rel; + + /* if possible, lookup via hardware-defined pnetid */ + smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport); + if (*smcibdev) + goto out_rel; + + /* lookup via SMC PNET table */ + smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport); + out_rel: dst_release(dst); +out: + return; } diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 5a29519db976..ad4455cde9e7 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -12,8 +12,22 @@ #ifndef _SMC_PNET_H #define _SMC_PNET_H +#if IS_ENABLED(CONFIG_HAVE_PNETID) +#include +#endif + struct smc_ib_device; +static inline int smc_pnetid_by_dev_port(struct device *dev, + unsigned short port, u8 *pnetid) +{ +#if IS_ENABLED(CONFIG_HAVE_PNETID) + return pnet_id_by_dev_port(dev, port, pnetid); +#else + return -ENOENT; +#endif +} + int smc_pnet_init(void) __init; void smc_pnet_exit(void); int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev); -- cgit v1.2.3-59-g8ed1b From e82f2e31f5597a3de44bd27b7427f577f637c552 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 28 Jun 2018 19:05:06 +0200 Subject: net/smc: optimize consumer cursor updates The SMC protocol requires to send a separate consumer cursor update, if it cannot be piggybacked to updates of the producer cursor. Currently the decision to send a separate consumer cursor update just considers the amount of data already received by the socket program. It does not consider the amount of data already arrived, but not yet consumed by the receiver. Basing the decision on the difference between already confirmed and already arrived data (instead of difference between already confirmed and already consumed data), may lead to a somewhat earlier consumer cursor update send in fast unidirectional traffic scenarios, and thus to better throughput. Signed-off-by: Ursula Braun Suggested-by: Thomas Richter Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index cee666400752..f82886b7d1d8 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -495,7 +495,8 @@ out: void smc_tx_consumer_update(struct smc_connection *conn, bool force) { - union smc_host_cursor cfed, cons; + union smc_host_cursor cfed, cons, prod; + int sender_free = conn->rmb_desc->len; int to_confirm; smc_curs_write(&cons, @@ -505,11 +506,18 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) smc_curs_read(&conn->rx_curs_confirmed, conn), conn); to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons); + if (to_confirm > conn->rmbe_update_limit) { + smc_curs_write(&prod, + smc_curs_read(&conn->local_rx_ctrl.prod, conn), + conn); + sender_free = conn->rmb_desc->len - + smc_curs_diff(conn->rmb_desc->len, &prod, &cfed); + } if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || force || ((to_confirm > conn->rmbe_update_limit) && - ((to_confirm > (conn->rmb_desc->len / 2)) || + ((sender_free <= (conn->rmb_desc->len / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && conn->alert_token_local) { /* connection healthy */ -- cgit v1.2.3-59-g8ed1b From c6ba7c9ba43de1b57e9a53946e7ff988554c84ed Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 28 Jun 2018 19:05:07 +0200 Subject: net/smc: add base infrastructure for SMC-D and ISM SMC supports two variants: SMC-R and SMC-D. For data transport, SMC-R uses RDMA devices, SMC-D uses so-called Internal Shared Memory (ISM) devices. An ISM device only allows shared memory communication between SMC instances on the same machine. For example, this allows virtual machines on the same host to communicate via SMC without RDMA devices. This patch adds the base infrastructure for SMC-D and ISM devices to the existing SMC code. It contains the following: * ISM driver interface: This interface allows an ISM driver to register ISM devices in SMC. In the process, the driver provides a set of device ops for each device. SMC uses these ops to execute SMC specific operations on or transfer data over the device. * Core SMC-D link group, connection, and buffer support: Link groups, SMC connections and SMC buffers (in smc_core) are extended to support SMC-D. * SMC type checks: Some type checks are added to prevent using SMC-R specific code for SMC-D and vice versa. To actually use SMC-D, additional changes to pnetid, CLC, CDC, etc. are required. These are added in follow-up patches. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Suggested-by: Thomas Richter Signed-off-by: David S. Miller --- include/net/smc.h | 62 +++++++++++ net/smc/Makefile | 2 +- net/smc/af_smc.c | 11 +- net/smc/smc_core.c | 270 +++++++++++++++++++++++++++++++++++------------ net/smc/smc_core.h | 71 +++++++++---- net/smc/smc_diag.c | 3 +- net/smc/smc_ism.c | 304 +++++++++++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_ism.h | 48 +++++++++ 8 files changed, 679 insertions(+), 92 deletions(-) create mode 100644 net/smc/smc_ism.c create mode 100644 net/smc/smc_ism.h (limited to 'net/smc') diff --git a/include/net/smc.h b/include/net/smc.h index 2173932fab9d..824a7af8d654 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -20,4 +20,66 @@ struct smc_hashinfo { int smc_hash_sk(struct sock *sk); void smc_unhash_sk(struct sock *sk); + +/* SMCD/ISM device driver interface */ +struct smcd_dmb { + u64 dmb_tok; + u64 rgid; + u32 dmb_len; + u32 sba_idx; + u32 vlan_valid; + u32 vlan_id; + void *cpu_addr; + dma_addr_t dma_addr; +}; + +#define ISM_EVENT_DMB 0 +#define ISM_EVENT_GID 1 +#define ISM_EVENT_SWR 2 + +struct smcd_event { + u32 type; + u32 code; + u64 tok; + u64 time; + u64 info; +}; + +struct smcd_dev; + +struct smcd_ops { + int (*query_remote_gid)(struct smcd_dev *dev, u64 rgid, u32 vid_valid, + u32 vid); + int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); + int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); + int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id); + int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id); + int (*set_vlan_required)(struct smcd_dev *dev); + int (*reset_vlan_required)(struct smcd_dev *dev); + int (*signal_event)(struct smcd_dev *dev, u64 rgid, u32 trigger_irq, + u32 event_code, u64 info); + int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, + bool sf, unsigned int offset, void *data, + unsigned int size); +}; + +struct smcd_dev { + const struct smcd_ops *ops; + struct device dev; + void *priv; + u64 local_gid; + struct list_head list; + spinlock_t lock; + struct smc_connection **conn; + struct list_head vlan; + struct workqueue_struct *event_wq; +}; + +struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, + const struct smcd_ops *ops, int max_dmbs); +int smcd_register_dev(struct smcd_dev *smcd); +void smcd_unregister_dev(struct smcd_dev *smcd); +void smcd_free_dev(struct smcd_dev *smcd); +void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event); +void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit); #endif /* _SMC_H */ diff --git a/net/smc/Makefile b/net/smc/Makefile index 188104654b54..4df96b4b8130 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_SMC) += smc.o obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o -smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o +smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index da7f02edcd37..8ce48799cf68 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -475,8 +475,8 @@ static int smc_connect_rdma(struct smc_sock *smc, int reason_code = 0; mutex_lock(&smc_create_lgr_pending); - local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl, - aclc->hdr.flag); + local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev, + ibport, &aclc->lcl, NULL, 0); if (local_contact < 0) { if (local_contact == -ENOMEM) reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ @@ -491,7 +491,7 @@ static int smc_connect_rdma(struct smc_sock *smc, smc_conn_save_peer_info(smc, aclc); /* create send buffer and rmb */ - if (smc_buf_create(smc)) + if (smc_buf_create(smc, false)) return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); if (local_contact == SMC_FIRST_CONTACT) @@ -894,7 +894,8 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc, int *local_contact) { /* allocate connection / link group */ - *local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0); + *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, + &pclc->lcl, NULL, 0); if (*local_contact < 0) { if (*local_contact == -ENOMEM) return SMC_CLC_DECL_MEM;/* insufficient memory*/ @@ -902,7 +903,7 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc, } /* create send buffer and rmb */ - if (smc_buf_create(new_smc)) + if (smc_buf_create(new_smc, false)) return SMC_CLC_DECL_MEM; return 0; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index add82b0266f3..daa88db1841a 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -25,6 +25,7 @@ #include "smc_llc.h" #include "smc_cdc.h" #include "smc_close.h" +#include "smc_ism.h" #define SMC_LGR_NUM_INCR 256 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) @@ -46,8 +47,8 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) * otherwise there is a risk of out-of-sync link groups. */ mod_delayed_work(system_wq, &lgr->free_work, - lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : - SMC_LGR_FREE_DELAY_SERV); + (!lgr->is_smcd && lgr->role == SMC_CLNT) ? + SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV); } /* Register connection's alert token in our lookup structure. @@ -153,16 +154,18 @@ static void smc_lgr_free_work(struct work_struct *work) free: spin_unlock_bh(&smc_lgr_list.lock); if (!delayed_work_pending(&lgr->free_work)) { - if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE) + if (!lgr->is_smcd && + lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE) smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); smc_lgr_free(lgr); } } /* create a new SMC link group */ -static int smc_lgr_create(struct smc_sock *smc, +static int smc_lgr_create(struct smc_sock *smc, bool is_smcd, struct smc_ib_device *smcibdev, u8 ibport, - char *peer_systemid, unsigned short vlan_id) + char *peer_systemid, unsigned short vlan_id, + struct smcd_dev *smcismdev, u64 peer_gid) { struct smc_link_group *lgr; struct smc_link *lnk; @@ -170,17 +173,23 @@ static int smc_lgr_create(struct smc_sock *smc, int rc = 0; int i; + if (is_smcd && vlan_id) { + rc = smc_ism_get_vlan(smcismdev, vlan_id); + if (rc) + goto out; + } + lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); if (!lgr) { rc = -ENOMEM; goto out; } - lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; + lgr->is_smcd = is_smcd; lgr->sync_err = 0; - memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); lgr->vlan_id = vlan_id; rwlock_init(&lgr->sndbufs_lock); rwlock_init(&lgr->rmbs_lock); + rwlock_init(&lgr->conns_lock); for (i = 0; i < SMC_RMBE_SIZES; i++) { INIT_LIST_HEAD(&lgr->sndbufs[i]); INIT_LIST_HEAD(&lgr->rmbs[i]); @@ -189,36 +198,44 @@ static int smc_lgr_create(struct smc_sock *smc, memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); lgr->conns_all = RB_ROOT; - - lnk = &lgr->lnk[SMC_SINGLE_LINK]; - /* initialize link */ - lnk->state = SMC_LNK_ACTIVATING; - lnk->link_id = SMC_SINGLE_LINK; - lnk->smcibdev = smcibdev; - lnk->ibport = ibport; - lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; - if (!smcibdev->initialized) - smc_ib_setup_per_ibdev(smcibdev); - get_random_bytes(rndvec, sizeof(rndvec)); - lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); - rc = smc_llc_link_init(lnk); - if (rc) - goto free_lgr; - rc = smc_wr_alloc_link_mem(lnk); - if (rc) - goto clear_llc_lnk; - rc = smc_ib_create_protection_domain(lnk); - if (rc) - goto free_link_mem; - rc = smc_ib_create_queue_pair(lnk); - if (rc) - goto dealloc_pd; - rc = smc_wr_create_link(lnk); - if (rc) - goto destroy_qp; - + if (is_smcd) { + /* SMC-D specific settings */ + lgr->peer_gid = peer_gid; + lgr->smcd = smcismdev; + } else { + /* SMC-R specific settings */ + lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; + memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); + + lnk = &lgr->lnk[SMC_SINGLE_LINK]; + /* initialize link */ + lnk->state = SMC_LNK_ACTIVATING; + lnk->link_id = SMC_SINGLE_LINK; + lnk->smcibdev = smcibdev; + lnk->ibport = ibport; + lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; + if (!smcibdev->initialized) + smc_ib_setup_per_ibdev(smcibdev); + get_random_bytes(rndvec, sizeof(rndvec)); + lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + + (rndvec[2] << 16); + rc = smc_llc_link_init(lnk); + if (rc) + goto free_lgr; + rc = smc_wr_alloc_link_mem(lnk); + if (rc) + goto clear_llc_lnk; + rc = smc_ib_create_protection_domain(lnk); + if (rc) + goto free_link_mem; + rc = smc_ib_create_queue_pair(lnk); + if (rc) + goto dealloc_pd; + rc = smc_wr_create_link(lnk); + if (rc) + goto destroy_qp; + } smc->conn.lgr = lgr; - rwlock_init(&lgr->conns_lock); spin_lock_bh(&smc_lgr_list.lock); list_add(&lgr->list, &smc_lgr_list.list); spin_unlock_bh(&smc_lgr_list.lock); @@ -264,7 +281,10 @@ void smc_conn_free(struct smc_connection *conn) { if (!conn->lgr) return; - smc_cdc_tx_dismiss_slots(conn); + if (conn->lgr->is_smcd) + smc_ism_unset_conn(conn); + else + smc_cdc_tx_dismiss_slots(conn); smc_lgr_unregister_conn(conn); smc_buf_unuse(conn); } @@ -280,8 +300,8 @@ static void smc_link_clear(struct smc_link *lnk) smc_wr_free_link_mem(lnk); } -static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, - struct smc_buf_desc *buf_desc) +static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, + struct smc_buf_desc *buf_desc) { struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; @@ -301,6 +321,25 @@ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, kfree(buf_desc); } +static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, + struct smc_buf_desc *buf_desc) +{ + if (is_dmb) + smc_ism_unregister_dmb(lgr->smcd, buf_desc); + else + kfree(buf_desc->cpu_addr); + kfree(buf_desc); +} + +static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, + struct smc_buf_desc *buf_desc) +{ + if (lgr->is_smcd) + smcd_buf_free(lgr, is_rmb, buf_desc); + else + smcr_buf_free(lgr, is_rmb, buf_desc); +} + static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) { struct smc_buf_desc *buf_desc, *bf_desc; @@ -332,7 +371,10 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) void smc_lgr_free(struct smc_link_group *lgr) { smc_lgr_free_bufs(lgr); - smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); + if (lgr->is_smcd) + smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); + else + smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); kfree(lgr); } @@ -357,7 +399,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) lgr->terminating = 1; if (!list_empty(&lgr->list)) /* forget lgr */ list_del_init(&lgr->list); - smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); + if (!lgr->is_smcd) + smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); write_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); @@ -374,7 +417,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) node = rb_first(&lgr->conns_all); } write_unlock_bh(&lgr->conns_lock); - wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); + if (!lgr->is_smcd) + wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); smc_lgr_schedule_free_work(lgr); } @@ -392,13 +436,40 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) spin_lock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { - if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && + if (!lgr->is_smcd && + lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) __smc_lgr_terminate(lgr); } spin_unlock_bh(&smc_lgr_list.lock); } +/* Called when SMC-D device is terminated or peer is lost */ +void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid) +{ + struct smc_link_group *lgr, *l; + LIST_HEAD(lgr_free_list); + + /* run common cleanup function and build free list */ + spin_lock_bh(&smc_lgr_list.lock); + list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { + if (lgr->is_smcd && lgr->smcd == dev && + (!peer_gid || lgr->peer_gid == peer_gid) && + !list_empty(&lgr->list)) { + __smc_lgr_terminate(lgr); + list_move(&lgr->list, &lgr_free_list); + } + } + spin_unlock_bh(&smc_lgr_list.lock); + + /* cancel the regular free workers and actually free lgrs */ + list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { + list_del_init(&lgr->list); + cancel_delayed_work_sync(&lgr->free_work); + smc_lgr_free(lgr); + } +} + /* Determine vlan of internal TCP socket. * @vlan_id: address to store the determined vlan id into */ @@ -477,10 +548,30 @@ static int smc_link_determine_gid(struct smc_link_group *lgr) return -ENODEV; } +static bool smcr_lgr_match(struct smc_link_group *lgr, + struct smc_clc_msg_local *lcl, + enum smc_lgr_role role) +{ + return !memcmp(lgr->peer_systemid, lcl->id_for_peer, + SMC_SYSTEMID_LEN) && + !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, + SMC_GID_SIZE) && + !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, + sizeof(lcl->mac)) && + lgr->role == role; +} + +static bool smcd_lgr_match(struct smc_link_group *lgr, + struct smcd_dev *smcismdev, u64 peer_gid) +{ + return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; +} + /* create a new SMC connection (and a new link group if necessary) */ -int smc_conn_create(struct smc_sock *smc, +int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, struct smc_ib_device *smcibdev, u8 ibport, - struct smc_clc_msg_local *lcl, int srv_first_contact) + struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, + u64 peer_gid) { struct smc_connection *conn = &smc->conn; int local_contact = SMC_FIRST_CONTACT; @@ -502,17 +593,12 @@ int smc_conn_create(struct smc_sock *smc, spin_lock_bh(&smc_lgr_list.lock); list_for_each_entry(lgr, &smc_lgr_list.list, list) { write_lock_bh(&lgr->conns_lock); - if (!memcmp(lgr->peer_systemid, lcl->id_for_peer, - SMC_SYSTEMID_LEN) && - !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, - SMC_GID_SIZE) && - !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, - sizeof(lcl->mac)) && + if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) : + smcr_lgr_match(lgr, lcl, role)) && !lgr->sync_err && - (lgr->role == role) && - (lgr->vlan_id == vlan_id) && - ((role == SMC_CLNT) || - (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) { + lgr->vlan_id == vlan_id && + (role == SMC_CLNT || + lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { /* link group found */ local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; @@ -535,12 +621,13 @@ int smc_conn_create(struct smc_sock *smc, create: if (local_contact == SMC_FIRST_CONTACT) { - rc = smc_lgr_create(smc, smcibdev, ibport, - lcl->id_for_peer, vlan_id); + rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport, + lcl->id_for_peer, vlan_id, smcd, peer_gid); if (rc) goto out; smc_lgr_register_conn(conn); /* add smc conn to lgr */ - rc = smc_link_determine_gid(conn->lgr); + if (!is_smcd) + rc = smc_link_determine_gid(conn->lgr); } conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; @@ -609,8 +696,8 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size) return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } -static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, - bool is_rmb, int bufsize) +static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, + bool is_rmb, int bufsize) { struct smc_buf_desc *buf_desc; struct smc_link *lnk; @@ -668,7 +755,43 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, return buf_desc; } -static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) +#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ + +static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, + bool is_dmb, int bufsize) +{ + struct smc_buf_desc *buf_desc; + int rc; + + if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) + return ERR_PTR(-EAGAIN); + + /* try to alloc a new DMB */ + buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); + if (!buf_desc) + return ERR_PTR(-ENOMEM); + if (is_dmb) { + rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); + if (rc) { + kfree(buf_desc); + return ERR_PTR(-EAGAIN); + } + memset(buf_desc->cpu_addr, 0, bufsize); + buf_desc->len = bufsize; + } else { + buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | + __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC); + if (!buf_desc->cpu_addr) { + kfree(buf_desc); + return ERR_PTR(-EAGAIN); + } + buf_desc->len = bufsize; + } + return buf_desc; +} + +static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) { struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); struct smc_connection *conn = &smc->conn; @@ -706,7 +829,11 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) break; /* found reusable slot */ } - buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); + if (is_smcd) + buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); + else + buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); + if (PTR_ERR(buf_desc) == -ENOMEM) break; if (IS_ERR(buf_desc)) @@ -728,6 +855,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) smc->sk.sk_rcvbuf = bufsize * 2; atomic_set(&conn->bytes_to_rcv, 0); conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); + if (is_smcd) + smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ } else { conn->sndbuf_desc = buf_desc; smc->sk.sk_sndbuf = bufsize * 2; @@ -740,6 +869,8 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!conn->lgr || conn->lgr->is_smcd) + return; smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -748,6 +879,8 @@ void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!conn->lgr || conn->lgr->is_smcd) + return; smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -756,6 +889,8 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!conn->lgr || conn->lgr->is_smcd) + return; smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, conn->rmb_desc, DMA_FROM_DEVICE); } @@ -764,6 +899,8 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!conn->lgr || conn->lgr->is_smcd) + return; smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, conn->rmb_desc, DMA_FROM_DEVICE); } @@ -774,16 +911,16 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) * the Linux implementation uses just one RMB-element per RMB, i.e. uses an * extra RMB for every connection in a link group */ -int smc_buf_create(struct smc_sock *smc) +int smc_buf_create(struct smc_sock *smc, bool is_smcd) { int rc; /* create send buffer */ - rc = __smc_buf_create(smc, false); + rc = __smc_buf_create(smc, is_smcd, false); if (rc) return rc; /* create rmb */ - rc = __smc_buf_create(smc, true); + rc = __smc_buf_create(smc, is_smcd, true); if (rc) smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); return rc; @@ -865,7 +1002,8 @@ void smc_core_exit(void) spin_unlock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { list_del_init(&lgr->list); - smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); + if (!lgr->is_smcd) + smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); cancel_delayed_work_sync(&lgr->free_work); smc_lgr_free(lgr); /* free link group */ } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 93cb3523bf50..cd9268a9570e 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -124,15 +124,28 @@ struct smc_buf_desc { void *cpu_addr; /* virtual address of buffer */ struct page *pages; int len; /* length of buffer */ - struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */ - struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; - /* for rmb only: memory region - * incl. rkey provided to peer - */ - u32 order; /* allocation order */ u32 used; /* currently used / unused */ u8 reused : 1; /* new created / reused */ u8 regerr : 1; /* err during registration */ + union { + struct { /* SMC-R */ + struct sg_table sgt[SMC_LINKS_PER_LGR_MAX]; + /* virtual buffer */ + struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; + /* for rmb only: memory region + * incl. rkey provided to peer + */ + u32 order; /* allocation order */ + }; + struct { /* SMC-D */ + unsigned short sba_idx; + /* SBA index number */ + u64 token; + /* DMB token number */ + dma_addr_t dma_addr; + /* DMA address */ + }; + }; }; struct smc_rtoken { /* address/key of remote RMB */ @@ -148,12 +161,10 @@ struct smc_rtoken { /* address/key of remote RMB */ * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15) */ +struct smcd_dev; + struct smc_link_group { struct list_head list; - enum smc_lgr_role role; /* client or server */ - struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */ - char peer_systemid[SMC_SYSTEMID_LEN]; - /* unique system_id of peer */ struct rb_root conns_all; /* connection tree */ rwlock_t conns_lock; /* protects conns_all */ unsigned int conns_num; /* current # of connections */ @@ -163,17 +174,35 @@ struct smc_link_group { rwlock_t sndbufs_lock; /* protects tx buffers */ struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */ rwlock_t rmbs_lock; /* protects rx buffers */ - struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] - [SMC_LINKS_PER_LGR_MAX]; - /* remote addr/key pairs */ - unsigned long rtokens_used_mask[BITS_TO_LONGS( - SMC_RMBS_PER_LGR_MAX)]; - /* used rtoken elements */ u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */ struct delayed_work free_work; /* delayed freeing of an lgr */ u8 sync_err : 1; /* lgr no longer fits to peer */ u8 terminating : 1;/* lgr is terminating */ + + bool is_smcd; /* SMC-R or SMC-D */ + union { + struct { /* SMC-R */ + enum smc_lgr_role role; + /* client or server */ + struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; + /* smc link */ + char peer_systemid[SMC_SYSTEMID_LEN]; + /* unique system_id of peer */ + struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] + [SMC_LINKS_PER_LGR_MAX]; + /* remote addr/key pairs */ + unsigned long rtokens_used_mask[BITS_TO_LONGS + (SMC_RMBS_PER_LGR_MAX)]; + /* used rtoken elements */ + }; + struct { /* SMC-D */ + u64 peer_gid; + /* Peer GID (remote) */ + struct smcd_dev *smcd; + /* ISM device for VLAN reg. */ + }; + }; }; /* Find the connection associated with the given alert token in the link group. @@ -217,7 +246,8 @@ void smc_lgr_free(struct smc_link_group *lgr); void smc_lgr_forget(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr); void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); -int smc_buf_create(struct smc_sock *smc); +void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid); +int smc_buf_create(struct smc_sock *smc, bool is_smcd); int smc_uncompress_bufsize(u8 compressed); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc); @@ -227,9 +257,12 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); void smc_rmb_sync_sg_for_device(struct smc_connection *conn); + void smc_conn_free(struct smc_connection *conn); -int smc_conn_create(struct smc_sock *smc, +int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, struct smc_ib_device *smcibdev, u8 ibport, - struct smc_clc_msg_local *lcl, int srv_first_contact); + struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, + u64 peer_gid); +void smcd_conn_free(struct smc_connection *conn); void smc_core_exit(void); #endif diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 839354402215..64ce107c24d9 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -136,7 +136,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, goto errout; } - if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr && + if (smc->conn.lgr && !smc->conn.lgr->is_smcd && + (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c new file mode 100644 index 000000000000..ca1ce42fd49f --- /dev/null +++ b/net/smc/smc_ism.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Shared Memory Communications Direct over ISM devices (SMC-D) + * + * Functions for ISM device. + * + * Copyright IBM Corp. 2018 + */ + +#include +#include +#include + +#include "smc.h" +#include "smc_core.h" +#include "smc_ism.h" + +struct smcd_dev_list smcd_dev_list = { + .list = LIST_HEAD_INIT(smcd_dev_list.list), + .lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock) +}; + +/* Test if an ISM communication is possible. */ +int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) +{ + return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0, + vlan_id); +} + +int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos, + void *data, size_t len) +{ + int rc; + + rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal, + pos->offset, data, len); + + return rc < 0 ? rc : 0; +} + +/* Set a connection using this DMBE. */ +void smc_ism_set_conn(struct smc_connection *conn) +{ + unsigned long flags; + + spin_lock_irqsave(&conn->lgr->smcd->lock, flags); + conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = conn; + spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags); +} + +/* Unset a connection using this DMBE. */ +void smc_ism_unset_conn(struct smc_connection *conn) +{ + unsigned long flags; + + if (!conn->rmb_desc) + return; + + spin_lock_irqsave(&conn->lgr->smcd->lock, flags); + conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = NULL; + spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags); +} + +/* Register a VLAN identifier with the ISM device. Use a reference count + * and add a VLAN identifier only when the first DMB using this VLAN is + * registered. + */ +int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid) +{ + struct smc_ism_vlanid *new_vlan, *vlan; + unsigned long flags; + int rc = 0; + + if (!vlanid) /* No valid vlan id */ + return -EINVAL; + + /* create new vlan entry, in case we need it */ + new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL); + if (!new_vlan) + return -ENOMEM; + new_vlan->vlanid = vlanid; + refcount_set(&new_vlan->refcnt, 1); + + /* if there is an existing entry, increase count and return */ + spin_lock_irqsave(&smcd->lock, flags); + list_for_each_entry(vlan, &smcd->vlan, list) { + if (vlan->vlanid == vlanid) { + refcount_inc(&vlan->refcnt); + kfree(new_vlan); + goto out; + } + } + + /* no existing entry found. + * add new entry to device; might fail, e.g., if HW limit reached + */ + if (smcd->ops->add_vlan_id(smcd, vlanid)) { + kfree(new_vlan); + rc = -EIO; + goto out; + } + list_add_tail(&new_vlan->list, &smcd->vlan); +out: + spin_unlock_irqrestore(&smcd->lock, flags); + return rc; +} + +/* Unregister a VLAN identifier with the ISM device. Use a reference count + * and remove a VLAN identifier only when the last DMB using this VLAN is + * unregistered. + */ +int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid) +{ + struct smc_ism_vlanid *vlan; + unsigned long flags; + bool found = false; + int rc = 0; + + if (!vlanid) /* No valid vlan id */ + return -EINVAL; + + spin_lock_irqsave(&smcd->lock, flags); + list_for_each_entry(vlan, &smcd->vlan, list) { + if (vlan->vlanid == vlanid) { + if (!refcount_dec_and_test(&vlan->refcnt)) + goto out; + found = true; + break; + } + } + if (!found) { + rc = -ENOENT; + goto out; /* VLAN id not in table */ + } + + /* Found and the last reference just gone */ + if (smcd->ops->del_vlan_id(smcd, vlanid)) + rc = -EIO; + list_del(&vlan->list); + kfree(vlan); +out: + spin_unlock_irqrestore(&smcd->lock, flags); + return rc; +} + +int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc) +{ + struct smcd_dmb dmb; + + memset(&dmb, 0, sizeof(dmb)); + dmb.dmb_tok = dmb_desc->token; + dmb.sba_idx = dmb_desc->sba_idx; + dmb.cpu_addr = dmb_desc->cpu_addr; + dmb.dma_addr = dmb_desc->dma_addr; + dmb.dmb_len = dmb_desc->len; + return smcd->ops->unregister_dmb(smcd, &dmb); +} + +int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, + struct smc_buf_desc *dmb_desc) +{ + struct smcd_dmb dmb; + int rc; + + memset(&dmb, 0, sizeof(dmb)); + dmb.dmb_len = dmb_len; + dmb.sba_idx = dmb_desc->sba_idx; + dmb.vlan_id = lgr->vlan_id; + dmb.rgid = lgr->peer_gid; + rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb); + if (!rc) { + dmb_desc->sba_idx = dmb.sba_idx; + dmb_desc->token = dmb.dmb_tok; + dmb_desc->cpu_addr = dmb.cpu_addr; + dmb_desc->dma_addr = dmb.dma_addr; + dmb_desc->len = dmb.dmb_len; + } + return rc; +} + +struct smc_ism_event_work { + struct work_struct work; + struct smcd_dev *smcd; + struct smcd_event event; +}; + +/* worker for SMC-D events */ +static void smc_ism_event_work(struct work_struct *work) +{ + struct smc_ism_event_work *wrk = + container_of(work, struct smc_ism_event_work, work); + + switch (wrk->event.type) { + case ISM_EVENT_GID: /* GID event, token is peer GID */ + smc_smcd_terminate(wrk->smcd, wrk->event.tok); + break; + case ISM_EVENT_DMB: + break; + } + kfree(wrk); +} + +static void smcd_release(struct device *dev) +{ + struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev); + + kfree(smcd->conn); + kfree(smcd); +} + +struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, + const struct smcd_ops *ops, int max_dmbs) +{ + struct smcd_dev *smcd; + + smcd = kzalloc(sizeof(*smcd), GFP_KERNEL); + if (!smcd) + return NULL; + smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *), + GFP_KERNEL); + if (!smcd->conn) { + kfree(smcd); + return NULL; + } + + smcd->dev.parent = parent; + smcd->dev.release = smcd_release; + device_initialize(&smcd->dev); + dev_set_name(&smcd->dev, name); + smcd->ops = ops; + + spin_lock_init(&smcd->lock); + INIT_LIST_HEAD(&smcd->vlan); + smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", + WQ_MEM_RECLAIM, name); + return smcd; +} +EXPORT_SYMBOL_GPL(smcd_alloc_dev); + +int smcd_register_dev(struct smcd_dev *smcd) +{ + spin_lock(&smcd_dev_list.lock); + list_add_tail(&smcd->list, &smcd_dev_list.list); + spin_unlock(&smcd_dev_list.lock); + + return device_add(&smcd->dev); +} +EXPORT_SYMBOL_GPL(smcd_register_dev); + +void smcd_unregister_dev(struct smcd_dev *smcd) +{ + spin_lock(&smcd_dev_list.lock); + list_del(&smcd->list); + spin_unlock(&smcd_dev_list.lock); + flush_workqueue(smcd->event_wq); + destroy_workqueue(smcd->event_wq); + smc_smcd_terminate(smcd, 0); + + device_del(&smcd->dev); +} +EXPORT_SYMBOL_GPL(smcd_unregister_dev); + +void smcd_free_dev(struct smcd_dev *smcd) +{ + put_device(&smcd->dev); +} +EXPORT_SYMBOL_GPL(smcd_free_dev); + +/* SMCD Device event handler. Called from ISM device interrupt handler. + * Parameters are smcd device pointer, + * - event->type (0 --> DMB, 1 --> GID), + * - event->code (event code), + * - event->tok (either DMB token when event type 0, or GID when event type 1) + * - event->time (time of day) + * - event->info (debug info). + * + * Context: + * - Function called in IRQ context from ISM device driver event handler. + */ +void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event) +{ + struct smc_ism_event_work *wrk; + + /* copy event to event work queue, and let it be handled there */ + wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC); + if (!wrk) + return; + INIT_WORK(&wrk->work, smc_ism_event_work); + wrk->smcd = smcd; + wrk->event = *event; + queue_work(smcd->event_wq, &wrk->work); +} +EXPORT_SYMBOL_GPL(smcd_handle_event); + +/* SMCD Device interrupt handler. Called from ISM device interrupt handler. + * Parameters are smcd device pointer and DMB number. Find the connection and + * schedule the tasklet for this connection. + * + * Context: + * - Function called in IRQ context from ISM device driver IRQ handler. + */ +void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno) +{ +} +EXPORT_SYMBOL_GPL(smcd_handle_irq); diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h new file mode 100644 index 000000000000..aee45b860b79 --- /dev/null +++ b/net/smc/smc_ism.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Shared Memory Communications Direct over ISM devices (SMC-D) + * + * SMC-D ISM device structure definitions. + * + * Copyright IBM Corp. 2018 + */ + +#ifndef SMCD_ISM_H +#define SMCD_ISM_H + +#include + +#include "smc.h" + +struct smcd_dev_list { /* List of SMCD devices */ + struct list_head list; + spinlock_t lock; /* Protects list of devices */ +}; + +extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */ + +struct smc_ism_vlanid { /* VLAN id set on ISM device */ + struct list_head list; + unsigned short vlanid; /* Vlan id */ + refcount_t refcnt; /* Reference count */ +}; + +struct smc_ism_position { /* ISM device position to write to */ + u64 token; /* Token of DMB */ + u32 offset; /* Offset into DMBE */ + u8 index; /* Index of DMBE */ + u8 signal; /* Generate interrupt on owner side */ +}; + +struct smcd_dev; + +int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev); +void smc_ism_set_conn(struct smc_connection *conn); +void smc_ism_unset_conn(struct smc_connection *conn); +int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id); +int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id); +int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size, + struct smc_buf_desc *dmb_desc); +int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); +int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, + void *data, size_t len); +#endif -- cgit v1.2.3-59-g8ed1b From 1619f770589a183af56f248de261534b255122de Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 28 Jun 2018 19:05:08 +0200 Subject: net/smc: add pnetid support for SMC-D and ISM SMC-D relies on PNETIDs to find usable SMC-D/ISM devices for a SMC connection. This patch adds SMC-D/ISM support to the current PNETID implementation. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Suggested-by: Thomas Richter Signed-off-by: David S. Miller --- include/net/smc.h | 1 + net/smc/smc_ism.c | 2 ++ net/smc/smc_pnet.c | 41 +++++++++++++++++++++++++++++++++++++++++ net/smc/smc_pnet.h | 2 ++ 4 files changed, 46 insertions(+) (limited to 'net/smc') diff --git a/include/net/smc.h b/include/net/smc.h index 824a7af8d654..9ef49f8b1002 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -73,6 +73,7 @@ struct smcd_dev { struct smc_connection **conn; struct list_head vlan; struct workqueue_struct *event_wq; + u8 pnetid[SMC_MAX_PNETID_LEN]; }; struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index ca1ce42fd49f..f44e4dff244a 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -13,6 +13,7 @@ #include "smc.h" #include "smc_core.h" #include "smc_ism.h" +#include "smc_pnet.h" struct smcd_dev_list smcd_dev_list = { .list = LIST_HEAD_INIT(smcd_dev_list.list), @@ -227,6 +228,7 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, device_initialize(&smcd->dev); dev_set_name(&smcd->dev, name); smcd->ops = ops; + smc_pnetid_by_dev_port(parent, 0, smcd->pnetid); spin_lock_init(&smcd->lock); INIT_LIST_HEAD(&smcd->vlan); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index cdc6e23b6ce1..1b6c066d3495 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -22,6 +22,7 @@ #include "smc_pnet.h" #include "smc_ib.h" +#include "smc_ism.h" static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { @@ -564,6 +565,27 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, spin_unlock(&smc_ib_devices.lock); } +static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, + struct smcd_dev **smcismdev) +{ + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct smcd_dev *ismdev; + + ndev = pnet_find_base_ndev(ndev); + if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, + ndev_pnetid)) + return; /* pnetid could not be determined */ + + spin_lock(&smcd_dev_list.lock); + list_for_each_entry(ismdev, &smcd_dev_list.list, list) { + if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) { + *smcismdev = ismdev; + break; + } + } + spin_unlock(&smcd_dev_list.lock); +} + /* Lookup of coupled ib_device via SMC pnet table */ static void smc_pnet_find_roce_by_table(struct net_device *netdev, struct smc_ib_device **smcibdev, @@ -615,3 +637,22 @@ out_rel: out: return; } + +void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev) +{ + struct dst_entry *dst = sk_dst_get(sk); + + *smcismdev = NULL; + if (!dst) + goto out; + if (!dst->dev) + goto out_rel; + + /* if possible, lookup via hardware-defined pnetid */ + smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev); + +out_rel: + dst_release(dst); +out: + return; +} diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index ad4455cde9e7..1e94fd4df7bc 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -17,6 +17,7 @@ #endif struct smc_ib_device; +struct smcd_dev; static inline int smc_pnetid_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid) @@ -33,5 +34,6 @@ void smc_pnet_exit(void); int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev); void smc_pnet_find_roce_resource(struct sock *sk, struct smc_ib_device **smcibdev, u8 *ibport); +void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev); #endif -- cgit v1.2.3-59-g8ed1b From c758dfddc1b5b1c9b8c64e5e4bb9bf24b74f4a59 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 28 Jun 2018 19:05:09 +0200 Subject: net/smc: add SMC-D support in CLC messages There are two types of SMC: SMC-R and SMC-D. These types are signaled within the CLC messages during the CLC handshake. This patch adds support for and checks of the SMC type. Also, SMC-R and SMC-D need to exchange different information during the CLC handshake. So, this patch extends the current message formats to support the SMC-D header fields. The Proposal message can contain both SMC-R and SMC-D information. The Accept and Confirm messages contain either SMC-R or SMC-D information. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Suggested-by: Thomas Richter Signed-off-by: David S. Miller --- net/smc/af_smc.c | 9 +-- net/smc/smc_clc.c | 193 ++++++++++++++++++++++++++++++++++++++---------------- net/smc/smc_clc.h | 81 ++++++++++++++++++----- 3 files changed, 205 insertions(+), 78 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 8ce48799cf68..20afa94be8bb 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -451,14 +451,14 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, } /* CLC handshake during connect */ -static int smc_connect_clc(struct smc_sock *smc, +static int smc_connect_clc(struct smc_sock *smc, int smc_type, struct smc_clc_msg_accept_confirm *aclc, struct smc_ib_device *ibdev, u8 ibport) { int rc = 0; /* do inband token exchange */ - rc = smc_clc_send_proposal(smc, ibdev, ibport); + rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, NULL); if (rc) return rc; /* receive SMC Accept CLC message */ @@ -564,7 +564,7 @@ static int __smc_connect(struct smc_sock *smc) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); /* perform CLC handshake */ - rc = smc_connect_clc(smc, &aclc, ibdev, ibport); + rc = smc_connect_clc(smc, SMC_TYPE_R, &aclc, ibdev, ibport); if (rc) return smc_connect_decline_fallback(smc, rc); @@ -1008,7 +1008,8 @@ static void smc_listen_work(struct work_struct *work) smc_tx_init(new_smc); /* check if RDMA is available */ - if (smc_check_rdma(new_smc, &ibdev, &ibport) || + if ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || + smc_check_rdma(new_smc, &ibdev, &ibport) || smc_listen_rdma_check(new_smc, pclc) || smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, &local_contact) || diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 717449b1da0b..038d70ef7892 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -23,9 +23,15 @@ #include "smc_core.h" #include "smc_clc.h" #include "smc_ib.h" +#include "smc_ism.h" + +#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 +#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 /* eye catcher "SMCR" EBCDIC for CLC messages */ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; +/* eye catcher "SMCD" EBCDIC for CLC messages */ +static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; /* check if received message has a correct header length and contains valid * heading and trailing eyecatchers @@ -38,10 +44,14 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) struct smc_clc_msg_decline *dclc; struct smc_clc_msg_trail *trl; - if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && + memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) return false; switch (clcm->type) { case SMC_CLC_PROPOSAL: + if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && + clcm->path != SMC_TYPE_B) + return false; pclc = (struct smc_clc_msg_proposal *)clcm; pclc_prfx = smc_clc_proposal_get_prefix(pclc); if (ntohs(pclc->hdr.length) != @@ -56,10 +66,16 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) break; case SMC_CLC_ACCEPT: case SMC_CLC_CONFIRM: + if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D) + return false; clc = (struct smc_clc_msg_accept_confirm *)clcm; - if (ntohs(clc->hdr.length) != sizeof(*clc)) + if ((clcm->path == SMC_TYPE_R && + ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) || + (clcm->path == SMC_TYPE_D && + ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN)) return false; - trl = &clc->trl; + trl = (struct smc_clc_msg_trail *) + ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl)); break; case SMC_CLC_DECLINE: dclc = (struct smc_clc_msg_decline *)clcm; @@ -70,7 +86,8 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) default: return false; } - if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && + memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) return false; return true; } @@ -295,6 +312,9 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || (datlen > buflen) || + (clcm->version != SMC_CLC_V1) || + (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && + clcm->path != SMC_TYPE_B) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@ -356,17 +376,18 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) } /* send CLC PROPOSAL message across internal TCP socket */ -int smc_clc_send_proposal(struct smc_sock *smc, - struct smc_ib_device *smcibdev, - u8 ibport) +int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, + struct smc_ib_device *ibdev, u8 ibport, + struct smcd_dev *ismdev) { struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX]; struct smc_clc_msg_proposal_prefix pclc_prfx; + struct smc_clc_msg_smcd pclc_smcd; struct smc_clc_msg_proposal pclc; struct smc_clc_msg_trail trl; int len, i, plen, rc; int reason_code = 0; - struct kvec vec[4]; + struct kvec vec[5]; struct msghdr msg; /* retrieve ip prefixes for CLC proposal msg */ @@ -381,18 +402,34 @@ int smc_clc_send_proposal(struct smc_sock *smc, memset(&pclc, 0, sizeof(pclc)); memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc.hdr.type = SMC_CLC_PROPOSAL; - pclc.hdr.length = htons(plen); pclc.hdr.version = SMC_CLC_V1; /* SMC version */ - memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); - memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); - pclc.iparea_offset = htons(0); + pclc.hdr.path = smc_type; + if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) { + /* add SMC-R specifics */ + memcpy(pclc.lcl.id_for_peer, local_systemid, + sizeof(local_systemid)); + memcpy(&pclc.lcl.gid, &ibdev->gid[ibport - 1], SMC_GID_SIZE); + memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN); + pclc.iparea_offset = htons(0); + } + if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) { + /* add SMC-D specifics */ + memset(&pclc_smcd, 0, sizeof(pclc_smcd)); + plen += sizeof(pclc_smcd); + pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET); + pclc_smcd.gid = ismdev->local_gid; + } + pclc.hdr.length = htons(plen); memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); i = 0; vec[i].iov_base = &pclc; vec[i++].iov_len = sizeof(pclc); + if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) { + vec[i].iov_base = &pclc_smcd; + vec[i++].iov_len = sizeof(pclc_smcd); + } vec[i].iov_base = &pclc_prfx; vec[i++].iov_len = sizeof(pclc_prfx); if (pclc_prfx.ipv6_prefixes_cnt > 0) { @@ -428,35 +465,56 @@ int smc_clc_send_confirm(struct smc_sock *smc) struct kvec vec; int len; - link = &conn->lgr->lnk[SMC_SINGLE_LINK]; /* send SMC Confirm CLC msg */ memset(&cclc, 0, sizeof(cclc)); - memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); cclc.hdr.type = SMC_CLC_CONFIRM; - cclc.hdr.length = htons(sizeof(cclc)); cclc.hdr.version = SMC_CLC_V1; /* SMC version */ - memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], - SMC_GID_SIZE); - memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); - hton24(cclc.qpn, link->roce_qp->qp_num); - cclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); - cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ - cclc.rmbe_alert_token = htonl(conn->alert_token_local); - cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); - cclc.rmbe_size = conn->rmbe_size_short; - cclc.rmb_dma_addr = cpu_to_be64( - (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); - hton24(cclc.psn, link->psn_initial); - - memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + if (smc->conn.lgr->is_smcd) { + /* SMC-D specific settings */ + memcpy(cclc.hdr.eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); + cclc.hdr.path = SMC_TYPE_D; + cclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); + cclc.gid = conn->lgr->smcd->local_gid; + cclc.token = conn->rmb_desc->token; + cclc.dmbe_size = conn->rmbe_size_short; + cclc.dmbe_idx = 0; + memcpy(&cclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); + memcpy(cclc.smcd_trl.eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); + } else { + /* SMC-R specific settings */ + link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + cclc.hdr.path = SMC_TYPE_R; + cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); + memcpy(cclc.lcl.id_for_peer, local_systemid, + sizeof(local_systemid)); + memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], + SMC_GID_SIZE); + memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], + ETH_ALEN); + hton24(cclc.qpn, link->roce_qp->qp_num); + cclc.rmb_rkey = + htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ + cclc.rmbe_alert_token = htonl(conn->alert_token_local); + cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); + cclc.rmbe_size = conn->rmbe_size_short; + cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address + (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); + hton24(cclc.psn, link->psn_initial); + memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + } memset(&msg, 0, sizeof(msg)); vec.iov_base = &cclc; - vec.iov_len = sizeof(cclc); - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc)); - if (len < sizeof(cclc)) { + vec.iov_len = ntohs(cclc.hdr.length); + len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, + ntohs(cclc.hdr.length)); + if (len < ntohs(cclc.hdr.length)) { if (len >= 0) { reason_code = -ENETUNREACH; smc->sk.sk_err = -reason_code; @@ -479,35 +537,58 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) int rc = 0; int len; - link = &conn->lgr->lnk[SMC_SINGLE_LINK]; memset(&aclc, 0, sizeof(aclc)); - memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); aclc.hdr.type = SMC_CLC_ACCEPT; - aclc.hdr.length = htons(sizeof(aclc)); aclc.hdr.version = SMC_CLC_V1; /* SMC version */ if (srv_first_contact) aclc.hdr.flag = 1; - memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], - SMC_GID_SIZE); - memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); - hton24(aclc.qpn, link->roce_qp->qp_num); - aclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); - aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */ - aclc.rmbe_alert_token = htonl(conn->alert_token_local); - aclc.qp_mtu = link->path_mtu; - aclc.rmbe_size = conn->rmbe_size_short, - aclc.rmb_dma_addr = cpu_to_be64( - (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); - hton24(aclc.psn, link->psn_initial); - memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + + if (new_smc->conn.lgr->is_smcd) { + /* SMC-D specific settings */ + aclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); + memcpy(aclc.hdr.eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); + aclc.hdr.path = SMC_TYPE_D; + aclc.gid = conn->lgr->smcd->local_gid; + aclc.token = conn->rmb_desc->token; + aclc.dmbe_size = conn->rmbe_size_short; + aclc.dmbe_idx = 0; + memcpy(&aclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); + memcpy(aclc.smcd_trl.eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); + } else { + /* SMC-R specific settings */ + aclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); + memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + aclc.hdr.path = SMC_TYPE_R; + link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + memcpy(aclc.lcl.id_for_peer, local_systemid, + sizeof(local_systemid)); + memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], + SMC_GID_SIZE); + memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], + ETH_ALEN); + hton24(aclc.qpn, link->roce_qp->qp_num); + aclc.rmb_rkey = + htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */ + aclc.rmbe_alert_token = htonl(conn->alert_token_local); + aclc.qp_mtu = link->path_mtu; + aclc.rmbe_size = conn->rmbe_size_short, + aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address + (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); + hton24(aclc.psn, link->psn_initial); + memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + } memset(&msg, 0, sizeof(msg)); vec.iov_base = &aclc; - vec.iov_len = sizeof(aclc); - len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc)); - if (len < sizeof(aclc)) { + vec.iov_len = ntohs(aclc.hdr.length); + len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, + ntohs(aclc.hdr.length)); + if (len < ntohs(aclc.hdr.length)) { if (len >= 0) new_smc->sk.sk_err = EPROTO; else diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 41ff9ea96139..100e988ad1a8 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -23,6 +23,9 @@ #define SMC_CLC_DECLINE 0x04 #define SMC_CLC_V1 0x1 /* SMC version */ +#define SMC_TYPE_R 0 /* SMC-R only */ +#define SMC_TYPE_D 1 /* SMC-D only */ +#define SMC_TYPE_B 3 /* SMC-R and SMC-D */ #define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ #define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */ #define SMC_CLC_DECL_TIMEOUT 0x02000000 /* timeout */ @@ -42,9 +45,11 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */ #if defined(__BIG_ENDIAN_BITFIELD) u8 version : 4, flag : 1, - rsvd : 3; + rsvd : 1, + path : 2; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 rsvd : 3, + u8 path : 2, + rsvd : 1, flag : 1, version : 4; #endif @@ -77,6 +82,11 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */ } __aligned(4); +struct smc_clc_msg_smcd { /* SMC-D GID information */ + u64 gid; /* ISM GID of requestor */ + u8 res[32]; +}; + struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ struct smc_clc_msg_hdr hdr; struct smc_clc_msg_local lcl; @@ -94,23 +104,45 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; - struct smc_clc_msg_local lcl; - u8 qpn[3]; /* QP number */ - __be32 rmb_rkey; /* RMB rkey */ - u8 rmbe_idx; /* Index of RMBE in RMB */ - __be32 rmbe_alert_token;/* unique connection id */ + union { + struct { /* SMC-R */ + struct smc_clc_msg_local lcl; + u8 qpn[3]; /* QP number */ + __be32 rmb_rkey; /* RMB rkey */ + u8 rmbe_idx; /* Index of RMBE in RMB */ + __be32 rmbe_alert_token;/* unique connection id */ #if defined(__BIG_ENDIAN_BITFIELD) - u8 rmbe_size : 4, /* RMBE buf size (compressed notation) */ - qp_mtu : 4; /* QP mtu */ + u8 rmbe_size : 4, /* buf size (compressed) */ + qp_mtu : 4; /* QP mtu */ #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 qp_mtu : 4, - rmbe_size : 4; + u8 qp_mtu : 4, + rmbe_size : 4; #endif - u8 reserved; - __be64 rmb_dma_addr; /* RMB virtual address */ - u8 reserved2; - u8 psn[3]; /* initial packet sequence number */ - struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ + u8 reserved; + __be64 rmb_dma_addr; /* RMB virtual address */ + u8 reserved2; + u8 psn[3]; /* packet sequence number */ + struct smc_clc_msg_trail smcr_trl; + /* eye catcher "SMCR" EBCDIC */ + } __packed; + struct { /* SMC-D */ + u64 gid; /* Sender GID */ + u64 token; /* DMB token */ + u8 dmbe_idx; /* DMBE index */ +#if defined(__BIG_ENDIAN_BITFIELD) + u8 dmbe_size : 4, /* buf size (compressed) */ + reserved3 : 4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved3 : 4, + dmbe_size : 4; +#endif + u16 reserved4; + u32 linkid; /* Link identifier */ + u32 reserved5[3]; + struct smc_clc_msg_trail smcd_trl; + /* eye catcher "SMCD" EBCDIC */ + } __packed; + }; } __packed; /* format defined in RFC7609 */ struct smc_clc_msg_decline { /* clc decline message */ @@ -129,13 +161,26 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); } +/* get SMC-D info from proposal message */ +static inline struct smc_clc_msg_smcd * +smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop) +{ + if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd)) + return NULL; + + return (struct smc_clc_msg_smcd *)(prop + 1); +} + +struct smcd_dev; + int smc_clc_prfx_match(struct socket *clcsock, struct smc_clc_msg_proposal_prefix *prop); int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); -int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, - u8 ibport); +int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, + struct smc_ib_device *smcibdev, u8 ibport, + struct smcd_dev *ismdev); int smc_clc_send_confirm(struct smc_sock *smc); int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact); -- cgit v1.2.3-59-g8ed1b From be244f28d22f77d939ba2b973c102ad2b49d3496 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 28 Jun 2018 19:05:10 +0200 Subject: net/smc: add SMC-D support in data transfer The data transfer and CDC message headers differ in SMC-R and SMC-D. This patch adds support for the SMC-D data transfer to the existing SMC code. It consists of the following: * SMC-D CDC support * SMC-D tx support * SMC-D rx support The CDC header is stored at the beginning of the receive buffer. Thus, a rx_offset variable is added for the CDC header offset within the buffer (0 for SMC-R). Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Suggested-by: Thomas Richter Signed-off-by: David S. Miller --- net/smc/smc.h | 5 ++ net/smc/smc_cdc.c | 86 +++++++++++++++++++++++- net/smc/smc_cdc.h | 43 +++++++++++- net/smc/smc_core.c | 25 +++++-- net/smc/smc_ism.c | 8 +++ net/smc/smc_rx.c | 2 +- net/smc/smc_tx.c | 193 +++++++++++++++++++++++++++++++++++++++++------------ net/smc/smc_tx.h | 2 + 8 files changed, 308 insertions(+), 56 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc.h b/net/smc/smc.h index 7c86f716a92e..8c6231011779 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -183,6 +183,11 @@ struct smc_connection { spinlock_t acurs_lock; /* protect cursors */ #endif struct work_struct close_work; /* peer sent some closing */ + struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */ + u8 rx_off; /* receive offset: + * 0 for SMC-R, 32 for SMC-D + */ + u64 peer_token; /* SMC-D token of peer */ }; struct smc_sock { /* smc sock container */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a7e8d63fc8ae..621d8cca570b 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -117,7 +117,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, return rc; } -int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) +static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) { struct smc_cdc_tx_pend *pend; struct smc_wr_buf *wr_buf; @@ -130,6 +130,21 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) return smc_cdc_msg_send(conn, wr_buf, pend); } +int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) +{ + int rc; + + if (conn->lgr->is_smcd) { + spin_lock_bh(&conn->send_lock); + rc = smcd_cdc_msg_send(conn); + spin_unlock_bh(&conn->send_lock); + } else { + rc = smcr_cdc_get_slot_and_msg_send(conn); + } + + return rc; +} + static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend, unsigned long data) { @@ -157,6 +172,45 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) (unsigned long)conn); } +/* Send a SMC-D CDC header. + * This increments the free space available in our send buffer. + * Also update the confirmed receive buffer with what was sent to the peer. + */ +int smcd_cdc_msg_send(struct smc_connection *conn) +{ + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + struct smcd_cdc_msg cdc; + int rc, diff; + + memset(&cdc, 0, sizeof(cdc)); + cdc.common.type = SMC_CDC_MSG_TYPE; + cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap; + cdc.prod_count = conn->local_tx_ctrl.prod.count; + + cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap; + cdc.cons_count = conn->local_tx_ctrl.cons.count; + cdc.prod_flags = conn->local_tx_ctrl.prod_flags; + cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags; + rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1); + if (rc) + return rc; + smc_curs_write(&conn->rx_curs_confirmed, + smc_curs_read(&conn->local_tx_ctrl.cons, conn), conn); + /* Calculate transmitted data and increment free send buffer space */ + diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, + &conn->tx_curs_sent); + /* increased by confirmed number of bytes */ + smp_mb__before_atomic(); + atomic_add(diff, &conn->sndbuf_space); + /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ + smp_mb__after_atomic(); + smc_curs_write(&conn->tx_curs_fin, + smc_curs_read(&conn->tx_curs_sent, conn), conn); + + smc_tx_sndbuf_nonfull(smc); + return rc; +} + /********************************* receive ***********************************/ static inline bool smc_cdc_before(u16 seq1, u16 seq2) @@ -178,7 +232,7 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc, if (!sock_flag(&smc->sk, SOCK_URGINLINE)) /* we'll skip the urgent byte, so don't account for it */ (*diff_prod)--; - base = (char *)conn->rmb_desc->cpu_addr; + base = (char *)conn->rmb_desc->cpu_addr + conn->rx_off; if (conn->urg_curs.count) conn->urg_rx_byte = *(base + conn->urg_curs.count - 1); else @@ -276,6 +330,34 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc) sock_put(&smc->sk); /* no free sk in softirq-context */ } +/* Schedule a tasklet for this connection. Triggered from the ISM device IRQ + * handler to indicate update in the DMBE. + * + * Context: + * - tasklet context + */ +static void smcd_cdc_rx_tsklet(unsigned long data) +{ + struct smc_connection *conn = (struct smc_connection *)data; + struct smcd_cdc_msg cdc; + struct smc_sock *smc; + + if (!conn) + return; + + memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc)); + smc = container_of(conn, struct smc_sock, conn); + smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc); +} + +/* Initialize receive tasklet. Called from ISM device IRQ handler to start + * receiver side. + */ +void smcd_cdc_rx_init(struct smc_connection *conn) +{ + tasklet_init(&conn->rx_tsklet, smcd_cdc_rx_tsklet, (unsigned long)conn); +} + /***************************** init, exit, misc ******************************/ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index f60082fee5b8..8fbce4fee3e4 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -50,6 +50,20 @@ struct smc_cdc_msg { u8 reserved[18]; } __packed; /* format defined in RFC7609 */ +/* CDC message for SMC-D */ +struct smcd_cdc_msg { + struct smc_wr_rx_hdr common; /* Type = 0xFE */ + u8 res1[7]; + u16 prod_wrap; + u32 prod_count; + u8 res2[2]; + u16 cons_wrap; + u32 cons_count; + struct smc_cdc_producer_flags prod_flags; + struct smc_cdc_conn_state_flags conn_state_flags; + u8 res3[8]; +} __packed; + static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn) { return conn->local_rx_ctrl.conn_state_flags.peer_conn_abort || @@ -204,9 +218,9 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local, smc_curs_write(local, smc_curs_read(&temp, conn), conn); } -static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, - struct smc_cdc_msg *peer, - struct smc_connection *conn) +static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local, + struct smc_cdc_msg *peer, + struct smc_connection *conn) { local->common.type = peer->common.type; local->len = peer->len; @@ -218,6 +232,27 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, local->conn_state_flags = peer->conn_state_flags; } +static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local, + struct smcd_cdc_msg *peer) +{ + local->prod.wrap = peer->prod_wrap; + local->prod.count = peer->prod_count; + local->cons.wrap = peer->cons_wrap; + local->cons.count = peer->cons_count; + local->prod_flags = peer->prod_flags; + local->conn_state_flags = peer->conn_state_flags; +} + +static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, + struct smc_cdc_msg *peer, + struct smc_connection *conn) +{ + if (conn->lgr->is_smcd) + smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer); + else + smcr_cdc_msg_to_host(local, peer, conn); +} + struct smc_cdc_tx_pend; int smc_cdc_get_free_slot(struct smc_connection *conn, @@ -227,6 +262,8 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); +int smcd_cdc_msg_send(struct smc_connection *conn); int smc_cdc_init(void) __init; +void smcd_cdc_rx_init(struct smc_connection *conn); #endif /* SMC_CDC_H */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index daa88db1841a..434c028162a4 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -281,10 +281,12 @@ void smc_conn_free(struct smc_connection *conn) { if (!conn->lgr) return; - if (conn->lgr->is_smcd) + if (conn->lgr->is_smcd) { smc_ism_unset_conn(conn); - else + tasklet_kill(&conn->rx_tsklet); + } else { smc_cdc_tx_dismiss_slots(conn); + } smc_lgr_unregister_conn(conn); smc_buf_unuse(conn); } @@ -324,10 +326,13 @@ static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, struct smc_buf_desc *buf_desc) { - if (is_dmb) + if (is_dmb) { + /* restore original buf len */ + buf_desc->len += sizeof(struct smcd_cdc_msg); smc_ism_unregister_dmb(lgr->smcd, buf_desc); - else + } else { kfree(buf_desc->cpu_addr); + } kfree(buf_desc); } @@ -632,6 +637,10 @@ create: conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; + if (is_smcd) { + conn->rx_off = sizeof(struct smcd_cdc_msg); + smcd_cdc_rx_init(conn); /* init tasklet for this conn */ + } #ifndef KERNEL_HAS_ATOMIC64 spin_lock_init(&conn->acurs_lock); #endif @@ -776,8 +785,9 @@ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, kfree(buf_desc); return ERR_PTR(-EAGAIN); } - memset(buf_desc->cpu_addr, 0, bufsize); - buf_desc->len = bufsize; + buf_desc->pages = virt_to_page(buf_desc->cpu_addr); + /* CDC header stored in buf. So, pretend it was smaller */ + buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); } else { buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | @@ -854,7 +864,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) conn->rmbe_size_short = bufsize_short; smc->sk.sk_rcvbuf = bufsize * 2; atomic_set(&conn->bytes_to_rcv, 0); - conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); + conn->rmbe_update_limit = + smc_rmb_wnd_update_limit(buf_desc->len); if (is_smcd) smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ } else { diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index f44e4dff244a..cfade7fdcc6d 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -302,5 +302,13 @@ EXPORT_SYMBOL_GPL(smcd_handle_event); */ void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno) { + struct smc_connection *conn = NULL; + unsigned long flags; + + spin_lock_irqsave(&smcd->lock, flags); + conn = smcd->conn[dmbno]; + if (conn) + tasklet_schedule(&conn->rx_tsklet); + spin_unlock_irqrestore(&smcd->lock, flags); } EXPORT_SYMBOL_GPL(smcd_handle_irq); diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 3d77b383cccd..b329803c8339 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -305,7 +305,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); /* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */ - rcvbuf_base = conn->rmb_desc->cpu_addr; + rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr; do { /* while (read_remaining) */ if (read_done >= target || (pipe && read_done)) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index f82886b7d1d8..142bcb134dd6 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -24,6 +24,7 @@ #include "smc.h" #include "smc_wr.h" #include "smc_cdc.h" +#include "smc_ism.h" #include "smc_tx.h" #define SMC_TX_WORK_DELAY HZ @@ -250,6 +251,24 @@ out_err: /***************************** sndbuf consumer *******************************/ +/* sndbuf consumer: actual data transfer of one target chunk with ISM write */ +int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, + u32 offset, int signal) +{ + struct smc_ism_position pos; + int rc; + + memset(&pos, 0, sizeof(pos)); + pos.token = conn->peer_token; + pos.index = conn->peer_rmbe_idx; + pos.offset = conn->tx_off + offset; + pos.signal = signal; + rc = smc_ism_write(conn->lgr->smcd, &pos, data, len); + if (rc) + conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; + return rc; +} + /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, int num_sges, struct ib_sge sges[]) @@ -297,21 +316,104 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn, smc_curs_add(conn->sndbuf_desc->len, sent, len); } +/* SMC-R helper for smc_tx_rdma_writes() */ +static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, + size_t src_off, size_t src_len, + size_t dst_off, size_t dst_len) +{ + dma_addr_t dma_addr = + sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); + struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + int src_len_sum = src_len, dst_len_sum = dst_len; + struct ib_sge sges[SMC_IB_MAX_SEND_SGE]; + int sent_count = src_off; + int srcchunk, dstchunk; + int num_sges; + int rc; + + for (dstchunk = 0; dstchunk < 2; dstchunk++) { + num_sges = 0; + for (srcchunk = 0; srcchunk < 2; srcchunk++) { + sges[srcchunk].addr = dma_addr + src_off; + sges[srcchunk].length = src_len; + sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; + num_sges++; + + src_off += src_len; + if (src_off >= conn->sndbuf_desc->len) + src_off -= conn->sndbuf_desc->len; + /* modulo in send ring */ + if (src_len_sum == dst_len) + break; /* either on 1st or 2nd iteration */ + /* prepare next (== 2nd) iteration */ + src_len = dst_len - src_len; /* remainder */ + src_len_sum += src_len; + } + rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); + if (rc) + return rc; + if (dst_len_sum == len) + break; /* either on 1st or 2nd iteration */ + /* prepare next (== 2nd) iteration */ + dst_off = 0; /* modulo offset in RMBE ring buffer */ + dst_len = len - dst_len; /* remainder */ + dst_len_sum += dst_len; + src_len = min_t(int, dst_len, conn->sndbuf_desc->len - + sent_count); + src_len_sum = src_len; + } + return 0; +} + +/* SMC-D helper for smc_tx_rdma_writes() */ +static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len, + size_t src_off, size_t src_len, + size_t dst_off, size_t dst_len) +{ + int src_len_sum = src_len, dst_len_sum = dst_len; + int srcchunk, dstchunk; + int rc; + + for (dstchunk = 0; dstchunk < 2; dstchunk++) { + for (srcchunk = 0; srcchunk < 2; srcchunk++) { + void *data = conn->sndbuf_desc->cpu_addr + src_off; + + rc = smcd_tx_ism_write(conn, data, src_len, dst_off + + sizeof(struct smcd_cdc_msg), 0); + if (rc) + return rc; + dst_off += src_len; + src_off += src_len; + if (src_off >= conn->sndbuf_desc->len) + src_off -= conn->sndbuf_desc->len; + /* modulo in send ring */ + if (src_len_sum == dst_len) + break; /* either on 1st or 2nd iteration */ + /* prepare next (== 2nd) iteration */ + src_len = dst_len - src_len; /* remainder */ + src_len_sum += src_len; + } + if (dst_len_sum == len) + break; /* either on 1st or 2nd iteration */ + /* prepare next (== 2nd) iteration */ + dst_off = 0; /* modulo offset in RMBE ring buffer */ + dst_len = len - dst_len; /* remainder */ + dst_len_sum += dst_len; + src_len = min_t(int, dst_len, conn->sndbuf_desc->len - src_off); + src_len_sum = src_len; + } + return 0; +} + /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; * usable snd_wnd as max transmit */ static int smc_tx_rdma_writes(struct smc_connection *conn) { - size_t src_off, src_len, dst_off, dst_len; /* current chunk values */ - size_t len, dst_len_sum, src_len_sum, dstchunk, srcchunk; + size_t len, src_len, dst_off, dst_len; /* current chunk values */ union smc_host_cursor sent, prep, prod, cons; - struct ib_sge sges[SMC_IB_MAX_SEND_SGE]; - struct smc_link_group *lgr = conn->lgr; struct smc_cdc_producer_flags *pflags; int to_send, rmbespace; - struct smc_link *link; - dma_addr_t dma_addr; - int num_sges; int rc; /* source: sndbuf */ @@ -341,7 +443,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) len = min(to_send, rmbespace); /* initialize variables for first iteration of subsequent nested loop */ - link = &lgr->lnk[SMC_SINGLE_LINK]; dst_off = prod.count; if (prod.wrap == cons.wrap) { /* the filled destination area is unwrapped, @@ -358,8 +459,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) */ dst_len = len; } - dst_len_sum = dst_len; - src_off = sent.count; /* dst_len determines the maximum src_len */ if (sent.count + dst_len <= conn->sndbuf_desc->len) { /* unwrapped src case: single chunk of entire dst_len */ @@ -368,38 +467,15 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) /* wrapped src case: 2 chunks of sum dst_len; start with 1st: */ src_len = conn->sndbuf_desc->len - sent.count; } - src_len_sum = src_len; - dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); - for (dstchunk = 0; dstchunk < 2; dstchunk++) { - num_sges = 0; - for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sges[srcchunk].addr = dma_addr + src_off; - sges[srcchunk].length = src_len; - sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; - num_sges++; - src_off += src_len; - if (src_off >= conn->sndbuf_desc->len) - src_off -= conn->sndbuf_desc->len; - /* modulo in send ring */ - if (src_len_sum == dst_len) - break; /* either on 1st or 2nd iteration */ - /* prepare next (== 2nd) iteration */ - src_len = dst_len - src_len; /* remainder */ - src_len_sum += src_len; - } - rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); - if (rc) - return rc; - if (dst_len_sum == len) - break; /* either on 1st or 2nd iteration */ - /* prepare next (== 2nd) iteration */ - dst_off = 0; /* modulo offset in RMBE ring buffer */ - dst_len = len - dst_len; /* remainder */ - dst_len_sum += dst_len; - src_len = min_t(int, - dst_len, conn->sndbuf_desc->len - sent.count); - src_len_sum = src_len; - } + + if (conn->lgr->is_smcd) + rc = smcd_tx_rdma_writes(conn, len, sent.count, src_len, + dst_off, dst_len); + else + rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len, + dst_off, dst_len); + if (rc) + return rc; if (conn->urg_tx_pend && len == to_send) pflags->urg_data_present = 1; @@ -420,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) /* Wakeup sndbuf consumers from any context (IRQ or process) * since there is more data to transmit; usable snd_wnd as max transmit */ -int smc_tx_sndbuf_nonempty(struct smc_connection *conn) +static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags; struct smc_cdc_tx_pend *pend; @@ -467,6 +543,37 @@ out_unlock: return rc; } +static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) +{ + struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; + int rc = 0; + + spin_lock_bh(&conn->send_lock); + if (!pflags->urg_data_present) + rc = smc_tx_rdma_writes(conn); + if (!rc) + rc = smcd_cdc_msg_send(conn); + + if (!rc && pflags->urg_data_present) { + pflags->urg_data_pending = 0; + pflags->urg_data_present = 0; + } + spin_unlock_bh(&conn->send_lock); + return rc; +} + +int smc_tx_sndbuf_nonempty(struct smc_connection *conn) +{ + int rc; + + if (conn->lgr->is_smcd) + rc = smcd_tx_sndbuf_nonempty(conn); + else + rc = smcr_tx_sndbuf_nonempty(conn); + + return rc; +} + /* Wakeup sndbuf consumers from process context * since there is more data to transmit */ diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h index 9d2238909fa0..b22bdc5694c4 100644 --- a/net/smc/smc_tx.h +++ b/net/smc/smc_tx.h @@ -33,5 +33,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len); int smc_tx_sndbuf_nonempty(struct smc_connection *conn); void smc_tx_sndbuf_nonfull(struct smc_sock *smc); void smc_tx_consumer_update(struct smc_connection *conn, bool force); +int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, + u32 offset, int signal); #endif /* SMC_TX_H */ -- cgit v1.2.3-59-g8ed1b From 413498440e30bfe381ac99dfc31628a3d8d4382a Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 28 Jun 2018 19:05:11 +0200 Subject: net/smc: add SMC-D support in af_smc This patch ties together the previous SMC-D patches. It adds support for SMC-D to the listen and connect functions and, thus, enables SMC-D support in the SMC code. If a connection supports both SMC-R and SMC-D, SMC-D is preferred. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Suggested-by: Thomas Richter Signed-off-by: David S. Miller --- net/smc/af_smc.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++----- net/smc/smc_core.c | 2 +- net/smc/smc_core.h | 1 + 3 files changed, 200 insertions(+), 19 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 20afa94be8bb..cbbb947dbfcf 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,7 @@ #include "smc_cdc.h" #include "smc_core.h" #include "smc_ib.h" +#include "smc_ism.h" #include "smc_pnet.h" #include "smc_tx.h" #include "smc_rx.h" @@ -372,8 +374,8 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) return 0; } -static void smc_conn_save_peer_info(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm *clc) +static void smcr_conn_save_peer_info(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *clc) { int bufsize = smc_uncompress_bufsize(clc->rmbe_size); @@ -384,6 +386,28 @@ static void smc_conn_save_peer_info(struct smc_sock *smc, smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); } +static void smcd_conn_save_peer_info(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *clc) +{ + int bufsize = smc_uncompress_bufsize(clc->dmbe_size); + + smc->conn.peer_rmbe_idx = clc->dmbe_idx; + smc->conn.peer_token = clc->token; + /* msg header takes up space in the buffer */ + smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); + atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); + smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; +} + +static void smc_conn_save_peer_info(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *clc) +{ + if (smc->conn.lgr->is_smcd) + smcd_conn_save_peer_info(smc, clc); + else + smcr_conn_save_peer_info(smc, clc); +} + static void smc_link_save_peer_info(struct smc_link *link, struct smc_clc_msg_accept_confirm *clc) { @@ -450,15 +474,51 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, return reason_code; } +/* check if there is an ISM device available for this connection. */ +/* called for connect and listen */ +static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev) +{ + /* Find ISM device with same PNETID as connecting interface */ + smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev); + if (!(*ismdev)) + return SMC_CLC_DECL_CNFERR; /* configuration error */ + return 0; +} + +/* Check for VLAN ID and register it on ISM device just for CLC handshake */ +static int smc_connect_ism_vlan_setup(struct smc_sock *smc, + struct smcd_dev *ismdev, + unsigned short vlan_id) +{ + if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id)) + return SMC_CLC_DECL_CNFERR; + return 0; +} + +/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is + * used, the VLAN ID will be registered again during the connection setup. + */ +static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, + struct smcd_dev *ismdev, + unsigned short vlan_id) +{ + if (!is_smcd) + return 0; + if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id)) + return SMC_CLC_DECL_CNFERR; + return 0; +} + /* CLC handshake during connect */ static int smc_connect_clc(struct smc_sock *smc, int smc_type, struct smc_clc_msg_accept_confirm *aclc, - struct smc_ib_device *ibdev, u8 ibport) + struct smc_ib_device *ibdev, u8 ibport, + struct smcd_dev *ismdev) { int rc = 0; /* do inband token exchange */ - rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, NULL); + rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, ismdev); if (rc) return rc; /* receive SMC Accept CLC message */ @@ -538,11 +598,50 @@ static int smc_connect_rdma(struct smc_sock *smc, return 0; } +/* setup for ISM connection of client */ +static int smc_connect_ism(struct smc_sock *smc, + struct smc_clc_msg_accept_confirm *aclc, + struct smcd_dev *ismdev) +{ + int local_contact = SMC_FIRST_CONTACT; + int rc = 0; + + mutex_lock(&smc_create_lgr_pending); + local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, + NULL, ismdev, aclc->gid); + if (local_contact < 0) + return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0); + + /* Create send and receive buffers */ + if (smc_buf_create(smc, true)) + return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); + + smc_conn_save_peer_info(smc, aclc); + smc_close_init(smc); + smc_rx_init(smc); + smc_tx_init(smc); + + rc = smc_clc_send_confirm(smc); + if (rc) + return smc_connect_abort(smc, rc, local_contact); + mutex_unlock(&smc_create_lgr_pending); + + smc_copy_sock_settings_to_clc(smc); + if (smc->sk.sk_state == SMC_INIT) + smc->sk.sk_state = SMC_ACTIVE; + + return 0; +} + /* perform steps before actually connecting */ static int __smc_connect(struct smc_sock *smc) { + bool ism_supported = false, rdma_supported = false; struct smc_clc_msg_accept_confirm aclc; struct smc_ib_device *ibdev; + struct smcd_dev *ismdev; + unsigned short vlan; + int smc_type; int rc = 0; u8 ibport; @@ -559,20 +658,52 @@ static int __smc_connect(struct smc_sock *smc) if (using_ipsec(smc)) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); - /* check if a RDMA device is available; if not, fall back */ - if (smc_check_rdma(smc, &ibdev, &ibport)) + /* check for VLAN ID */ + if (smc_vlan_by_tcpsk(smc->clcsock, &vlan)) + return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); + + /* check if there is an ism device available */ + if (!smc_check_ism(smc, &ismdev) && + !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) { + /* ISM is supported for this connection */ + ism_supported = true; + smc_type = SMC_TYPE_D; + } + + /* check if there is a rdma device available */ + if (!smc_check_rdma(smc, &ibdev, &ibport)) { + /* RDMA is supported for this connection */ + rdma_supported = true; + if (ism_supported) + smc_type = SMC_TYPE_B; /* both */ + else + smc_type = SMC_TYPE_R; /* only RDMA */ + } + + /* if neither ISM nor RDMA are supported, fallback */ + if (!rdma_supported && !ism_supported) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); /* perform CLC handshake */ - rc = smc_connect_clc(smc, SMC_TYPE_R, &aclc, ibdev, ibport); - if (rc) + rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, ismdev); + if (rc) { + smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return smc_connect_decline_fallback(smc, rc); + } - /* connect using rdma */ - rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); - if (rc) + /* depending on previous steps, connect using rdma or ism */ + if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) + rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); + else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) + rc = smc_connect_ism(smc, &aclc, ismdev); + else + rc = SMC_CLC_DECL_CNFERR; + if (rc) { + smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return smc_connect_decline_fallback(smc, rc); + } + smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return 0; } @@ -909,6 +1040,44 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc, return 0; } +/* listen worker: initialize connection and buffers for SMC-D */ +static int smc_listen_ism_init(struct smc_sock *new_smc, + struct smc_clc_msg_proposal *pclc, + struct smcd_dev *ismdev, + int *local_contact) +{ + struct smc_clc_msg_smcd *pclc_smcd; + + pclc_smcd = smc_get_clc_msg_smcd(pclc); + *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL, + ismdev, pclc_smcd->gid); + if (*local_contact < 0) { + if (*local_contact == -ENOMEM) + return SMC_CLC_DECL_MEM;/* insufficient memory*/ + return SMC_CLC_DECL_INTERR; /* other error */ + } + + /* Check if peer can be reached via ISM device */ + if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, + new_smc->conn.lgr->vlan_id, + new_smc->conn.lgr->smcd)) { + if (*local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); + smc_conn_free(&new_smc->conn); + return SMC_CLC_DECL_CNFERR; + } + + /* Create send and receive buffers */ + if (smc_buf_create(new_smc, true)) { + if (*local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); + smc_conn_free(&new_smc->conn); + return SMC_CLC_DECL_MEM; + } + + return 0; +} + /* listen worker: register buffers */ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) { @@ -967,6 +1136,8 @@ static void smc_listen_work(struct work_struct *work) struct smc_clc_msg_accept_confirm cclc; struct smc_clc_msg_proposal *pclc; struct smc_ib_device *ibdev; + bool ism_supported = false; + struct smcd_dev *ismdev; u8 buf[SMC_CLC_MAX_LEN]; int local_contact = 0; int reason_code = 0; @@ -1007,13 +1178,21 @@ static void smc_listen_work(struct work_struct *work) smc_rx_init(new_smc); smc_tx_init(new_smc); + /* check if ISM is available */ + if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) && + !smc_check_ism(new_smc, &ismdev) && + !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) { + ism_supported = true; + } + /* check if RDMA is available */ - if ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || - smc_check_rdma(new_smc, &ibdev, &ibport) || - smc_listen_rdma_check(new_smc, pclc) || - smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, - &local_contact) || - smc_listen_rdma_reg(new_smc, local_contact)) { + if (!ism_supported && + ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || + smc_check_rdma(new_smc, &ibdev, &ibport) || + smc_listen_rdma_check(new_smc, pclc) || + smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, + &local_contact) || + smc_listen_rdma_reg(new_smc, local_contact))) { /* SMC not supported, decline */ mutex_unlock(&smc_create_lgr_pending); smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact); @@ -1038,7 +1217,8 @@ static void smc_listen_work(struct work_struct *work) } /* finish worker */ - smc_listen_rdma_finish(new_smc, &cclc, local_contact); + if (!ism_supported) + smc_listen_rdma_finish(new_smc, &cclc, local_contact); smc_conn_save_peer_info(new_smc, &cclc); mutex_unlock(&smc_create_lgr_pending); smc_listen_out_connected(new_smc); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 434c028162a4..66741e61a3b0 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -478,7 +478,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid) /* Determine vlan of internal TCP socket. * @vlan_id: address to store the determined vlan id into */ -static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) +int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) { struct dst_entry *dst = sk_dst_get(clcsock->sk); struct net_device *ndev; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index cd9268a9570e..8b47e0168fc3 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -257,6 +257,7 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); void smc_rmb_sync_sg_for_device(struct smc_connection *conn); +int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id); void smc_conn_free(struct smc_connection *conn); int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, -- cgit v1.2.3-59-g8ed1b From 4b1b7d3b30a6d32ac1a1dcede284e76ef8a8542d Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 28 Jun 2018 19:05:12 +0200 Subject: net/smc: add SMC-D diag support This patch adds diag support for SMC-D. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Suggested-by: Thomas Richter Signed-off-by: David S. Miller --- include/uapi/linux/smc_diag.h | 10 ++++++++++ net/smc/smc_diag.c | 15 +++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'net/smc') diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 0ae5d4685ba3..92be255e534c 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -35,6 +35,7 @@ enum { SMC_DIAG_CONNINFO, SMC_DIAG_LGRINFO, SMC_DIAG_SHUTDOWN, + SMC_DIAG_DMBINFO, __SMC_DIAG_MAX, }; @@ -83,4 +84,13 @@ struct smc_diag_lgrinfo { struct smc_diag_linkinfo lnk[1]; __u8 role; }; + +struct smcd_diag_dmbinfo { /* SMC-D Socket internals */ + __u32 linkid; /* Link identifier */ + __u64 peer_gid; /* Peer GID */ + __u64 my_gid; /* My GID */ + __u64 token; /* Token of DMB */ + __u64 peer_token; /* Token of remote DMBE */ +}; + #endif /* _UAPI_SMC_DIAG_H_ */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 64ce107c24d9..6d83eef1b743 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -156,6 +156,21 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) goto errout; } + if (smc->conn.lgr && smc->conn.lgr->is_smcd && + (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && + !list_empty(&smc->conn.lgr->list)) { + struct smc_connection *conn = &smc->conn; + struct smcd_diag_dmbinfo dinfo = { + .linkid = *((u32 *)conn->lgr->id), + .peer_gid = conn->lgr->peer_gid, + .my_gid = conn->lgr->smcd->local_gid, + .token = conn->rmb_desc->token, + .peer_token = conn->peer_token + }; + + if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0) + goto errout; + } nlmsg_end(skb, nlh); return 0; -- cgit v1.2.3-59-g8ed1b From c601171d7a60b5b09d7c2fe0579953323a80744e Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 23 Jul 2018 13:53:08 +0200 Subject: net/smc: provide smc mode in smc_diag.c Rename field diag_fallback into diag_mode and set the smc mode of a connection explicitly. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/uapi/linux/smc_diag.h | 9 ++++++++- net/smc/smc_diag.c | 7 ++++++- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'net/smc') diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 92be255e534c..48ae3ee22b2d 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -20,7 +20,7 @@ struct smc_diag_req { struct smc_diag_msg { __u8 diag_family; __u8 diag_state; - __u8 diag_fallback; + __u8 diag_mode; __u8 diag_shutdown; struct inet_diag_sockid id; @@ -28,6 +28,13 @@ struct smc_diag_msg { __u64 diag_inode; }; +/* Mode of a connection */ +enum { + SMC_DIAG_MODE_SMCR, + SMC_DIAG_MODE_FALLBACK_TCP, + SMC_DIAG_MODE_SMCD, +}; + /* Extensions */ enum { diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 6d83eef1b743..d772cd10297e 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -91,7 +91,12 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, r = nlmsg_data(nlh); smc_diag_msg_common_fill(r, sk); r->diag_state = sk->sk_state; - r->diag_fallback = smc->use_fallback; + if (smc->use_fallback) + r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP; + else if (smc->conn.lgr && smc->conn.lgr->is_smcd) + r->diag_mode = SMC_DIAG_MODE_SMCD; + else + r->diag_mode = SMC_DIAG_MODE_SMCR; user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk); if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) goto errout; -- cgit v1.2.3-59-g8ed1b From bac6de7b637018f4caacfdf2b4ad8c8749de7420 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jul 2018 13:53:09 +0200 Subject: net/smc: eliminate cursor read and write calls The functions to read and write cursors are exclusively used to copy cursors. Therefore switch to a respective function instead. Signed-off-by: Stefan Raspl Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 8 ++------ net/smc/smc_cdc.c | 33 ++++++++++++--------------------- net/smc/smc_cdc.h | 43 +++++++++++++++---------------------------- net/smc/smc_rx.c | 15 ++++----------- net/smc/smc_tx.c | 46 +++++++++++++--------------------------------- net/smc/smc_tx.h | 4 ++-- 6 files changed, 48 insertions(+), 101 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 143b2220c0c8..7fc810ec31c5 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1755,12 +1755,8 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd, smc->sk.sk_state == SMC_CLOSED) { answ = 0; } else { - smc_curs_write(&cons, - smc_curs_read(&conn->local_tx_ctrl.cons, conn), - conn); - smc_curs_write(&urg, - smc_curs_read(&conn->urg_curs, conn), - conn); + smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); + smc_curs_copy(&urg, &conn->urg_curs, conn); answ = smc_curs_diff(conn->rmb_desc->len, &cons, &urg) == 1; } diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 621d8cca570b..f3a1497953ee 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -34,14 +34,15 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, enum ib_wc_status wc_status) { struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd; + struct smc_connection *conn = cdcpend->conn; struct smc_sock *smc; int diff; - if (!cdcpend->conn) + if (!conn) /* already dismissed */ return; - smc = container_of(cdcpend->conn, struct smc_sock, conn); + smc = container_of(conn, struct smc_sock, conn); bh_lock_sock(&smc->sk); if (!wc_status) { diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len, @@ -52,9 +53,7 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, atomic_add(diff, &cdcpend->conn->sndbuf_space); /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ smp_mb__after_atomic(); - smc_curs_write(&cdcpend->conn->tx_curs_fin, - smc_curs_read(&cdcpend->cursor, cdcpend->conn), - cdcpend->conn); + smc_curs_copy(&conn->tx_curs_fin, &cdcpend->cursor, conn); } smc_tx_sndbuf_nonfull(smc); bh_unlock_sock(&smc->sk); @@ -110,9 +109,8 @@ int smc_cdc_msg_send(struct smc_connection *conn, &conn->local_tx_ctrl, conn); rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); if (!rc) - smc_curs_write(&conn->rx_curs_confirmed, - smc_curs_read(&conn->local_tx_ctrl.cons, conn), - conn); + smc_curs_copy(&conn->rx_curs_confirmed, + &conn->local_tx_ctrl.cons, conn); return rc; } @@ -194,8 +192,8 @@ int smcd_cdc_msg_send(struct smc_connection *conn) rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1); if (rc) return rc; - smc_curs_write(&conn->rx_curs_confirmed, - smc_curs_read(&conn->local_tx_ctrl.cons, conn), conn); + smc_curs_copy(&conn->rx_curs_confirmed, &conn->local_tx_ctrl.cons, + conn); /* Calculate transmitted data and increment free send buffer space */ diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, &conn->tx_curs_sent); @@ -204,8 +202,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn) atomic_add(diff, &conn->sndbuf_space); /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ smp_mb__after_atomic(); - smc_curs_write(&conn->tx_curs_fin, - smc_curs_read(&conn->tx_curs_sent, conn), conn); + smc_curs_copy(&conn->tx_curs_fin, &conn->tx_curs_sent, conn); smc_tx_sndbuf_nonfull(smc); return rc; @@ -225,9 +222,7 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc, char *base; /* new data included urgent business */ - smc_curs_write(&conn->urg_curs, - smc_curs_read(&conn->local_rx_ctrl.prod, conn), - conn); + smc_curs_copy(&conn->urg_curs, &conn->local_rx_ctrl.prod, conn); conn->urg_state = SMC_URG_VALID; if (!sock_flag(&smc->sk, SOCK_URGINLINE)) /* we'll skip the urgent byte, so don't account for it */ @@ -247,12 +242,8 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, struct smc_connection *conn = &smc->conn; int diff_cons, diff_prod; - smc_curs_write(&prod_old, - smc_curs_read(&conn->local_rx_ctrl.prod, conn), - conn); - smc_curs_write(&cons_old, - smc_curs_read(&conn->local_rx_ctrl.cons, conn), - conn); + smc_curs_copy(&prod_old, &conn->local_rx_ctrl.prod, conn); + smc_curs_copy(&cons_old, &conn->local_rx_ctrl.cons, conn); smc_cdc_msg_to_host(&conn->local_rx_ctrl, cdc, conn); diff_cons = smc_curs_diff(conn->peer_rmbe_size, &cons_old, diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 8fbce4fee3e4..934df4473a7c 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -104,47 +104,34 @@ static inline u64 smc_curs_read(union smc_host_cursor *curs, #endif } -static inline u64 smc_curs_read_net(union smc_cdc_cursor *curs, - struct smc_connection *conn) -{ -#ifndef KERNEL_HAS_ATOMIC64 - unsigned long flags; - u64 ret; - - spin_lock_irqsave(&conn->acurs_lock, flags); - ret = curs->acurs; - spin_unlock_irqrestore(&conn->acurs_lock, flags); - return ret; -#else - return atomic64_read(&curs->acurs); -#endif -} - -static inline void smc_curs_write(union smc_host_cursor *curs, u64 val, - struct smc_connection *conn) +/* Copy cursor src into tgt */ +static inline void smc_curs_copy(union smc_host_cursor *tgt, + union smc_host_cursor *src, + struct smc_connection *conn) { #ifndef KERNEL_HAS_ATOMIC64 unsigned long flags; spin_lock_irqsave(&conn->acurs_lock, flags); - curs->acurs = val; + tgt->acurs = src->acurs; spin_unlock_irqrestore(&conn->acurs_lock, flags); #else - atomic64_set(&curs->acurs, val); + atomic64_set(&tgt->acurs, atomic64_read(&src->acurs)); #endif } -static inline void smc_curs_write_net(union smc_cdc_cursor *curs, u64 val, - struct smc_connection *conn) +static inline void smc_curs_copy_net(union smc_cdc_cursor *tgt, + union smc_cdc_cursor *src, + struct smc_connection *conn) { #ifndef KERNEL_HAS_ATOMIC64 unsigned long flags; spin_lock_irqsave(&conn->acurs_lock, flags); - curs->acurs = val; + tgt->acurs = src->acurs; spin_unlock_irqrestore(&conn->acurs_lock, flags); #else - atomic64_set(&curs->acurs, val); + atomic64_set(&tgt->acurs, atomic64_read(&src->acurs)); #endif } @@ -179,7 +166,7 @@ static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer, { union smc_host_cursor temp; - smc_curs_write(&temp, smc_curs_read(local, conn), conn); + smc_curs_copy(&temp, local, conn); peer->count = htonl(temp.count); peer->wrap = htons(temp.wrap); /* peer->reserved = htons(0); must be ensured by caller */ @@ -206,8 +193,8 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local, union smc_host_cursor temp, old; union smc_cdc_cursor net; - smc_curs_write(&old, smc_curs_read(local, conn), conn); - smc_curs_write_net(&net, smc_curs_read_net(peer, conn), conn); + smc_curs_copy(&old, local, conn); + smc_curs_copy_net(&net, peer, conn); temp.count = ntohl(net.count); temp.wrap = ntohs(net.wrap); if ((old.wrap > temp.wrap) && temp.wrap) @@ -215,7 +202,7 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local, if ((old.wrap == temp.wrap) && (old.count > temp.count)) return; - smc_curs_write(local, smc_curs_read(&temp, conn), conn); + smc_curs_copy(local, &temp, conn); } static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local, diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index b329803c8339..c99c987097b1 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -82,8 +82,7 @@ static int smc_rx_update_consumer(struct smc_sock *smc, } } - smc_curs_write(&conn->local_tx_ctrl.cons, smc_curs_read(&cons, conn), - conn); + smc_curs_copy(&conn->local_tx_ctrl.cons, &cons, conn); /* send consumer cursor update if required */ /* similar to advertising new TCP rcv_wnd if required */ @@ -97,8 +96,7 @@ static void smc_rx_update_cons(struct smc_sock *smc, size_t len) struct smc_connection *conn = &smc->conn; union smc_host_cursor cons; - smc_curs_write(&cons, smc_curs_read(&conn->local_tx_ctrl.cons, conn), - conn); + smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); smc_rx_update_consumer(smc, cons, len); } @@ -245,10 +243,7 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len, if (!(flags & MSG_TRUNC)) rc = memcpy_to_msg(msg, &conn->urg_rx_byte, 1); len = 1; - smc_curs_write(&cons, - smc_curs_read(&conn->local_tx_ctrl.cons, - conn), - conn); + smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); if (smc_curs_diff(conn->rmb_desc->len, &cons, &conn->urg_curs) > 1) conn->urg_rx_skip_pend = true; @@ -370,9 +365,7 @@ copy: continue; } - smc_curs_write(&cons, - smc_curs_read(&conn->local_tx_ctrl.cons, conn), - conn); + smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); /* subsequent splice() calls pick up where previous left */ if (splbytes) smc_curs_add(conn->rmb_desc->len, &cons, splbytes); diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 142bcb134dd6..2f5e324e54b9 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -181,9 +181,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) copylen = min_t(size_t, send_remaining, writespace); /* determine start of sndbuf */ sndbuf_base = conn->sndbuf_desc->cpu_addr; - smc_curs_write(&prep, - smc_curs_read(&conn->tx_curs_prep, conn), - conn); + smc_curs_copy(&prep, &conn->tx_curs_prep, conn); tx_cnt_prep = prep.count; /* determine chunks where to write into sndbuf */ /* either unwrapped case, or 1st chunk of wrapped case */ @@ -214,9 +212,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) smc_sndbuf_sync_sg_for_device(conn); /* update cursors */ smc_curs_add(conn->sndbuf_desc->len, &prep, copylen); - smc_curs_write(&conn->tx_curs_prep, - smc_curs_read(&prep, conn), - conn); + smc_curs_copy(&conn->tx_curs_prep, &prep, conn); /* increased in send tasklet smc_cdc_tx_handler() */ smp_mb__before_atomic(); atomic_sub(copylen, &conn->sndbuf_space); @@ -417,8 +413,8 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) int rc; /* source: sndbuf */ - smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn); - smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn); + smc_curs_copy(&sent, &conn->tx_curs_sent, conn); + smc_curs_copy(&prep, &conn->tx_curs_prep, conn); /* cf. wmem_alloc - (snd_max - snd_una) */ to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); if (to_send <= 0) @@ -429,12 +425,8 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) rmbespace = atomic_read(&conn->peer_rmbe_space); if (rmbespace <= 0) return 0; - smc_curs_write(&prod, - smc_curs_read(&conn->local_tx_ctrl.prod, conn), - conn); - smc_curs_write(&cons, - smc_curs_read(&conn->local_rx_ctrl.cons, conn), - conn); + smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn); + smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); /* if usable snd_wnd closes ask peer to advertise once it opens again */ pflags = &conn->local_tx_ctrl.prod_flags; @@ -481,14 +473,9 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) pflags->urg_data_present = 1; smc_tx_advance_cursors(conn, &prod, &sent, len); /* update connection's cursors with advanced local cursors */ - smc_curs_write(&conn->local_tx_ctrl.prod, - smc_curs_read(&prod, conn), - conn); + smc_curs_copy(&conn->local_tx_ctrl.prod, &prod, conn); /* dst: peer RMBE */ - smc_curs_write(&conn->tx_curs_sent, - smc_curs_read(&sent, conn), - conn); - /* src: local sndbuf */ + smc_curs_copy(&conn->tx_curs_sent, &sent, conn);/* src: local sndbuf */ return 0; } @@ -606,17 +593,11 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) int sender_free = conn->rmb_desc->len; int to_confirm; - smc_curs_write(&cons, - smc_curs_read(&conn->local_tx_ctrl.cons, conn), - conn); - smc_curs_write(&cfed, - smc_curs_read(&conn->rx_curs_confirmed, conn), - conn); + smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); + smc_curs_copy(&cfed, &conn->rx_curs_confirmed, conn); to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons); if (to_confirm > conn->rmbe_update_limit) { - smc_curs_write(&prod, - smc_curs_read(&conn->local_rx_ctrl.prod, conn), - conn); + smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn); sender_free = conn->rmb_desc->len - smc_curs_diff(conn->rmb_desc->len, &prod, &cfed); } @@ -632,9 +613,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) SMC_TX_WORK_DELAY); return; } - smc_curs_write(&conn->rx_curs_confirmed, - smc_curs_read(&conn->local_tx_ctrl.cons, conn), - conn); + smc_curs_copy(&conn->rx_curs_confirmed, + &conn->local_tx_ctrl.cons, conn); conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; } if (conn->local_rx_ctrl.prod_flags.write_blocked && diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h index b22bdc5694c4..07e6ad76224a 100644 --- a/net/smc/smc_tx.h +++ b/net/smc/smc_tx.h @@ -22,8 +22,8 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn) { union smc_host_cursor sent, prep; - smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn); - smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn); + smc_curs_copy(&sent, &conn->tx_curs_sent, conn); + smc_curs_copy(&prep, &conn->tx_curs_prep, conn); return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); } -- cgit v1.2.3-59-g8ed1b From 00e5fb263f9f5f2af60754b79b7dcec0d5e88154 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 23 Jul 2018 13:53:10 +0200 Subject: net/smc: add function to get link group from link Replace a frequently used construct with a more readable variant, reducing the code. Also might come handy when we start to support more than a single per link group. Signed-off-by: Stefan Raspl Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 2 +- net/smc/smc_core.h | 5 +++++ net/smc/smc_ib.c | 3 +-- net/smc/smc_llc.c | 30 ++++++++---------------------- net/smc/smc_wr.c | 27 +++++---------------------- 5 files changed, 20 insertions(+), 47 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index f3a1497953ee..a7af2289cdff 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -365,7 +365,7 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) return; /* invalid message */ /* lookup connection */ - lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + lgr = smc_get_lgr(link); read_lock_bh(&lgr->conns_lock); conn = smc_lgr_find_conn(ntohl(cdc->token), lgr); read_unlock_bh(&lgr->conns_lock); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 8b47e0168fc3..8807865483bb 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -266,4 +266,9 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, u64 peer_gid); void smcd_conn_free(struct smc_connection *conn); void smc_core_exit(void); + +static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) +{ + return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); +} #endif diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 36de2fd76170..4706ab7092a9 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -112,8 +112,7 @@ int smc_ib_modify_qp_reset(struct smc_link *lnk) int smc_ib_ready_link(struct smc_link *lnk) { - struct smc_link_group *lgr = - container_of(lnk, struct smc_link_group, lnk[0]); + struct smc_link_group *lgr = smc_get_lgr(lnk); int rc = 0; rc = smc_ib_modify_qp_init(lnk); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 5800a6b43d83..b7944aa1ffc3 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -186,8 +186,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], union ib_gid *gid, enum smc_llc_reqresp reqresp) { - struct smc_link_group *lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); + struct smc_link_group *lgr = smc_get_lgr(link); struct smc_llc_msg_confirm_link *confllc; struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; @@ -381,11 +380,9 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen) static void smc_llc_rx_confirm_link(struct smc_link *link, struct smc_llc_msg_confirm_link *llc) { - struct smc_link_group *lgr; + struct smc_link_group *lgr = smc_get_lgr(link); int conf_rc; - lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); - /* RMBE eyecatchers are not supported */ if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC) conf_rc = 0; @@ -411,8 +408,7 @@ static void smc_llc_rx_confirm_link(struct smc_link *link, static void smc_llc_rx_add_link(struct smc_link *link, struct smc_llc_msg_add_link *llc) { - struct smc_link_group *lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); + struct smc_link_group *lgr = smc_get_lgr(link); if (llc->hd.flags & SMC_LLC_FLAG_RESP) { if (link->state == SMC_LNK_ACTIVATING) @@ -442,8 +438,7 @@ static void smc_llc_rx_add_link(struct smc_link *link, static void smc_llc_rx_delete_link(struct smc_link *link, struct smc_llc_msg_del_link *llc) { - struct smc_link_group *lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); + struct smc_link_group *lgr = smc_get_lgr(link); if (llc->hd.flags & SMC_LLC_FLAG_RESP) { if (lgr->role == SMC_SERV) @@ -476,17 +471,14 @@ static void smc_llc_rx_test_link(struct smc_link *link, static void smc_llc_rx_confirm_rkey(struct smc_link *link, struct smc_llc_msg_confirm_rkey *llc) { - struct smc_link_group *lgr; int rc; - lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); - if (llc->hd.flags & SMC_LLC_FLAG_RESP) { link->llc_confirm_rkey_rc = llc->hd.flags & SMC_LLC_FLAG_RKEY_NEG; complete(&link->llc_confirm_rkey); } else { - rc = smc_rtoken_add(lgr, + rc = smc_rtoken_add(smc_get_lgr(link), llc->rtoken[0].rmb_vaddr, llc->rtoken[0].rmb_key); @@ -514,18 +506,15 @@ static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link, static void smc_llc_rx_delete_rkey(struct smc_link *link, struct smc_llc_msg_delete_rkey *llc) { - struct smc_link_group *lgr; u8 err_mask = 0; int i, max; - lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); - if (llc->hd.flags & SMC_LLC_FLAG_RESP) { /* unused as long as we don't send this type of msg */ } else { max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); for (i = 0; i < max; i++) { - if (smc_rtoken_delete(lgr, llc->rkey[i])) + if (smc_rtoken_delete(smc_get_lgr(link), llc->rkey[i])) err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i); } @@ -583,12 +572,10 @@ static void smc_llc_testlink_work(struct work_struct *work) struct smc_link *link = container_of(to_delayed_work(work), struct smc_link, llc_testlink_wrk); unsigned long next_interval; - struct smc_link_group *lgr; unsigned long expire_time; u8 user_data[16] = { 0 }; int rc; - lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); if (link->state != SMC_LNK_ACTIVE) return; /* don't reschedule worker */ expire_time = link->wr_rx_tstamp + link->llc_testlink_time; @@ -602,7 +589,7 @@ static void smc_llc_testlink_work(struct work_struct *work) rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, SMC_LLC_WAIT_TIME); if (rc <= 0) { - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); return; } next_interval = link->llc_testlink_time; @@ -613,8 +600,7 @@ out: int smc_llc_link_init(struct smc_link *link) { - struct smc_link_group *lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); + struct smc_link_group *lgr = smc_get_lgr(link); link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM, *((u32 *)lgr->id), link->link_id); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index dbd2605d1962..b6df69756bef 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -92,8 +92,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) return; if (wc->status) { - struct smc_link_group *lgr; - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { /* clear full struct smc_wr_tx_pend including .priv */ memset(&link->wr_tx_pends[i], 0, @@ -103,9 +101,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) clear_bit(i, link->wr_tx_mask); } /* terminate connections of this link group abnormally */ - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); } if (pnd_snd.handler) pnd_snd.handler(&pnd_snd.priv, link, wc->status); @@ -188,8 +184,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, } else { struct smc_link_group *lgr; - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); + lgr = smc_get_lgr(link); rc = wait_event_timeout( link->wr_tx_wait, list_empty(&lgr->list) || /* lgr terminated */ @@ -250,12 +245,8 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], &failed_wr); if (rc) { - struct smc_link_group *lgr = - container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); - smc_wr_tx_put_slot(link, priv); - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); } return rc; } @@ -283,11 +274,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) SMC_WR_REG_MR_WAIT_TIME); if (!rc) { /* timeout - terminate connections */ - struct smc_link_group *lgr; - - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); return -EPIPE; } if (rc == -ERESTARTSYS) @@ -380,8 +367,6 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) smc_wr_rx_demultiplex(&wc[i]); smc_wr_rx_post(link); /* refill WR RX */ } else { - struct smc_link_group *lgr; - /* handle status errors */ switch (wc[i].status) { case IB_WC_RETRY_EXC_ERR: @@ -390,9 +375,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) /* terminate connections of this link group * abnormally */ - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); break; default: smc_wr_rx_post(link); /* refill WR RX */ -- cgit v1.2.3-59-g8ed1b From 144ce4b9b5a788953b5373162a1921267497fb38 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 23 Jul 2018 13:53:11 +0200 Subject: net/smc: use DECLARE_BITMAP for rtokens_used_mask Link group field tokens_used_mask is a bitmap. Use macro DECLARE_BITMAP for its definition. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 8807865483bb..1e8974c50550 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -192,8 +192,7 @@ struct smc_link_group { struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] [SMC_LINKS_PER_LGR_MAX]; /* remote addr/key pairs */ - unsigned long rtokens_used_mask[BITS_TO_LONGS - (SMC_RMBS_PER_LGR_MAX)]; + DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX); /* used rtoken elements */ }; struct { /* SMC-D */ -- cgit v1.2.3-59-g8ed1b From 48bf5231771c7e3961c8326353b6027b1bed6eb5 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 23 Jul 2018 13:53:12 +0200 Subject: net/smc: remove local variable page in smc_rx_splice() The page map address is already stored in the RMB descriptor. There is no need to derive it from the cpu_addr value. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_rx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index c99c987097b1..bbcf0fe4ae10 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -155,10 +155,8 @@ static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len, struct splice_pipe_desc spd; struct partial_page partial; struct smc_spd_priv *priv; - struct page *page; int bytes; - page = virt_to_page(smc->conn.rmb_desc->cpu_addr); priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; @@ -170,7 +168,7 @@ static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len, spd.nr_pages_max = 1; spd.nr_pages = 1; - spd.pages = &page; + spd.pages = &smc->conn.rmb_desc->pages; spd.partial = &partial; spd.ops = &smc_pipe_ops; spd.spd_release = smc_rx_spd_release; -- cgit v1.2.3-59-g8ed1b From 947541f36c561b5e0ca639ffc450a8c5221de467 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 25 Jul 2018 16:35:30 +0200 Subject: net/smc: fewer parameters for smc_llc_send_confirm_link() Link confirmation will always be sent across the new link being confirmed. This allows to shrink the parameter list. No functional change. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 10 ++-------- net/smc/smc_llc.c | 9 +++++---- net/smc/smc_llc.h | 2 +- 3 files changed, 8 insertions(+), 13 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 7fc810ec31c5..7883f70f7c6d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -352,10 +352,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) return SMC_CLC_DECL_INTERR; /* send CONFIRM LINK response over RoCE fabric */ - rc = smc_llc_send_confirm_link(link, - link->smcibdev->mac[link->ibport - 1], - &link->smcibdev->gid[link->ibport - 1], - SMC_LLC_RESP); + rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); if (rc < 0) return SMC_CLC_DECL_TCL; @@ -951,10 +948,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) return SMC_CLC_DECL_INTERR; /* send CONFIRM LINK request to client over the RoCE fabric */ - rc = smc_llc_send_confirm_link(link, - link->smcibdev->mac[link->ibport - 1], - &link->smcibdev->gid[link->ibport - 1], - SMC_LLC_REQ); + rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); if (rc < 0) return SMC_CLC_DECL_TCL; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index b7944aa1ffc3..f2ba99c2e69a 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -182,8 +182,7 @@ static int smc_llc_add_pending_send(struct smc_link *link, } /* high-level API to send LLC confirm link */ -int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], - union ib_gid *gid, +int smc_llc_send_confirm_link(struct smc_link *link, enum smc_llc_reqresp reqresp) { struct smc_link_group *lgr = smc_get_lgr(link); @@ -202,8 +201,10 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC; if (reqresp == SMC_LLC_RESP) confllc->hd.flags |= SMC_LLC_FLAG_RESP; - memcpy(confllc->sender_mac, mac, ETH_ALEN); - memcpy(confllc->sender_gid, gid, SMC_GID_SIZE); + memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1], + ETH_ALEN); + memcpy(confllc->sender_gid, &link->smcibdev->gid[link->ibport - 1], + SMC_GID_SIZE); hton24(confllc->sender_qp_num, link->roce_qp->qp_num); confllc->link_num = link->link_id; memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 65c8645e96a1..9a29fcbbcea8 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -36,7 +36,7 @@ enum smc_llc_msg_type { }; /* transmit */ -int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid, +int smc_llc_send_confirm_link(struct smc_link *lnk, enum smc_llc_reqresp reqresp); int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid, enum smc_llc_reqresp reqresp); -- cgit v1.2.3-59-g8ed1b From 7005ada68d1774d7c1109deaba0c2cd8e46f5091 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 25 Jul 2018 16:35:31 +0200 Subject: net/smc: use correct vlan gid of RoCE device SMC code uses the base gid for VLAN traffic. The gids exchanged in the CLC handshake and the gid index used for the QP have to switch from the base gid to the appropriate vlan gid. When searching for a matching IB device port for a certain vlan device, it does not make sense to return an IB device port, which is not enabled for the used vlan_id. Add another check whether a vlan gid exists for a certain IB device port. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 24 +++++++++++++----------- net/smc/smc_clc.c | 10 ++++------ net/smc/smc_clc.h | 2 +- net/smc/smc_core.c | 37 ++++--------------------------------- net/smc/smc_core.h | 5 +++-- net/smc/smc_diag.c | 2 +- net/smc/smc_ib.c | 41 ++++++++++++++++++++++++++++++++++++----- net/smc/smc_ib.h | 3 ++- net/smc/smc_llc.c | 15 +++++---------- net/smc/smc_llc.h | 2 +- net/smc/smc_pnet.c | 30 +++++++++++++++++++++--------- net/smc/smc_pnet.h | 3 ++- 12 files changed, 93 insertions(+), 81 deletions(-) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 7883f70f7c6d..b81797103260 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -370,8 +370,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) /* send add link reject message, only one link supported for now */ rc = smc_llc_send_add_link(link, link->smcibdev->mac[link->ibport - 1], - &link->smcibdev->gid[link->ibport - 1], - SMC_LLC_RESP); + link->gid, SMC_LLC_RESP); if (rc < 0) return SMC_CLC_DECL_TCL; @@ -469,7 +468,7 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code, /* check if there is a rdma device available for this connection. */ /* called for connect and listen */ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, - u8 *ibport) + u8 *ibport, unsigned short vlan_id, u8 gid[]) { int reason_code = 0; @@ -477,7 +476,8 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, * within same PNETID that also contains the ethernet device * used for the internal TCP socket */ - smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport); + smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id, + gid); if (!(*ibdev)) reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ @@ -523,12 +523,12 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, static int smc_connect_clc(struct smc_sock *smc, int smc_type, struct smc_clc_msg_accept_confirm *aclc, struct smc_ib_device *ibdev, u8 ibport, - struct smcd_dev *ismdev) + u8 gid[], struct smcd_dev *ismdev) { int rc = 0; /* do inband token exchange */ - rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, ismdev); + rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev); if (rc) return rc; /* receive SMC Accept CLC message */ @@ -650,6 +650,7 @@ static int __smc_connect(struct smc_sock *smc) struct smc_clc_msg_accept_confirm aclc; struct smc_ib_device *ibdev; struct smcd_dev *ismdev; + u8 gid[SMC_GID_SIZE]; unsigned short vlan; int smc_type; int rc = 0; @@ -681,7 +682,7 @@ static int __smc_connect(struct smc_sock *smc) } /* check if there is a rdma device available */ - if (!smc_check_rdma(smc, &ibdev, &ibport)) { + if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) { /* RDMA is supported for this connection */ rdma_supported = true; if (ism_supported) @@ -695,7 +696,7 @@ static int __smc_connect(struct smc_sock *smc) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); /* perform CLC handshake */ - rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, ismdev); + rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev); if (rc) { smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return smc_connect_decline_fallback(smc, rc); @@ -970,8 +971,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) /* send ADD LINK request to client over the RoCE fabric */ rc = smc_llc_send_add_link(link, link->smcibdev->mac[link->ibport - 1], - &link->smcibdev->gid[link->ibport - 1], - SMC_LLC_REQ); + link->gid, SMC_LLC_REQ); if (rc < 0) return SMC_CLC_DECL_TCL; @@ -1193,6 +1193,7 @@ static void smc_listen_work(struct work_struct *work) struct smcd_dev *ismdev; u8 buf[SMC_CLC_MAX_LEN]; int local_contact = 0; + unsigned short vlan; int reason_code = 0; int rc = 0; u8 ibport; @@ -1241,7 +1242,8 @@ static void smc_listen_work(struct work_struct *work) /* check if RDMA is available */ if (!ism_supported && ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || - smc_check_rdma(new_smc, &ibdev, &ibport) || + smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) || + smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) || smc_listen_rdma_check(new_smc, pclc) || smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, &local_contact) || diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index ad39efdb4f1c..78d74938a9d9 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -378,7 +378,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) /* send CLC PROPOSAL message across internal TCP socket */ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, - struct smc_ib_device *ibdev, u8 ibport, + struct smc_ib_device *ibdev, u8 ibport, u8 gid[], struct smcd_dev *ismdev) { struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX]; @@ -409,7 +409,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, /* add SMC-R specifics */ memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&pclc.lcl.gid, &ibdev->gid[ibport - 1], SMC_GID_SIZE); + memcpy(&pclc.lcl.gid, gid, SMC_GID_SIZE); memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN); pclc.iparea_offset = htons(0); } @@ -492,8 +492,7 @@ int smc_clc_send_confirm(struct smc_sock *smc) cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], - SMC_GID_SIZE); + memcpy(&cclc.lcl.gid, link->gid, SMC_GID_SIZE); memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(cclc.qpn, link->roce_qp->qp_num); @@ -566,8 +565,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) link = &conn->lgr->lnk[SMC_SINGLE_LINK]; memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); - memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], - SMC_GID_SIZE); + memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE); memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(aclc.qpn, link->roce_qp->qp_num); diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 100e988ad1a8..6bdc63352d6a 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -179,7 +179,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, - struct smc_ib_device *smcibdev, u8 ibport, + struct smc_ib_device *smcibdev, u8 ibport, u8 gid[], struct smcd_dev *ismdev); int smc_clc_send_confirm(struct smc_sock *smc); int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 66741e61a3b0..90c10ae9ae09 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -219,6 +219,10 @@ static int smc_lgr_create(struct smc_sock *smc, bool is_smcd, get_random_bytes(rndvec, sizeof(rndvec)); lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); + rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, + vlan_id, lnk->gid, &lnk->sgid_index); + if (rc) + goto free_lgr; rc = smc_llc_link_init(lnk); if (rc) goto free_lgr; @@ -522,37 +526,6 @@ out: return rc; } -/* determine the link gid matching the vlan id of the link group */ -static int smc_link_determine_gid(struct smc_link_group *lgr) -{ - struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; - struct ib_gid_attr gattr; - union ib_gid gid; - int i; - - if (!lgr->vlan_id) { - lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1]; - return 0; - } - - for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; - i++) { - if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, - &gattr)) - continue; - if (gattr.ndev) { - if (is_vlan_dev(gattr.ndev) && - vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { - lnk->gid = gid; - dev_put(gattr.ndev); - return 0; - } - dev_put(gattr.ndev); - } - } - return -ENODEV; -} - static bool smcr_lgr_match(struct smc_link_group *lgr, struct smc_clc_msg_local *lcl, enum smc_lgr_role role) @@ -631,8 +604,6 @@ create: if (rc) goto out; smc_lgr_register_conn(conn); /* add smc conn to lgr */ - if (!is_smcd) - rc = smc_link_determine_gid(conn->lgr); } conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 1e8974c50550..a4f0cc4e0270 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -84,14 +84,15 @@ struct smc_link { wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ - union ib_gid gid; /* gid matching used vlan id */ + u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/ + u8 sgid_index; /* gid index for vlan id */ u32 peer_qpn; /* QP number of peer */ enum ib_mtu path_mtu; /* used mtu */ enum ib_mtu peer_mtu; /* mtu size of peer */ u32 psn_initial; /* QP tx initial packet seqno */ u32 peer_psn; /* QP rx initial packet seqno */ u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */ - u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/ + u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/ u8 link_id; /* unique # within link group */ enum smc_link_state state; /* state of link */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index d772cd10297e..a3cf7313a2d3 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -154,7 +154,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, smc->conn.lgr->lnk[0].smcibdev->ibdev->name, sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name)); smc_gid_be16_convert(linfo.lnk[0].gid, - smc->conn.lgr->lnk[0].gid.raw); + smc->conn.lgr->lnk[0].gid); smc_gid_be16_convert(linfo.lnk[0].peer_gid, smc->conn.lgr->lnk[0].peer_gid); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 4706ab7092a9..2cc64bc8ae20 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -68,7 +68,7 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk) qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); - rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, 0, 1, 0); + rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0); rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, sizeof(lnk->peer_mac)); @@ -142,13 +142,13 @@ out: return rc; } -static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) +static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport) { struct ib_gid_attr gattr; + union ib_gid gid; int rc; - rc = ib_query_gid(smcibdev->ibdev, ibport, 0, - &smcibdev->gid[ibport - 1], &gattr); + rc = ib_query_gid(smcibdev->ibdev, ibport, 0, &gid, &gattr); if (rc || !gattr.ndev) return -ENODEV; @@ -175,6 +175,37 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; } +/* determine the gid for an ib-device port and vlan id */ +int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, + unsigned short vlan_id, u8 gid[], u8 *sgid_index) +{ + struct ib_gid_attr gattr; + union ib_gid _gid; + int i; + + for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { + memset(&_gid, 0, SMC_GID_SIZE); + memset(&gattr, 0, sizeof(gattr)); + if (ib_query_gid(smcibdev->ibdev, ibport, i, &_gid, &gattr)) + continue; + if (!gattr.ndev) + continue; + if (((!vlan_id && !is_vlan_dev(gattr.ndev)) || + (vlan_id && is_vlan_dev(gattr.ndev) && + vlan_dev_vlan_id(gattr.ndev) == vlan_id)) && + gattr.gid_type == IB_GID_TYPE_IB) { + if (gid) + memcpy(gid, &_gid, SMC_GID_SIZE); + if (sgid_index) + *sgid_index = i; + dev_put(gattr.ndev); + return 0; + } + dev_put(gattr.ndev); + } + return -ENODEV; +} + static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) { int rc; @@ -186,7 +217,7 @@ static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) if (rc) goto out; /* the SMC protocol requires specification of the RoCE MAC address */ - rc = smc_ib_fill_gid_and_mac(smcibdev, ibport); + rc = smc_ib_fill_mac(smcibdev, ibport); if (rc) goto out; if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET, diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 7c1223c91229..bac7fd65a4c0 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -40,7 +40,6 @@ struct smc_ib_device { /* ib-device infos for smc */ struct tasklet_struct recv_tasklet; /* called by recv cq handler */ char mac[SMC_MAX_PORTS][ETH_ALEN]; /* mac address per port*/ - union ib_gid gid[SMC_MAX_PORTS]; /* gid per port */ u8 pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN]; /* pnetid per port */ u8 initialized : 1; /* ib dev CQ, evthdl done */ @@ -77,4 +76,6 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); +int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, + unsigned short vlan_id, u8 gid[], u8 *sgid_index); #endif diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index f2ba99c2e69a..a88c01029fa6 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -203,8 +203,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, confllc->hd.flags |= SMC_LLC_FLAG_RESP; memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); - memcpy(confllc->sender_gid, &link->smcibdev->gid[link->ibport - 1], - SMC_GID_SIZE); + memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE); hton24(confllc->sender_qp_num, link->roce_qp->qp_num); confllc->link_num = link->link_id; memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); @@ -241,8 +240,7 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link, /* prepare an add link message */ static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc, - struct smc_link *link, u8 mac[], - union ib_gid *gid, + struct smc_link *link, u8 mac[], u8 gid[], enum smc_llc_reqresp reqresp) { memset(addllc, 0, sizeof(*addllc)); @@ -259,8 +257,7 @@ static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc, } /* send ADD LINK request or response */ -int smc_llc_send_add_link(struct smc_link *link, u8 mac[], - union ib_gid *gid, +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], enum smc_llc_reqresp reqresp) { struct smc_llc_msg_add_link *addllc; @@ -423,14 +420,12 @@ static void smc_llc_rx_add_link(struct smc_link *link, if (lgr->role == SMC_SERV) { smc_llc_prep_add_link(llc, link, link->smcibdev->mac[link->ibport - 1], - &link->smcibdev->gid[link->ibport - 1], - SMC_LLC_REQ); + link->gid, SMC_LLC_REQ); } else { smc_llc_prep_add_link(llc, link, link->smcibdev->mac[link->ibport - 1], - &link->smcibdev->gid[link->ibport - 1], - SMC_LLC_RESP); + link->gid, SMC_LLC_RESP); } smc_llc_send_message(link, llc, sizeof(*llc)); } diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 9a29fcbbcea8..95a7f3662e59 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -38,7 +38,7 @@ enum smc_llc_msg_type { /* transmit */ int smc_llc_send_confirm_link(struct smc_link *lnk, enum smc_llc_reqresp reqresp); -int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid, +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], enum smc_llc_reqresp reqresp); int smc_llc_send_delete_link(struct smc_link *link, enum smc_llc_reqresp reqresp); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 1b6c066d3495..01c6ce042a1c 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -535,11 +535,13 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev) } /* Determine the corresponding IB device port based on the hardware PNETID. - * Searching stops at the first matching active IB device port. + * Searching stops at the first matching active IB device port with vlan_id + * configured. */ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, struct smc_ib_device **smcibdev, - u8 *ibport) + u8 *ibport, unsigned short vlan_id, + u8 gid[]) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct smc_ib_device *ibdev; @@ -553,15 +555,20 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, spin_lock(&smc_ib_devices.lock); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { for (i = 1; i <= SMC_MAX_PORTS; i++) { + if (!rdma_is_port_valid(ibdev->ibdev, i)) + continue; if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid, SMC_MAX_PNETID_LEN) && - smc_ib_port_active(ibdev, i)) { + smc_ib_port_active(ibdev, i) && + !smc_ib_determine_gid(ibdev, i, vlan_id, gid, + NULL)) { *smcibdev = ibdev; *ibport = i; - break; + goto out; } } } +out: spin_unlock(&smc_ib_devices.lock); } @@ -589,7 +596,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, /* Lookup of coupled ib_device via SMC pnet table */ static void smc_pnet_find_roce_by_table(struct net_device *netdev, struct smc_ib_device **smcibdev, - u8 *ibport) + u8 *ibport, unsigned short vlan_id, + u8 gid[]) { struct smc_pnetentry *pnetelem; @@ -597,7 +605,10 @@ static void smc_pnet_find_roce_by_table(struct net_device *netdev, list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { if (netdev == pnetelem->ndev) { if (smc_ib_port_active(pnetelem->smcibdev, - pnetelem->ib_port)) { + pnetelem->ib_port) && + !smc_ib_determine_gid(pnetelem->smcibdev, + pnetelem->ib_port, vlan_id, + gid, NULL)) { *smcibdev = pnetelem->smcibdev; *ibport = pnetelem->ib_port; } @@ -612,7 +623,8 @@ static void smc_pnet_find_roce_by_table(struct net_device *netdev, * ethernet interface. */ void smc_pnet_find_roce_resource(struct sock *sk, - struct smc_ib_device **smcibdev, u8 *ibport) + struct smc_ib_device **smcibdev, u8 *ibport, + unsigned short vlan_id, u8 gid[]) { struct dst_entry *dst = sk_dst_get(sk); @@ -625,12 +637,12 @@ void smc_pnet_find_roce_resource(struct sock *sk, goto out_rel; /* if possible, lookup via hardware-defined pnetid */ - smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport); + smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid); if (*smcibdev) goto out_rel; /* lookup via SMC PNET table */ - smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport); + smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid); out_rel: dst_release(dst); diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 1e94fd4df7bc..8ff777636e32 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -33,7 +33,8 @@ int smc_pnet_init(void) __init; void smc_pnet_exit(void); int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev); void smc_pnet_find_roce_resource(struct sock *sk, - struct smc_ib_device **smcibdev, u8 *ibport); + struct smc_ib_device **smcibdev, u8 *ibport, + unsigned short vlan_id, u8 gid[]); void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev); #endif -- cgit v1.2.3-59-g8ed1b From 603cc1498455cf57f5ca4483b600efb37ea2c56c Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 25 Jul 2018 16:35:32 +0200 Subject: net/smc: provide fallback reason code Remember the fallback reason code and the peer diagnosis code for smc sockets, and provide them in smc_diag.c to the netlink interface. And add more detailed reason codes. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/uapi/linux/smc_diag.h | 6 +++++ net/smc/af_smc.c | 52 +++++++++++++++++++++++++------------------ net/smc/smc.h | 2 ++ net/smc/smc_clc.c | 6 ++++- net/smc/smc_clc.h | 18 ++++++++++----- net/smc/smc_diag.c | 6 +++++ 6 files changed, 61 insertions(+), 29 deletions(-) (limited to 'net/smc') diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 48ae3ee22b2d..ac9e8c96d9bd 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -43,6 +43,7 @@ enum { SMC_DIAG_LGRINFO, SMC_DIAG_SHUTDOWN, SMC_DIAG_DMBINFO, + SMC_DIAG_FALLBACK, __SMC_DIAG_MAX, }; @@ -92,6 +93,11 @@ struct smc_diag_lgrinfo { __u8 role; }; +struct smc_diag_fallback { + __u32 reason; + __u32 peer_diagnosis; +}; + struct smcd_diag_dmbinfo { /* SMC-D Socket internals */ __u32 linkid; /* Link identifier */ __u64 peer_gid; /* Peer GID */ diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index b81797103260..fce7e4751151 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -344,17 +344,17 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) rc = smc_ib_modify_qp_rts(link); if (rc) - return SMC_CLC_DECL_INTERR; + return SMC_CLC_DECL_ERR_RDYLNK; smc_wr_remember_qp_attr(link); if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) - return SMC_CLC_DECL_INTERR; + return SMC_CLC_DECL_ERR_REGRMB; /* send CONFIRM LINK response over RoCE fabric */ rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); if (rc < 0) - return SMC_CLC_DECL_TCL; + return SMC_CLC_DECL_TIMEOUT_CL; /* receive ADD LINK request from server over RoCE fabric */ rest = wait_for_completion_interruptible_timeout(&link->llc_add, @@ -372,7 +372,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) link->smcibdev->mac[link->ibport - 1], link->gid, SMC_LLC_RESP); if (rc < 0) - return SMC_CLC_DECL_TCL; + return SMC_CLC_DECL_TIMEOUT_AL; smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); @@ -424,9 +424,10 @@ static void smc_link_save_peer_info(struct smc_link *link, } /* fall back during connect */ -static int smc_connect_fallback(struct smc_sock *smc) +static int smc_connect_fallback(struct smc_sock *smc, int reason_code) { smc->use_fallback = true; + smc->fallback_rsn = reason_code; smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) smc->sk.sk_state = SMC_ACTIVE; @@ -443,7 +444,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) sock_put(&smc->sk); /* passive closing */ return reason_code; } - if (reason_code != SMC_CLC_DECL_REPLY) { + if (reason_code != SMC_CLC_DECL_PEERDECL) { rc = smc_clc_send_decline(smc, reason_code); if (rc < 0) { if (smc->sk.sk_state == SMC_INIT) @@ -451,7 +452,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) return rc; } } - return smc_connect_fallback(smc); + return smc_connect_fallback(smc, reason_code); } /* abort connecting */ @@ -568,7 +569,7 @@ static int smc_connect_rdma(struct smc_sock *smc, smc_link_save_peer_info(link, aclc); if (smc_rmb_rtoken_handling(&smc->conn, aclc)) - return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, + return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, local_contact); smc_close_init(smc); @@ -576,12 +577,12 @@ static int smc_connect_rdma(struct smc_sock *smc, if (local_contact == SMC_FIRST_CONTACT) { if (smc_ib_ready_link(link)) - return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, + return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, local_contact); } else { if (!smc->conn.rmb_desc->reused && smc_reg_rmb(link, smc->conn.rmb_desc, true)) - return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, + return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, local_contact); } smc_rmb_sync_sg_for_device(&smc->conn); @@ -659,11 +660,11 @@ static int __smc_connect(struct smc_sock *smc) sock_hold(&smc->sk); /* sock put in passive closing */ if (smc->use_fallback) - return smc_connect_fallback(smc); + return smc_connect_fallback(smc, smc->fallback_rsn); /* if peer has not signalled SMC-capability, fall back */ if (!tcp_sk(smc->clcsock->sk)->syn_smc) - return smc_connect_fallback(smc); + return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); /* IPSec connections opt out of SMC-R optimizations */ if (using_ipsec(smc)) @@ -693,7 +694,7 @@ static int __smc_connect(struct smc_sock *smc) /* if neither ISM nor RDMA are supported, fallback */ if (!rdma_supported && !ism_supported) - return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); + return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); /* perform CLC handshake */ rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev); @@ -708,7 +709,7 @@ static int __smc_connect(struct smc_sock *smc) else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) rc = smc_connect_ism(smc, &aclc, ismdev); else - rc = SMC_CLC_DECL_CNFERR; + rc = SMC_CLC_DECL_MODEUNSUPP; if (rc) { smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); return smc_connect_decline_fallback(smc, rc); @@ -946,12 +947,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) link = &lgr->lnk[SMC_SINGLE_LINK]; if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) - return SMC_CLC_DECL_INTERR; + return SMC_CLC_DECL_ERR_REGRMB; /* send CONFIRM LINK request to client over the RoCE fabric */ rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); if (rc < 0) - return SMC_CLC_DECL_TCL; + return SMC_CLC_DECL_TIMEOUT_CL; /* receive CONFIRM LINK response from client over the RoCE fabric */ rest = wait_for_completion_interruptible_timeout( @@ -973,7 +974,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) link->smcibdev->mac[link->ibport - 1], link->gid, SMC_LLC_REQ); if (rc < 0) - return SMC_CLC_DECL_TCL; + return SMC_CLC_DECL_TIMEOUT_AL; /* receive ADD LINK response from client over the RoCE fabric */ rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp, @@ -1048,7 +1049,8 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, } smc_conn_free(&new_smc->conn); new_smc->use_fallback = true; - if (reason_code && reason_code != SMC_CLC_DECL_REPLY) { + new_smc->fallback_rsn = reason_code; + if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { if (smc_clc_send_decline(new_smc, reason_code) < 0) { smc_listen_out_err(new_smc); return; @@ -1139,7 +1141,7 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) if (local_contact != SMC_FIRST_CONTACT) { if (!new_smc->conn.rmb_desc->reused) { if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) - return SMC_CLC_DECL_INTERR; + return SMC_CLC_DECL_ERR_REGRMB; } } smc_rmb_sync_sg_for_device(&new_smc->conn); @@ -1159,13 +1161,13 @@ static void smc_listen_rdma_finish(struct smc_sock *new_smc, smc_link_save_peer_info(link, cclc); if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) { - reason_code = SMC_CLC_DECL_INTERR; + reason_code = SMC_CLC_DECL_ERR_RTOK; goto decline; } if (local_contact == SMC_FIRST_CONTACT) { if (smc_ib_ready_link(link)) { - reason_code = SMC_CLC_DECL_INTERR; + reason_code = SMC_CLC_DECL_ERR_RDYLNK; goto decline; } /* QP confirmation over RoCE fabric */ @@ -1206,6 +1208,7 @@ static void smc_listen_work(struct work_struct *work) /* check if peer is smc capable */ if (!tcp_sk(newclcsock->sk)->syn_smc) { new_smc->use_fallback = true; + new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; smc_listen_out_connected(new_smc); return; } @@ -1250,7 +1253,8 @@ static void smc_listen_work(struct work_struct *work) smc_listen_rdma_reg(new_smc, local_contact))) { /* SMC not supported, decline */ mutex_unlock(&smc_create_lgr_pending); - smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact); + smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP, + local_contact); return; } @@ -1297,6 +1301,7 @@ static void smc_tcp_listen_work(struct work_struct *work) new_smc->listen_smc = lsmc; new_smc->use_fallback = lsmc->use_fallback; + new_smc->fallback_rsn = lsmc->fallback_rsn; sock_hold(lsk); /* sock_put in smc_listen_work */ INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); smc_copy_sock_settings_to_smc(new_smc); @@ -1451,6 +1456,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_FASTOPEN) { if (sk->sk_state == SMC_INIT) { smc->use_fallback = true; + smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { rc = -EINVAL; goto out; @@ -1648,6 +1654,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, /* option not supported by SMC */ if (sk->sk_state == SMC_INIT) { smc->use_fallback = true; + smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { if (!smc->use_fallback) rc = -EINVAL; @@ -1885,6 +1892,7 @@ static int smc_create(struct net *net, struct socket *sock, int protocol, /* create internal TCP socket for CLC handshake and fallback */ smc = smc_sk(sk); smc->use_fallback = false; /* assume rdma capability first */ + smc->fallback_rsn = 0; rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &smc->clcsock); if (rc) { diff --git a/net/smc/smc.h b/net/smc/smc.h index be20acd7b5ab..08786ace6010 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -208,6 +208,8 @@ struct smc_sock { /* smc sock container */ struct list_head accept_q; /* sockets to be accepted */ spinlock_t accept_q_lock; /* protects accept_q */ bool use_fallback; /* fallback to tcp */ + int fallback_rsn; /* reason for fallback */ + u32 peer_diagnosis; /* decline reason from peer */ int sockopt_defer_accept; /* sockopt TCP_DEFER_ACCEPT * value diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 78d74938a9d9..83aba9ade060 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -334,7 +334,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, goto out; } if (clcm->type == SMC_CLC_DECLINE) { - reason_code = SMC_CLC_DECL_REPLY; + struct smc_clc_msg_decline *dclc; + + dclc = (struct smc_clc_msg_decline *)clcm; + reason_code = SMC_CLC_DECL_PEERDECL; + smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { smc->conn.lgr->sync_err = 1; smc_lgr_terminate(smc->conn.lgr); diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 6bdc63352d6a..18da89b681c2 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -28,15 +28,21 @@ #define SMC_TYPE_B 3 /* SMC-R and SMC-D */ #define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ #define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */ -#define SMC_CLC_DECL_TIMEOUT 0x02000000 /* timeout */ +#define SMC_CLC_DECL_TIMEOUT_CL 0x02010000 /* timeout w4 QP confirm link */ +#define SMC_CLC_DECL_TIMEOUT_AL 0x02020000 /* timeout w4 QP add link */ #define SMC_CLC_DECL_CNFERR 0x03000000 /* configuration error */ -#define SMC_CLC_DECL_IPSEC 0x03030000 /* IPsec usage */ +#define SMC_CLC_DECL_PEERNOSMC 0x03010000 /* peer did not indicate SMC */ +#define SMC_CLC_DECL_IPSEC 0x03020000 /* IPsec usage */ +#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found */ +#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/ +#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */ +#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */ #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ -#define SMC_CLC_DECL_REPLY 0x06000000 /* reply to a received decline */ +#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ #define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */ -#define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */ -#define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */ -#define SMC_CLC_DECL_RMBE_EC 0x08000000 /* peer has eyecatcher in RMBE */ +#define SMC_CLC_DECL_ERR_RTOK 0x99990001 /* rtoken handling failed */ +#define SMC_CLC_DECL_ERR_RDYLNK 0x99990002 /* ib ready link failed */ +#define SMC_CLC_DECL_ERR_REGRMB 0x99990003 /* reg rmb failed */ struct smc_clc_msg_hdr { /* header1 of clc messages */ u8 eyecatcher[4]; /* eye catcher */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index a3cf7313a2d3..dbf64a93d68a 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -79,6 +79,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, struct nlattr *bc) { struct smc_sock *smc = smc_sk(sk); + struct smc_diag_fallback fallback; struct user_namespace *user_ns; struct smc_diag_msg *r; struct nlmsghdr *nlh; @@ -101,6 +102,11 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) goto errout; + fallback.reason = smc->fallback_rsn; + fallback.peer_diagnosis = smc->peer_diagnosis; + if (nla_put(skb, SMC_DIAG_FALLBACK, sizeof(fallback), &fallback) < 0) + goto errout; + if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.alert_token_local) { struct smc_connection *conn = &smc->conn; -- cgit v1.2.3-59-g8ed1b From 0d18a0cb4b1585d9e5a3b300d5df9ed866561ffb Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 25 Jul 2018 16:35:33 +0200 Subject: net/smc: improve delete link processing Send an orderly DELETE LINK request before termination of a link group, add support for client triggered DELETE LINK processing. And send a disorderly DELETE LINK before module is unloaded. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 47 ++++++++++++++++++++++++++++++++++++++++++----- net/smc/smc_core.h | 4 +++- net/smc/smc_llc.c | 30 +++++++++++++++++++----------- net/smc/smc_llc.h | 3 ++- net/smc/smc_wr.c | 7 ++----- 5 files changed, 68 insertions(+), 23 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 90c10ae9ae09..a46418f45ecd 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -30,6 +30,7 @@ #define SMC_LGR_NUM_INCR 256 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) +#define SMC_LGR_FREE_DELAY_FAST (8 * HZ) static struct smc_lgr_list smc_lgr_list = { /* established link groups */ .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), @@ -51,6 +52,11 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV); } +void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) +{ + mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST); +} + /* Register connection's alert token in our lookup structure. * To use rbtrees we have to implement our own insert core. * Requires @conns_lock @@ -133,6 +139,20 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) smc_lgr_schedule_free_work(lgr); } +/* Send delete link, either as client to request the initiation + * of the DELETE LINK sequence from server; or as server to + * initiate the delete processing. See smc_llc_rx_delete_link(). + */ +static int smc_link_send_delete(struct smc_link *lnk) +{ + if (lnk->state == SMC_LNK_ACTIVE && + !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) { + smc_llc_link_deleting(lnk); + return 0; + } + return -ENOTCONN; +} + static void smc_lgr_free_work(struct work_struct *work) { struct smc_link_group *lgr = container_of(to_delayed_work(work), @@ -153,10 +173,21 @@ static void smc_lgr_free_work(struct work_struct *work) list_del_init(&lgr->list); /* remove from smc_lgr_list */ free: spin_unlock_bh(&smc_lgr_list.lock); + + if (!lgr->is_smcd && !lgr->terminating) { + /* try to send del link msg, on error free lgr immediately */ + if (!smc_link_send_delete(&lgr->lnk[SMC_SINGLE_LINK])) { + /* reschedule in case we never receive a response */ + smc_lgr_schedule_free_work(lgr); + return; + } + } + if (!delayed_work_pending(&lgr->free_work)) { - if (!lgr->is_smcd && - lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE) - smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); + struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + + if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE) + smc_llc_link_inactive(lnk); smc_lgr_free(lgr); } } @@ -984,8 +1015,14 @@ void smc_core_exit(void) spin_unlock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { list_del_init(&lgr->list); - if (!lgr->is_smcd) - smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); + if (!lgr->is_smcd) { + struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + + if (lnk->state == SMC_LNK_ACTIVE) + smc_llc_send_delete_link(lnk, SMC_LLC_REQ, + false); + smc_llc_link_inactive(lnk); + } cancel_delayed_work_sync(&lgr->free_work); smc_lgr_free(lgr); /* free link group */ } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index a4f0cc4e0270..c156674733c9 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -34,7 +34,8 @@ enum smc_lgr_role { /* possible roles of a link group */ enum smc_link_state { /* possible states of a link */ SMC_LNK_INACTIVE, /* link is inactive */ SMC_LNK_ACTIVATING, /* link is being activated */ - SMC_LNK_ACTIVE /* link is active */ + SMC_LNK_ACTIVE, /* link is active */ + SMC_LNK_DELETING, /* link is being deleted */ }; #define SMC_WR_BUF_SIZE 48 /* size of work request buffer */ @@ -265,6 +266,7 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, struct smc_clc_msg_local *lcl, struct smcd_dev *smcd, u64 peer_gid); void smcd_conn_free(struct smc_connection *conn); +void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr); void smc_core_exit(void); static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index a88c01029fa6..9c916c709ca7 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -278,7 +278,7 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], /* prepare a delete link message */ static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc, struct smc_link *link, - enum smc_llc_reqresp reqresp) + enum smc_llc_reqresp reqresp, bool orderly) { memset(delllc, 0, sizeof(*delllc)); delllc->hd.common.type = SMC_LLC_DELETE_LINK; @@ -287,13 +287,14 @@ static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc, delllc->hd.flags |= SMC_LLC_FLAG_RESP; /* DEL_LINK_ALL because only 1 link supported */ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; - delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; + if (orderly) + delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; delllc->link_num = link->link_id; } /* send DELETE LINK request or response */ int smc_llc_send_delete_link(struct smc_link *link, - enum smc_llc_reqresp reqresp) + enum smc_llc_reqresp reqresp, bool orderly) { struct smc_llc_msg_del_link *delllc; struct smc_wr_tx_pend_priv *pend; @@ -304,7 +305,7 @@ int smc_llc_send_delete_link(struct smc_link *link, if (rc) return rc; delllc = (struct smc_llc_msg_del_link *)wr_buf; - smc_llc_prep_delete_link(delllc, link, reqresp); + smc_llc_prep_delete_link(delllc, link, reqresp, orderly); /* send llc message */ rc = smc_wr_tx_send(link, pend); return rc; @@ -438,17 +439,19 @@ static void smc_llc_rx_delete_link(struct smc_link *link, if (llc->hd.flags & SMC_LLC_FLAG_RESP) { if (lgr->role == SMC_SERV) - smc_lgr_terminate(lgr); + smc_lgr_schedule_free_work_fast(lgr); } else { + smc_lgr_forget(lgr); + smc_llc_link_deleting(link); if (lgr->role == SMC_SERV) { - smc_lgr_forget(lgr); - smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ); - smc_llc_send_message(link, llc, sizeof(*llc)); + /* client asks to delete this link, send request */ + smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true); } else { - smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP); - smc_llc_send_message(link, llc, sizeof(*llc)); - smc_lgr_terminate(lgr); + /* server requests to delete this link, send response */ + smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true); } + smc_llc_send_message(link, llc, sizeof(*llc)); + smc_lgr_schedule_free_work_fast(lgr); } } @@ -622,6 +625,11 @@ void smc_llc_link_active(struct smc_link *link, int testlink_time) } } +void smc_llc_link_deleting(struct smc_link *link) +{ + link->state = SMC_LNK_DELETING; +} + /* called in tasklet context */ void smc_llc_link_inactive(struct smc_link *link) { diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 95a7f3662e59..9e2ff088e301 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -41,9 +41,10 @@ int smc_llc_send_confirm_link(struct smc_link *lnk, int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], enum smc_llc_reqresp reqresp); int smc_llc_send_delete_link(struct smc_link *link, - enum smc_llc_reqresp reqresp); + enum smc_llc_reqresp reqresp, bool orderly); int smc_llc_link_init(struct smc_link *link); void smc_llc_link_active(struct smc_link *link, int testlink_time); +void smc_llc_link_deleting(struct smc_link *link); void smc_llc_link_inactive(struct smc_link *link); void smc_llc_link_clear(struct smc_link *link); int smc_llc_do_confirm_rkey(struct smc_link *link, diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index b6df69756bef..f856b8402b3f 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -182,17 +182,14 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, if (rc) return rc; } else { - struct smc_link_group *lgr; - - lgr = smc_get_lgr(link); rc = wait_event_timeout( link->wr_tx_wait, - list_empty(&lgr->list) || /* lgr terminated */ + link->state == SMC_LNK_INACTIVE || (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); if (!rc) { /* timeout - terminate connections */ - smc_lgr_terminate(lgr); + smc_lgr_terminate(smc_get_lgr(link)); return -EPIPE; } if (idx == link->wr_tx_cnt) -- cgit v1.2.3-59-g8ed1b From dd979b4df817e9976f18fb6f9d134d6bc4a3c317 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 30 Jul 2018 09:42:10 +0200 Subject: net: simplify sock_poll_wait The wait_address argument is always directly derived from the filp argument, so remove it. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- crypto/af_alg.c | 2 +- include/net/sock.h | 11 ++++++----- net/atm/common.c | 2 +- net/caif/caif_socket.c | 2 +- net/core/datagram.c | 2 +- net/dccp/proto.c | 2 +- net/ipv4/tcp.c | 2 +- net/iucv/af_iucv.c | 2 +- net/nfc/llcp_sock.c | 2 +- net/rxrpc/af_rxrpc.c | 2 +- net/smc/af_smc.c | 2 +- net/tipc/socket.c | 2 +- net/unix/af_unix.c | 4 ++-- 13 files changed, 19 insertions(+), 18 deletions(-) (limited to 'net/smc') diff --git a/crypto/af_alg.c b/crypto/af_alg.c index c166f424871c..b053179e0bc5 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1071,7 +1071,7 @@ __poll_t af_alg_poll(struct file *file, struct socket *sock, struct af_alg_ctx *ctx = ask->private; __poll_t mask; - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); mask = 0; if (!ctx->more || ctx->used) diff --git a/include/net/sock.h b/include/net/sock.h index 83b747538bd0..0518f61926ec 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2057,16 +2057,17 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq) /** * sock_poll_wait - place memory barrier behind the poll_wait call. * @filp: file - * @wait_address: socket wait queue * @p: poll_table * * See the comments in the wq_has_sleeper function. */ -static inline void sock_poll_wait(struct file *filp, - wait_queue_head_t *wait_address, poll_table *p) +static inline void sock_poll_wait(struct file *filp, poll_table *p) { - if (!poll_does_not_wait(p) && wait_address) { - poll_wait(filp, wait_address, p); + struct socket *sock = filp->private_data; + wait_queue_head_t *wq = sk_sleep(sock->sk); + + if (!poll_does_not_wait(p) && wq) { + poll_wait(filp, wq, p); /* We need to be sure we are in sync with the * socket flags modification. * diff --git a/net/atm/common.c b/net/atm/common.c index a7a68e509628..9f8cb0d2e71e 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -653,7 +653,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) struct atm_vcc *vcc; __poll_t mask; - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); mask = 0; vcc = ATM_SD(sock); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index a6fb1b3bcad9..d18965f3291f 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -941,7 +941,7 @@ static __poll_t caif_poll(struct file *file, __poll_t mask; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); mask = 0; /* exceptional events? */ diff --git a/net/core/datagram.c b/net/core/datagram.c index 9938952c5c78..9aac0d63d53e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -837,7 +837,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; __poll_t mask; - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); mask = 0; /* exceptional events? */ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 0d56e36a6db7..875858c8b059 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -325,7 +325,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock, __poll_t mask; struct sock *sk = sock->sk; - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 514aaac1626f..f3bfb9f29520 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -507,7 +507,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) const struct tcp_sock *tp = tcp_sk(sk); int state; - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); state = inet_sk_state_load(sk); if (state == TCP_LISTEN) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 8d1c43f8fed4..92ee91e34395 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1494,7 +1494,7 @@ __poll_t iucv_sock_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; __poll_t mask = 0; - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); if (sk->sk_state == IUCV_LISTEN) return iucv_accept_poll(sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index ea0c0c6f1874..dd4adf8b1167 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -556,7 +556,7 @@ static __poll_t llcp_sock_poll(struct file *file, struct socket *sock, pr_debug("%p\n", sk); - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); if (sk->sk_state == LLCP_LISTEN) return llcp_accept_poll(sk); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 2b463047dd7b..ac44d8afffb1 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -741,7 +741,7 @@ static __poll_t rxrpc_poll(struct file *file, struct socket *sock, struct rxrpc_sock *rx = rxrpc_sk(sk); __poll_t mask; - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index fce7e4751151..0fc94f296e54 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1535,7 +1535,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, mask |= EPOLLERR; } else { if (sk->sk_state != SMC_CLOSED) - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); if (sk->sk_err) mask |= EPOLLERR; if ((sk->sk_shutdown == SHUTDOWN_MASK) || diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3d21414ba357..3763bedecf5f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -716,7 +716,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, struct tipc_sock *tsk = tipc_sk(sk); __poll_t revents = 0; - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); if (sk->sk_shutdown & RCV_SHUTDOWN) revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e5473c03d667..1772a0e32665 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2635,7 +2635,7 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa struct sock *sk = sock->sk; __poll_t mask; - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); mask = 0; /* exceptional events? */ @@ -2672,7 +2672,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, unsigned int writable; __poll_t mask; - sock_poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, wait); mask = 0; /* exceptional events? */ -- cgit v1.2.3-59-g8ed1b From 0d86caff06363151df21603eb1f4e3207ea91bd2 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 10 Aug 2018 17:45:11 +0200 Subject: net/smc: send response to test link signal With SMC-D z/OS sends a test link signal every 10 seconds. Linux is supposed to answer, otherwise the SMC-D connection breaks. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_ism.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'net/smc') diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index cfade7fdcc6d..e36f21ce7252 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -184,6 +184,37 @@ struct smc_ism_event_work { struct smcd_event event; }; +#define ISM_EVENT_REQUEST 0x0001 +#define ISM_EVENT_RESPONSE 0x0002 +#define ISM_EVENT_REQUEST_IR 0x00000001 +#define ISM_EVENT_CODE_TESTLINK 0x83 + +static void smcd_handle_sw_event(struct smc_ism_event_work *wrk) +{ + union { + u64 info; + struct { + u32 uid; + unsigned short vlanid; + u16 code; + }; + } ev_info; + + switch (wrk->event.code) { + case ISM_EVENT_CODE_TESTLINK: /* Activity timer */ + ev_info.info = wrk->event.info; + if (ev_info.code == ISM_EVENT_REQUEST) { + ev_info.code = ISM_EVENT_RESPONSE; + wrk->smcd->ops->signal_event(wrk->smcd, + wrk->event.tok, + ISM_EVENT_REQUEST_IR, + ISM_EVENT_CODE_TESTLINK, + ev_info.info); + } + break; + } +} + /* worker for SMC-D events */ static void smc_ism_event_work(struct work_struct *work) { @@ -196,6 +227,9 @@ static void smc_ism_event_work(struct work_struct *work) break; case ISM_EVENT_DMB: break; + case ISM_EVENT_SWR: /* Software defined event */ + smcd_handle_sw_event(wrk); + break; } kfree(wrk); } -- cgit v1.2.3-59-g8ed1b