From 4fc8cd4919428f9b86f0b65e2f3245a1c186737f Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Tue, 27 Nov 2007 00:11:04 -0800 Subject: IB/mad: Report number of times a mad was retried To allow ULPs to tune timeout values and capture retry statistics, report the number of times that a mad send operation was retried. For RMPP mads, report the total number of times that the any portion (send window) of the send operation was retried. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/ib_mad.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 8ec3799e42e1..7228c056b9e9 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -230,7 +230,9 @@ struct ib_class_port_info * @seg_count: The number of RMPP segments allocated for this send. * @seg_size: Size of each RMPP segment. * @timeout_ms: Time to wait for a response. - * @retries: Number of times to retry a request for a response. + * @retries: Number of times to retry a request for a response. For MADs + * using RMPP, this applies per window. On completion, returns the number + * of retries needed to complete the transfer. * * Users are responsible for initializing the MAD buffer itself, with the * exception of any RMPP header. Additional segment buffer space allocated -- cgit v1.2.3-59-g8ed1b From 88314e4dda1e158aabce76429ef4d017b48f8b92 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 14 Nov 2007 00:29:50 -0800 Subject: RDMA/cma: add support for rdma_migrate_id() This is based on user feedback from Doug Ledford at RedHat: Events that occur on an rdma_cm_id are reported to userspace through an event channel. Connection request events are reported on the event channel associated with the listen. When the connection is accepted, a new rdma_cm_id is created and automatically uses the listen event channel. This is suboptimal where the user only wants listen events on that channel. Additionally, it may be desirable to have events related to connection establishment use a different event channel than those related to already established connections. Allow the user to migrate an rdma_cm_id between event channels. All pending events associated with the rdma_cm_id are moved to the new event channel. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/ucma.c | 92 ++++++++++++++++++++++++++++++++++++++++++ include/rdma/rdma_user_cm.h | 13 +++++- 2 files changed, 104 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 90d675ad9ec8..15937eb38aae 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -31,6 +31,7 @@ */ #include +#include #include #include #include @@ -991,6 +992,96 @@ out: return ret; } +static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) +{ + /* Acquire mutex's based on pointer comparison to prevent deadlock. */ + if (file1 < file2) { + mutex_lock(&file1->mut); + mutex_lock(&file2->mut); + } else { + mutex_lock(&file2->mut); + mutex_lock(&file1->mut); + } +} + +static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) +{ + if (file1 < file2) { + mutex_unlock(&file2->mut); + mutex_unlock(&file1->mut); + } else { + mutex_unlock(&file1->mut); + mutex_unlock(&file2->mut); + } +} + +static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) +{ + struct ucma_event *uevent, *tmp; + + list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) + if (uevent->ctx == ctx) + list_move_tail(&uevent->list, &file->event_list); +} + +static ssize_t ucma_migrate_id(struct ucma_file *new_file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_migrate_id cmd; + struct rdma_ucm_migrate_resp resp; + struct ucma_context *ctx; + struct file *filp; + struct ucma_file *cur_file; + int ret = 0; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + /* Get current fd to protect against it being closed */ + filp = fget(cmd.fd); + if (!filp) + return -ENOENT; + + /* Validate current fd and prevent destruction of id. */ + ctx = ucma_get_ctx(filp->private_data, cmd.id); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto file_put; + } + + cur_file = ctx->file; + if (cur_file == new_file) { + resp.events_reported = ctx->events_reported; + goto response; + } + + /* + * Migrate events between fd's, maintaining order, and avoiding new + * events being added before existing events. + */ + ucma_lock_files(cur_file, new_file); + mutex_lock(&mut); + + list_move_tail(&ctx->list, &new_file->ctx_list); + ucma_move_events(ctx, new_file); + ctx->file = new_file; + resp.events_reported = ctx->events_reported; + + mutex_unlock(&mut); + ucma_unlock_files(cur_file, new_file); + +response: + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + + ucma_put_ctx(ctx); +file_put: + fput(filp); + return ret; +} + static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) = { @@ -1012,6 +1103,7 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, + [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id }; static ssize_t ucma_write(struct file *filp, const char __user *buf, diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h index 9749c1b34d00..c55705460b87 100644 --- a/include/rdma/rdma_user_cm.h +++ b/include/rdma/rdma_user_cm.h @@ -60,7 +60,8 @@ enum { RDMA_USER_CM_CMD_SET_OPTION, RDMA_USER_CM_CMD_NOTIFY, RDMA_USER_CM_CMD_JOIN_MCAST, - RDMA_USER_CM_CMD_LEAVE_MCAST + RDMA_USER_CM_CMD_LEAVE_MCAST, + RDMA_USER_CM_CMD_MIGRATE_ID }; /* @@ -230,4 +231,14 @@ struct rdma_ucm_set_option { __u32 optlen; }; +struct rdma_ucm_migrate_id { + __u64 response; + __u32 id; + __u32 fd; +}; + +struct rdma_ucm_migrate_resp { + __u32 events_reported; +}; + #endif /* RDMA_USER_CM_H */ -- cgit v1.2.3-59-g8ed1b From a9e527e3f9f4510e9f3450ca3bc51bc3ef2854fd Mon Sep 17 00:00:00 2001 From: Rolf Manderscheid Date: Mon, 10 Dec 2007 13:38:41 -0700 Subject: IPoIB: improve IPv4/IPv6 to IB mcast mapping functions An IPoIB subnet on an IB fabric that spans multiple IB subnets can't use link-local scope in multicast GIDs. The existing routines that map IP/IPv6 multicast addresses into IB link-level addresses hard-code the scope to link-local, and they also leave the partition key field uninitialised. This patch adds a parameter (the link-level broadcast address) to the mapping routines, allowing them to initialise both the scope and the P_Key appropriately, and fixes up the call sites. The next step will be to add a way to configure the scope for an IPoIB interface. Signed-off-by: Rolf Manderscheid Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 4 +--- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 4 ---- include/net/if_inet6.h | 11 +++++++---- include/net/ip.h | 10 ++++++---- net/ipv4/arp.c | 2 +- net/ipv6/ndisc.c | 2 +- 6 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 312ec74f3d18..982836e69f55 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2610,11 +2610,9 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else { - ip_ib_mc_map(sin->sin_addr.s_addr, mc_map); + ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ - mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8; - mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr); *mgid = *(union ib_gid *) (mc_map + 4); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 858ada17f980..2628339e3a99 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -788,10 +788,6 @@ void ipoib_mcast_restart_task(struct work_struct *work) memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); - /* Add in the P_Key */ - mgid.raw[4] = (priv->pkey >> 8) & 0xff; - mgid.raw[5] = priv->pkey & 0xff; - mcast = __ipoib_mcast_find(dev, &mgid); if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { struct ipoib_mcast *nmcast; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 448eccb20638..b24508abb850 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -269,18 +269,21 @@ static inline void ipv6_arcnet_mc_map(const struct in6_addr *addr, char *buf) buf[0] = 0x00; } -static inline void ipv6_ib_mc_map(struct in6_addr *addr, char *buf) +static inline void ipv6_ib_mc_map(const struct in6_addr *addr, + const unsigned char *broadcast, char *buf) { + unsigned char scope = broadcast[5] & 0xF; + buf[0] = 0; /* Reserved */ buf[1] = 0xff; /* Multicast QPN */ buf[2] = 0xff; buf[3] = 0xff; buf[4] = 0xff; - buf[5] = 0x12; /* link local scope */ + buf[5] = 0x10 | scope; /* scope from broadcast address */ buf[6] = 0x60; /* IPv6 signature */ buf[7] = 0x1b; - buf[8] = 0; /* P_Key */ - buf[9] = 0; + buf[8] = broadcast[8]; /* P_Key */ + buf[9] = broadcast[9]; memcpy(buf + 10, addr->s6_addr + 6, 10); } #endif diff --git a/include/net/ip.h b/include/net/ip.h index 840dd91b513b..50c8889b1b8d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -266,20 +266,22 @@ static inline void ip_eth_mc_map(__be32 naddr, char *buf) * Leave P_Key as 0 to be filled in by driver. */ -static inline void ip_ib_mc_map(__be32 naddr, char *buf) +static inline void ip_ib_mc_map(__be32 naddr, const unsigned char *broadcast, char *buf) { __u32 addr; + unsigned char scope = broadcast[5] & 0xF; + buf[0] = 0; /* Reserved */ buf[1] = 0xff; /* Multicast QPN */ buf[2] = 0xff; buf[3] = 0xff; addr = ntohl(naddr); buf[4] = 0xff; - buf[5] = 0x12; /* link local scope */ + buf[5] = 0x10 | scope; /* scope from broadcast address */ buf[6] = 0x40; /* IPv4 signature */ buf[7] = 0x1b; - buf[8] = 0; /* P_Key */ - buf[9] = 0; + buf[8] = broadcast[8]; /* P_Key */ + buf[9] = broadcast[9]; buf[10] = 0; buf[11] = 0; buf[12] = 0; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 08174a2aa878..54a76b8b803a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -211,7 +211,7 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) ip_tr_mc_map(addr, haddr); return 0; case ARPHRD_INFINIBAND: - ip_ib_mc_map(addr, haddr); + ip_ib_mc_map(addr, dev->broadcast, haddr); return 0; default: if (dir) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 777ed733b2d7..85947eae5bf7 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -337,7 +337,7 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d ipv6_arcnet_mc_map(addr, buf); return 0; case ARPHRD_INFINIBAND: - ipv6_ib_mc_map(addr, buf); + ipv6_ib_mc_map(addr, dev->broadcast, buf); return 0; default: if (dir) { -- cgit v1.2.3-59-g8ed1b