From bc3fe2e3769874dfa8674791e84c4a901ba9e48b Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 27 Jul 2015 18:10:12 -0500 Subject: svcrdma: Use max_sge_rd for destination read depths Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 12 +----------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 4 ++++ 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 2e1348bde325..cb5174284074 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -115,15 +115,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, rqstp->rq_arg.tail[0].iov_len = 0; } -static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) -{ - if (!rdma_cap_read_multi_sge(xprt->sc_cm_id->device, - xprt->sc_cm_id->port_num)) - return 1; - else - return min_t(int, sge_count, xprt->sc_max_sge); -} - /* Issue an RDMA_READ using the local lkey to map the data sink */ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, @@ -144,8 +135,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, ctxt->direction = DMA_FROM_DEVICE; ctxt->read_hdr = head; - pages_needed = - min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed)); + pages_needed = min_t(int, pages_needed, xprt->sc_max_sge_rd); read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); for (pno = 0; pno < pages_needed; pno++) { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6b36279e4288..fdc850ffc26c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -872,6 +872,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) * capabilities of this particular device */ newxprt->sc_max_sge = min((size_t)devattr.max_sge, (size_t)RPCSVC_MAXPAGES); + newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd, + RPCSVC_MAXPAGES); newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr, (size_t)svcrdma_max_requests); newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; @@ -1046,6 +1048,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) " remote_ip : %pI4\n" " remote_port : %d\n" " max_sge : %d\n" + " max_sge_rd : %d\n" " sq_depth : %d\n" " max_requests : %d\n" " ord : %d\n", @@ -1059,6 +1062,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> route.addr.dst_addr)->sin_port), newxprt->sc_max_sge, + newxprt->sc_max_sge_rd, newxprt->sc_sq_depth, newxprt->sc_max_requests, newxprt->sc_ord); -- cgit v1.2.3-59-g8ed1b From 7c1eb45a22d76bb99236e7485958f87ef7c449cf Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Thu, 30 Jul 2015 17:50:14 +0300 Subject: IB/core: lock client data with lists_rwsem An ib_client callback that is called with the lists_rwsem locked only for read is protected from changes to the IB client lists, but not from ib_unregister_device() freeing its client data. This is because ib_unregister_device() will remove the device from the device list with lists_rwsem locked for write, but perform the rest of the cleanup, including the call to remove() without that lock. Mark client data that is undergoing de-registration with a new going_down flag in the client data context. Lock the client data list with lists_rwsem for write in addition to using the spinlock, so that functions calling the callback would be able to lock only lists_rwsem for read and let callbacks sleep. Since ib_unregister_client() now marks the client data context, no need for remove() to search the context again, so pass the client data directly to remove() callbacks. Reviewed-by: Jason Gunthorpe Signed-off-by: Haggai Eran Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 2 +- drivers/infiniband/core/cm.c | 7 ++-- drivers/infiniband/core/cma.c | 7 ++-- drivers/infiniband/core/device.c | 53 +++++++++++++++++++++++++------ drivers/infiniband/core/mad.c | 2 +- drivers/infiniband/core/multicast.c | 7 ++-- drivers/infiniband/core/sa_query.c | 6 ++-- drivers/infiniband/core/ucm.c | 6 ++-- drivers/infiniband/core/user_mad.c | 6 ++-- drivers/infiniband/core/uverbs_main.c | 6 ++-- drivers/infiniband/ulp/ipoib/ipoib_main.c | 7 ++-- drivers/infiniband/ulp/srp/ib_srp.c | 6 ++-- drivers/infiniband/ulp/srpt/ib_srpt.c | 5 ++- include/rdma/ib_verbs.h | 4 ++- net/rds/ib.c | 5 ++- net/rds/iw.c | 5 ++- 16 files changed, 82 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 871da832d016..c93af66cc091 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -394,7 +394,7 @@ err: kfree(device->cache.lmc_cache); } -static void ib_cache_cleanup_one(struct ib_device *device) +static void ib_cache_cleanup_one(struct ib_device *device, void *client_data) { int p; diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 3a972ebf3c0d..82d5c4362aa8 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -58,7 +58,7 @@ MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); static void cm_add_one(struct ib_device *device); -static void cm_remove_one(struct ib_device *device); +static void cm_remove_one(struct ib_device *device, void *client_data); static struct ib_client cm_client = { .name = "cm", @@ -3886,9 +3886,9 @@ free: kfree(cm_dev); } -static void cm_remove_one(struct ib_device *ib_device) +static void cm_remove_one(struct ib_device *ib_device, void *client_data) { - struct cm_device *cm_dev; + struct cm_device *cm_dev = client_data; struct cm_port *port; struct ib_port_modify port_modify = { .clr_port_cap_mask = IB_PORT_CM_SUP @@ -3896,7 +3896,6 @@ static void cm_remove_one(struct ib_device *ib_device) unsigned long flags; int i; - cm_dev = ib_get_client_data(ib_device, &cm_client); if (!cm_dev) return; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 4e72e4c16cfe..9664131c4eeb 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -94,7 +94,7 @@ const char *rdma_event_msg(enum rdma_cm_event_type event) EXPORT_SYMBOL(rdma_event_msg); static void cma_add_one(struct ib_device *device); -static void cma_remove_one(struct ib_device *device); +static void cma_remove_one(struct ib_device *device, void *client_data); static struct ib_client cma_client = { .name = "cma", @@ -3554,11 +3554,10 @@ static void cma_process_remove(struct cma_device *cma_dev) wait_for_completion(&cma_dev->comp); } -static void cma_remove_one(struct ib_device *device) +static void cma_remove_one(struct ib_device *device, void *client_data) { - struct cma_device *cma_dev; + struct cma_device *cma_dev = client_data; - cma_dev = ib_get_client_data(device, &cma_client); if (!cma_dev) return; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 0c8fa781538b..ce317e623862 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -50,6 +50,9 @@ struct ib_client_data { struct list_head list; struct ib_client *client; void * data; + /* The device or client is going down. Do not call client or device + * callbacks other than remove(). */ + bool going_down; }; struct workqueue_struct *ib_wq; @@ -69,6 +72,8 @@ static LIST_HEAD(client_list); * to the lists must be done with a write lock. A special case is when the * device_mutex is locked. In this case locking the lists for read access is * not necessary as the device_mutex implies it. + * + * lists_rwsem also protects access to the client data list. */ static DEFINE_MUTEX(device_mutex); static DECLARE_RWSEM(lists_rwsem); @@ -210,10 +215,13 @@ static int add_client_context(struct ib_device *device, struct ib_client *client context->client = client; context->data = NULL; + context->going_down = false; + down_write(&lists_rwsem); spin_lock_irqsave(&device->client_data_lock, flags); list_add(&context->list, &device->client_data_list); spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); return 0; } @@ -339,7 +347,6 @@ EXPORT_SYMBOL(ib_register_device); */ void ib_unregister_device(struct ib_device *device) { - struct ib_client *client; struct ib_client_data *context, *tmp; unsigned long flags; @@ -347,20 +354,29 @@ void ib_unregister_device(struct ib_device *device) down_write(&lists_rwsem); list_del(&device->core_list); - up_write(&lists_rwsem); + spin_lock_irqsave(&device->client_data_lock, flags); + list_for_each_entry_safe(context, tmp, &device->client_data_list, list) + context->going_down = true; + spin_unlock_irqrestore(&device->client_data_lock, flags); + downgrade_write(&lists_rwsem); - list_for_each_entry_reverse(client, &client_list, list) - if (client->remove) - client->remove(device); + list_for_each_entry_safe(context, tmp, &device->client_data_list, + list) { + if (context->client->remove) + context->client->remove(device, context->data); + } + up_read(&lists_rwsem); mutex_unlock(&device_mutex); ib_device_unregister_sysfs(device); + down_write(&lists_rwsem); spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry_safe(context, tmp, &device->client_data_list, list) kfree(context); spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); device->reg_state = IB_DEV_UNREGISTERED; } @@ -420,16 +436,35 @@ void ib_unregister_client(struct ib_client *client) up_write(&lists_rwsem); list_for_each_entry(device, &device_list, core_list) { - if (client->remove) - client->remove(device); + struct ib_client_data *found_context = NULL; + down_write(&lists_rwsem); spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry_safe(context, tmp, &device->client_data_list, list) if (context->client == client) { - list_del(&context->list); - kfree(context); + context->going_down = true; + found_context = context; + break; } spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); + + if (client->remove) + client->remove(device, found_context ? + found_context->data : NULL); + + if (!found_context) { + pr_warn("No client context found for %s/%s\n", + device->name, client->name); + continue; + } + + down_write(&lists_rwsem); + spin_lock_irqsave(&device->client_data_lock, flags); + list_del(&found_context->list); + kfree(found_context); + spin_unlock_irqrestore(&device->client_data_lock, flags); + up_write(&lists_rwsem); } mutex_unlock(&device_mutex); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 786fc51bf04b..66b4b3eb8f67 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3335,7 +3335,7 @@ error: } } -static void ib_mad_remove_device(struct ib_device *device) +static void ib_mad_remove_device(struct ib_device *device, void *client_data) { int i; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 2cb865c7ce7a..d38d8b2b2979 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -43,7 +43,7 @@ #include "sa.h" static void mcast_add_one(struct ib_device *device); -static void mcast_remove_one(struct ib_device *device); +static void mcast_remove_one(struct ib_device *device, void *client_data); static struct ib_client mcast_client = { .name = "ib_multicast", @@ -840,13 +840,12 @@ static void mcast_add_one(struct ib_device *device) ib_register_event_handler(&dev->event_handler); } -static void mcast_remove_one(struct ib_device *device) +static void mcast_remove_one(struct ib_device *device, void *client_data) { - struct mcast_device *dev; + struct mcast_device *dev = client_data; struct mcast_port *port; int i; - dev = ib_get_client_data(device, &mcast_client); if (!dev) return; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index ca919f429666..d40be3673b79 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -107,7 +107,7 @@ struct ib_sa_mcmember_query { }; static void ib_sa_add_one(struct ib_device *device); -static void ib_sa_remove_one(struct ib_device *device); +static void ib_sa_remove_one(struct ib_device *device, void *client_data); static struct ib_client sa_client = { .name = "sa", @@ -1221,9 +1221,9 @@ free: return; } -static void ib_sa_remove_one(struct ib_device *device) +static void ib_sa_remove_one(struct ib_device *device, void *client_data) { - struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_device *sa_dev = client_data; int i; if (!sa_dev) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 009481073644..8cde48b96f19 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -109,7 +109,7 @@ enum { #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) static void ib_ucm_add_one(struct ib_device *device); -static void ib_ucm_remove_one(struct ib_device *device); +static void ib_ucm_remove_one(struct ib_device *device, void *client_data); static struct ib_client ucm_client = { .name = "ucm", @@ -1310,9 +1310,9 @@ err: return; } -static void ib_ucm_remove_one(struct ib_device *device) +static void ib_ucm_remove_one(struct ib_device *device, void *client_data) { - struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client); + struct ib_ucm_device *ucm_dev = client_data; if (!ucm_dev) return; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 35567fffaa4e..57f281f8d686 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -133,7 +133,7 @@ static DEFINE_SPINLOCK(port_lock); static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); -static void ib_umad_remove_one(struct ib_device *device); +static void ib_umad_remove_one(struct ib_device *device, void *client_data); static void ib_umad_release_dev(struct kobject *kobj) { @@ -1322,9 +1322,9 @@ free: kobject_put(&umad_dev->kobj); } -static void ib_umad_remove_one(struct ib_device *device) +static void ib_umad_remove_one(struct ib_device *device, void *client_data) { - struct ib_umad_device *umad_dev = ib_get_client_data(device, &umad_client); + struct ib_umad_device *umad_dev = client_data; int i; if (!umad_dev) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index f6eef2da7097..46c92294afa5 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -128,7 +128,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, }; static void ib_uverbs_add_one(struct ib_device *device); -static void ib_uverbs_remove_one(struct ib_device *device); +static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); static void ib_uverbs_release_dev(struct kref *ref) { @@ -948,9 +948,9 @@ err: return; } -static void ib_uverbs_remove_one(struct ib_device *device) +static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) { - struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); + struct ib_uverbs_device *uverbs_dev = client_data; if (!uverbs_dev) return; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b2943c84a5dd..cca1a0c91ec4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -89,7 +89,7 @@ struct workqueue_struct *ipoib_workqueue; struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); -static void ipoib_remove_one(struct ib_device *device); +static void ipoib_remove_one(struct ib_device *device, void *client_data); static void ipoib_neigh_reclaim(struct rcu_head *rp); static struct ib_client ipoib_client = { @@ -1715,12 +1715,11 @@ static void ipoib_add_one(struct ib_device *device) ib_set_client_data(device, &ipoib_client, dev_list); } -static void ipoib_remove_one(struct ib_device *device) +static void ipoib_remove_one(struct ib_device *device, void *client_data) { struct ipoib_dev_priv *priv, *tmp; - struct list_head *dev_list; + struct list_head *dev_list = client_data; - dev_list = ib_get_client_data(device, &ipoib_client); if (!dev_list) return; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 31a20b462266..7755df444cfd 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -131,7 +131,7 @@ MODULE_PARM_DESC(ch_count, "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); static void srp_add_one(struct ib_device *device); -static void srp_remove_one(struct ib_device *device); +static void srp_remove_one(struct ib_device *device, void *client_data); static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr); static void srp_send_completion(struct ib_cq *cq, void *ch_ptr); static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); @@ -3460,13 +3460,13 @@ free_attr: kfree(dev_attr); } -static void srp_remove_one(struct ib_device *device) +static void srp_remove_one(struct ib_device *device, void *client_data) { struct srp_device *srp_dev; struct srp_host *host, *tmp_host; struct srp_target_port *target; - srp_dev = ib_get_client_data(device, &srp_client); + srp_dev = client_data; if (!srp_dev) return; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 60ff0a2390e5..4c59ceb40fff 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3326,12 +3326,11 @@ err: /** * srpt_remove_one() - InfiniBand device removal callback function. */ -static void srpt_remove_one(struct ib_device *device) +static void srpt_remove_one(struct ib_device *device, void *client_data) { - struct srpt_device *sdev; + struct srpt_device *sdev = client_data; int i; - sdev = ib_get_client_data(device, &srpt_client); if (!sdev) { pr_info("%s(%s): nothing to do.\n", __func__, device->name); return; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7448a2740287..449609b70928 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1550,6 +1550,8 @@ struct ib_device { spinlock_t client_data_lock; struct list_head core_list; + /* Access to the client_data_list is protected by the client_data_lock + * spinlock and the lists_rwsem read-write semaphore */ struct list_head client_data_list; struct ib_cache cache; @@ -1761,7 +1763,7 @@ struct ib_device { struct ib_client { char *name; void (*add) (struct ib_device *); - void (*remove)(struct ib_device *); + void (*remove)(struct ib_device *, void *client_data); struct list_head list; }; diff --git a/net/rds/ib.c b/net/rds/ib.c index ba2dffeff608..348ac37c1161 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -230,11 +230,10 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device) * * This can be called at any time and can be racing with any other RDS path. */ -static void rds_ib_remove_one(struct ib_device *device) +static void rds_ib_remove_one(struct ib_device *device, void *client_data) { - struct rds_ib_device *rds_ibdev; + struct rds_ib_device *rds_ibdev = client_data; - rds_ibdev = ib_get_client_data(device, &rds_ib_client); if (!rds_ibdev) return; diff --git a/net/rds/iw.c b/net/rds/iw.c index 589935661d66..7cc2f32a0cb3 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -125,12 +125,11 @@ free_attr: kfree(dev_attr); } -static void rds_iw_remove_one(struct ib_device *device) +static void rds_iw_remove_one(struct ib_device *device, void *client_data) { - struct rds_iw_device *rds_iwdev; + struct rds_iw_device *rds_iwdev = client_data; struct rds_iw_cm_id *i_cm_id, *next; - rds_iwdev = ib_get_client_data(device, &rds_iw_client); if (!rds_iwdev) return; -- cgit v1.2.3-59-g8ed1b From 0410e38eca85e042f5d5a281dbcc792db701ed44 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 30 Jul 2015 10:32:39 +0300 Subject: xprtrdma, svcrdma: Convert to ib_alloc_mr Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- net/sunrpc/xprtrdma/frwr_ops.c | 6 +++--- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 04ea914201b2..5318951b3b53 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -117,7 +117,7 @@ __frwr_recovery_worker(struct work_struct *work) if (ib_dereg_mr(r->r.frmr.fr_mr)) goto out_fail; - r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth); + r->r.frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); if (IS_ERR(r->r.frmr.fr_mr)) goto out_fail; @@ -148,7 +148,7 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, struct rpcrdma_frmr *f = &r->r.frmr; int rc; - f->fr_mr = ib_alloc_fast_reg_mr(pd, depth); + f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); if (IS_ERR(f->fr_mr)) goto out_mr_err; f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); @@ -158,7 +158,7 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, out_mr_err: rc = PTR_ERR(f->fr_mr); - dprintk("RPC: %s: ib_alloc_fast_reg_mr status %i\n", + dprintk("RPC: %s: ib_alloc_mr status %i\n", __func__, rc); return rc; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index fdc850ffc26c..8752a2d653b5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -738,7 +738,7 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) if (!frmr) goto err; - mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); + mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, RPCSVC_MAXPAGES); if (IS_ERR(mr)) goto err_free_frmr; -- cgit v1.2.3-59-g8ed1b From 9ac07501e1918b8d1140adcc360e8d8c7f5a2f7c Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 7 Aug 2015 11:11:20 -0500 Subject: svcrdma: limit FRMR page list lengths to device max Svcrdma was incorrectly allocating fastreg MRs and page lists using RPCSVC_MAXPAGES, which can exceed the device capabilities. So limit the depth to the minimum of RPCSVC_MAXPAGES and xprt->sc_frmr_pg_list_len. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 8752a2d653b5..11d5133c3f38 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -733,17 +733,19 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) struct ib_mr *mr; struct ib_fast_reg_page_list *pl; struct svc_rdma_fastreg_mr *frmr; + u32 num_sg; frmr = kmalloc(sizeof(*frmr), GFP_KERNEL); if (!frmr) goto err; - mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, RPCSVC_MAXPAGES); + num_sg = min_t(u32, RPCSVC_MAXPAGES, xprt->sc_frmr_pg_list_len); + mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, num_sg); if (IS_ERR(mr)) goto err_free_frmr; pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, - RPCSVC_MAXPAGES); + num_sg); if (IS_ERR(pl)) goto err_free_mr; -- cgit v1.2.3-59-g8ed1b From fc27995942960c894bc4725435dce8750e44cd64 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 30 Jul 2015 10:32:40 +0300 Subject: RDS: Convert to ib_alloc_mr Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- net/rds/iw_rdma.c | 5 +++-- net/rds/iw_send.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index dba8d0864f18..6a8fbd6e69e7 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -667,11 +667,12 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct ib_mr *mr; int err; - mr = ib_alloc_fast_reg_mr(rds_iwdev->pd, pool->max_message_size); + mr = ib_alloc_mr(rds_iwdev->pd, IB_MR_TYPE_MEM_REG, + pool->max_message_size); if (IS_ERR(mr)) { err = PTR_ERR(mr); - printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed (err=%d)\n", err); + printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed (err=%d)\n", err); return err; } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 334fe98c5084..86152ec3b887 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -153,9 +153,10 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic) sge->length = sizeof(struct rds_header); sge->lkey = 0; - send->s_mr = ib_alloc_fast_reg_mr(ic->i_pd, fastreg_message_size); + send->s_mr = ib_alloc_mr(ic->i_pd, IB_MR_TYPE_MEM_REG, + fastreg_message_size); if (IS_ERR(send->s_mr)) { - printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed\n"); + printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n"); break; } -- cgit v1.2.3-59-g8ed1b From 399e6f95811bd36fb64b3d30cf8529d633884b4c Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 30 Jul 2015 18:33:22 +0300 Subject: net/ipv6: Export addrconf_ifid_eui48 For loopback purposes, RoCE devices should have a default GID in the port GID table, even when the interface is down. In order to do so, we use the IPv6 link local address which would have been genenrated for the related Ethernet netdevice when it goes up as a default GID. addrconf_ifid_eui48 is used to gernerate this address, export it. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/net/addrconf.h | 31 +++++++++++++++++++++++++++++++ net/ipv6/addrconf.c | 31 ------------------------------- 2 files changed, 31 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index def59d3a34d5..431fdfa3f56d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -91,6 +91,37 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); +static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) +{ + if (dev->addr_len != ETH_ALEN) + return -1; + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + + /* + * The zSeries OSA network cards can be shared among various + * OS instances, but the OSA cards have only one MAC address. + * This leads to duplicate address conflicts in conjunction + * with IPv6 if more than one instance uses the same card. + * + * The driver for these cards can deliver a unique 16-bit + * identifier for each instance sharing the same card. It is + * placed instead of 0xFFFE in the interface identifier. The + * "u" bit of the interface identifier is not inverted in this + * case. Hence the resulting interface identifier has local + * scope according to RFC2373. + */ + if (dev->dev_id) { + eui[3] = (dev->dev_id >> 8) & 0xFF; + eui[4] = dev->dev_id & 0xFF; + } else { + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[0] ^= 2; + } + return 0; +} + static inline unsigned long addrconf_timeout_fixup(u32 timeout, unsigned int unit) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 21c2c818df3b..5b0c041323d7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1845,37 +1845,6 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) __ipv6_dev_ac_dec(ifp->idev, &addr); } -static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) -{ - if (dev->addr_len != ETH_ALEN) - return -1; - memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr + 3, 3); - - /* - * The zSeries OSA network cards can be shared among various - * OS instances, but the OSA cards have only one MAC address. - * This leads to duplicate address conflicts in conjunction - * with IPv6 if more than one instance uses the same card. - * - * The driver for these cards can deliver a unique 16-bit - * identifier for each instance sharing the same card. It is - * placed instead of 0xFFFE in the interface identifier. The - * "u" bit of the interface identifier is not inverted in this - * case. Hence the resulting interface identifier has local - * scope according to RFC2373. - */ - if (dev->dev_id) { - eui[3] = (dev->dev_id >> 8) & 0xFF; - eui[4] = dev->dev_id & 0xFF; - } else { - eui[3] = 0xFF; - eui[4] = 0xFE; - eui[0] ^= 2; - } - return 0; -} - static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev) { if (dev->addr_len != IEEE802154_ADDR_LEN) -- cgit v1.2.3-59-g8ed1b From 816dd19b3d191da88bc034fb85e21ed09a3ed320 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 30 Jul 2015 18:33:23 +0300 Subject: net: Add info for NETDEV_CHANGEUPPER event Some consumers of NETDEV_CHANGEUPPER event would like to know which upper device was linked/unlinked and what operation was carried. Add information in the notifier info block for that purpose. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/linux/netdevice.h | 14 ++++++++++++++ net/core/dev.c | 12 ++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e20979dfd6a9..2b7fe4e3146a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3556,6 +3556,20 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, netdev_features_t features); +enum netdev_changeupper_event { + NETDEV_CHANGEUPPER_LINK, + NETDEV_CHANGEUPPER_UNLINK, +}; + +struct netdev_changeupper_info { + struct netdev_notifier_info info; /* must be first */ + enum netdev_changeupper_event event; + struct net_device *upper; +}; + +void netdev_changeupper_info_change(struct net_device *dev, + struct netdev_changeupper_info *info); + struct netdev_bonding_info { ifslave slave; ifbond master; diff --git a/net/core/dev.c b/net/core/dev.c index a8e4dd430285..6e6f14e5d44f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5302,6 +5302,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, void *private) { struct netdev_adjacent *i, *j, *to_i, *to_j; + struct netdev_changeupper_info changeupper_info; int ret = 0; ASSERT_RTNL(); @@ -5357,7 +5358,10 @@ static int __netdev_upper_dev_link(struct net_device *dev, goto rollback_lower_mesh; } - call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); + changeupper_info.event = NETDEV_CHANGEUPPER_LINK; + changeupper_info.upper = upper_dev; + call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + &changeupper_info.info); return 0; rollback_lower_mesh: @@ -5453,6 +5457,7 @@ void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { struct netdev_adjacent *i, *j; + struct netdev_changeupper_info changeupper_info; ASSERT_RTNL(); __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); @@ -5474,7 +5479,10 @@ void netdev_upper_dev_unlink(struct net_device *dev, list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) __netdev_adjacent_dev_unlink(dev, i->dev); - call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); + changeupper_info.event = NETDEV_CHANGEUPPER_UNLINK; + changeupper_info.upper = upper_dev; + call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + &changeupper_info.info); } EXPORT_SYMBOL(netdev_upper_dev_unlink); -- cgit v1.2.3-59-g8ed1b From 2f31fa881fbe70808b945a6d23cae1ca8eadf1b3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 30 Jul 2015 17:22:25 -0600 Subject: net/9p: Remove ib_get_dma_mr calls The pd now has a local_dma_lkey member which completely replaces ib_get_dma_mr, use it instead. Signed-off-by: Jason Gunthorpe Tested-by: Dominique Martinet Signed-off-by: Doug Ledford --- net/9p/trans_rdma.c | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 37a78d20c0f6..ba1210253f5e 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -94,8 +94,6 @@ struct p9_trans_rdma { struct ib_pd *pd; struct ib_qp *qp; struct ib_cq *cq; - struct ib_mr *dma_mr; - u32 lkey; long timeout; int sq_depth; struct semaphore sq_sem; @@ -382,9 +380,6 @@ static void rdma_destroy_trans(struct p9_trans_rdma *rdma) if (!rdma) return; - if (rdma->dma_mr && !IS_ERR(rdma->dma_mr)) - ib_dereg_mr(rdma->dma_mr); - if (rdma->qp && !IS_ERR(rdma->qp)) ib_destroy_qp(rdma->qp); @@ -415,7 +410,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) sge.addr = c->busa; sge.length = client->msize; - sge.lkey = rdma->lkey; + sge.lkey = rdma->pd->local_dma_lkey; wr.next = NULL; c->wc_op = IB_WC_RECV; @@ -506,7 +501,7 @@ dont_need_post_recv: sge.addr = c->busa; sge.length = c->req->tc->size; - sge.lkey = rdma->lkey; + sge.lkey = rdma->pd->local_dma_lkey; wr.next = NULL; c->wc_op = IB_WC_SEND; @@ -647,7 +642,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; - struct ib_device_attr devattr; struct ib_cq_init_attr cq_attr = {}; /* Parse the transport specific mount options */ @@ -700,11 +694,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) goto error; - /* Query the device attributes */ - err = ib_query_device(rdma->cm_id->device, &devattr); - if (err) - goto error; - /* Create the Completion Queue */ cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, @@ -719,17 +708,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) if (IS_ERR(rdma->pd)) goto error; - /* Cache the DMA lkey in the transport */ - rdma->dma_mr = NULL; - if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) - rdma->lkey = rdma->cm_id->device->local_dma_lkey; - else { - rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(rdma->dma_mr)) - goto error; - rdma->lkey = rdma->dma_mr->lkey; - } - /* Create the Queue Pair */ memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; -- cgit v1.2.3-59-g8ed1b From e5580242aa8fab292579a1661463f7479275f7ff Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 30 Jul 2015 17:22:26 -0600 Subject: rds/ib: Remove ib_get_dma_mr calls The pd now has a local_dma_lkey member which completely replaces ib_get_dma_mr, use it instead. Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford --- net/rds/ib.c | 8 -------- net/rds/ib.h | 2 -- net/rds/ib_cm.c | 4 +--- net/rds/ib_recv.c | 6 +++--- net/rds/ib_send.c | 8 ++++---- 5 files changed, 8 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/rds/ib.c b/net/rds/ib.c index 348ac37c1161..c6f95b9494ae 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -99,8 +99,6 @@ static void rds_ib_dev_free(struct work_struct *work) if (rds_ibdev->mr_pool) rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); - if (rds_ibdev->mr) - ib_dereg_mr(rds_ibdev->mr); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); @@ -164,12 +162,6 @@ static void rds_ib_add_one(struct ib_device *device) goto put_dev; } - rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(rds_ibdev->mr)) { - rds_ibdev->mr = NULL; - goto put_dev; - } - rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); if (IS_ERR(rds_ibdev->mr_pool)) { rds_ibdev->mr_pool = NULL; diff --git a/net/rds/ib.h b/net/rds/ib.h index 86d88ec5d556..36f7d808ffaa 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -100,7 +100,6 @@ struct rds_ib_connection { /* alphabet soup, IBTA style */ struct rdma_cm_id *i_cm_id; struct ib_pd *i_pd; - struct ib_mr *i_mr; struct ib_cq *i_send_cq; struct ib_cq *i_recv_cq; @@ -173,7 +172,6 @@ struct rds_ib_device { struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; - struct ib_mr *mr; struct rds_ib_mr_pool *mr_pool; unsigned int fmr_max_remaps; unsigned int max_fmrs; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0da2a45b33bd..a75e8832bc23 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -269,7 +269,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn) /* Protection domain and memory range */ ic->i_pd = rds_ibdev->pd; - ic->i_mr = rds_ibdev->mr; cq_attr.cqe = ic->i_send_ring.w_nr + 1; ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler, @@ -375,7 +374,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) rds_ib_recv_init_ack(ic); - rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, + rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, ic->i_send_cq, ic->i_recv_cq); out: @@ -678,7 +677,6 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) ic->i_cm_id = NULL; ic->i_pd = NULL; - ic->i_mr = NULL; ic->i_send_cq = NULL; ic->i_recv_cq = NULL; ic->i_send_hdrs = NULL; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index cac5b4506ee3..0ceb4c60d2a3 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -62,12 +62,12 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic) sge = &recv->r_sge[0]; sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); sge->length = sizeof(struct rds_header); - sge->lkey = ic->i_mr->lkey; + sge->lkey = ic->i_pd->local_dma_lkey; sge = &recv->r_sge[1]; sge->addr = 0; sge->length = RDS_FRAG_SIZE; - sge->lkey = ic->i_mr->lkey; + sge->lkey = ic->i_pd->local_dma_lkey; } } @@ -520,7 +520,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic) sge->addr = ic->i_ack_dma; sge->length = sizeof(struct rds_header); - sge->lkey = ic->i_mr->lkey; + sge->lkey = ic->i_pd->local_dma_lkey; wr->sg_list = sge; wr->num_sge = 1; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 5d0a704fa039..f6c829d43373 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -202,9 +202,9 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic) sge = &send->s_sge[0]; sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); sge->length = sizeof(struct rds_header); - sge->lkey = ic->i_mr->lkey; + sge->lkey = ic->i_pd->local_dma_lkey; - send->s_sge[1].lkey = ic->i_mr->lkey; + send->s_sge[1].lkey = ic->i_pd->local_dma_lkey; } } @@ -813,7 +813,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) /* Convert our struct scatterlist to struct ib_sge */ send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg); send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg); - send->s_sge[0].lkey = ic->i_mr->lkey; + send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr, send->s_sge[0].addr, send->s_sge[0].length); @@ -927,7 +927,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); send->s_sge[j].length = len; - send->s_sge[j].lkey = ic->i_mr->lkey; + send->s_sge[j].lkey = ic->i_pd->local_dma_lkey; sent += len; rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr); -- cgit v1.2.3-59-g8ed1b From 7dd78647a2c2c224e376fc72797d411a3a0bb047 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 5 Aug 2015 14:34:31 -0600 Subject: IB/core: Make ib_dealloc_pd return void The majority of callers never check the return value, and even if they did, they can't do anything about a failure. All possible failure cases represent a bug in the caller, so just WARN_ON inside the function instead. This fixes a few random errors: net/rd/iw.c infinite loops while it fails. (racing with EBUSY?) This also lays the ground work to get rid of error return from the drivers. Most drivers do not error, the few that do are broken since it cannot be handled. Since uverbs can legitimately make use of EBUSY, open code the check. Signed-off-by: Jason Gunthorpe Reviewed-by: Chuck Lever Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 22 ++++++++++++++++------ drivers/infiniband/core/verbs.c | 26 ++++++++++++++++++++------ drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 4 +--- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- include/rdma/ib_verbs.h | 6 +----- net/rds/iw.c | 5 +---- net/sunrpc/xprtrdma/verbs.c | 2 +- 7 files changed, 41 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 258485ee46b2..4c98696e3626 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -606,6 +606,7 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, { struct ib_uverbs_dealloc_pd cmd; struct ib_uobject *uobj; + struct ib_pd *pd; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -614,15 +615,20 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); if (!uobj) return -EINVAL; + pd = uobj->object; - ret = ib_dealloc_pd(uobj->object); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + if (atomic_read(&pd->usecnt)) { + ret = -EBUSY; + goto err_put; + } + ret = pd->device->dealloc_pd(uobj->object); + WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); if (ret) - return ret; + goto err_put; + + uobj->live = 0; + put_uobj_write(uobj); idr_remove_uobj(&ib_uverbs_pd_idr, uobj); @@ -633,6 +639,10 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, put_uobj(uobj); return in_len; + +err_put: + put_uobj_write(uobj); + return ret; } struct xrcd_table_entry { diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 2e5fd89a8929..aad8b3ce66cc 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -260,18 +260,32 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) } EXPORT_SYMBOL(ib_alloc_pd); -int ib_dealloc_pd(struct ib_pd *pd) +/** + * ib_dealloc_pd - Deallocates a protection domain. + * @pd: The protection domain to deallocate. + * + * It is an error to call this function while any resources in the pd still + * exist. The caller is responsible to synchronously destroy them and + * guarantee no new allocations will happen. + */ +void ib_dealloc_pd(struct ib_pd *pd) { + int ret; + if (pd->local_mr) { - if (ib_dereg_mr(pd->local_mr)) - return -EBUSY; + ret = ib_dereg_mr(pd->local_mr); + WARN_ON(ret); pd->local_mr = NULL; } - if (atomic_read(&pd->usecnt)) - return -EBUSY; + /* uverbs manipulates usecnt with proper locking, while the kabi + requires the caller to guarantee we can't race here. */ + WARN_ON(atomic_read(&pd->usecnt)); - return pd->device->dealloc_pd(pd); + /* Making delalloc_pd a void return is a WIP, no driver should return + an error here. */ + ret = pd->device->dealloc_pd(pd); + WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); } EXPORT_SYMBOL(ib_dealloc_pd); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 8c451983d8a5..78845b6e8b81 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -280,9 +280,7 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) priv->wq = NULL; } - if (ib_dealloc_pd(priv->pd)) - ipoib_warn(priv, "ib_dealloc_pd failed\n"); - + ib_dealloc_pd(priv->pd); } void ipoib_event(struct ib_event_handler *handler, diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ad2d2b50cd7f..ae70cc1463ac 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -185,7 +185,7 @@ static void iser_free_device_ib_res(struct iser_device *device) (void)ib_unregister_event_handler(&device->event_handler); (void)ib_dereg_mr(device->mr); - (void)ib_dealloc_pd(device->pd); + ib_dealloc_pd(device->pd); kfree(device->comps); device->comps = NULL; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 09400512d579..128abf2888ab 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2196,11 +2196,7 @@ int ib_find_pkey(struct ib_device *device, struct ib_pd *ib_alloc_pd(struct ib_device *device); -/** - * ib_dealloc_pd - Deallocates a protection domain. - * @pd: The protection domain to deallocate. - */ -int ib_dealloc_pd(struct ib_pd *pd); +void ib_dealloc_pd(struct ib_pd *pd); /** * ib_create_ah - Creates an address handle for the given address vector. diff --git a/net/rds/iw.c b/net/rds/iw.c index 7cc2f32a0cb3..c7dcddbf17cb 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -148,10 +148,7 @@ static void rds_iw_remove_one(struct ib_device *device, void *client_data) if (rds_iwdev->mr) ib_dereg_mr(rds_iwdev->mr); - while (ib_dealloc_pd(rds_iwdev->pd)) { - rdsdebug("Failed to dealloc pd %p\n", rds_iwdev->pd); - msleep(1); - } + ib_dealloc_pd(rds_iwdev->pd); list_del(&rds_iwdev->list); kfree(rds_iwdev); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 891c4ede2c20..afd504375a9a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -624,7 +624,7 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) /* If the pd is still busy, xprtrdma missed freeing a resource */ if (ia->ri_pd && !IS_ERR(ia->ri_pd)) - WARN_ON(ib_dealloc_pd(ia->ri_pd)); + ib_dealloc_pd(ia->ri_pd); } /* -- cgit v1.2.3-59-g8ed1b