diff options
157 files changed, 2415 insertions, 1468 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 0a3ec7c726ec..a1fb840de45d 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -89,6 +89,7 @@ config INFINIBAND_ADDR_TRANS_CONFIGFS This allows the user to config the default GID type that the CM uses for each device, when initiaing new connections. +if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/qib/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig" @@ -101,6 +102,12 @@ source "drivers/infiniband/hw/ocrdma/Kconfig" source "drivers/infiniband/hw/vmw_pvrdma/Kconfig" source "drivers/infiniband/hw/usnic/Kconfig" source "drivers/infiniband/hw/hns/Kconfig" +source "drivers/infiniband/hw/bnxt_re/Kconfig" +source "drivers/infiniband/hw/hfi1/Kconfig" +source "drivers/infiniband/hw/qedr/Kconfig" +source "drivers/infiniband/sw/rdmavt/Kconfig" +source "drivers/infiniband/sw/rxe/Kconfig" +endif source "drivers/infiniband/ulp/ipoib/Kconfig" @@ -111,13 +118,5 @@ source "drivers/infiniband/ulp/iser/Kconfig" source "drivers/infiniband/ulp/isert/Kconfig" source "drivers/infiniband/ulp/opa_vnic/Kconfig" -source "drivers/infiniband/sw/rdmavt/Kconfig" -source "drivers/infiniband/sw/rxe/Kconfig" - -source "drivers/infiniband/hw/hfi1/Kconfig" - -source "drivers/infiniband/hw/qedr/Kconfig" - -source "drivers/infiniband/hw/bnxt_re/Kconfig" endif # INFINIBAND diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 69dee36e0e89..313f2349b518 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -15,8 +15,6 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ nldev.o restrack.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o -ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o -ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o ib_cm-y := cm.o @@ -39,3 +37,5 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ uverbs_std_types_mr.o uverbs_std_types_counters.o \ uverbs_uapi.o uverbs_std_types_device.o +ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o +ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c index 126ac5f99db7..388fd04e5f63 100644 --- a/drivers/infiniband/core/cgroup.c +++ b/drivers/infiniband/core/cgroup.c @@ -21,12 +21,11 @@ * Register with the rdma cgroup. Should be called before * exposing rdma device to user space applications to avoid * resource accounting leak. - * Returns 0 on success or otherwise failure code. */ -int ib_device_register_rdmacg(struct ib_device *device) +void ib_device_register_rdmacg(struct ib_device *device) { device->cg_device.name = device->name; - return rdmacg_register_device(&device->cg_device); + rdmacg_register_device(&device->cg_device); } /** diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 37980c7564c0..b9416a6fca36 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4052,8 +4052,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, atomic_long_inc(&port->counter_group[CM_RECV]. counter[attr_id - CM_ATTR_ID_OFFSET]); - work = kmalloc(sizeof(*work) + sizeof(struct sa_path_rec) * paths, - GFP_KERNEL); + work = kmalloc(struct_size(work, path, paths), GFP_KERNEL); if (!work) { ib_free_recv_mad(mad_recv_wc); return; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 84f077b2b90a..81bded0d37d1 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2966,13 +2966,22 @@ static void addr_handler(int status, struct sockaddr *src_addr, { struct rdma_id_private *id_priv = context; struct rdma_cm_event event = {}; + struct sockaddr *addr; + struct sockaddr_storage old_addr; mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_RESOLVED)) goto out; - memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); + /* + * Store the previous src address, so that if we fail to acquire + * matching rdma device, old address can be restored back, which helps + * to cancel the cma listen operation correctly. + */ + addr = cma_src_addr(id_priv); + memcpy(&old_addr, addr, rdma_addr_size(addr)); + memcpy(addr, src_addr, rdma_addr_size(src_addr)); if (!status && !id_priv->cma_dev) { status = cma_acquire_dev_by_src_ip(id_priv); if (status) @@ -2983,6 +2992,8 @@ static void addr_handler(int status, struct sockaddr *src_addr, } if (status) { + memcpy(addr, &old_addr, + rdma_addr_size((struct sockaddr *)&old_addr)); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ADDR_BOUND)) goto out; diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 616734313f0c..bcb3e3029a9b 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -54,9 +54,7 @@ struct pkey_index_qp_list { struct list_head qp_list; }; -int ib_device_register_sysfs(struct ib_device *device, - int (*port_callback)(struct ib_device *, - u8, struct kobject *)); +int ib_device_register_sysfs(struct ib_device *device); void ib_device_unregister_sysfs(struct ib_device *device); int ib_device_rename(struct ib_device *ibdev, const char *name); @@ -117,7 +115,7 @@ void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); #ifdef CONFIG_CGROUP_RDMA -int ib_device_register_rdmacg(struct ib_device *device); +void ib_device_register_rdmacg(struct ib_device *device); void ib_device_unregister_rdmacg(struct ib_device *device); int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj, @@ -128,21 +126,26 @@ void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index); #else -static inline int ib_device_register_rdmacg(struct ib_device *device) -{ return 0; } +static inline void ib_device_register_rdmacg(struct ib_device *device) +{ +} static inline void ib_device_unregister_rdmacg(struct ib_device *device) -{ } +{ +} static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index) -{ return 0; } +{ + return 0; +} static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj, struct ib_device *device, enum rdmacg_resource_type resource_index) -{ } +{ +} #endif static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 238ec42778ef..55221990d946 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -121,13 +121,12 @@ static int ib_device_check_mandatory(struct ib_device *device) }; int i; + device->kverbs_provider = true; for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { if (!*(void **) ((void *) &device->ops + mandatory_table[i].offset)) { - dev_warn(&device->dev, - "Device is missing mandatory function %s\n", - mandatory_table[i].name); - return -EINVAL; + device->kverbs_provider = false; + break; } } @@ -190,18 +189,15 @@ static struct ib_device *__ib_device_get_by_name(const char *name) int ib_device_rename(struct ib_device *ibdev, const char *name) { - struct ib_device *device; int ret = 0; if (!strcmp(name, dev_name(&ibdev->dev))) return ret; mutex_lock(&device_mutex); - list_for_each_entry(device, &device_list, core_list) { - if (!strcmp(name, dev_name(&device->dev))) { - ret = -EEXIST; - goto out; - } + if (__ib_device_get_by_name(name)) { + ret = -EEXIST; + goto out; } ret = device_rename(&ibdev->dev, name); @@ -278,7 +274,7 @@ static struct class ib_class = { }; /** - * ib_alloc_device - allocate an IB device struct + * _ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate * * Low-level drivers should use ib_alloc_device() to allocate &struct @@ -287,7 +283,7 @@ static struct class ib_class = { * ib_dealloc_device() must be used to free structures allocated with * ib_alloc_device(). */ -struct ib_device *ib_alloc_device(size_t size) +struct ib_device *_ib_alloc_device(size_t size) { struct ib_device *device; @@ -298,13 +294,11 @@ struct ib_device *ib_alloc_device(size_t size) if (!device) return NULL; - rdma_restrack_init(&device->res); + rdma_restrack_init(device); device->dev.class = &ib_class; device_initialize(&device->dev); - dev_set_drvdata(&device->dev, device); - INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); rwlock_init(&device->client_data_lock); @@ -314,7 +308,7 @@ struct ib_device *ib_alloc_device(size_t size) return device; } -EXPORT_SYMBOL(ib_alloc_device); +EXPORT_SYMBOL(_ib_alloc_device); /** * ib_dealloc_device - free an IB device struct @@ -327,7 +321,7 @@ void ib_dealloc_device(struct ib_device *device) WARN_ON(!list_empty(&device->client_data_list)); WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && device->reg_state != IB_DEV_UNINITIALIZED); - rdma_restrack_clean(&device->res); + rdma_restrack_clean(device); put_device(&device->dev); } EXPORT_SYMBOL(ib_dealloc_device); @@ -336,6 +330,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client { struct ib_client_data *context; + if (!device->kverbs_provider && !client->no_kverbs_req) + return -EOPNOTSUPP; + context = kmalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; @@ -580,9 +577,7 @@ port_cleanup: * callback for each device that is added. @device must be allocated * with ib_alloc_device(). */ -int ib_register_device(struct ib_device *device, const char *name, - int (*port_callback)(struct ib_device *, u8, - struct kobject *)) +int ib_register_device(struct ib_device *device, const char *name) { int ret; struct ib_client *client; @@ -612,14 +607,9 @@ int ib_register_device(struct ib_device *device, const char *name, device->index = __dev_new_index(); - ret = ib_device_register_rdmacg(device); - if (ret) { - dev_warn(&device->dev, - "Couldn't register device with rdma cgroup\n"); - goto dev_cleanup; - } + ib_device_register_rdmacg(device); - ret = ib_device_register_sysfs(device, port_callback); + ret = ib_device_register_sysfs(device); if (ret) { dev_warn(&device->dev, "Couldn't register device with driver model\n"); @@ -641,7 +631,6 @@ int ib_register_device(struct ib_device *device, const char *name, cg_cleanup: ib_device_unregister_rdmacg(device); -dev_cleanup: cleanup_device(device); out: mutex_unlock(&device_mutex); @@ -1281,6 +1270,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, disassociate_ucontext); SET_DEVICE_OP(dev_ops, drain_rq); SET_DEVICE_OP(dev_ops, drain_sq); + SET_DEVICE_OP(dev_ops, fill_res_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); SET_DEVICE_OP(dev_ops, get_hw_stats); @@ -1290,6 +1280,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_vector_affinity); SET_DEVICE_OP(dev_ops, get_vf_config); SET_DEVICE_OP(dev_ops, get_vf_stats); + SET_DEVICE_OP(dev_ops, init_port); SET_DEVICE_OP(dev_ops, map_mr_sg); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 3c97a8b6bf1e..5601fa968244 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -314,7 +314,6 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) [RDMA_RESTRACK_CTX] = "ctx", }; - struct rdma_restrack_root *res = &device->res; struct nlattr *table_attr; int ret, i, curr; @@ -328,7 +327,8 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) for (i = 0; i < RDMA_RESTRACK_MAX; i++) { if (!names[i]) continue; - curr = rdma_restrack_count(res, i, task_active_pid_ns(current)); + curr = rdma_restrack_count(device, i, + task_active_pid_ns(current)); ret = fill_res_info_entry(msg, names[i], curr); if (ret) goto err; @@ -361,11 +361,19 @@ static int fill_res_name_pid(struct sk_buff *msg, return 0; } -static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb, +static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (!dev->ops.fill_res_entry) + return false; + return dev->ops.fill_res_entry(msg, res); +} + +static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_qp *qp = container_of(res, struct ib_qp, res); - struct rdma_restrack_root *resroot = &qp->device->res; + struct ib_device *dev = qp->device; struct ib_qp_init_attr qp_init_attr; struct nlattr *entry_attr; struct ib_qp_attr qp_attr; @@ -415,7 +423,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -427,13 +435,12 @@ out: return -EMSGSIZE; } -static int fill_res_cm_id_entry(struct sk_buff *msg, - struct netlink_callback *cb, +static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct rdma_id_private *id_priv = container_of(res, struct rdma_id_private, res); - struct rdma_restrack_root *resroot = &id_priv->id.device->res; + struct ib_device *dev = id_priv->id.device; struct rdma_cm_id *cm_id = &id_priv->id; struct nlattr *entry_attr; @@ -475,7 +482,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -487,11 +494,11 @@ out: return -EMSGSIZE; } -static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb, +static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_cq *cq = container_of(res, struct ib_cq, res); - struct rdma_restrack_root *resroot = &cq->device->res; + struct ib_device *dev = cq->device; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY); @@ -512,7 +519,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -524,18 +531,18 @@ out: return -EMSGSIZE; } -static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb, +static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_mr *mr = container_of(res, struct ib_mr, res); - struct rdma_restrack_root *resroot = &mr->pd->device->res; + struct ib_device *dev = mr->pd->device; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY); if (!entry_attr) goto out; - if (netlink_capable(cb->skb, CAP_NET_ADMIN)) { + if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) goto err; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) @@ -549,7 +556,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -561,18 +568,18 @@ out: return -EMSGSIZE; } -static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, +static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_pd *pd = container_of(res, struct ib_pd, res); - struct rdma_restrack_root *resroot = &pd->device->res; + struct ib_device *dev = pd->device; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY); if (!entry_attr) goto out; - if (netlink_capable(cb->skb, CAP_NET_ADMIN)) { + if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, pd->local_dma_lkey)) goto err; @@ -588,7 +595,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; - if (resroot->fill_res_entry(msg, res)) + if (fill_res_entry(dev, msg, res)) goto err; nla_nest_end(msg, entry_attr); @@ -905,7 +912,7 @@ static int nldev_res_get_dumpit(struct sk_buff *skb, } struct nldev_fill_res_entry { - int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb, + int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, u32 port); enum rdma_nldev_attr nldev_attr; enum rdma_nldev_command nldev_cmd; @@ -939,6 +946,17 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { }, }; +static bool is_visible_in_pid_ns(struct rdma_restrack_entry *res) +{ + /* + * 1. Kern resources should be visible in init name space only + * 2. Present only resources visible in the current namespace + */ + if (rdma_is_kernel_res(res)) + return task_active_pid_ns(current) == &init_pid_ns; + return task_active_pid_ns(current) == task_active_pid_ns(res->task); +} + static int res_get_common_dumpit(struct sk_buff *skb, struct netlink_callback *cb, enum rdma_restrack_type res_type) @@ -950,6 +968,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, struct nlattr *table_attr; struct ib_device *device; int start = cb->args[0]; + bool has_cap_net_admin; struct nlmsghdr *nlh; u32 index, port = 0; bool filled = false; @@ -998,21 +1017,14 @@ static int res_get_common_dumpit(struct sk_buff *skb, goto err; } + has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); + down_read(&device->res.rwsem); hash_for_each_possible(device->res.hash, res, node, res_type) { if (idx < start) goto next; - if ((rdma_is_kernel_res(res) && - task_active_pid_ns(current) != &init_pid_ns) || - (!rdma_is_kernel_res(res) && task_active_pid_ns(current) != - task_active_pid_ns(res->task))) - /* - * 1. Kern resources should be visible in init - * namspace only - * 2. Present only resources visible in the current - * namespace - */ + if (!is_visible_in_pid_ns(res)) goto next; if (!rdma_restrack_get(res)) @@ -1026,7 +1038,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, filled = true; up_read(&device->res.rwsem); - ret = fe->fill_res_func(skb, cb, res, port); + ret = fe->fill_res_func(skb, has_cap_net_admin, res, port); down_read(&device->res.rwsem); /* * Return resource back, but it won't be released till @@ -1073,35 +1085,18 @@ err_index: return ret; } -static int nldev_res_get_qp_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP); -} - -static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID); -} - -static int nldev_res_get_cq_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ); -} - -static int nldev_res_get_mr_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR); -} +#define RES_GET_FUNCS(name, type) \ + static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ + struct netlink_callback *cb) \ + { \ + return res_get_common_dumpit(skb, cb, type); \ + } -static int nldev_res_get_pd_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) -{ - return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD); -} +RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); +RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID); +RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ); +RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); +RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 6c4747e61d2b..a260d2f8e0b7 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -801,6 +801,7 @@ void uverbs_close_fd(struct file *f) /* Pairs with filp->private_data in alloc_begin_fd_uobject */ uverbs_uobject_put(uobj); } +EXPORT_SYMBOL(uverbs_close_fd); /* * Drop the ucontext off the ufile and completely disconnect it from the diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 46a5c553c624..f80b37d437ac 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -12,16 +12,15 @@ #include "cma_priv.h" -static int fill_res_noop(struct sk_buff *msg, - struct rdma_restrack_entry *entry) +/** + * rdma_restrack_init() - initialize resource tracking + * @dev: IB device + */ +void rdma_restrack_init(struct ib_device *dev) { - return 0; -} + struct rdma_restrack_root *res = &dev->res; -void rdma_restrack_init(struct rdma_restrack_root *res) -{ init_rwsem(&res->rwsem); - res->fill_res_entry = fill_res_noop; } static const char *type2str(enum rdma_restrack_type type) @@ -38,11 +37,15 @@ static const char *type2str(enum rdma_restrack_type type) return names[type]; }; -void rdma_restrack_clean(struct rdma_restrack_root *res) +/** + * rdma_restrack_clean() - clean resource tracking + * @dev: IB device + */ +void rdma_restrack_clean(struct ib_device *dev) { + struct rdma_restrack_root *res = &dev->res; struct rdma_restrack_entry *e; char buf[TASK_COMM_LEN]; - struct ib_device *dev; const char *owner; int bkt; @@ -72,10 +75,16 @@ void rdma_restrack_clean(struct rdma_restrack_root *res) pr_err("restrack: %s", CUT_HERE); } -int rdma_restrack_count(struct rdma_restrack_root *res, - enum rdma_restrack_type type, +/** + * rdma_restrack_count() - the current usage of specific object + * @dev: IB device + * @type: actual type of object to operate + * @ns: PID namespace + */ +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, struct pid_namespace *ns) { + struct rdma_restrack_root *res = &dev->res; struct rdma_restrack_entry *e; u32 cnt = 0; @@ -161,17 +170,6 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res) if (!dev) return; - if (res->type != RDMA_RESTRACK_CM_ID || rdma_is_kernel_res(res)) - res->task = NULL; - - if (!rdma_is_kernel_res(res)) { - if (!res->task) - rdma_restrack_set_task(res, NULL); - res->kern_name = NULL; - } else { - set_kern_name(res); - } - kref_init(&res->kref); init_completion(&res->comp); res->valid = true; @@ -187,6 +185,8 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res) */ void rdma_restrack_kadd(struct rdma_restrack_entry *res) { + res->task = NULL; + set_kern_name(res); res->user = false; rdma_restrack_add(res); } @@ -198,6 +198,13 @@ EXPORT_SYMBOL(rdma_restrack_kadd); */ void rdma_restrack_uadd(struct rdma_restrack_entry *res) { + if (res->type != RDMA_RESTRACK_CM_ID) + res->task = NULL; + + if (!res->task) + rdma_restrack_set_task(res, NULL); + res->kern_name = NULL; + res->user = true; rdma_restrack_add(res); } diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index d22c4a2ebac6..89a5be3a2f97 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -179,7 +179,6 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, struct scatterlist *sg, u32 sg_cnt, u32 offset, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { - struct ib_device *dev = qp->pd->device; u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge : qp->max_read_sge; struct ib_sge *sge; @@ -209,8 +208,8 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, rdma_wr->wr.sg_list = sge; for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) { - sge->addr = ib_sg_dma_address(dev, sg) + offset; - sge->length = ib_sg_dma_len(dev, sg) - offset; + sge->addr = sg_dma_address(sg) + offset; + sge->length = sg_dma_len(sg) - offset; sge->lkey = qp->pd->local_dma_lkey; total_len += sge->length; @@ -236,14 +235,13 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { - struct ib_device *dev = qp->pd->device; struct ib_rdma_wr *rdma_wr = &ctx->single.wr; ctx->nr_ops = 1; ctx->single.sge.lkey = qp->pd->local_dma_lkey; - ctx->single.sge.addr = ib_sg_dma_address(dev, sg) + offset; - ctx->single.sge.length = ib_sg_dma_len(dev, sg) - offset; + ctx->single.sge.addr = sg_dma_address(sg) + offset; + ctx->single.sge.length = sg_dma_len(sg) - offset; memset(rdma_wr, 0, sizeof(*rdma_wr)); if (dir == DMA_TO_DEVICE) @@ -294,7 +292,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, * Skip to the S/G entry that sg_offset falls into: */ for (;;) { - u32 len = ib_sg_dma_len(dev, sg); + u32 len = sg_dma_len(sg); if (sg_offset < len) break; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 97e6d7b69abf..7925e45ea88a 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -2342,9 +2342,7 @@ static void ib_sa_add_one(struct ib_device *device) s = rdma_start_port(device); e = rdma_end_port(device); - sa_dev = kzalloc(sizeof *sa_dev + - (e - s + 1) * sizeof (struct ib_sa_port), - GFP_KERNEL); + sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL); if (!sa_dev) return; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 80f68eb0ba5c..c75692802da8 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1015,9 +1015,7 @@ err_free_stats: return; } -static int add_port(struct ib_device *device, int port_num, - int (*port_callback)(struct ib_device *, - u8, struct kobject *)) +static int add_port(struct ib_device *device, int port_num) { struct ib_port *p; struct ib_port_attr attr; @@ -1113,8 +1111,8 @@ static int add_port(struct ib_device *device, int port_num, if (ret) goto err_free_pkey; - if (port_callback) { - ret = port_callback(device, port_num, &p->kobj); + if (device->ops.init_port) { + ret = device->ops.init_port(device, port_num, &p->kobj); if (ret) goto err_remove_pkey; } @@ -1189,7 +1187,7 @@ err_put: static ssize_t node_type_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); switch (dev->node_type) { case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); @@ -1206,7 +1204,7 @@ static DEVICE_ATTR_RO(node_type); static ssize_t sys_image_guid_show(struct device *device, struct device_attribute *dev_attr, char *buf) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); return sprintf(buf, "%04x:%04x:%04x:%04x\n", be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]), @@ -1219,7 +1217,7 @@ static DEVICE_ATTR_RO(sys_image_guid); static ssize_t node_guid_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); return sprintf(buf, "%04x:%04x:%04x:%04x\n", be16_to_cpu(((__be16 *) &dev->node_guid)[0]), @@ -1232,7 +1230,7 @@ static DEVICE_ATTR_RO(node_guid); static ssize_t node_desc_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); return sprintf(buf, "%.64s\n", dev->node_desc); } @@ -1241,7 +1239,7 @@ static ssize_t node_desc_store(struct device *device, struct device_attribute *attr, const char *buf, size_t count) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); struct ib_device_modify desc = {}; int ret; @@ -1260,7 +1258,7 @@ static DEVICE_ATTR_RW(node_desc); static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ib_device *dev = container_of(device, struct ib_device, dev); + struct ib_device *dev = rdma_device_to_ibdev(device); ib_get_device_fw_str(dev, buf); strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); @@ -1308,9 +1306,7 @@ static void free_port_list_attributes(struct ib_device *device) kobject_put(device->ports_kobj); } -int ib_device_register_sysfs(struct ib_device *device, - int (*port_callback)(struct ib_device *, - u8, struct kobject *)) +int ib_device_register_sysfs(struct ib_device *device) { struct device *class_dev = &device->dev; int ret; @@ -1330,12 +1326,12 @@ int ib_device_register_sysfs(struct ib_device *device, } if (rdma_cap_ib_switch(device)) { - ret = add_port(device, 0, port_callback); + ret = add_port(device, 0); if (ret) goto err_put; } else { for (i = 1; i <= device->phys_port_cnt; ++i) { - ret = add_port(device, i, port_callback); + ret = add_port(device, i); if (ret) goto err_put; } diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index c6144df47ea4..1efe0a74e06b 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -72,15 +72,16 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d * If access flags indicate ODP memory, avoid pinning. Instead, stores * the mm for future page fault handling in conjunction with MMU notifiers. * - * @context: userspace context to pin memory for + * @udata: userspace context to pin memory for * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned * @dmasync: flush in-flight DMA when the memory region is written */ -struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, +struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, size_t size, int access, int dmasync) { + struct ib_ucontext *context; struct ib_umem *umem; struct page **page_list; struct vm_area_struct **vma_list; @@ -95,6 +96,10 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, struct scatterlist *sg, *sg_list_start; unsigned int gup_flags = FOLL_WRITE; + context = rdma_get_ucontext(udata); + if (IS_ERR(context)) + return ERR_CAST(context); + if (dmasync) dma_attrs |= DMA_ATTR_WRITE_BARRIER; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index acb882f279cb..012044f16d1c 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -299,7 +299,7 @@ static void free_per_mm(struct rcu_head *rcu) kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu)); } -void put_per_mm(struct ib_umem_odp *umem_odp) +static void put_per_mm(struct ib_umem_odp *umem_odp) { struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; struct ib_ucontext *ctx = umem_odp->umem.context; @@ -332,9 +332,10 @@ void put_per_mm(struct ib_umem_odp *umem_odp) mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm); } -struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, +struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, unsigned long addr, size_t size) { + struct ib_ucontext_per_mm *per_mm = root->per_mm; struct ib_ucontext *ctx = per_mm->context; struct ib_umem_odp *odp_data; struct ib_umem *umem; @@ -349,7 +350,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, umem->length = size; umem->address = addr; umem->page_shift = PAGE_SHIFT; - umem->writable = 1; + umem->writable = root->umem.writable; umem->is_odp = 1; odp_data->per_mm = per_mm; umem->owning_mm = per_mm->mm; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index de8d31ab8945..3ebd211a87ed 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -957,19 +957,22 @@ static int ib_umad_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_umad_file *file; - int ret = -ENXIO; + int ret = 0; port = container_of(inode->i_cdev, struct ib_umad_port, cdev); mutex_lock(&port->file_mutex); - if (!port->ib_dev) + if (!port->ib_dev) { + ret = -ENXIO; goto out; + } - ret = -ENOMEM; - file = kzalloc(sizeof *file, GFP_KERNEL); - if (!file) + file = kzalloc(sizeof(*file), GFP_KERNEL); + if (!file) { + ret = -ENOMEM; goto out; + } mutex_init(&file->mutex); spin_lock_init(&file->send_lock); @@ -982,14 +985,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp) list_add_tail(&file->port_list, &port->file_list); - ret = nonseekable_open(inode, filp); - if (ret) { - list_del(&file->port_list); - kfree(file); - goto out; - } - - ib_umad_dev_get(port->umad_dev); + nonseekable_open(inode, filp); out: mutex_unlock(&port->file_mutex); return ret; @@ -998,7 +994,6 @@ out: static int ib_umad_close(struct inode *inode, struct file *filp) { struct ib_umad_file *file = filp->private_data; - struct ib_umad_device *dev = file->port->umad_dev; struct ib_umad_packet *packet, *tmp; int already_dead; int i; @@ -1027,7 +1022,6 @@ static int ib_umad_close(struct inode *inode, struct file *filp) mutex_unlock(&file->port->file_mutex); kfree(file); - ib_umad_dev_put(dev); return 0; } @@ -1073,17 +1067,9 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) filp->private_data = port; - ret = nonseekable_open(inode, filp); - if (ret) - goto err_clr_sm_cap; - - ib_umad_dev_get(port->umad_dev); + nonseekable_open(inode, filp); return 0; -err_clr_sm_cap: - swap(props.set_port_cap_mask, props.clr_port_cap_mask); - ib_modify_port(port->ib_dev, port->port_num, 0, &props); - err_up_sem: up(&port->sm_sem); @@ -1106,7 +1092,6 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) up(&port->sm_sem); - ib_umad_dev_put(port->umad_dev); return ret; } @@ -1283,10 +1268,12 @@ static void ib_umad_kill_port(struct ib_umad_port *port) mutex_unlock(&port->file_mutex); cdev_device_del(&port->sm_cdev, &port->sm_dev); - put_device(&port->sm_dev); cdev_device_del(&port->cdev, &port->dev); - put_device(&port->dev); ida_free(&umad_ida, port->dev_num); + + /* balances device_initialize() */ + put_device(&port->sm_dev); + put_device(&port->dev); } static void ib_umad_add_one(struct ib_device *device) @@ -1329,6 +1316,7 @@ err: ib_umad_kill_port(&umad_dev->ports[i - s]); } free: + /* balances kref_init */ ib_umad_dev_put(umad_dev); } @@ -1344,6 +1332,7 @@ static void ib_umad_remove_one(struct ib_device *device, void *client_data) if (rdma_cap_ib_mad(device, i + rdma_start_port(device))) ib_umad_kill_port(&umad_dev->ports[i]); } + /* balances kref_init() */ ib_umad_dev_put(umad_dev); } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3317300ab036..aa260cafbd85 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -238,14 +238,11 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) ucontext->closing = false; ucontext->cleanup_retryable = false; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING mutex_init(&ucontext->per_mm_list_lock); INIT_LIST_HEAD(&ucontext->per_mm_list); if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) ucontext->invalidate_range = NULL; -#endif - resp.num_comp_vectors = file->device->num_comp_vectors; ret = get_unused_fd_flags(O_CLOEXEC); @@ -3618,7 +3615,6 @@ static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs) copy_query_dev_fields(ucontext, &resp.base, &attr); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING resp.odp_caps.general_caps = attr.odp_caps.general_caps; resp.odp_caps.per_transport_caps.rc_odp_caps = attr.odp_caps.per_transport_caps.rc_odp_caps; @@ -3626,7 +3622,7 @@ static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs) attr.odp_caps.per_transport_caps.uc_odp_caps; resp.odp_caps.per_transport_caps.ud_odp_caps = attr.odp_caps.per_transport_caps.ud_odp_caps; -#endif + resp.xrc_odp_caps = attr.odp_caps.per_transport_caps.xrc_odp_caps; resp.timestamp_mask = attr.timestamp_mask; resp.hca_core_clock = attr.hca_core_clock; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 5f366838b7ff..accc61cc93ac 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -101,6 +101,30 @@ struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile) } EXPORT_SYMBOL(ib_uverbs_get_ucontext_file); +/* rdma_get_ucontext - Return the ucontext from a udata + * @udata: The udata to get the context from + * + * This can only be called from within a uapi method that was passed ib_udata + * as a parameter. It returns the ucontext associated with the udata, or ERR_PTR + * if the udata is NULL or the ucontext has been disassociated. + */ +struct ib_ucontext *rdma_get_ucontext(struct ib_udata *udata) +{ + if (!udata) + return ERR_PTR(-EIO); + + /* + * FIXME: Really all cases that get here with a udata will have + * already called ib_uverbs_get_ucontext_file, or located a uobject + * that points to a ucontext. We could store that result in the udata + * so this function can't fail. + */ + return ib_uverbs_get_ucontext_file( + container_of(udata, struct uverbs_attr_bundle, driver_udata) + ->ufile); +} +EXPORT_SYMBOL(rdma_get_ucontext); + int uverbs_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd = mw->pd; @@ -1135,6 +1159,7 @@ static const struct file_operations uverbs_mmap_fops = { static struct ib_client uverbs_client = { .name = "uverbs", + .no_kverbs_req = true, .add = ib_uverbs_add_one, .remove = ib_uverbs_remove_one }; diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 9ae08e4b78a3..7a987acf0c0b 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -188,13 +188,18 @@ static int uapi_merge_obj_tree(struct uverbs_api *uapi, obj_elm->type_attrs = obj->type_attrs; obj_elm->type_class = obj->type_attrs->type_class; /* - * Today drivers are only permitted to use idr_class - * types. They cannot use FD types because we currently have - * no way to revoke the fops pointer after device - * disassociation. + * Today drivers are only permitted to use idr_class and + * fd_class types. We can revoke the IDR types during + * disassociation, and the FD types require the driver to use + * struct file_operations.owner to prevent the driver module + * code from unloading while the file is open. This provides + * enough safety that uverbs_close_fd() will continue to work. + * Drivers using FD are responsible to handle disassociation of + * the device on their own. */ if (WARN_ON(is_driver && - obj->type_attrs->type_class != &uverbs_idr_class)) + obj->type_attrs->type_class != &uverbs_idr_class && + obj->type_attrs->type_class != &uverbs_fd_class)) return -EINVAL; } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index ac011836bb54..3220fb42ecce 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1106,8 +1106,8 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, } EXPORT_SYMBOL(ib_open_qp); -static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp, - struct ib_qp_init_attr *qp_init_attr) +static struct ib_qp *create_xrc_qp(struct ib_qp *qp, + struct ib_qp_init_attr *qp_init_attr) { struct ib_qp *real_qp = qp; @@ -1122,10 +1122,10 @@ static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp, qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, qp_init_attr->qp_context); - if (!IS_ERR(qp)) - __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); - else - real_qp->device->ops.destroy_qp(real_qp); + if (IS_ERR(qp)) + return qp; + + __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); return qp; } @@ -1156,10 +1156,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, return qp; ret = ib_create_qp_security(qp, device); - if (ret) { - ib_destroy_qp(qp); - return ERR_PTR(ret); - } + if (ret) + goto err; qp->real_qp = qp; qp->qp_type = qp_init_attr->qp_type; @@ -1172,8 +1170,15 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, INIT_LIST_HEAD(&qp->sig_mrs); qp->port = 0; - if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) - return ib_create_xrc_qp(qp, qp_init_attr); + if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { + struct ib_qp *xrc_qp = create_xrc_qp(qp, qp_init_attr); + + if (IS_ERR(xrc_qp)) { + ret = PTR_ERR(xrc_qp); + goto err; + } + return xrc_qp; + } qp->event_handler = qp_init_attr->event_handler; qp->qp_context = qp_init_attr->qp_context; @@ -1200,11 +1205,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (qp_init_attr->cap.max_rdma_ctxs) { ret = rdma_rw_init_mrs(qp, qp_init_attr); - if (ret) { - pr_err("failed to init MR pool ret= %d\n", ret); - ib_destroy_qp(qp); - return ERR_PTR(ret); - } + if (ret) + goto err; } /* @@ -1217,6 +1219,11 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, device->attrs.max_sge_rd); return qp; + +err: + ib_destroy_qp(qp); + return ERR_PTR(ret); + } EXPORT_SYMBOL(ib_create_qp); diff --git a/drivers/infiniband/hw/bnxt_re/Makefile b/drivers/infiniband/hw/bnxt_re/Makefile index 6e3bc25cc140..ee9bb1be61ea 100644 --- a/drivers/infiniband/hw/bnxt_re/Makefile +++ b/drivers/infiniband/hw/bnxt_re/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-y := -Idrivers/net/ethernet/broadcom/bnxt +ccflags-y := -I $(srctree)/drivers/net/ethernet/broadcom/bnxt obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o bnxt_re-y := main.o ib_verbs.o \ qplib_res.o qplib_rcfw.o \ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 1e2515e2eb62..9bc637e49faa 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -895,8 +895,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) bytes += (qplib_qp->sq.max_wqe * sizeof(struct sq_psn_search)); bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(context, ureq.qpsva, bytes, - IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -908,7 +907,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, if (!qp->qplib_qp.srq) { bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(context, ureq.qprva, bytes, + umem = ib_umem_get(udata, ureq.qprva, bytes, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(umem)) goto rqfail; @@ -1370,8 +1369,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(context, ureq.srqva, bytes, - IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -2622,7 +2620,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, goto fail; } - cq->umem = ib_umem_get(context, req.cq_va, + cq->umem = ib_umem_get(udata, req.cq_va, entries * sizeof(struct cq_base), IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(cq->umem)) { @@ -3589,8 +3587,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, /* The fixed portion of the rkey is the same as the lkey */ mr->ib_mr.rkey = mr->qplib_mr.rkey; - umem = ib_umem_get(ib_pd->uobject->context, start, length, - mr_access_flags, 0); + umem = ib_umem_get(udata, start, length, mr_access_flags, 0); if (IS_ERR(umem)) { dev_err(rdev_to_dev(rdev), "Failed to get umem"); rc = -EFAULT; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index e7a997f2a537..08777506d0b1 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -538,7 +538,8 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev) static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { - struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); + struct bnxt_re_dev *rdev = + rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); } @@ -547,7 +548,8 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { - struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); + struct bnxt_re_dev *rdev = + rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); } @@ -662,7 +664,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ibdev->driver_id = RDMA_DRIVER_BNXT_RE; ib_set_device_ops(ibdev, &bnxt_re_dev_ops); - return ib_register_device(ibdev, "bnxt_re%d", NULL); + return ib_register_device(ibdev, "bnxt_re%d"); } static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev) @@ -686,7 +688,7 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev, struct bnxt_re_dev *rdev; /* Allocate bnxt_re_dev instance here */ - rdev = (struct bnxt_re_dev *)ib_alloc_device(sizeof(*rdev)); + rdev = ib_alloc_device(bnxt_re_dev, ibdev); if (!rdev) { dev_err(NULL, "%s: bnxt_re_dev allocation failure!", ROCE_DRV_MODULE_NAME); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 19551aa43850..65e17de220f6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -684,8 +684,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, /* General */ rcfw->seq_num = 0; set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags); - bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth * - sizeof(unsigned long)); + bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth) * sizeof(unsigned long); rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL); if (!rcfw->cmdq_bitmap) return -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile index 66fe0917aba0..34bb86a6ae3a 100644 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ b/drivers/infiniband/hw/cxgb3/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb3 +ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb3 obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 591de319c178..fb03bc492ef7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -146,7 +146,7 @@ static void open_rnic_dev(struct t3cdev *tdev) pr_debug("%s t3cdev %p\n", __func__, tdev); pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION); - rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp)); + rnicp = ib_alloc_device(iwch_dev, ibdev); if (!rnicp) { pr_err("Cannot allocate ib device\n"); return; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index b34b1a1bd94b..07c20cd07f33 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -540,7 +540,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mhp->rhp = rhp; - mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); + mhp->umem = ib_umem_get(udata, start, length, acc, 0); if (IS_ERR(mhp->umem)) { err = PTR_ERR(mhp->umem); kfree(mhp); @@ -1130,8 +1130,9 @@ static int iwch_query_port(struct ib_device *ibdev, static ssize_t hw_rev_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, - ibdev.dev); + struct iwch_dev *iwch_dev = + rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); + pr_debug("%s dev 0x%p\n", __func__, dev); return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); } @@ -1140,8 +1141,8 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, - ibdev.dev); + struct iwch_dev *iwch_dev = + rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); struct ethtool_drvinfo info; struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; @@ -1154,8 +1155,9 @@ static DEVICE_ATTR_RO(hca_type); static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, - ibdev.dev); + struct iwch_dev *iwch_dev = + rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); + pr_debug("%s dev 0x%p\n", __func__, dev); return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, iwch_dev->rdev.rnic_info.pdev->device); @@ -1409,7 +1411,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); - ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL); + ret = ib_register_device(&dev->ibdev, "cxgb3_%d"); if (ret) kfree(dev->ibdev.iwcm); return ret; diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index 9edd92023e18..31a87d90a40b 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -1,5 +1,5 @@ -ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 -ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb +ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4 +ccflags-y += -I $(srctree)/drivers/net/ethernet/chelsio/libcxgb obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8221813219e5..59917eb124da 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2942,15 +2942,18 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) ep = get_ep_from_tid(dev, tid); - if (ep && ep->com.qp) { - pr_warn("TERM received tid %u qpid %u\n", - tid, ep->com.qp->wq.sq.qid); - attrs.next_state = C4IW_QP_STATE_TERMINATE; - c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + if (ep) { + if (ep->com.qp) { + pr_warn("TERM received tid %u qpid %u\n", tid, + ep->com.qp->wq.sq.qid); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + + c4iw_put_ep(&ep->com); } else pr_warn("TERM received tid %u no ep/qp\n", tid); - c4iw_put_ep(&ep->com); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index c13c0ba30f63..4b4e2464b705 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -720,11 +720,8 @@ static const struct file_operations ep_debugfs_fops = { .read = debugfs_read, }; -static int setup_debugfs(struct c4iw_dev *devp) +static void setup_debugfs(struct c4iw_dev *devp) { - if (!devp->debugfs_root) - return -1; - debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root, (void *)devp, &qp_debugfs_fops, 4096); @@ -740,7 +737,6 @@ static int setup_debugfs(struct c4iw_dev *devp) if (c4iw_wr_log) debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root, (void *)devp, &wr_log_debugfs_fops, 4096); - return 0; } void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, @@ -970,7 +966,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) pr_info("%s: On-Chip Queues not supported on this device\n", pci_name(infop->pdev)); - devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); + devp = ib_alloc_device(c4iw_dev, ibdev); if (!devp) { pr_err("Cannot allocate ib device\n"); return ERR_PTR(-ENOMEM); @@ -1553,8 +1549,6 @@ static int __init c4iw_init_module(void) return err; c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL); - if (!c4iw_debugfs_root) - pr_warn("could not create debugfs entry, continuing\n"); reg_workq = create_singlethread_workqueue("Register_iWARP_device"); if (!reg_workq) { diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 7b76e6f81aeb..96760a36b9fc 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -537,7 +537,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mhp->rhp = rhp; - mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); + mhp->umem = ib_umem_get(udata, start, length, acc, 0); if (IS_ERR(mhp->umem)) goto err_free_skb; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 586b0c37481f..cb5b713bbf39 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -376,8 +376,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, static ssize_t hw_rev_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); + pr_debug("dev 0x%p\n", dev); return sprintf(buf, "%d\n", CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); @@ -387,8 +388,8 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); struct ethtool_drvinfo info; struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0]; @@ -401,8 +402,9 @@ static DEVICE_ATTR_RO(hca_type); static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, - ibdev.dev); + struct c4iw_dev *c4iw_dev = + rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); + pr_debug("dev 0x%p\n", dev); return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, c4iw_dev->rdev.lldi.pdev->device); @@ -547,6 +549,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .destroy_cq = c4iw_destroy_cq, .destroy_qp = c4iw_destroy_qp, .destroy_srq = c4iw_destroy_srq, + .fill_res_entry = fill_res_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = c4iw_get_dma_mr, .get_hw_stats = c4iw_get_mib, @@ -627,14 +630,13 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; dev->ibdev.iwcm->get_qp = c4iw_get_qp; - dev->ibdev.res.fill_res_entry = fill_res_entry; memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, sizeof(dev->ibdev.iwcm->ifname)); rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops); - ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL); + ret = ib_register_device(&dev->ibdev, "cxgb4_%d"); if (ret) goto err_kfree_iwcm; return; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 504cf525508f..0fe87b9c1e10 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -632,7 +632,10 @@ static void build_rdma_write_cmpl(struct t4_sq *sq, wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); - wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey); + if (wr->next->opcode == IB_WR_SEND) + wcwr->stag_inv = 0; + else + wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey); wcwr->r2 = 0; wcwr->r3 = 0; @@ -726,7 +729,10 @@ static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr) /* SEND_WITH_INV swsqe */ swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; - swsqe->opcode = FW_RI_SEND_WITH_INV; + if (wr->next->opcode == IB_WR_SEND) + swsqe->opcode = FW_RI_SEND; + else + swsqe->opcode = FW_RI_SEND_WITH_INV; swsqe->idx = qhp->wq.sq.pidx; swsqe->complete = 0; swsqe->signaled = send_signaled; @@ -1133,9 +1139,9 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, /* * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is * the response for small NVMEe-oF READ requests. If the chain is - * exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths - * meet the requirements of the fw_ri_write_cmpl_wr work request, - * then build and post the write_cmpl WR. If any of the tests + * exactly a WRITE->SEND_WITH_INV or a WRITE->SEND and the sgl depths + * and lengths meet the requirements of the fw_ri_write_cmpl_wr work + * request, then build and post the write_cmpl WR. If any of the tests * below are not true, then we continue on with the tradtional WRITE * and SEND WRs. */ @@ -1145,7 +1151,8 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, wr && wr->next && !wr->next->next && wr->opcode == IB_WR_RDMA_WRITE && wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL && - wr->next->opcode == IB_WR_SEND_WITH_INV && + (wr->next->opcode == IB_WR_SEND || + wr->next->opcode == IB_WR_SEND_WITH_INV) && wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE && wr->next->num_sge == 1 && num_wrs >= 2) { post_write_cmpl(qhp, wr); @@ -2589,7 +2596,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, /* build fw_ri_res_wr */ wr_len = sizeof(*res_wr) + sizeof(*res); - skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) goto err_free_queue; set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 0a557795563c..427ba0ce74a5 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1167,6 +1167,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) char link[10]; struct hfi1_devdata *dd = dd_from_dev(ibd); struct hfi1_pportdata *ppd; + struct dentry *root; int unit = dd->unit; int i, j; @@ -1174,31 +1175,29 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) return; snprintf(name, sizeof(name), "%s_%d", class_name(), unit); snprintf(link, sizeof(link), "%d", unit); - ibd->hfi1_ibdev_dbg = debugfs_create_dir(name, hfi1_dbg_root); - if (!ibd->hfi1_ibdev_dbg) { - pr_warn("create of %s failed\n", name); - return; - } + root = debugfs_create_dir(name, hfi1_dbg_root); + ibd->hfi1_ibdev_dbg = root; + ibd->hfi1_ibdev_link = debugfs_create_symlink(link, hfi1_dbg_root, name); - if (!ibd->hfi1_ibdev_link) { - pr_warn("create of %s symlink failed\n", name); - return; - } - DEBUGFS_SEQ_FILE_CREATE(opcode_stats, ibd->hfi1_ibdev_dbg, ibd); - DEBUGFS_SEQ_FILE_CREATE(tx_opcode_stats, ibd->hfi1_ibdev_dbg, ibd); - DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd); - DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); - DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); - DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd); - DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd); - DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); + + debugfs_create_file("opcode_stats", 0444, root, ibd, + &_opcode_stats_file_ops); + debugfs_create_file("tx_opcode_stats", 0444, root, ibd, + &_tx_opcode_stats_file_ops); + debugfs_create_file("ctx_stats", 0444, root, ibd, &_ctx_stats_file_ops); + debugfs_create_file("qp_stats", 0444, root, ibd, &_qp_stats_file_ops); + debugfs_create_file("sdes", 0444, root, ibd, &_sdes_file_ops); + debugfs_create_file("rcds", 0444, root, ibd, &_rcds_file_ops); + debugfs_create_file("pios", 0444, root, ibd, &_pios_file_ops); + debugfs_create_file("sdma_cpu_list", 0444, root, ibd, + &_sdma_cpu_list_file_ops); + /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) - DEBUGFS_FILE_CREATE(cntr_ops[i].name, - ibd->hfi1_ibdev_dbg, - dd, - &cntr_ops[i].ops, S_IRUGO); + debugfs_create_file(cntr_ops[i].name, 0444, root, dd, + &cntr_ops[i].ops); + /* per port files */ for (ppd = dd->pport, j = 0; j < dd->num_pports; j++, ppd++) for (i = 0; i < ARRAY_SIZE(port_cntr_ops); i++) { @@ -1206,12 +1205,11 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) sizeof(name), port_cntr_ops[i].name, j + 1); - DEBUGFS_FILE_CREATE(name, - ibd->hfi1_ibdev_dbg, - ppd, - &port_cntr_ops[i].ops, + debugfs_create_file(name, !port_cntr_ops[i].ops.write ? - S_IRUGO : S_IRUGO | S_IWUSR); + S_IRUGO : + S_IRUGO | S_IWUSR, + root, ppd, &port_cntr_ops[i].ops); } hfi1_fault_init_debugfs(ibd); @@ -1341,10 +1339,10 @@ DEBUGFS_FILE_OPS(driver_stats); void hfi1_dbg_init(void) { hfi1_dbg_root = debugfs_create_dir(DRIVER_NAME, NULL); - if (!hfi1_dbg_root) - pr_warn("init of debugfs failed\n"); - DEBUGFS_SEQ_FILE_CREATE(driver_stats_names, hfi1_dbg_root, NULL); - DEBUGFS_SEQ_FILE_CREATE(driver_stats, hfi1_dbg_root, NULL); + debugfs_create_file("driver_stats_names", 0444, hfi1_dbg_root, NULL, + &_driver_stats_names_file_ops); + debugfs_create_file("driver_stats", 0444, hfi1_dbg_root, NULL, + &_driver_stats_file_ops); } void hfi1_dbg_exit(void) diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h index d5d824459fcc..57e582caa5eb 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.h +++ b/drivers/infiniband/hw/hfi1/debugfs.h @@ -49,16 +49,6 @@ struct hfi1_ibdev; -#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \ -do { \ - struct dentry *ent; \ - const char *__name = name; \ - ent = debugfs_create_file(__name, mode, parent, \ - data, ops); \ - if (!ent) \ - pr_warn("create of %s failed\n", __name); \ -} while (0) - #define DEBUGFS_SEQ_FILE_OPS(name) \ static const struct seq_operations _##name##_seq_ops = { \ .start = _##name##_seq_start, \ @@ -89,8 +79,6 @@ static const struct file_operations _##name##_file_ops = { \ .release = seq_release \ } -#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \ - DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, 0444) ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos); diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index e2290f32c8d9..3fd3315d0fb0 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -250,6 +250,7 @@ void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd) int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd) { struct dentry *parent = ibd->hfi1_ibdev_dbg; + struct dentry *fault_dir; ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL); if (!ibd->fault) @@ -269,45 +270,31 @@ int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd) bitmap_zero(ibd->fault->opcodes, sizeof(ibd->fault->opcodes) * BITS_PER_BYTE); - ibd->fault->dir = - fault_create_debugfs_attr("fault", parent, - &ibd->fault->attr); - if (IS_ERR(ibd->fault->dir)) { + fault_dir = + fault_create_debugfs_attr("fault", parent, &ibd->fault->attr); + if (IS_ERR(fault_dir)) { kfree(ibd->fault); ibd->fault = NULL; return -ENOENT; } - - DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault->dir, ibd); - if (!debugfs_create_bool("enable", 0600, ibd->fault->dir, - &ibd->fault->enable)) - goto fail; - if (!debugfs_create_bool("suppress_err", 0600, - ibd->fault->dir, - &ibd->fault->suppress_err)) - goto fail; - if (!debugfs_create_bool("opcode_mode", 0600, ibd->fault->dir, - &ibd->fault->opcode)) - goto fail; - if (!debugfs_create_file("opcodes", 0600, ibd->fault->dir, - ibd->fault, &__fault_opcodes_fops)) - goto fail; - if (!debugfs_create_u64("skip_pkts", 0600, - ibd->fault->dir, - &ibd->fault->fault_skip)) - goto fail; - if (!debugfs_create_u64("skip_usec", 0600, - ibd->fault->dir, - &ibd->fault->fault_skip_usec)) - goto fail; - if (!debugfs_create_u8("direction", 0600, ibd->fault->dir, - &ibd->fault->direction)) - goto fail; + ibd->fault->dir = fault_dir; + + debugfs_create_file("fault_stats", 0444, fault_dir, ibd, + &_fault_stats_file_ops); + debugfs_create_bool("enable", 0600, fault_dir, &ibd->fault->enable); + debugfs_create_bool("suppress_err", 0600, fault_dir, + &ibd->fault->suppress_err); + debugfs_create_bool("opcode_mode", 0600, fault_dir, + &ibd->fault->opcode); + debugfs_create_file("opcodes", 0600, fault_dir, ibd->fault, + &__fault_opcodes_fops); + debugfs_create_u64("skip_pkts", 0600, fault_dir, + &ibd->fault->fault_skip); + debugfs_create_u64("skip_usec", 0600, fault_dir, + &ibd->fault->fault_skip_usec); + debugfs_create_u8("direction", 0600, fault_dir, &ibd->fault->direction); return 0; -fail: - hfi1_fault_exit_debugfs(ibd); - return -ENOMEM; } bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 2be513d4c9da..90f62c4bddba 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -498,7 +498,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); } @@ -508,7 +508,7 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); int ret; @@ -524,7 +524,7 @@ static ssize_t boardversion_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ @@ -536,7 +536,7 @@ static ssize_t nctxts_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); /* @@ -555,7 +555,7 @@ static ssize_t nfreectxts_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ @@ -567,7 +567,7 @@ static ssize_t serial_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); @@ -579,7 +579,7 @@ static ssize_t chip_reset_store(struct device *device, size_t count) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); int ret; @@ -609,7 +609,7 @@ static ssize_t tempsense_show(struct device *device, struct device_attribute *attr, char *buf) { struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); struct hfi1_temp temp; int ret; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index bf96067876c9..f88ad425664a 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -222,31 +222,11 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) ssge.num_sge = swqe->wr.num_sge; sge = &ssge.sge; while (length) { - u32 len = sge->length; + u32 len = rvt_get_sge_length(sge, length); - if (len > length) - len = length; - if (len > sge->sge_length) - len = sge->sge_length; WARN_ON_ONCE(len == 0); rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--ssge.num_sge) - *sge = *ssge.sg_list++; - } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= RVT_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } + rvt_update_sge(&ssge, len, false); length -= len; } rvt_put_ss(&qp->r_sge); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index ec582d86025f..c980345cf1e1 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -553,11 +553,7 @@ static noinline int build_verbs_ulp_payload( int ret = 0; while (length) { - len = ss->sge.length; - if (len > length) - len = length; - if (len > ss->sge.sge_length) - len = ss->sge.sge_length; + len = rvt_get_sge_length(&ss->sge, length); WARN_ON_ONCE(len == 0); ret = sdma_txadd_kvaddr( sde->dd, @@ -914,12 +910,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (ss) { while (len) { void *addr = ss->sge.vaddr; - u32 slen = ss->sge.length; + u32 slen = rvt_get_sge_length(&ss->sge, len); - if (slen > len) - slen = len; - if (slen > ss->sge.sge_length) - slen = ss->sge.sge_length; rvt_update_sge(ss, slen, false); seg_pio_copy_mid(pbuf, addr, slen); len -= slen; @@ -1622,6 +1614,7 @@ static const struct ib_device_ops hfi1_dev_ops = { .alloc_rdma_netdev = hfi1_vnic_alloc_rn, .get_dev_fw_str = hfi1_get_dev_fw_str, .get_hw_stats = get_hw_stats, + .init_port = hfi1_create_port_files, .modify_device = modify_device, /* keep process mad in the driver */ .process_mad = hfi1_process_mad, @@ -1679,7 +1672,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* * Fill in rvt info object. */ - dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files; dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index 21c2100b2ea9..fddb5fdf92de 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,7 +1,6 @@ config INFINIBAND_HNS tristate "HNS RoCE Driver" depends on NET_VENDOR_HISILICON - depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS depends on ARM64 || (COMPILE_TEST && 64BIT) ---help--- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index 004c88b32e13..e2a7f1488f76 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -2,7 +2,7 @@ # Makefile for the Hisilicon RoCE drivers. # -ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 +ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 927701df5eff..059fd1da493e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -75,6 +75,10 @@ enum { HNS_ROCE_CMD_DESTROY_MPT_BT1 = 0x29, HNS_ROCE_CMD_DESTROY_MPT_BT2 = 0x2a, + /* CQC TIMER commands */ + HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0 = 0x23, + HNS_ROCE_CMD_READ_CQC_TIMER_BT0 = 0x27, + /* MPT commands */ HNS_ROCE_CMD_QUERY_MPT = 0x62, @@ -89,6 +93,10 @@ enum { HNS_ROCE_CMD_DESTROY_SRQC_BT1 = 0x39, HNS_ROCE_CMD_DESTROY_SRQC_BT2 = 0x3a, + /* QPC TIMER commands */ + HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0 = 0x33, + HNS_ROCE_CMD_READ_QPC_TIMER_BT0 = 0x37, + /* EQC commands */ HNS_ROCE_CMD_CREATE_AEQC = 0x80, HNS_ROCE_CMD_MODIFY_AEQC = 0x81, @@ -98,6 +106,10 @@ enum { HNS_ROCE_CMD_MODIFY_CEQC = 0x91, HNS_ROCE_CMD_QUERY_CEQC = 0x92, HNS_ROCE_CMD_DESTROY_CEQC = 0x93, + + /* SCC CTX BT commands */ + HNS_ROCE_CMD_READ_SCCC_BT0 = 0xa4, + HNS_ROCE_CMD_WRITE_SCCC_BT0 = 0xa5, }; enum { diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 3a485f50fede..1dfe5627006c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -215,7 +215,7 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) EXPORT_SYMBOL_GPL(hns_roce_free_cq); static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, - struct ib_ucontext *context, + struct ib_udata *udata, struct hns_roce_cq_buf *buf, struct ib_umem **umem, u64 buf_addr, int cqe) { @@ -223,7 +223,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, u32 page_shift; u32 npages; - *umem = ib_umem_get(context, buf_addr, cqe * hr_dev->caps.cq_entry_sz, + *umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -347,7 +347,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, } /* Get user space address, write it into mtt table */ - ret = hns_roce_ib_get_cq_umem(hr_dev, context, &hr_cq->hr_buf, + ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf, &hr_cq->umem, ucmd.buf_addr, cq_entries); if (ret) { @@ -358,7 +358,8 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp))) { ret = hns_roce_db_map_user(to_hr_ucontext(context), - ucmd.db_addr, &hr_cq->db); + udata, ucmd.db_addr, + &hr_cq->db); if (ret) { dev_err(dev, "cq record doorbell map failed!\n"); goto err_mtt; diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index e2f93c1ce86a..0c6c1fe87705 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -8,7 +8,8 @@ #include <rdma/ib_umem.h> #include "hns_roce_device.h" -int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, +int hns_roce_db_map_user(struct hns_roce_ucontext *context, + struct ib_udata *udata, unsigned long virt, struct hns_roce_db *db) { struct hns_roce_user_db_page *page; @@ -28,8 +29,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, refcount_set(&page->refcount, 1); page->user_virt = (virt & PAGE_MASK); - page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, - PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); if (IS_ERR(page->umem)) { ret = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 509e467843f6..6fde434b22fd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -202,6 +202,7 @@ enum { HNS_ROCE_CAP_FLAG_SRQ = BIT(5), HNS_ROCE_CAP_FLAG_MW = BIT(7), HNS_ROCE_CAP_FLAG_FRMR = BIT(8), + HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), }; @@ -482,6 +483,8 @@ struct hns_roce_qp_table { struct hns_roce_hem_table qp_table; struct hns_roce_hem_table irrl_table; struct hns_roce_hem_table trrl_table; + struct hns_roce_hem_table sccc_table; + struct mutex scc_mutex; }; struct hns_roce_cq_table { @@ -729,6 +732,8 @@ struct hns_roce_caps { u32 max_extend_sg; int num_qps; /* 256k */ int reserved_qps; + int num_qpc_timer; + int num_cqc_timer; u32 max_srq_sg; int num_srqs; u32 max_wqes; /* 16k */ @@ -768,6 +773,9 @@ struct hns_roce_caps { int irrl_entry_sz; int trrl_entry_sz; int cqc_entry_sz; + int sccc_entry_sz; + int qpc_timer_entry_sz; + int cqc_timer_entry_sz; int srqc_entry_sz; int idx_entry_sz; u32 pbl_ba_pg_sz; @@ -777,9 +785,12 @@ struct hns_roce_caps { int ceqe_depth; enum ib_mtu max_mtu; u32 qpc_bt_num; + u32 qpc_timer_bt_num; u32 srqc_bt_num; u32 cqc_bt_num; + u32 cqc_timer_bt_num; u32 mpt_bt_num; + u32 sccc_bt_num; u32 qpc_ba_pg_sz; u32 qpc_buf_pg_sz; u32 qpc_hop_num; @@ -795,6 +806,15 @@ struct hns_roce_caps { u32 mtt_ba_pg_sz; u32 mtt_buf_pg_sz; u32 mtt_hop_num; + u32 sccc_ba_pg_sz; + u32 sccc_buf_pg_sz; + u32 sccc_hop_num; + u32 qpc_timer_ba_pg_sz; + u32 qpc_timer_buf_pg_sz; + u32 qpc_timer_hop_num; + u32 cqc_timer_ba_pg_sz; + u32 cqc_timer_buf_pg_sz; + u32 cqc_timer_hop_num; u32 cqe_ba_pg_sz; u32 cqe_buf_pg_sz; u32 cqe_hop_num; @@ -861,6 +881,8 @@ struct hns_roce_hw { int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state); int (*destroy_qp)(struct ib_qp *ibqp); + int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp); int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, @@ -922,6 +944,8 @@ struct hns_roce_dev { struct hns_roce_srq_table srq_table; struct hns_roce_qp_table qp_table; struct hns_roce_eq_table eq_table; + struct hns_roce_hem_table qpc_timer_table; + struct hns_roce_hem_table cqc_timer_table; int cmd_mod; int loop_idc; @@ -1133,7 +1157,8 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq); void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); -int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, +int hns_roce_db_map_user(struct hns_roce_ucontext *context, + struct ib_udata *udata, unsigned long virt, struct hns_roce_db *db); void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, struct hns_roce_db *db); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 4cdbcafa5915..f1fec56f3ff4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -45,6 +45,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) (hr_dev->caps.mpt_hop_num && type == HEM_TYPE_MTPT) || (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) || (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) || + (hr_dev->caps.sccc_hop_num && type == HEM_TYPE_SCCC) || + (hr_dev->caps.qpc_timer_hop_num && type == HEM_TYPE_QPC_TIMER) || + (hr_dev->caps.cqc_timer_hop_num && type == HEM_TYPE_CQC_TIMER) || (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) || (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) || (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) || @@ -125,6 +128,30 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = hr_dev->caps.cqc_bt_num; mhop->hop_num = hr_dev->caps.cqc_hop_num; break; + case HEM_TYPE_SCCC: + mhop->buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.sccc_bt_num; + mhop->hop_num = hr_dev->caps.sccc_hop_num; + break; + case HEM_TYPE_QPC_TIMER: + mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.qpc_timer_bt_num; + mhop->hop_num = hr_dev->caps.qpc_timer_hop_num; + break; + case HEM_TYPE_CQC_TIMER: + mhop->buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.cqc_timer_bt_num; + mhop->hop_num = hr_dev->caps.cqc_timer_hop_num; + break; case HEM_TYPE_SRQC: mhop->buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz + PAGE_SHIFT); @@ -175,7 +202,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, return 0; /* - * QPC/MTPT/CQC/SRQC alloc hem for buffer pages. + * QPC/MTPT/CQC/SRQC/SCCC alloc hem for buffer pages. * MTT/CQE alloc hem for bt pages. */ bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num); @@ -486,7 +513,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, } /* - * alloc buffer space chunk for QPC/MTPT/CQC/SRQC. + * alloc buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC. * alloc bt space chunk for MTT/CQE. */ size = table->type < HEM_TYPE_MTT ? buf_chunk_size : bt_chunk_size; @@ -593,6 +620,7 @@ out: mutex_unlock(&table->mutex); return ret; } +EXPORT_SYMBOL_GPL(hns_roce_table_get); static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, @@ -658,7 +686,7 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, } /* - * free buffer space chunk for QPC/MTPT/CQC/SRQC. + * free buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC. * free bt space chunk for MTT/CQE. */ hns_roce_free_hem(hr_dev, table->hem[hem_idx]); @@ -735,6 +763,7 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, mutex_unlock(&table->mutex); } +EXPORT_SYMBOL_GPL(hns_roce_table_put); void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, @@ -904,6 +933,30 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, num_bt_l0 = hr_dev->caps.cqc_bt_num; hop_num = hr_dev->caps.cqc_hop_num; break; + case HEM_TYPE_SCCC: + buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz + + PAGE_SHIFT); + bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz + + PAGE_SHIFT); + num_bt_l0 = hr_dev->caps.sccc_bt_num; + hop_num = hr_dev->caps.sccc_hop_num; + break; + case HEM_TYPE_QPC_TIMER: + buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz + + PAGE_SHIFT); + bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz + + PAGE_SHIFT); + num_bt_l0 = hr_dev->caps.qpc_timer_bt_num; + hop_num = hr_dev->caps.qpc_timer_hop_num; + break; + case HEM_TYPE_CQC_TIMER: + buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz + + PAGE_SHIFT); + bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz + + PAGE_SHIFT); + num_bt_l0 = hr_dev->caps.cqc_timer_bt_num; + hop_num = hr_dev->caps.cqc_timer_hop_num; + break; case HEM_TYPE_SRQC: buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz + PAGE_SHIFT); @@ -1081,6 +1134,15 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); + if (hr_dev->caps.qpc_timer_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->qpc_timer_table); + if (hr_dev->caps.cqc_timer_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->cqc_timer_table); + if (hr_dev->caps.sccc_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->qp_table.sccc_table); if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.trrl_table); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index a650278c6fbd..d9d668992e49 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -44,6 +44,9 @@ enum { HEM_TYPE_MTPT, HEM_TYPE_CQC, HEM_TYPE_SRQC, + HEM_TYPE_SCCC, + HEM_TYPE_QPC_TIMER, + HEM_TYPE_CQC_TIMER, /* UNMAP HEM */ HEM_TYPE_MTT, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index b74c742b000c..fa08c22aad66 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -5002,7 +5002,7 @@ static int hns_roce_probe(struct platform_device *pdev) struct hns_roce_dev *hr_dev; struct device *dev = &pdev->dev; - hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); + hr_dev = ib_alloc_device(hns_roce_dev, ib_dev); if (!hr_dev) return -ENOMEM; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 543fa1504cd3..6a22db12fc3d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1078,6 +1078,44 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) hr_dev->caps.sl_num = roce_get_field(req_b->qid_idx_sl_num, PF_RES_DATA_3_PF_SL_NUM_M, PF_RES_DATA_3_PF_SL_NUM_S); + hr_dev->caps.sccc_bt_num = roce_get_field(req_b->sccc_bt_idx_num, + PF_RES_DATA_4_PF_SCCC_BT_NUM_M, + PF_RES_DATA_4_PF_SCCC_BT_NUM_S); + + return 0; +} + +static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_pf_timer_res_a *req_a; + struct hns_roce_cmq_desc desc[2]; + int ret, i; + + for (i = 0; i < 2; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], + HNS_ROCE_OPC_QUERY_PF_TIMER_RES, + true); + + if (i == 0) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + else + desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + } + + ret = hns_roce_cmq_send(hr_dev, desc, 2); + if (ret) + return ret; + + req_a = (struct hns_roce_pf_timer_res_a *)desc[0].data; + + hr_dev->caps.qpc_timer_bt_num = + roce_get_field(req_a->qpc_timer_bt_idx_num, + PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M, + PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S); + hr_dev->caps.cqc_timer_bt_num = + roce_get_field(req_a->cqc_timer_bt_idx_num, + PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M, + PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S); return 0; } @@ -1193,6 +1231,14 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) VF_RES_B_DATA_3_VF_SL_NUM_M, VF_RES_B_DATA_3_VF_SL_NUM_S, HNS_ROCE_VF_SL_NUM); + + roce_set_field(req_b->vf_sccc_idx_num, + VF_RES_B_DATA_4_VF_SCCC_BT_IDX_M, + VF_RES_B_DATA_4_VF_SCCC_BT_IDX_S, 0); + roce_set_field(req_b->vf_sccc_idx_num, + VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M, + VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S, + HNS_ROCE_VF_SCCC_BT_NUM); } } @@ -1205,6 +1251,7 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev) u8 qpc_hop_num = hr_dev->caps.qpc_hop_num; u8 cqc_hop_num = hr_dev->caps.cqc_hop_num; u8 mpt_hop_num = hr_dev->caps.mpt_hop_num; + u8 sccc_hop_num = hr_dev->caps.sccc_hop_num; struct hns_roce_cfg_bt_attr *req; struct hns_roce_cmq_desc desc; @@ -1252,6 +1299,20 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev) CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S, mpt_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : mpt_hop_num); + roce_set_field(req->vf_sccc_cfg, + CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_M, + CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_S, + hr_dev->caps.sccc_ba_pg_sz + PG_SHIFT_OFFSET); + roce_set_field(req->vf_sccc_cfg, + CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_M, + CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_S, + hr_dev->caps.sccc_buf_pg_sz + PG_SHIFT_OFFSET); + roce_set_field(req->vf_sccc_cfg, + CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_M, + CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_S, + sccc_hop_num == + HNS_ROCE_HOP_NUM_0 ? 0 : sccc_hop_num); + return hns_roce_cmq_send(hr_dev, &desc, 1); } @@ -1289,6 +1350,16 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } + if (hr_dev->pci_dev->revision == 0x21) { + ret = hns_roce_query_pf_timer_resource(hr_dev); + if (ret) { + dev_err(hr_dev->dev, + "Query pf timer resource fail, ret = %d.\n", + ret); + return ret; + } + } + ret = hns_roce_alloc_vf_resource(hr_dev); if (ret) { dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n", @@ -1366,7 +1437,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->mpt_ba_pg_sz = 0; caps->mpt_buf_pg_sz = 0; caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; - caps->pbl_ba_pg_sz = 0; + caps->pbl_ba_pg_sz = 2; caps->pbl_buf_pg_sz = 0; caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; caps->mtt_ba_pg_sz = 0; @@ -1408,9 +1479,27 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; - if (hr_dev->pci_dev->revision == 0x21) + if (hr_dev->pci_dev->revision == 0x21) { caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | - HNS_ROCE_CAP_FLAG_SRQ; + HNS_ROCE_CAP_FLAG_SRQ | + HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; + + caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; + caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; + caps->qpc_timer_ba_pg_sz = 0; + caps->qpc_timer_buf_pg_sz = 0; + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; + caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; + caps->cqc_timer_ba_pg_sz = 0; + caps->cqc_timer_buf_pg_sz = 0; + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + + caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; + caps->sccc_ba_pg_sz = 0; + caps->sccc_buf_pg_sz = 0; + caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; + } ret = hns_roce_v2_set_bt(hr_dev); if (ret) @@ -1611,7 +1700,8 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; - int ret; + int qpc_count, cqc_count; + int ret, i; /* TSQ includes SQ doorbell and ack doorbell */ ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); @@ -1626,8 +1716,40 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) goto err_tpq_init_failed; } + /* Alloc memory for QPC Timer buffer space chunk*/ + for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num; + qpc_count++) { + ret = hns_roce_table_get(hr_dev, &hr_dev->qpc_timer_table, + qpc_count); + if (ret) { + dev_err(hr_dev->dev, "QPC Timer get failed\n"); + goto err_qpc_timer_failed; + } + } + + /* Alloc memory for CQC Timer buffer space chunk*/ + for (cqc_count = 0; cqc_count < hr_dev->caps.cqc_timer_bt_num; + cqc_count++) { + ret = hns_roce_table_get(hr_dev, &hr_dev->cqc_timer_table, + cqc_count); + if (ret) { + dev_err(hr_dev->dev, "CQC Timer get failed\n"); + goto err_cqc_timer_failed; + } + } + return 0; +err_cqc_timer_failed: + for (i = 0; i < cqc_count; i++) + hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i); + +err_qpc_timer_failed: + for (i = 0; i < qpc_count; i++) + hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i); + + hns_roce_free_link_table(hr_dev, &priv->tpq); + err_tpq_init_failed: hns_roce_free_link_table(hr_dev, &priv->tsq); @@ -2663,11 +2785,24 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, case HEM_TYPE_SRQC: op = HNS_ROCE_CMD_WRITE_SRQC_BT0; break; + case HEM_TYPE_SCCC: + op = HNS_ROCE_CMD_WRITE_SCCC_BT0; + break; + case HEM_TYPE_QPC_TIMER: + op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0; + break; + case HEM_TYPE_CQC_TIMER: + op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0; + break; default: dev_warn(dev, "Table %d not to be written by mailbox!\n", table->type); return 0; } + + if (table->type == HEM_TYPE_SCCC && step_idx) + return 0; + op += step_idx; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -2722,6 +2857,10 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, case HEM_TYPE_CQC: op = HNS_ROCE_CMD_DESTROY_CQC_BT0; break; + case HEM_TYPE_SCCC: + case HEM_TYPE_QPC_TIMER: + case HEM_TYPE_CQC_TIMER: + break; case HEM_TYPE_SRQC: op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; break; @@ -2730,6 +2869,12 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, table->type); return 0; } + + if (table->type == HEM_TYPE_SCCC || + table->type == HEM_TYPE_QPC_TIMER || + table->type == HEM_TYPE_CQC_TIMER) + return 0; + op += step_idx; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -3686,10 +3831,16 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_212_LSN_S, 0); if (attr_mask & IB_QP_TIMEOUT) { - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_AT_M, - V2_QPC_BYTE_28_AT_S, attr->timeout); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_AT_M, - V2_QPC_BYTE_28_AT_S, 0); + if (attr->timeout < 31) { + roce_set_field(context->byte_28_at_fl, + V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, + attr->timeout); + roce_set_field(qpc_mask->byte_28_at_fl, + V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, + 0); + } else { + dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n"); + } } roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, @@ -3789,13 +3940,16 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_160_sq_ci_pi, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); - roce_set_field(context->byte_84_rq_ci_pi, + + if (!ibqp->srq) { + roce_set_field(context->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, hr_qp->rq.head); - roce_set_field(qpc_mask->byte_84_rq_ci_pi, + roce_set_field(qpc_mask->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); + } } if (attr_mask & IB_QP_AV) { @@ -4224,6 +4378,59 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) return 0; } +static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_sccc_clr_done *resp; + struct hns_roce_sccc_clr *clr; + struct hns_roce_cmq_desc desc; + int ret, i; + + mutex_lock(&hr_dev->qp_table.scc_mutex); + + /* set scc ctx clear done flag */ + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + dev_err(hr_dev->dev, "Reset SCC ctx failed(%d)\n", ret); + goto out; + } + + /* clear scc context */ + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CLR_SCCC, false); + clr = (struct hns_roce_sccc_clr *)desc.data; + clr->qpn = cpu_to_le32(hr_qp->qpn); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + dev_err(hr_dev->dev, "Clear SCC ctx failed(%d)\n", ret); + goto out; + } + + /* query scc context clear is done or not */ + resp = (struct hns_roce_sccc_clr_done *)desc.data; + for (i = 0; i <= HNS_ROCE_CMQ_SCC_CLR_DONE_CNT; i++) { + hns_roce_cmq_setup_basic_desc(&desc, + HNS_ROCE_OPC_QUERY_SCCC, true); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + dev_err(hr_dev->dev, "Query clr cmq failed(%d)\n", ret); + goto out; + } + + if (resp->clr_done) + goto out; + + msleep(20); + } + + dev_err(hr_dev->dev, "Query SCC clr done flag overtime.\n"); + ret = -ETIMEDOUT; + +out: + mutex_unlock(&hr_dev->qp_table.scc_mutex); + return ret; +} + static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { struct hns_roce_dev *hr_dev = to_hr_dev(cq->device); @@ -4281,7 +4488,8 @@ static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn) if (hr_qp->ibqp.uobject) { if (hr_qp->sdb_en == 1) { hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); - hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); + if (hr_qp->rdb_en == 1) + hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); } else { dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n"); return; @@ -4568,7 +4776,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n", event_type, eq->eqn, eq->cons_index); break; - }; + } eq->event_type = event_type; eq->sub_type = sub_type; @@ -4692,11 +4900,22 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id) int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG); if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) { + struct pci_dev *pdev = hr_dev->pci_dev; + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); + const struct hnae3_ae_ops *ops = ae_dev->ops; + dev_err(dev, "AEQ overflow!\n"); roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S, 1); roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st); + /* Set reset level for reset_event() */ + if (ops->set_default_reset_request) + ops->set_default_reset_request(ae_dev, + HNAE3_FUNC_RESET); + if (ops->reset_event) + ops->reset_event(pdev, NULL); + roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1); roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en); @@ -5770,6 +5989,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .modify_qp = hns_roce_v2_modify_qp, .query_qp = hns_roce_v2_query_qp, .destroy_qp = hns_roce_v2_destroy_qp, + .qp_flow_control_init = hns_roce_v2_qp_flow_control_init, .modify_cq = hns_roce_v2_modify_cq, .post_send = hns_roce_v2_post_send, .post_recv = hns_roce_v2_post_recv, @@ -5838,7 +6058,7 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) struct hns_roce_dev *hr_dev; int ret; - hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); + hr_dev = ib_alloc_device(hns_roce_dev, ib_dev); if (!hr_dev) return -ENOMEM; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index b72d0443c835..242eeaeba761 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -36,6 +36,7 @@ #include <linux/bitops.h> #define HNS_ROCE_VF_QPC_BT_NUM 256 +#define HNS_ROCE_VF_SCCC_BT_NUM 64 #define HNS_ROCE_VF_SRQC_BT_NUM 64 #define HNS_ROCE_VF_CQC_BT_NUM 64 #define HNS_ROCE_VF_MPT_BT_NUM 64 @@ -45,11 +46,13 @@ #define HNS_ROCE_VF_SL_NUM 8 #define HNS_ROCE_V2_MAX_QP_NUM 0x2000 +#define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200 #define HNS_ROCE_V2_MAX_WQE_NUM 0x8000 #define HNS_ROCE_V2_MAX_SRQ 0x100000 #define HNS_ROCE_V2_MAX_SRQ_WR 0x8000 #define HNS_ROCE_V2_MAX_SRQ_SGE 0x100 #define HNS_ROCE_V2_MAX_CQ_NUM 0x8000 +#define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100 #define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQE_NUM 0x10000 #define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000 @@ -83,6 +86,9 @@ #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 +#define HNS_ROCE_V2_SCCC_ENTRY_SZ 32 +#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ 4096 +#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ 4096 #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_INVALID_LKEY 0x100 @@ -91,6 +97,7 @@ #define HNS_ROCE_V2_RSV_QPS 8 #define HNS_ROCE_CONTEXT_HOP_NUM 1 +#define HNS_ROCE_SCCC_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 #define HNS_ROCE_CQE_HOP_NUM 1 #define HNS_ROCE_SRQWQE_HOP_NUM 1 @@ -120,6 +127,8 @@ #define HNS_ROCE_CMQ_EN_B 16 #define HNS_ROCE_CMQ_ENABLE BIT(HNS_ROCE_CMQ_EN_B) +#define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT 5 + #define check_whether_last_step(hop_num, step_idx) \ ((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \ (step_idx == 1 && hop_num == 1) || \ @@ -224,11 +233,15 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, + HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406, HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, HNS_ROCE_OPC_POST_MB = 0x8504, HNS_ROCE_OPC_QUERY_MB_ST = 0x8505, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, + HNS_ROCE_OPC_CLR_SCCC = 0x8509, + HNS_ROCE_OPC_QUERY_SCCC = 0x850a, + HNS_ROCE_OPC_RESET_SCCC = 0x850b, HNS_SWITCH_PARAMETER_CFG = 0x1033, }; @@ -1300,7 +1313,8 @@ struct hns_roce_pf_res_b { __le32 smac_idx_num; __le32 sgid_idx_num; __le32 qid_idx_sl_num; - __le32 rsv[2]; + __le32 sccc_bt_idx_num; + __le32 rsv; }; #define PF_RES_DATA_1_PF_SMAC_IDX_S 0 @@ -1321,6 +1335,31 @@ struct hns_roce_pf_res_b { #define PF_RES_DATA_3_PF_SL_NUM_S 16 #define PF_RES_DATA_3_PF_SL_NUM_M GENMASK(26, 16) +#define PF_RES_DATA_4_PF_SCCC_BT_IDX_S 0 +#define PF_RES_DATA_4_PF_SCCC_BT_IDX_M GENMASK(8, 0) + +#define PF_RES_DATA_4_PF_SCCC_BT_NUM_S 9 +#define PF_RES_DATA_4_PF_SCCC_BT_NUM_M GENMASK(17, 9) + +struct hns_roce_pf_timer_res_a { + __le32 rsv0; + __le32 qpc_timer_bt_idx_num; + __le32 cqc_timer_bt_idx_num; + __le32 rsv[3]; +}; + +#define PF_RES_DATA_1_PF_QPC_TIMER_BT_IDX_S 0 +#define PF_RES_DATA_1_PF_QPC_TIMER_BT_IDX_M GENMASK(11, 0) + +#define PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S 16 +#define PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M GENMASK(28, 16) + +#define PF_RES_DATA_2_PF_CQC_TIMER_BT_IDX_S 0 +#define PF_RES_DATA_2_PF_CQC_TIMER_BT_IDX_M GENMASK(10, 0) + +#define PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S 16 +#define PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M GENMASK(27, 16) + struct hns_roce_vf_res_a { __le32 vf_id; __le32 vf_qpc_bt_idx_num; @@ -1365,7 +1404,8 @@ struct hns_roce_vf_res_b { __le32 vf_smac_idx_num; __le32 vf_sgid_idx_num; __le32 vf_qid_idx_sl_num; - __le32 rsv[2]; + __le32 vf_sccc_idx_num; + __le32 rsv1; }; #define VF_RES_B_DATA_0_VF_ID_S 0 @@ -1389,6 +1429,12 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_3_VF_SL_NUM_S 16 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) +#define VF_RES_B_DATA_4_VF_SCCC_BT_IDX_S 0 +#define VF_RES_B_DATA_4_VF_SCCC_BT_IDX_M GENMASK(8, 0) + +#define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S 9 +#define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M GENMASK(17, 9) + struct hns_roce_vf_switch { __le32 rocee_sel; __le32 fun_id; @@ -1424,7 +1470,8 @@ struct hns_roce_cfg_bt_attr { __le32 vf_srqc_cfg; __le32 vf_cqc_cfg; __le32 vf_mpt_cfg; - __le32 rsv[2]; + __le32 vf_sccc_cfg; + __le32 rsv; }; #define CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_S 0 @@ -1463,6 +1510,15 @@ struct hns_roce_cfg_bt_attr { #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8 #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8) +#define CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_S 0 +#define CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_M GENMASK(3, 0) + +#define CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_S 4 +#define CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_M GENMASK(7, 4) + +#define CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_S 8 +#define CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_M GENMASK(9, 8) + struct hns_roce_cfg_sgid_tb { __le32 table_idx_rsv; __le32 vf_sgid_l; @@ -1730,4 +1786,14 @@ struct hns_roce_wqe_atomic_seg { __le64 cmp_data; }; +struct hns_roce_sccc_clr { + __le32 qpn; + __le32 rsv[5]; +}; + +struct hns_roce_sccc_clr_done { + __le32 clr_done; + __le32 rsv[5]; +}; + #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c79054ba9495..67a8c4333f4f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -343,7 +343,7 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev, resp.qp_tab_size = hr_dev->caps.num_qps; - context = kmalloc(sizeof(*context), GFP_KERNEL); + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); @@ -564,7 +564,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->driver_id = RDMA_DRIVER_HNS; ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); ib_set_device_ops(ib_dev, &hns_roce_dev_ops); - ret = ib_register_device(ib_dev, "hns_%d", NULL); + ret = ib_register_device(ib_dev, "hns_%d"); if (ret) { dev_err(dev, "ib_register_device failed!\n"); return ret; @@ -702,8 +702,62 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } + if (hr_dev->caps.sccc_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->qp_table.sccc_table, + HEM_TYPE_SCCC, + hr_dev->caps.sccc_entry_sz, + hr_dev->caps.num_qps, 1); + if (ret) { + dev_err(dev, + "Failed to init SCC context memory, aborting.\n"); + goto err_unmap_idx; + } + } + + if (hr_dev->caps.qpc_timer_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->qpc_timer_table, + HEM_TYPE_QPC_TIMER, + hr_dev->caps.qpc_timer_entry_sz, + hr_dev->caps.num_qpc_timer, 1); + if (ret) { + dev_err(dev, + "Failed to init QPC timer memory, aborting.\n"); + goto err_unmap_ctx; + } + } + + if (hr_dev->caps.cqc_timer_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->cqc_timer_table, + HEM_TYPE_CQC_TIMER, + hr_dev->caps.cqc_timer_entry_sz, + hr_dev->caps.num_cqc_timer, 1); + if (ret) { + dev_err(dev, + "Failed to init CQC timer memory, aborting.\n"); + goto err_unmap_qpc_timer; + } + } + return 0; +err_unmap_qpc_timer: + if (hr_dev->caps.qpc_timer_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->qpc_timer_table); + +err_unmap_ctx: + if (hr_dev->caps.sccc_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->qp_table.sccc_table); + +err_unmap_idx: + if (hr_dev->caps.num_idx_segs) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_idx_table); + err_unmap_srqwqe: if (hr_dev->caps.num_srqwqe_segs) hns_roce_cleanup_hem_table(hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index ee5991bd4171..da4fffedb879 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1110,8 +1110,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(pd->uobject->context, start, length, - access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags, 0); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); goto err_free; @@ -1220,8 +1219,8 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, } ib_umem_release(mr->umem); - mr->umem = ib_umem_get(ibmr->uobject->context, start, length, - mr_access_flags, 0); + mr->umem = + ib_umem_get(udata, start, length, mr_access_flags, 0); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); mr->umem = NULL; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index e11c149da04d..4a29b2cb9bab 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -66,7 +66,7 @@ struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, struct hns_roce_pd *pd; int ret; - pd = kmalloc(sizeof(*pd), GFP_KERNEL); + pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 54031c5b53fa..73066bf38e47 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -209,13 +209,23 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, } } + if (hr_dev->caps.sccc_entry_sz) { + /* Alloc memory for SCC CTX */ + ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, + hr_qp->qpn); + if (ret) { + dev_err(dev, "SCC CTX table get failed\n"); + goto err_put_trrl; + } + } + spin_lock_irq(&qp_table->lock); ret = radix_tree_insert(&hr_dev->qp_table_tree, hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp); spin_unlock_irq(&qp_table->lock); if (ret) { dev_err(dev, "QPC radix_tree_insert failed\n"); - goto err_put_trrl; + goto err_put_sccc; } atomic_set(&hr_qp->refcount, 1); @@ -223,6 +233,11 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, return 0; +err_put_sccc: + if (hr_dev->caps.sccc_entry_sz) + hns_roce_table_put(hr_dev, &qp_table->sccc_table, + hr_qp->qpn); + err_put_trrl: if (hr_dev->caps.trrl_entry_sz) hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn); @@ -258,6 +273,9 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) wait_for_completion(&hr_qp->free); if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) { + if (hr_dev->caps.sccc_entry_sz) + hns_roce_table_put(hr_dev, &qp_table->sccc_table, + hr_qp->qpn); if (hr_dev->caps.trrl_entry_sz) hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn); @@ -526,7 +544,8 @@ static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr) static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_XRC_INI || - attr->qp_type == IB_QPT_XRC_TGT || attr->srq) + attr->qp_type == IB_QPT_XRC_TGT || attr->srq || + !attr->cap.max_recv_wr) return 0; return 1; @@ -612,9 +631,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_rq_sge_list; } - hr_qp->umem = ib_umem_get(ib_pd->uobject->context, - ucmd.buf_addr, hr_qp->buff_size, 0, - 0); + hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr, + hr_qp->buff_size, 0, 0); if (IS_ERR(hr_qp->umem)) { dev_err(dev, "ib_umem_get error for create qp\n"); ret = PTR_ERR(hr_qp->umem); @@ -653,8 +671,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, (udata->outlen >= sizeof(resp)) && hns_roce_qp_has_sq(init_attr)) { ret = hns_roce_db_map_user( - to_hr_ucontext(ib_pd->uobject->context), - ucmd.sdb_addr, &hr_qp->sdb); + to_hr_ucontext(ib_pd->uobject->context), udata, + ucmd.sdb_addr, &hr_qp->sdb); if (ret) { dev_err(dev, "sq record doorbell map failed!\n"); goto err_mtt; @@ -669,12 +687,16 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, (udata->outlen >= sizeof(resp)) && hns_roce_qp_has_rq(init_attr)) { ret = hns_roce_db_map_user( - to_hr_ucontext(ib_pd->uobject->context), - ucmd.db_addr, &hr_qp->rdb); + to_hr_ucontext(ib_pd->uobject->context), udata, + ucmd.db_addr, &hr_qp->rdb); if (ret) { dev_err(dev, "rq record doorbell map failed!\n"); goto err_sq_dbmap; } + + /* indicate kernel supports rq record db */ + resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; + hr_qp->rdb_en = 1; } } else { if (init_attr->create_flags & @@ -783,17 +805,19 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, else hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn); - if (udata && (udata->outlen >= sizeof(resp)) && - (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) { - - /* indicate kernel supports rq record db */ - resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (udata) { + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); if (ret) goto err_qp; + } - hr_qp->rdb_en = 1; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { + ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp); + if (ret) + goto err_qp; } + hr_qp->event = hns_roce_ib_qp_event; return 0; @@ -969,7 +993,9 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) { if (hr_qp->sdb_en == 1) { hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); - hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); + + if (hr_qp->rdb_en == 1) + hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); } else { dev_warn(dev, "flush cqe is not supported in userspace!\n"); goto out; @@ -1133,6 +1159,7 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) int reserved_from_bot; int ret; + mutex_init(&qp_table->scc_mutex); spin_lock_init(&qp_table->lock); INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 12deacf442cf..11be2dbf6486 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -253,8 +253,8 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, goto err_srq; } - srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - srq_buf_size, 0, 0); + srq->umem = + ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0); if (IS_ERR(srq->umem)) { ret = PTR_ERR(srq->umem); goto err_srq; @@ -281,8 +281,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, goto err_srq_mtt; /* config index queue BA */ - srq->idx_que.umem = ib_umem_get(pd->uobject->context, - ucmd.que_addr, + srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr, srq->idx_que.buf_size, 0, 0); if (IS_ERR(srq->idx_que.umem)) { dev_err(hr_dev->dev, diff --git a/drivers/infiniband/hw/i40iw/Makefile b/drivers/infiniband/hw/i40iw/Makefile index 5a8a7a3f28ae..8942f8229945 100644 --- a/drivers/infiniband/hw/i40iw/Makefile +++ b/drivers/infiniband/hw/i40iw/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-y := -Idrivers/net/ethernet/intel/i40e +ccflags-y := -I $(srctree)/drivers/net/ethernet/intel/i40e obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 0b675b0742c2..d4ab46dd9e6c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1852,7 +1852,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, if (length > I40IW_MAX_MR_SIZE) return ERR_PTR(-EINVAL); - region = ib_umem_get(pd->uobject->context, start, length, acc, 0); + region = ib_umem_get(udata, start, length, acc, 0); if (IS_ERR(region)) return (struct ib_mr *)region; @@ -2139,9 +2139,8 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) static ssize_t hw_rev_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct i40iw_ib_device *iwibdev = container_of(dev, - struct i40iw_ib_device, - ibdev.dev); + struct i40iw_ib_device *iwibdev = + rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev); u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev; return sprintf(buf, "%x\n", hw_rev); @@ -2763,7 +2762,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev struct net_device *netdev = iwdev->netdev; struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context; - iwibdev = (struct i40iw_ib_device *)ib_alloc_device(sizeof(*iwibdev)); + iwibdev = ib_alloc_device(i40iw_ib_device, ibdev); if (!iwibdev) { i40iw_pr_err("iwdev == NULL\n"); return NULL; @@ -2868,7 +2867,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) iwibdev = iwdev->iwibdev; rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group); iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW; - ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", NULL); + ret = ib_register_device(&iwibdev->ibdev, "i40iw%d"); if (ret) goto error; diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig index d1de3285fd88..db4aa13ebae0 100644 --- a/drivers/infiniband/hw/mlx4/Kconfig +++ b/drivers/infiniband/hw/mlx4/Kconfig @@ -1,7 +1,6 @@ config MLX4_INFINIBAND tristate "Mellanox ConnectX HCA support" depends on NETDEVICES && ETHERNET && PCI && INET - depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS depends on MAY_USE_DEVLINK select NET_VENDOR_MELLANOX select MLX4_CORE diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 43512347b4f0..03ac72339dd2 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -134,16 +134,16 @@ static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf); } -static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, - struct mlx4_ib_cq_buf *buf, struct ib_umem **umem, - u64 buf_addr, int cqe) +static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata, + struct mlx4_ib_cq_buf *buf, + struct ib_umem **umem, u64 buf_addr, int cqe) { int err; int cqe_size = dev->dev->caps.cqe_size; int shift; int n; - *umem = ib_umem_get(context, buf_addr, cqe * cqe_size, + *umem = ib_umem_get(udata, buf_addr, cqe * cqe_size, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -190,7 +190,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) return ERR_PTR(-EINVAL); - cq = kmalloc(sizeof *cq, GFP_KERNEL); + cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) return ERR_PTR(-ENOMEM); @@ -213,14 +213,13 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, } buf_addr = (void *)(unsigned long)ucmd.buf_addr; - - err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem, + err = mlx4_ib_get_cq_umem(dev, udata, &cq->buf, &cq->umem, ucmd.buf_addr, entries); if (err) goto err_cq; - err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr, - &cq->db); + err = mlx4_ib_db_map_user(to_mucontext(context), udata, + ucmd.db_addr, &cq->db); if (err) goto err_mtt; @@ -336,7 +335,7 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq if (!cq->resize_buf) return -ENOMEM; - err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf, + err = mlx4_ib_get_cq_umem(dev, udata, &cq->resize_buf->buf, &cq->resize_umem, ucmd.buf_addr, entries); if (err) { kfree(cq->resize_buf); diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index c51740986367..3aab71b29ce8 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -41,7 +41,8 @@ struct mlx4_ib_user_db_page { int refcnt; }; -int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, +int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, + struct ib_udata *udata, unsigned long virt, struct mlx4_db *db) { struct mlx4_ib_user_db_page *page; @@ -61,8 +62,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, - PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1f15ec3e2b83..d66002a31000 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2043,7 +2043,7 @@ static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = - container_of(device, struct mlx4_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2052,7 +2052,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = - container_of(device, struct mlx4_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); return sprintf(buf, "%x\n", dev->dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2061,7 +2061,8 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = - container_of(device, struct mlx4_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); + return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id); } @@ -2634,7 +2635,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (num_ports == 0) return NULL; - ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); + ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev); if (!ibdev) { dev_err(&dev->persist->pdev->dev, "Device struct alloc failed\n"); @@ -2856,7 +2857,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group); ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4; - if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", NULL)) + if (ib_register_device(&ibdev->ib_dev, "mlx4_%d")) goto err_diag_counters; if (mlx4_ib_mad_init(ibdev)) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e491f3eda6e7..60dc1347c5ab 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -722,7 +722,8 @@ static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev) int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); -int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, +int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, + struct ib_udata *udata, unsigned long virt, struct mlx4_db *db); void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index c7c85c22e4e3..56639ecd53ad 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -367,7 +367,8 @@ end: return block_shift; } -static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start, +static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, + struct ib_udata *udata, u64 start, u64 length, u64 virt_addr, int access_flags) { @@ -398,7 +399,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start, up_read(¤t->mm->mmap_sem); } - return ib_umem_get(context, start, length, access_flags, 0); + return ib_umem_get(udata, start, length, access_flags, 0); } struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, @@ -415,7 +416,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length, + mr->umem = mlx4_get_umem_mr(pd->uobject->context, udata, start, length, virt_addr, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); @@ -506,8 +507,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); ib_umem_release(mmr->umem); mmr->umem = - mlx4_get_umem_mr(mr->uobject->context, start, length, - virt_addr, mr_access_flags); + mlx4_get_umem_mr(mr->uobject->context, udata, start, + length, virt_addr, mr_access_flags); if (IS_ERR(mmr->umem)) { err = PTR_ERR(mmr->umem); /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 971e9a9ebdaf..e38bab50cecf 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1015,9 +1015,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, (qp->sq.wqe_cnt << qp->sq.wqe_shift); } - qp->umem = ib_umem_get(pd->uobject->context, - (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr : - ucmd.wq.buf_addr, qp->buf_size, 0, 0); + qp->umem = + ib_umem_get(udata, + (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr : + ucmd.wq.buf_addr, + qp->buf_size, 0, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; @@ -1035,9 +1037,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_mtt; if (qp_has_rq(init_attr)) { - err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), + err = mlx4_ib_db_map_user( + to_mucontext(pd->uobject->context), udata, (src == MLX4_IB_QP_SRC) ? ucmd.qp.db_addr : - ucmd.wq.db_addr, &qp->db); + ucmd.wq.db_addr, + &qp->db); if (err) goto err_mtt; } diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 4456f1b8921d..498588eac051 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -113,8 +113,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, goto err_srq; } - srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); if (IS_ERR(srq->umem)) { err = PTR_ERR(srq->umem); goto err_srq; @@ -130,7 +129,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, goto err_mtt; err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), - ucmd.db_addr, &srq->db); + udata, ucmd.db_addr, &srq->db); if (err) goto err_mtt; } else { diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index 0440966bc6ec..8d651c05de62 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,7 +1,6 @@ config MLX5_INFINIBAND tristate "Mellanox 5th generation network adapters (ConnectX series) support" depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE - depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n ---help--- This driver provides low-level InfiniBand support for Mellanox Connect-IB PCI Express host channel adapters (HCAs). diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 356bccc715ee..6bcc63aaa50b 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -345,3 +345,40 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, counter_set_id); return err; } + +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port) +{ + int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); + int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); + int err = -ENOMEM; + void *data; + void *resp; + u32 *out; + u32 *in; + + in = kzalloc(inlen, GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); + if (!in || !out) + goto out; + + MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); + MLX5_SET(mad_ifc_in, in, op_mod, opmod); + MLX5_SET(mad_ifc_in, in, port, port); + + data = MLX5_ADDR_OF(mad_ifc_in, in, mad); + memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); + + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + if (err) + goto out; + + resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); + memcpy(outb, resp, + MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); + +out: + kfree(out); + kfree(in); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 1e76dc67a369..923a7b93f507 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -63,4 +63,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index 7e4e358a4fd8..8ba439fabf7f 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -389,19 +389,19 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) dev->port[port_num].dbg_cc_params = NULL; } -int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) +void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) { struct mlx5_ib_dbg_cc_params *dbg_cc_params; struct mlx5_core_dev *mdev; int i; if (!mlx5_debugfs_root) - goto out; + return; /* Takes a 1-based port number */ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL); if (!mdev) - goto out; + return; if (!MLX5_CAP_GEN(mdev, cc_query_allowed) || !MLX5_CAP_GEN(mdev, cc_modify_allowed)) @@ -415,8 +415,6 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) dbg_cc_params->root = debugfs_create_dir("cc_params", mdev->priv.dbg_root); - if (!dbg_cc_params->root) - goto err; for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { dbg_cc_params->params[i].offset = i; @@ -427,14 +425,11 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num) 0600, dbg_cc_params->root, &dbg_cc_params->params[i], &dbg_cc_fops); - if (!dbg_cc_params->params[i].dentry) - goto err; } put_mdev: mlx5_ib_put_native_port_mdev(dev, port_num + 1); -out: - return 0; + return; err: mlx5_ib_warn(dev, "cong debugfs failure\n"); @@ -445,5 +440,5 @@ err: * We don't want to fail driver if debugfs failed to initialize, * so we are not forwarding error to the user. */ - return 0; + return; } diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 90f1b0bae5b5..18704e503508 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -187,8 +187,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wqe_ctr = be16_to_cpu(cqe->wqe_counter); wc->wr_id = srq->wrid[wqe_ctr]; mlx5_ib_free_srq_wqe(srq, wqe_ctr); - if (msrq && atomic_dec_and_test(&msrq->refcount)) - complete(&msrq->free); + if (msrq) + mlx5_core_res_put(&msrq->common); } } else { wq = &qp->rq; @@ -707,15 +707,15 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *cqe_size = ucmd.cqe_size; - cq->buf.umem = ib_umem_get(context, ucmd.buf_addr, - entries * ucmd.cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + cq->buf.umem = + ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size, + IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(cq->buf.umem)) { err = PTR_ERR(cq->buf.umem); return err; } - err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr, + err = mlx5_ib_db_map_user(to_mucontext(context), udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; @@ -1111,7 +1111,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, struct ib_umem *umem; int err; int npages; - struct ib_ucontext *context = cq->buf.umem->context; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) @@ -1124,7 +1123,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1) return -EINVAL; - umem = ib_umem_get(context, ucmd.buf_addr, + umem = ib_umem_get(udata, ucmd.buf_addr, (size_t)ucmd.cqe_size * entries, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(umem)) { diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 5a588f3cfb1b..cd43e39ced87 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -8,6 +8,7 @@ #include <rdma/uverbs_types.h> #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/ib_umem.h> #include <rdma/uverbs_std_types.h> #include <linux/mlx5/driver.h> @@ -17,12 +18,28 @@ #define UVERBS_MODULE_NAME mlx5_ib #include <rdma/uverbs_named_ioctl.h> +enum devx_obj_flags { + DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0, +}; + +struct devx_async_data { + struct mlx5_ib_dev *mdev; + struct list_head list; + struct ib_uobject *fd_uobj; + struct mlx5_async_work cb_work; + u16 cmd_out_len; + /* must be last field in this structure */ + struct mlx5_ib_uapi_devx_async_cmd_hdr hdr; +}; + #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) struct devx_obj { struct mlx5_core_dev *mdev; u64 obj_id; u32 dinlen; /* destroy inbox length */ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; + u32 flags; + struct mlx5_ib_devx_mr devx_mr; }; struct devx_umem { @@ -1011,6 +1028,92 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, } } +static int devx_handle_mkey_indirect(struct devx_obj *obj, + struct mlx5_ib_dev *dev, + void *in, void *out) +{ + struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; + struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr; + unsigned long flags; + struct mlx5_core_mkey *mkey; + void *mkc; + u8 key; + int err; + + mkey = &devx_mr->mmkey; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + key = MLX5_GET(mkc, mkc, mkey_7_0); + mkey->key = mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, out, mkey_index)) | key; + mkey->type = MLX5_MKEY_INDIRECT_DEVX; + mkey->iova = MLX5_GET64(mkc, mkc, start_addr); + mkey->size = MLX5_GET64(mkc, mkc, len); + mkey->pd = MLX5_GET(mkc, mkc, pd); + devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); + + write_lock_irqsave(&table->lock, flags); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), + mkey); + write_unlock_irqrestore(&table->lock, flags); + return err; +} + +static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, + struct devx_obj *obj, + void *in, int in_len) +{ + int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) + + MLX5_FLD_SZ_BYTES(create_mkey_in, + memory_key_mkey_entry); + void *mkc; + u8 access_mode; + + if (in_len < min_len) + return -EINVAL; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + access_mode = MLX5_GET(mkc, mkc, access_mode_1_0); + access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2; + + if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS || + access_mode == MLX5_MKC_ACCESS_MODE_KSM) { + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY; + return 0; + } + + MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1); + return 0; +} + +static void devx_free_indirect_mkey(struct rcu_head *rcu) +{ + kfree(container_of(rcu, struct devx_obj, devx_mr.rcu)); +} + +/* This function to delete from the radix tree needs to be called before + * destroying the underlying mkey. Otherwise a race might occur in case that + * other thread will get the same mkey before this one will be deleted, + * in that case it will fail via inserting to the tree its own data. + * + * Note: + * An error in the destroy is not expected unless there is some other indirect + * mkey which points to this one. In a kernel cleanup flow it will be just + * destroyed in the iterative destruction call. In a user flow, in case + * the application didn't close in the expected order it's its own problem, + * the mkey won't be part of the tree, in both cases the kernel is safe. + */ +static void devx_cleanup_mkey(struct devx_obj *obj) +{ + struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table; + unsigned long flags; + + write_lock_irqsave(&table->lock, flags); + radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key)); + write_unlock_irqrestore(&table->lock, flags); +} + static int devx_obj_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why) { @@ -1018,10 +1121,21 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, struct devx_obj *obj = uobject->object; int ret; + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) + devx_cleanup_mkey(obj); + ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + struct mlx5_ib_dev *dev = to_mdev(uobject->context->device); + + call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, + devx_free_indirect_mkey); + return ret; + } + kfree(obj); return ret; } @@ -1032,6 +1146,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT); + int cmd_in_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); void *cmd_out; struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); @@ -1060,10 +1176,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( return -ENOMEM; MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); - devx_set_umem_valid(cmd_in); + if (opcode == MLX5_CMD_OP_CREATE_MKEY) { + err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len); + if (err) + goto obj_free; + } else { + devx_set_umem_valid(cmd_in); + } err = mlx5_cmd_exec(dev->mdev, cmd_in, - uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN), + cmd_in_len, cmd_out, cmd_out_len); if (err) goto obj_free; @@ -1074,6 +1196,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); + if (err) + goto obj_destroy; + } + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) goto obj_destroy; @@ -1082,6 +1210,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( return 0; obj_destroy: + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) + devx_cleanup_mkey(obj); mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); obj_free: kfree(obj); @@ -1168,6 +1298,153 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( cmd_out, cmd_out_len); } +struct devx_async_event_queue { + spinlock_t lock; + wait_queue_head_t poll_wait; + struct list_head event_list; + atomic_t bytes_in_use; + u8 is_destroyed:1; +}; + +struct devx_async_cmd_event_file { + struct ib_uobject uobj; + struct devx_async_event_queue ev_queue; + struct mlx5_async_ctx async_ctx; +}; + +static void devx_init_event_queue(struct devx_async_event_queue *ev_queue) +{ + spin_lock_init(&ev_queue->lock); + INIT_LIST_HEAD(&ev_queue->event_list); + init_waitqueue_head(&ev_queue->poll_wait); + atomic_set(&ev_queue->bytes_in_use, 0); + ev_queue->is_destroyed = 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct devx_async_cmd_event_file *ev_file; + + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE); + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + + ev_file = container_of(uobj, struct devx_async_cmd_event_file, + uobj); + devx_init_event_queue(&ev_file->ev_queue); + mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx); + return 0; +} + +static void devx_query_callback(int status, struct mlx5_async_work *context) +{ + struct devx_async_data *async_data = + container_of(context, struct devx_async_data, cb_work); + struct ib_uobject *fd_uobj = async_data->fd_uobj; + struct devx_async_cmd_event_file *ev_file; + struct devx_async_event_queue *ev_queue; + unsigned long flags; + + ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file, + uobj); + ev_queue = &ev_file->ev_queue; + + spin_lock_irqsave(&ev_queue->lock, flags); + list_add_tail(&async_data->list, &ev_queue->event_list); + spin_unlock_irqrestore(&ev_queue->lock, flags); + + wake_up_interruptible(&ev_queue->poll_wait); + fput(fd_uobj->object); +} + +#define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */ + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( + struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN); + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE); + u16 cmd_out_len; + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct ib_uobject *fd_uobj; + int err; + int uid; + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct devx_async_cmd_event_file *ev_file; + struct devx_async_data *async_data; + + uid = devx_get_uid(c, cmd_in); + if (uid < 0) + return uid; + + if (!devx_is_obj_query_cmd(cmd_in)) + return -EINVAL; + + err = uverbs_get_const(&cmd_out_len, attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN); + if (err) + return err; + + if (!devx_is_valid_obj_id(uobj, cmd_in)) + return -EINVAL; + + fd_uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD); + if (IS_ERR(fd_uobj)) + return PTR_ERR(fd_uobj); + + ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file, + uobj); + + if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) > + MAX_ASYNC_BYTES_IN_USE) { + atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use); + return -EAGAIN; + } + + async_data = kvzalloc(struct_size(async_data, hdr.out_data, + cmd_out_len), GFP_KERNEL); + if (!async_data) { + err = -ENOMEM; + goto sub_bytes; + } + + err = uverbs_copy_from(&async_data->hdr.wr_id, attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID); + if (err) + goto free_async; + + async_data->cmd_out_len = cmd_out_len; + async_data->mdev = mdev; + async_data->fd_uobj = fd_uobj; + + get_file(fd_uobj->object); + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); + err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in, + uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN), + async_data->hdr.out_data, + async_data->cmd_out_len, + devx_query_callback, &async_data->cb_work); + + if (err) + goto cb_err; + + return 0; + +cb_err: + fput(fd_uobj->object); +free_async: + kvfree(async_data); +sub_bytes: + atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use); + return err; +} + static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, struct uverbs_attr_bundle *attrs, struct devx_umem *obj) @@ -1195,7 +1472,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - obj->umem = ib_umem_get(ucontext, addr, size, access, 0); + obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); @@ -1313,6 +1590,123 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, return 0; } +static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct devx_async_cmd_event_file *comp_ev_file = filp->private_data; + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + struct devx_async_data *event; + int ret = 0; + size_t eventsz; + + spin_lock_irq(&ev_queue->lock); + + while (list_empty(&ev_queue->event_list)) { + spin_unlock_irq(&ev_queue->lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible( + ev_queue->poll_wait, + (!list_empty(&ev_queue->event_list) || + ev_queue->is_destroyed))) { + return -ERESTARTSYS; + } + + if (list_empty(&ev_queue->event_list) && + ev_queue->is_destroyed) + return -EIO; + + spin_lock_irq(&ev_queue->lock); + } + + event = list_entry(ev_queue->event_list.next, + struct devx_async_data, list); + eventsz = event->cmd_out_len + + sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr); + + if (eventsz > count) { + spin_unlock_irq(&ev_queue->lock); + return -ENOSPC; + } + + list_del(ev_queue->event_list.next); + spin_unlock_irq(&ev_queue->lock); + + if (copy_to_user(buf, &event->hdr, eventsz)) + ret = -EFAULT; + else + ret = eventsz; + + atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use); + kvfree(event); + return ret; +} + +static int devx_async_cmd_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uobject *uobj = filp->private_data; + struct devx_async_cmd_event_file *comp_ev_file = container_of( + uobj, struct devx_async_cmd_event_file, uobj); + struct devx_async_data *entry, *tmp; + + spin_lock_irq(&comp_ev_file->ev_queue.lock); + list_for_each_entry_safe(entry, tmp, + &comp_ev_file->ev_queue.event_list, list) + kvfree(entry); + spin_unlock_irq(&comp_ev_file->ev_queue.lock); + + uverbs_close_fd(filp); + return 0; +} + +static __poll_t devx_async_cmd_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct devx_async_cmd_event_file *comp_ev_file = filp->private_data; + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + __poll_t pollflags = 0; + + poll_wait(filp, &ev_queue->poll_wait, wait); + + spin_lock_irq(&ev_queue->lock); + if (ev_queue->is_destroyed) + pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + else if (!list_empty(&ev_queue->event_list)) + pollflags = EPOLLIN | EPOLLRDNORM; + spin_unlock_irq(&ev_queue->lock); + + return pollflags; +} + +const struct file_operations devx_async_cmd_event_fops = { + .owner = THIS_MODULE, + .read = devx_async_cmd_event_read, + .poll = devx_async_cmd_event_poll, + .release = devx_async_cmd_event_close, + .llseek = no_llseek, +}; + +static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct devx_async_cmd_event_file *comp_ev_file = + container_of(uobj, struct devx_async_cmd_event_file, + uobj); + struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + + spin_lock_irq(&ev_queue->lock); + ev_queue->is_destroyed = 1; + spin_unlock_irq(&ev_queue->lock); + + if (why == RDMA_REMOVE_DRIVER_REMOVE) + wake_up_interruptible(&ev_queue->poll_wait); + + mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx); + return 0; +}; + DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_UMEM_REG, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, @@ -1423,6 +1817,27 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), UA_MANDATORY)); +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, + UVERBS_IDR_ANY_OBJECT, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN, + u16, UA_MANDATORY), + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD, + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY)); + DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), @@ -1433,13 +1848,30 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)); DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC, + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE, + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_ACCESS_NEW, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file), + devx_hot_unplug_async_cmd_event_file, + &devx_async_cmd_event_fops, "[devx_async_cmd]", + O_RDONLY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)); + static bool devx_is_supported(struct ib_device *device) { struct mlx5_ib_dev *dev = to_mdev(device); @@ -1457,5 +1889,8 @@ const struct uapi_definition mlx5_ib_devx_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_DEVX_UMEM, UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), {}, }; diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index a0e4e6ddb71a..8f4e5f22b84c 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -43,7 +43,8 @@ struct mlx5_ib_user_db_page { int refcnt; }; -int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, + struct ib_udata *udata, unsigned long virt, struct mlx5_db *db) { struct mlx5_ib_user_db_page *page; @@ -63,8 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, - PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 46a9ddc8ca56..6d7b8bad4b61 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -70,7 +70,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { struct mlx5_ib_dev *ibdev; - ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev)); + ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 558638468edb..6c529e6f3a01 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -36,6 +36,7 @@ #include <rdma/ib_smi.h> #include <rdma/ib_pma.h> #include "mlx5_ib.h" +#include "cmd.h" enum { MLX5_IB_VENDOR_CLASS1 = 0x9, @@ -51,9 +52,10 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, return dev->mdev->port_caps[port_num - 1].has_smi; } -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad) +static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, + int ignore_bkey, u8 port, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const void *in_mad, + void *response_mad) { u8 op_modifier = 0; @@ -68,7 +70,8 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, if (ignore_bkey || !in_wc) op_modifier |= 0x2; - return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); + return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, + port); } static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 94fe253d4956..76d6c2557d0c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -476,24 +476,51 @@ out: return err; } +struct mlx5_ib_vlan_info { + u16 vlan_id; + bool vlan; +}; + +static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) +{ + struct mlx5_ib_vlan_info *vlan_info = data; + + if (is_vlan_dev(lower_dev)) { + vlan_info->vlan = true; + vlan_info->vlan_id = vlan_dev_vlan_id(lower_dev); + } + /* We are interested only in first level vlan device, so + * always return 1 to stop iterating over next level devices. + */ + return 1; +} + static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum ib_gid_type gid_type = IB_GID_TYPE_IB; + struct mlx5_ib_vlan_info vlan_info = { }; u8 roce_version = 0; u8 roce_l3_type = 0; - bool vlan = false; u8 mac[ETH_ALEN]; - u16 vlan_id = 0; if (gid) { gid_type = attr->gid_type; ether_addr_copy(mac, attr->ndev->dev_addr); if (is_vlan_dev(attr->ndev)) { - vlan = true; - vlan_id = vlan_dev_vlan_id(attr->ndev); + vlan_info.vlan = true; + vlan_info.vlan_id = vlan_dev_vlan_id(attr->ndev); + } else { + /* If the netdev is upper device and if it's lower + * lower device is vlan device, consider vlan id of + * the lower vlan device for this gid entry. + */ + rcu_read_lock(); + netdev_walk_all_lower_dev_rcu(attr->ndev, + get_lower_dev_vlan, &vlan_info); + rcu_read_unlock(); } } @@ -514,8 +541,9 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, } return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, - roce_l3_type, gid->raw, mac, vlan, - vlan_id, port_num); + roce_l3_type, gid->raw, mac, + vlan_info.vlan, vlan_info.vlan_id, + port_num); } static int mlx5_ib_add_gid(const struct ib_gid_attr *attr, @@ -923,11 +951,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(mdev, pg)) - props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; - props->odp_caps = dev->odp_caps; -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + if (MLX5_CAP_GEN(mdev, pg)) + props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; + props->odp_caps = dev->odp_caps; + } if (MLX5_CAP_GEN(mdev, cd)) props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; @@ -1763,9 +1791,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (err) goto out_sys_pages; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; -#endif + if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING) + context->ibucontext.invalidate_range = + &mlx5_ib_invalidate_range; if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { err = mlx5_ib_devx_create(dev, true); @@ -1897,12 +1925,10 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* All umem's must be destroyed before destroying the ucontext. */ mutex_lock(&ibcontext->per_mm_list_lock); WARN_ON(!list_empty(&ibcontext->per_mm_list)); mutex_unlock(&ibcontext->per_mm_list_lock); -#endif bfregi = &context->bfregi; mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); @@ -2265,7 +2291,7 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; u16 uid = 0; - pd = kmalloc(sizeof(*pd), GFP_KERNEL); + pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); @@ -2335,10 +2361,29 @@ static u8 get_match_criteria_enable(u32 *match_criteria) return match_criteria_enable; } -static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) +static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) { - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); + u8 entry_mask; + u8 entry_val; + int err = 0; + + if (!mask) + goto out; + + entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, + ip_protocol); + entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, + ip_protocol); + if (!entry_mask) { + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); + goto out; + } + /* Don't override existing ip protocol */ + if (mask != entry_mask || val != entry_val) + err = -EINVAL; +out: + return err; } static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, @@ -2572,8 +2617,10 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, set_tos(headers_c, headers_v, ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); - set_proto(headers_c, headers_v, - ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); + if (set_proto(headers_c, headers_v, + ib_spec->ipv4.mask.proto, + ib_spec->ipv4.val.proto)) + return -EINVAL; break; case IB_FLOW_SPEC_IPV6: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) @@ -2612,9 +2659,10 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, ib_spec->ipv6.mask.traffic_class, ib_spec->ipv6.val.traffic_class); - set_proto(headers_c, headers_v, - ib_spec->ipv6.mask.next_hdr, - ib_spec->ipv6.val.next_hdr); + if (set_proto(headers_c, headers_v, + ib_spec->ipv6.mask.next_hdr, + ib_spec->ipv6.val.next_hdr)) + return -EINVAL; set_flow_label(misc_params_c, misc_params_v, ntohl(ib_spec->ipv6.mask.flow_label), @@ -2635,10 +2683,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, LAST_TCP_UDP_FIELD)) return -EOPNOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - 0xff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - IPPROTO_TCP); + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) + return -EINVAL; MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); @@ -2655,10 +2701,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, LAST_TCP_UDP_FIELD)) return -EOPNOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - 0xff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - IPPROTO_UDP); + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) + return -EINVAL; MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); @@ -2674,6 +2718,9 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, if (ib_spec->gre.mask.c_ks_res0_ver) return -EOPNOTSUPP; + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) + return -EINVAL; + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 0xff); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, @@ -3825,7 +3872,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); - dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL); + dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); if (!dst) return ERR_PTR(-ENOMEM); @@ -4106,7 +4153,7 @@ static ssize_t fw_pages_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); } @@ -4116,7 +4163,7 @@ static ssize_t reg_pages_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } @@ -4126,7 +4173,8 @@ static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -4135,7 +4183,8 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "%x\n", dev->mdev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -4144,7 +4193,8 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct mlx5_ib_dev *dev = - container_of(device, struct mlx5_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); + return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, dev->mdev->board_id); } @@ -5508,9 +5558,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port; mlx5_notifier_register(mpi->mdev, &mpi->mdev_events); - err = mlx5_ib_init_cong_debugfs(ibdev, port_num); - if (err) - goto unbind; + mlx5_ib_init_cong_debugfs(ibdev, port_num); return true; @@ -5722,11 +5770,12 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&dev->mr_srcu); - drain_workqueue(dev->advise_mr_wq); - destroy_workqueue(dev->advise_mr_wq); -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + srcu_barrier(&dev->mr_srcu); + cleanup_srcu_struct(&dev->mr_srcu); + drain_workqueue(dev->advise_mr_wq); + destroy_workqueue(dev->advise_mr_wq); + } kfree(dev->port); } @@ -5779,19 +5828,20 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->memic.memic_lock); dev->memic.dev = mdev; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - dev->advise_mr_wq = alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0); - if (!dev->advise_mr_wq) { - err = -ENOMEM; - goto err_mp; - } + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + dev->advise_mr_wq = + alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0); + if (!dev->advise_mr_wq) { + err = -ENOMEM; + goto err_mp; + } - err = init_srcu_struct(&dev->mr_srcu); - if (err) { - destroy_workqueue(dev->advise_mr_wq); - goto err_mp; + err = init_srcu_struct(&dev->mr_srcu); + if (err) { + destroy_workqueue(dev->advise_mr_wq); + goto err_mp; + } } -#endif return 0; err_mp: @@ -6154,7 +6204,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) return mlx5_ib_odp_init_one(dev); } -void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_odp_cleanup_one(dev); } @@ -6183,8 +6233,9 @@ void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) { - return mlx5_ib_init_cong_debugfs(dev, - mlx5_core_native_port_num(dev->mdev) - 1); + mlx5_ib_init_cong_debugfs(dev, + mlx5_core_native_port_num(dev->mdev) - 1); + return 0; } static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev) @@ -6234,7 +6285,7 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) name = "mlx5_%d"; else name = "mlx5_bond_%d"; - return ib_register_device(&dev->ib_dev, name, NULL); + return ib_register_device(&dev->ib_dev, name); } void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) @@ -6485,7 +6536,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) return mlx5_ib_add_slave_port(mdev); - dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); + dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) return NULL; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 549234988bb4..9f90be296ee0 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -111,7 +111,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, *count = i; } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static u64 umem_dma_to_mtt(dma_addr_t umem_dma) { u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK; @@ -123,7 +122,6 @@ static u64 umem_dma_to_mtt(dma_addr_t umem_dma) return mtt_entry; } -#endif /* * Populate the given array with bus addresses from the umem. @@ -151,7 +149,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int len; struct scatterlist *sg; int entry; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (umem->is_odp) { WARN_ON(shift != 0); WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); @@ -164,7 +162,6 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, } return; } -#endif i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b06d3b1efea8..f9817284c7a3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -36,6 +36,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <rdma/ib_verbs.h> +#include <rdma/ib_umem.h> #include <rdma/ib_smi.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cq.h> @@ -587,14 +588,27 @@ struct mlx5_ib_mr { struct mlx5_ib_mr *parent; atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; + struct mlx5_async_work cb_work; }; +static inline bool is_odp_mr(struct mlx5_ib_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_odp; +} + struct mlx5_ib_mw { struct ib_mw ibmw; struct mlx5_core_mkey mmkey; int ndescs; }; +struct mlx5_ib_devx_mr { + struct mlx5_core_mkey mmkey; + int ndescs; + struct rcu_head rcu; +}; + struct mlx5_ib_umr_context { struct ib_cqe cqe; enum ib_wc_status status; @@ -623,7 +637,6 @@ struct mlx5_cache_ent { spinlock_t lock; - struct dentry *dir; char name[4]; u32 order; u32 xlt; @@ -635,11 +648,6 @@ struct mlx5_cache_ent { u32 miss; u32 limit; - struct dentry *fsize; - struct dentry *fcur; - struct dentry *fmiss; - struct dentry *flimit; - struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; @@ -911,7 +919,6 @@ struct mlx5_ib_dev { /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; int fill_delay; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; @@ -923,7 +930,6 @@ struct mlx5_ib_dev { struct srcu_struct mr_srcu; u32 null_mkey; struct workqueue_struct *advise_mr_wq; -#endif struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; @@ -944,6 +950,7 @@ struct mlx5_ib_dev { struct mlx5_memic memic; u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; + struct mlx5_async_ctx async_ctx; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1032,15 +1039,13 @@ to_mflow_act(struct ib_flow_action *ibact) return container_of(ibact, struct mlx5_ib_flow_action, ib_action); } -int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, + struct ib_udata *udata, unsigned long virt, struct mlx5_db *db); void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); @@ -1070,9 +1075,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); -int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, - void *buffer, u32 length, - struct mlx5_ib_qp_base *base); +int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + int buflen, size_t *bc); +int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, + int buflen, size_t *bc); +int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, + void *buffer, int buflen, size_t *bc); struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, @@ -1098,6 +1106,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, + struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, @@ -1215,6 +1224,9 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } +static inline void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, + unsigned long start, + unsigned long end){}; #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ /* Needed for rep profile */ @@ -1254,7 +1266,7 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, const struct ib_gid_attr *attr); void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); -int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); +void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); /* GSI QP helper functions */ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd6ea1f75085..705a79cd21da 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -71,10 +71,9 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + /* Wait until all page fault handlers using the mr complete. */ + synchronize_srcu(&dev->mr_srcu); return err; } @@ -95,10 +94,9 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static void update_odp_mr(struct mlx5_ib_mr *mr) { - if (mr->umem->is_odp) { + if (is_odp_mr(mr)) { /* * This barrier prevents the compiler from moving the * setting of umem->odp_data->private to point to our @@ -121,11 +119,11 @@ static void update_odp_mr(struct mlx5_ib_mr *mr) smp_wmb(); } } -#endif -static void reg_mr_callback(int status, void *context) +static void reg_mr_callback(int status, struct mlx5_async_work *context) { - struct mlx5_ib_mr *mr = context; + struct mlx5_ib_mr *mr = + container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; struct mlx5_mr_cache *cache = &dev->cache; int c = order2idx(dev, mr->order); @@ -216,9 +214,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) ent->pending++; spin_unlock_irq(&ent->lock); err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, - in, inlen, + &dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), - reg_mr_callback, mr); + reg_mr_callback, &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -256,9 +254,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + synchronize_srcu(&dev->mr_srcu); list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); @@ -610,52 +607,27 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) dev->cache.root = NULL; } -static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) +static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; + struct dentry *dir; int i; if (!mlx5_debugfs_root || dev->rep) - return 0; + return; cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); - if (!cache->root) - return -ENOMEM; for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; sprintf(ent->name, "%d", ent->order); - ent->dir = debugfs_create_dir(ent->name, cache->root); - if (!ent->dir) - goto err; - - ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, - &size_fops); - if (!ent->fsize) - goto err; - - ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, - &limit_fops); - if (!ent->flimit) - goto err; - - ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, - &ent->cur); - if (!ent->fcur) - goto err; - - ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, - &ent->miss); - if (!ent->fmiss) - goto err; + dir = debugfs_create_dir(ent->name, cache->root); + debugfs_create_file("size", 0600, dir, ent, &size_fops); + debugfs_create_file("limit", 0600, dir, ent, &limit_fops); + debugfs_create_u32("cur", 0400, dir, &ent->cur); + debugfs_create_u32("miss", 0600, dir, &ent->miss); } - - return 0; -err: - mlx5_mr_cache_debugfs_cleanup(dev); - - return -ENOMEM; } static void delay_time_func(struct timer_list *t) @@ -669,7 +641,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; - int err; int i; mutex_init(&dev->slow_path_mutex); @@ -679,6 +650,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return -ENOMEM; } + mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; @@ -713,45 +685,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) queue_work(cache->wq, &ent->work); } - err = mlx5_mr_cache_debugfs_init(dev); - if (err) - mlx5_ib_warn(dev, "cache debugfs failure\n"); - - /* - * We don't want to fail driver if debugfs failed to initialize, - * so we are not forwarding error to the user. - */ + mlx5_mr_cache_debugfs_init(dev); return 0; } -static void wait_for_async_commands(struct mlx5_ib_dev *dev) -{ - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; - int total = 0; - int i; - int j; - - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - for (j = 0 ; j < 1000; j++) { - if (!ent->pending) - break; - msleep(50); - } - } - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - total += ent->pending; - } - - if (total) - mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); - else - mlx5_ib_warn(dev, "done with all pending requests\n"); -} - int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -763,12 +701,12 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) flush_workqueue(dev->cache.wq); mlx5_mr_cache_debugfs_cleanup(dev); + mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); - wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; @@ -847,18 +785,17 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, - int access_flags, struct ib_umem **umem, - int *npages, int *page_shift, int *ncont, - int *order) +static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, + u64 start, u64 length, int access_flags, + struct ib_umem **umem, int *npages, int *page_shift, + int *ncont, int *order) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); struct ib_umem *u; int err; *umem = NULL; - u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); + u = ib_umem_get(udata, start, length, access_flags, 0); err = PTR_ERR_OR_ZERO(u); if (err) { mlx5_ib_dbg(dev, "umem get failed (%d)\n", err); @@ -1331,21 +1268,20 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (!start && length == U64_MAX) { + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && + length == U64_MAX) { if (!(access_flags & IB_ACCESS_ON_DEMAND) || !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) return ERR_PTR(-EINVAL); - mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); + mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); if (IS_ERR(mr)) return ERR_CAST(mr); return &mr->ibmr; } -#endif - err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, - &page_shift, &ncont, &order); + err = mr_umem_get(dev, udata, start, length, access_flags, &umem, + &npages, &page_shift, &ncont, &order); if (err < 0) return ERR_PTR(err); @@ -1386,9 +1322,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); -#endif if (!populate_mtts) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; @@ -1405,9 +1339,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - mr->live = 1; -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + mr->live = 1; + return &mr->ibmr; error: ib_umem_release(umem); @@ -1495,8 +1429,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); mr->umem = NULL; - err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, - &npages, &page_shift, &ncont, &order); + err = mr_umem_get(dev, udata, addr, len, access_flags, + &mr->umem, &npages, &page_shift, &ncont, + &order); if (err) goto err; } @@ -1522,9 +1457,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } mr->allocated_from_cache = 0; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - mr->live = 1; -#endif + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + mr->live = 1; } else { /* * Send a UMR WQE @@ -1553,9 +1487,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, npages, len, access_flags); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); -#endif return 0; err: @@ -1641,8 +1573,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int npages = mr->npages; struct ib_umem *umem = mr->umem; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (umem && umem->is_odp) { + if (is_odp_mr(mr)) { struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); /* Prevent new page faults from succeeding */ @@ -1666,7 +1597,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Avoid double-freeing the umem. */ umem = NULL; } -#endif + clean_mr(dev, mr); /* diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4ee32964e1dd..335fd0c6ea2a 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -101,9 +101,9 @@ static int check_parent(struct ib_umem_odp *odp, return mr && mr->parent == parent && !odp->dying; } -struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) +static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) { - if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp)) + if (WARN_ON(!mr || !is_odp_mr(mr))) return NULL; return to_ib_umem_odp(mr->umem)->per_mm; @@ -315,6 +315,9 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send)) caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND; + if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.srq_receive)) + caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send)) caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND; @@ -330,6 +333,27 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic)) caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.srq_receive)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.send)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SEND; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.receive)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_RECV; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.write)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_WRITE; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.read)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_READ; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.atomic)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; + + if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.srq_receive)) + caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; + if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && MLX5_CAP_GEN(dev->mdev, null_mkey) && MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) @@ -439,7 +463,7 @@ next_mr: if (nentries) nentries++; } else { - odp = ib_alloc_odp_umem(odp_mr->per_mm, addr, + odp = ib_alloc_odp_umem(odp_mr, addr, MLX5_IMR_MTT_SIZE); if (IS_ERR(odp)) { mutex_unlock(&odp_mr->umem_mutex); @@ -492,13 +516,13 @@ next_mr: } struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, + struct ib_udata *udata, int access_flags) { - struct ib_ucontext *ctx = pd->ibpd.uobject->context; struct mlx5_ib_mr *imr; struct ib_umem *umem; - umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0); + umem = ib_umem_get(udata, 0, 0, IB_ACCESS_ON_DEMAND, 0); if (IS_ERR(umem)) return ERR_CAST(umem); @@ -685,6 +709,21 @@ struct pf_frame { int depth; }; +static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey) +{ + struct mlx5_ib_mw *mw; + struct mlx5_ib_devx_mr *devx_mr; + + if (mmkey->type == MLX5_MKEY_MW) { + mw = container_of(mmkey, struct mlx5_ib_mw, mmkey); + return mw->ndescs; + } + + devx_mr = container_of(mmkey, struct mlx5_ib_devx_mr, + mmkey); + return devx_mr->ndescs; +} + /* * Handle a single data segment in a page-fault WQE or RDMA region. * @@ -705,11 +744,11 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key, bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mw *mw; struct mlx5_ib_mr *mr; struct mlx5_klm *pklm; u32 *out = NULL; size_t offset; + int ndescs; srcu_key = srcu_read_lock(&dev->mr_srcu); @@ -739,12 +778,12 @@ next_mr: goto srcu_unlock; } - if (prefetch && !mr->umem->is_odp) { + if (prefetch && !is_odp_mr(mr)) { ret = -EINVAL; goto srcu_unlock; } - if (!mr->umem->is_odp) { + if (!is_odp_mr(mr)) { mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", key); if (bytes_mapped) @@ -762,7 +801,8 @@ next_mr: break; case MLX5_MKEY_MW: - mw = container_of(mmkey, struct mlx5_ib_mw, mmkey); + case MLX5_MKEY_INDIRECT_DEVX: + ndescs = get_indirect_num_descs(mmkey); if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) { mlx5_ib_dbg(dev, "indirection level exceeded\n"); @@ -771,7 +811,7 @@ next_mr: } outlen = MLX5_ST_SZ_BYTES(query_mkey_out) + - sizeof(*pklm) * (mw->ndescs - 2); + sizeof(*pklm) * (ndescs - 2); if (outlen > cur_outlen) { kfree(out); @@ -786,14 +826,14 @@ next_mr: pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out, bsf0_klm0_pas_mtt0_1); - ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen); + ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen); if (ret) goto srcu_unlock; offset = io_virt - MLX5_GET64(query_mkey_out, out, memory_key_mkey_entry.start_addr); - for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) { + for (i = 0; bcnt && i < ndescs; i++, pklm++) { if (offset >= be32_to_cpu(pklm->bcount)) { offset -= be32_to_cpu(pklm->bcount); continue; @@ -853,7 +893,6 @@ srcu_unlock: /** * Parse a series of data segments for page fault handling. * - * @qp the QP on which the fault occurred. * @pfault contains page fault information. * @wqe points at the first data segment in the WQE. * @wqe_end points after the end of the WQE. @@ -870,7 +909,7 @@ srcu_unlock: */ static int pagefault_data_segments(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, - struct mlx5_ib_qp *qp, void *wqe, + void *wqe, void *wqe_end, u32 *bytes_mapped, u32 *total_wqe_bytes, int receive_queue) { @@ -881,10 +920,6 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, size_t bcnt; int inline_segment; - /* Skip SRQ next-WQE segment. */ - if (receive_queue && qp->ibqp.srq) - wqe += sizeof(struct mlx5_wqe_srq_next_seg); - if (bytes_mapped) *bytes_mapped = 0; if (total_wqe_bytes) @@ -1009,6 +1044,10 @@ static int mlx5_ib_mr_initiator_pfault_handler( MLX5_WQE_CTRL_OPCODE_MASK; switch (qp->ibqp.qp_type) { + case IB_QPT_XRC_INI: + *wqe += sizeof(struct mlx5_wqe_xrc_seg); + transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps; + break; case IB_QPT_RC: transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps; break; @@ -1028,7 +1067,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( return -EFAULT; } - if (qp->ibqp.qp_type != IB_QPT_RC) { + if (qp->ibqp.qp_type == IB_QPT_UD) { av = *wqe; if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); @@ -1053,21 +1092,34 @@ static int mlx5_ib_mr_initiator_pfault_handler( } /* - * Parse responder WQE. Advances the wqe pointer to point at the - * scatter-gather list, and set wqe_end to the end of the WQE. + * Parse responder WQE and set wqe_end to the end of the WQE. */ -static int mlx5_ib_mr_responder_pfault_handler( - struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, - struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length) +static int mlx5_ib_mr_responder_pfault_handler_srq(struct mlx5_ib_dev *dev, + struct mlx5_ib_srq *srq, + void **wqe, void **wqe_end, + int wqe_length) { - struct mlx5_ib_wq *wq = &qp->rq; - int wqe_size = 1 << wq->wqe_shift; + int wqe_size = 1 << srq->msrq.wqe_shift; - if (qp->ibqp.srq) { - mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n"); + if (wqe_size > wqe_length) { + mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n"); return -EFAULT; } + *wqe_end = *wqe + wqe_size; + *wqe += sizeof(struct mlx5_wqe_srq_next_seg); + + return 0; +} + +static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + void *wqe, void **wqe_end, + int wqe_length) +{ + struct mlx5_ib_wq *wq = &qp->rq; + int wqe_size = 1 << wq->wqe_shift; + if (qp->wq_sig) { mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n"); return -EFAULT; @@ -1091,7 +1143,7 @@ invalid_transport_or_opcode: return -EFAULT; } - *wqe_end = *wqe + wqe_size; + *wqe_end = wqe + wqe_size; return 0; } @@ -1099,22 +1151,25 @@ invalid_transport_or_opcode: static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev, u32 wq_num, int pf_type) { - enum mlx5_res_type res_type; + struct mlx5_core_rsc_common *common = NULL; + struct mlx5_core_srq *srq; switch (pf_type) { case MLX5_WQE_PF_TYPE_RMP: - res_type = MLX5_RES_SRQ; + srq = mlx5_cmd_get_srq(dev, wq_num); + if (srq) + common = &srq->common; break; case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE: case MLX5_WQE_PF_TYPE_RESP: case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC: - res_type = MLX5_RES_QP; + common = mlx5_core_res_hold(dev->mdev, wq_num, MLX5_RES_QP); break; default: - return NULL; + break; } - return mlx5_core_res_hold(dev->mdev, wq_num, res_type); + return common; } static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) @@ -1124,6 +1179,14 @@ static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) return to_mibqp(mqp); } +static inline struct mlx5_ib_srq *res_to_srq(struct mlx5_core_rsc_common *res) +{ + struct mlx5_core_srq *msrq = + container_of(res, struct mlx5_core_srq, common); + + return to_mibsrq(msrq); +} + static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { @@ -1134,8 +1197,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, int resume_with_error = 1; u16 wqe_index = pfault->wqe.wqe_index; int requestor = pfault->type & MLX5_PFAULT_REQUESTOR; - struct mlx5_core_rsc_common *res; - struct mlx5_ib_qp *qp; + struct mlx5_core_rsc_common *res = NULL; + struct mlx5_ib_qp *qp = NULL; + struct mlx5_ib_srq *srq = NULL; + size_t bytes_copied; res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); if (!res) { @@ -1147,6 +1212,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, case MLX5_RES_QP: qp = res_to_qp(res); break; + case MLX5_RES_SRQ: + case MLX5_RES_XSRQ: + srq = res_to_srq(res); + break; default: mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type); goto resolve_page_fault; @@ -1158,9 +1227,23 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } - ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer, - PAGE_SIZE, &qp->trans_qp.base); - if (ret < 0) { + if (qp) { + if (requestor) { + ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } else { + ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } + } else { + ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, + buffer, PAGE_SIZE, + &bytes_copied); + } + + if (ret) { mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n", ret, wqe_index, pfault->token); goto resolve_page_fault; @@ -1168,11 +1251,18 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, wqe = buffer; if (requestor) - ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, &wqe, - &wqe_end, ret); + ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, + &wqe, &wqe_end, + bytes_copied); + else if (qp) + ret = mlx5_ib_mr_responder_pfault_handler_rq(dev, qp, + wqe, &wqe_end, + bytes_copied); else - ret = mlx5_ib_mr_responder_pfault_handler(dev, pfault, qp, &wqe, - &wqe_end, ret); + ret = mlx5_ib_mr_responder_pfault_handler_srq(dev, srq, + &wqe, &wqe_end, + bytes_copied); + if (ret < 0) goto resolve_page_fault; @@ -1181,7 +1271,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } - ret = pagefault_data_segments(dev, pfault, qp, wqe, wqe_end, + ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, &bytes_mapped, &total_wqe_bytes, !requestor); if (ret == -EAGAIN) { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7db778d96ef5..c6ccd4d36a90 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -109,75 +109,173 @@ static int is_sqp(enum ib_qp_type qp_type) } /** - * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space. + * mlx5_ib_read_user_wqe_common() - Copy a WQE (or part of) from user WQ + * to kernel buffer * - * @qp: QP to copy from. - * @send: copy from the send queue when non-zero, use the receive queue - * otherwise. - * @wqe_index: index to start copying from. For send work queues, the - * wqe_index is in units of MLX5_SEND_WQE_BB. - * For receive work queue, it is the number of work queue - * element in the queue. - * @buffer: destination buffer. - * @length: maximum number of bytes to copy. + * @umem: User space memory where the WQ is + * @buffer: buffer to copy to + * @buflen: buffer length + * @wqe_index: index of WQE to copy from + * @wq_offset: offset to start of WQ + * @wq_wqe_cnt: number of WQEs in WQ + * @wq_wqe_shift: log2 of WQE size + * @bcnt: number of bytes to copy + * @bytes_copied: number of bytes to copy (return value) * - * Copies at least a single WQE, but may copy more data. + * Copies from start of WQE bcnt or less bytes. + * Does not gurantee to copy the entire WQE. * - * Return: the number of bytes copied, or an error code. + * Return: zero on success, or an error code. */ -int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, - void *buffer, u32 length, - struct mlx5_ib_qp_base *base) +static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, + void *buffer, + u32 buflen, + int wqe_index, + int wq_offset, + int wq_wqe_cnt, + int wq_wqe_shift, + int bcnt, + size_t *bytes_copied) +{ + size_t offset = wq_offset + ((wqe_index % wq_wqe_cnt) << wq_wqe_shift); + size_t wq_end = wq_offset + (wq_wqe_cnt << wq_wqe_shift); + size_t copy_length; + int ret; + + /* don't copy more than requested, more than buffer length or + * beyond WQ end + */ + copy_length = min_t(u32, buflen, wq_end - offset); + copy_length = min_t(u32, copy_length, bcnt); + + ret = ib_umem_copy_from(buffer, umem, offset, copy_length); + if (ret) + return ret; + + if (!ret && bytes_copied) + *bytes_copied = copy_length; + + return 0; +} + +int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) { - struct ib_device *ibdev = qp->ibqp.device; - struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq; - size_t offset; - size_t wq_end; + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct ib_umem *umem = base->ubuffer.umem; - u32 first_copy_length; - int wqe_length; + struct mlx5_ib_wq *wq = &qp->sq; + struct mlx5_wqe_ctrl_seg *ctrl; + size_t bytes_copied; + size_t bytes_copied2; + size_t wqe_length; int ret; + int ds; - if (wq->wqe_cnt == 0) { - mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n", - qp->ibqp.qp_type); + if (buflen < sizeof(*ctrl)) return -EINVAL; - } - offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift); - wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift); + /* at first read as much as possible */ + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + buflen, + &bytes_copied); + if (ret) + return ret; - if (send && length < sizeof(struct mlx5_wqe_ctrl_seg)) + /* we need at least control segment size to proceed */ + if (bytes_copied < sizeof(*ctrl)) return -EINVAL; - if (offset > umem->length || - (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length)) - return -EINVAL; + ctrl = buffer; + ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; + wqe_length = ds * MLX5_WQE_DS_UNITS; + + /* if we copied enough then we are done */ + if (bytes_copied >= wqe_length) { + *bc = bytes_copied; + return 0; + } + + /* otherwise this a wrapped around wqe + * so read the remaining bytes starting + * from wqe_index 0 + */ + ret = mlx5_ib_read_user_wqe_common(umem, + buffer + bytes_copied, + buflen - bytes_copied, + 0, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + wqe_length - bytes_copied, + &bytes_copied2); - first_copy_length = min_t(u32, offset + length, wq_end) - offset; - ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length); if (ret) return ret; + *bc = bytes_copied + bytes_copied2; + return 0; +} - if (send) { - struct mlx5_wqe_ctrl_seg *ctrl = buffer; - int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; - - wqe_length = ds * MLX5_WQE_DS_UNITS; - } else { - wqe_length = 1 << wq->wqe_shift; - } +int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) +{ + struct mlx5_ib_qp_base *base = &qp->trans_qp.base; + struct ib_umem *umem = base->ubuffer.umem; + struct mlx5_ib_wq *wq = &qp->rq; + size_t bytes_copied; + int ret; - if (wqe_length <= first_copy_length) - return first_copy_length; + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + wq->offset, + wq->wqe_cnt, + wq->wqe_shift, + buflen, + &bytes_copied); - ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset, - wqe_length - first_copy_length); if (ret) return ret; + *bc = bytes_copied; + return 0; +} + +int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, + int wqe_index, + void *buffer, + int buflen, + size_t *bc) +{ + struct ib_umem *umem = srq->umem; + size_t bytes_copied; + int ret; + + ret = mlx5_ib_read_user_wqe_common(umem, + buffer, + buflen, + wqe_index, + 0, + srq->msrq.max, + srq->msrq.wqe_shift, + buflen, + &bytes_copied); - return wqe_length; + if (ret) + return ret; + *bc = bytes_copied; + return 0; } static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) @@ -645,16 +743,14 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, return bfregi->sys_pages[index_of_sys_page] + offset; } -static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, - struct ib_pd *pd, +static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, unsigned long addr, size_t size, - struct ib_umem **umem, - int *npages, int *page_shift, int *ncont, - u32 *offset) + struct ib_umem **umem, int *npages, int *page_shift, + int *ncont, u32 *offset) { int err; - *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0); + *umem = ib_umem_get(udata, addr, size, 0, 0); if (IS_ERR(*umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); return PTR_ERR(*umem); @@ -695,10 +791,9 @@ static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, } static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_rwq *rwq, + struct ib_udata *udata, struct mlx5_ib_rwq *rwq, struct mlx5_ib_create_wq *ucmd) { - struct mlx5_ib_ucontext *context; int page_shift = 0; int npages; u32 offset = 0; @@ -708,9 +803,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (!ucmd->buf_addr) return -EINVAL; - context = to_mucontext(pd->uobject->context); - rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr, - rwq->buf_size, 0, 0); + rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0); if (IS_ERR(rwq->umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); err = PTR_ERR(rwq->umem); @@ -735,7 +828,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, (unsigned long long)ucmd->buf_addr, rwq->buf_size, npages, page_shift, ncont, offset); - err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db); + err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), udata, + ucmd->db_addr, &rwq->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_umem; @@ -819,10 +913,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd.buf_addr && ubuffer->buf_size) { ubuffer->buf_addr = ucmd.buf_addr; - err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, - ubuffer->buf_size, - &ubuffer->umem, &npages, &page_shift, - &ncont, &offset); + err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, + ubuffer->buf_size, &ubuffer->umem, + &npages, &page_shift, &ncont, &offset); if (err) goto err_bfreg; } else { @@ -856,7 +949,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, resp->bfreg_index = MLX5_IB_INVALID_BFREG; qp->bfregn = bfregn; - err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db); + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &qp->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_free; @@ -1119,6 +1212,7 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, + struct ib_udata *udata, struct mlx5_ib_sq *sq, void *qpin, struct ib_pd *pd) { @@ -1135,9 +1229,9 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, int ncont = 0; u32 offset = 0; - err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size, - &sq->ubuffer.umem, &npages, &page_shift, - &ncont, &offset); + err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, + &sq->ubuffer.umem, &npages, &page_shift, &ncont, + &offset); if (err) return err; @@ -1374,7 +1468,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) return err; - err = create_raw_packet_qp_sq(dev, sq, in, pd); + err = create_raw_packet_qp_sq(dev, udata, sq, in, pd); if (err) goto err_destroy_tis; @@ -5795,7 +5889,7 @@ static int prepare_user_rq(struct ib_pd *pd, return err; } - err = create_user_rq(dev, pd, rwq, &ucmd); + err = create_user_rq(dev, pd, udata, rwq, &ucmd); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4e8d18009f58..22bd774e0b4e 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -79,8 +79,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); - srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, - 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); if (IS_ERR(srq->umem)) { mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); err = PTR_ERR(srq->umem); @@ -104,7 +103,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); - err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), + err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), udata, ucmd.db_addr, &srq->db); if (err) { mlx5_ib_dbg(dev, "map doorbell failed\n"); diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 75eb5839ae95..c330af35ff10 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -46,8 +46,6 @@ struct mlx5_core_srq { int wqe_shift; void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e); - atomic_t refcount; - struct completion free; u16 uid; }; diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 7aaaffbd4afa..63ac38bb3498 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -87,7 +87,7 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) srq = radix_tree_lookup(&table->tree, srqn); if (srq) - atomic_inc(&srq->refcount); + atomic_inc(&srq->common.refcount); spin_unlock(&table->lock); @@ -594,8 +594,8 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, if (err) return err; - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); + atomic_set(&srq->common.refcount, 1); + init_completion(&srq->common.free); spin_lock_irq(&table->lock); err = radix_tree_insert(&table->tree, srq->srqn, srq); @@ -627,9 +627,8 @@ int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) if (err) return err; - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); - wait_for_completion(&srq->free); + mlx5_core_res_put(&srq->common); + wait_for_completion(&srq->common.free); return 0; } @@ -685,7 +684,7 @@ static int srq_event_notifier(struct notifier_block *nb, srq = radix_tree_lookup(&table->tree, srqn); if (srq) - atomic_inc(&srq->refcount); + atomic_inc(&srq->common.refcount); spin_unlock(&table->lock); @@ -694,8 +693,7 @@ static int srq_event_notifier(struct notifier_block *nb, srq->event(srq, eqe->type); - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); + mlx5_core_res_put(&srq->common); return NOTIFY_OK; } diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 92c49bff22bc..fe9654a7af71 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -961,7 +961,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) /* We can handle large RDMA requests, so allow larger segments. */ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); - mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev); + mdev = ib_alloc_device(mthca_dev, ib_dev); if (!mdev) { dev_err(&pdev->dev, "Device struct alloc failed, " "aborting.\n"); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index e3e9dd54caa2..1bb67562c8c8 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -318,7 +318,7 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, else uresp.uarc_size = 0; - context = kmalloc(sizeof *context, GFP_KERNEL); + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); @@ -381,7 +381,7 @@ static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, struct mthca_pd *pd; int err; - pd = kmalloc(sizeof *pd, GFP_KERNEL); + pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); @@ -684,7 +684,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, goto err_unmap_set; } - cq = kmalloc(sizeof *cq, GFP_KERNEL); + cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) { err = -ENOMEM; goto err_unmap_arm; @@ -931,7 +931,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, + mr->umem = ib_umem_get(udata, start, length, acc, ucmd.mr_attrs & MTHCA_MR_DMASYNC); if (IS_ERR(mr->umem)) { @@ -1081,7 +1081,8 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct mthca_dev *dev = - container_of(device, struct mthca_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); + return sprintf(buf, "%x\n", dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -1090,7 +1091,8 @@ static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct mthca_dev *dev = - container_of(device, struct mthca_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); + switch (dev->pdev->device) { case PCI_DEVICE_ID_MELLANOX_TAVOR: return sprintf(buf, "MT23108\n"); @@ -1111,7 +1113,8 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct mthca_dev *dev = - container_of(device, struct mthca_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); + return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -1338,7 +1341,7 @@ int mthca_register_device(struct mthca_dev *dev) rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group); dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA; - ret = ib_register_device(&dev->ib_dev, "mthca%d", NULL); + ret = ib_register_device(&dev->ib_dev, "mthca%d"); if (ret) return ret; diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig index 7964eba8e7ed..52caae954e4a 100644 --- a/drivers/infiniband/hw/nes/Kconfig +++ b/drivers/infiniband/hw/nes/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_NES tristate "NetEffect RNIC Driver" - depends on PCI && INET && INFINIBAND + depends on PCI && INET select LIBCRC32C ---help--- This is the RDMA Network Interface Card (RNIC) driver for diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 4e7f08ee1907..6eb991d40035 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2134,7 +2134,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u8 stag_key; int first_page = 1; - region = ib_umem_get(pd->uobject->context, start, length, acc, 0); + region = ib_umem_get(udata, start, length, acc, 0); if (IS_ERR(region)) { return (struct ib_mr *)region; } @@ -2560,7 +2560,7 @@ static ssize_t hw_rev_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nes_ib_device *nesibdev = - container_of(dev, struct nes_ib_device, ibdev.dev); + rdma_device_to_drv_device(dev, struct nes_ib_device, ibdev); struct nes_vnic *nesvnic = nesibdev->nesvnic; nes_debug(NES_DBG_INIT, "\n"); @@ -3669,7 +3669,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; - nesibdev = (struct nes_ib_device *)ib_alloc_device(sizeof(struct nes_ib_device)); + nesibdev = ib_alloc_device(nes_ib_device, ibdev); if (nesibdev == NULL) { return NULL; } @@ -3801,7 +3801,7 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev) rdma_set_device_sysfs_group(&nesvnic->nesibdev->ibdev, &nes_attr_group); nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES; - ret = ib_register_device(&nesvnic->nesibdev->ibdev, "nes%d", NULL); + ret = ib_register_device(&nesvnic->nesibdev->ibdev, "nes%d"); if (ret) { return ret; } diff --git a/drivers/infiniband/hw/ocrdma/Makefile b/drivers/infiniband/hw/ocrdma/Makefile index d1bfd4f4cdde..e3f20ca15462 100644 --- a/drivers/infiniband/hw/ocrdma/Makefile +++ b/drivers/infiniband/hw/ocrdma/Makefile @@ -1,4 +1,4 @@ -ccflags-y := -Idrivers/net/ethernet/emulex/benet +ccflags-y := -I $(srctree)/drivers/net/ethernet/emulex/benet obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma.o diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 1f393842453a..88970a6bb555 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -118,7 +118,8 @@ static void get_dev_fw_str(struct ib_device *device, char *str) static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ocrdma_dev *dev = dev_get_drvdata(device); + struct ocrdma_dev *dev = + rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); } @@ -127,7 +128,8 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { - struct ocrdma_dev *dev = dev_get_drvdata(device); + struct ocrdma_dev *dev = + rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); } @@ -243,7 +245,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) } rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA; - return ib_register_device(&dev->ibdev, "ocrdma%d", NULL); + return ib_register_device(&dev->ibdev, "ocrdma%d"); } static int ocrdma_alloc_resources(struct ocrdma_dev *dev) @@ -295,7 +297,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) u8 lstate = 0; struct ocrdma_dev *dev; - dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); + dev = ib_alloc_device(ocrdma_dev, ibdev); if (!dev) { pr_err("Unable to allocate ib device\n"); return NULL; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 6be0ea109138..a902942adb5d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -767,88 +767,65 @@ void ocrdma_add_port_stats(struct ocrdma_dev *dev) /* Create post stats base dir */ dev->dir = debugfs_create_dir(pci_name(pdev), ocrdma_dbgfs_dir); - if (!dev->dir) - goto err; dev->rsrc_stats.type = OCRDMA_RSRC_STATS; dev->rsrc_stats.dev = dev; - if (!debugfs_create_file("resource_stats", S_IRUSR, dev->dir, - &dev->rsrc_stats, &ocrdma_dbg_ops)) - goto err; + debugfs_create_file("resource_stats", S_IRUSR, dev->dir, + &dev->rsrc_stats, &ocrdma_dbg_ops); dev->rx_stats.type = OCRDMA_RXSTATS; dev->rx_stats.dev = dev; - if (!debugfs_create_file("rx_stats", S_IRUSR, dev->dir, - &dev->rx_stats, &ocrdma_dbg_ops)) - goto err; + debugfs_create_file("rx_stats", S_IRUSR, dev->dir, &dev->rx_stats, + &ocrdma_dbg_ops); dev->wqe_stats.type = OCRDMA_WQESTATS; dev->wqe_stats.dev = dev; - if (!debugfs_create_file("wqe_stats", S_IRUSR, dev->dir, - &dev->wqe_stats, &ocrdma_dbg_ops)) - goto err; + debugfs_create_file("wqe_stats", S_IRUSR, dev->dir, &dev->wqe_stats, + &ocrdma_dbg_ops); dev->tx_stats.type = OCRDMA_TXSTATS; dev->tx_stats.dev = dev; - if (!debugfs_create_file("tx_stats", S_IRUSR, dev->dir, - &dev->tx_stats, &ocrdma_dbg_ops)) - goto err; + debugfs_create_file("tx_stats", S_IRUSR, dev->dir, &dev->tx_stats, + &ocrdma_dbg_ops); dev->db_err_stats.type = OCRDMA_DB_ERRSTATS; dev->db_err_stats.dev = dev; - if (!debugfs_create_file("db_err_stats", S_IRUSR, dev->dir, - &dev->db_err_stats, &ocrdma_dbg_ops)) - goto err; - + debugfs_create_file("db_err_stats", S_IRUSR, dev->dir, + &dev->db_err_stats, &ocrdma_dbg_ops); dev->tx_qp_err_stats.type = OCRDMA_TXQP_ERRSTATS; dev->tx_qp_err_stats.dev = dev; - if (!debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir, - &dev->tx_qp_err_stats, &ocrdma_dbg_ops)) - goto err; + debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir, + &dev->tx_qp_err_stats, &ocrdma_dbg_ops); dev->rx_qp_err_stats.type = OCRDMA_RXQP_ERRSTATS; dev->rx_qp_err_stats.dev = dev; - if (!debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir, - &dev->rx_qp_err_stats, &ocrdma_dbg_ops)) - goto err; - + debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir, + &dev->rx_qp_err_stats, &ocrdma_dbg_ops); dev->tx_dbg_stats.type = OCRDMA_TX_DBG_STATS; dev->tx_dbg_stats.dev = dev; - if (!debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir, - &dev->tx_dbg_stats, &ocrdma_dbg_ops)) - goto err; + debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir, + &dev->tx_dbg_stats, &ocrdma_dbg_ops); dev->rx_dbg_stats.type = OCRDMA_RX_DBG_STATS; dev->rx_dbg_stats.dev = dev; - if (!debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir, - &dev->rx_dbg_stats, &ocrdma_dbg_ops)) - goto err; + debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir, + &dev->rx_dbg_stats, &ocrdma_dbg_ops); dev->driver_stats.type = OCRDMA_DRV_STATS; dev->driver_stats.dev = dev; - if (!debugfs_create_file("driver_dbg_stats", S_IRUSR, dev->dir, - &dev->driver_stats, &ocrdma_dbg_ops)) - goto err; + debugfs_create_file("driver_dbg_stats", S_IRUSR, dev->dir, + &dev->driver_stats, &ocrdma_dbg_ops); dev->reset_stats.type = OCRDMA_RESET_STATS; dev->reset_stats.dev = dev; - if (!debugfs_create_file("reset_stats", 0200, dev->dir, - &dev->reset_stats, &ocrdma_dbg_ops)) - goto err; - - - return; -err: - debugfs_remove_recursive(dev->dir); - dev->dir = NULL; + debugfs_create_file("reset_stats", 0200, dev->dir, &dev->reset_stats, + &ocrdma_dbg_ops); } void ocrdma_rem_port_stats(struct ocrdma_dev *dev) { - if (!dev->dir) - return; debugfs_remove_recursive(dev->dir); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 287c332ff0e6..2a62936bef4d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -55,7 +55,7 @@ int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { - if (index > 1) + if (index > 0) return -EINVAL; *pkey = 0xffff; @@ -916,7 +916,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(status); - mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc, 0); if (IS_ERR(mr->umem)) { status = -EFAULT; goto umem_err; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 75940e2a8791..878e9e23652b 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -137,7 +137,8 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num, static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { - struct qedr_dev *dev = dev_get_drvdata(device); + struct qedr_dev *dev = + rdma_device_to_drv_device(device, struct qedr_dev, ibdev); return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor); } @@ -290,7 +291,7 @@ static int qedr_register_device(struct qedr_dev *dev) ib_set_device_ops(&dev->ibdev, &qedr_dev_ops); dev->ibdev.driver_id = RDMA_DRIVER_QEDR; - return ib_register_device(&dev->ibdev, "qedr%d", NULL); + return ib_register_device(&dev->ibdev, "qedr%d"); } /* This function allocates fast-path status block memory */ @@ -852,7 +853,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev, struct qedr_dev *dev; int rc = 0; - dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev)); + dev = ib_alloc_device(qedr_dev, ibdev); if (!dev) { pr_err("Unable to allocate ib device\n"); return NULL; diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 93b16237b767..0555e5a8c9ed 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -349,7 +349,7 @@ qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params) default: DP_NOTICE(dev, "Unknown event received %d\n", params->event); break; - }; + } return 0; } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index e1ccf32b1c3d..989f08633fbe 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -67,7 +67,7 @@ static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { - if (index > QEDR_ROCE_PKEY_TABLE_LEN) + if (index >= QEDR_ROCE_PKEY_TABLE_LEN) return -EINVAL; *pkey = QEDR_ROCE_PKEY_DEFAULT; @@ -736,11 +736,10 @@ static inline int qedr_align_cq_entries(int entries) return aligned_size / QEDR_CQE_SIZE; } -static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx, +static inline int qedr_init_user_queue(struct ib_udata *udata, struct qedr_dev *dev, - struct qedr_userq *q, - u64 buf_addr, size_t buf_len, - int access, int dmasync, + struct qedr_userq *q, u64 buf_addr, + size_t buf_len, int access, int dmasync, int alloc_and_init) { u32 fw_pages; @@ -748,7 +747,7 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx, q->buf_addr = buf_addr; q->buf_len = buf_len; - q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync); + q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync); if (IS_ERR(q->umem)) { DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n", PTR_ERR(q->umem)); @@ -905,9 +904,9 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev, cq->cq_type = QEDR_CQ_TYPE_USER; - rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr, - ureq.len, IB_ACCESS_LOCAL_WRITE, - 1, 1); + rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr, + ureq.len, IB_ACCESS_LOCAL_WRITE, 1, + 1); if (rc) goto err0; @@ -1344,7 +1343,7 @@ static void qedr_free_srq_kernel_params(struct qedr_srq *srq) hw_srq->phy_prod_pair_addr); } -static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx, +static int qedr_init_srq_user_params(struct ib_udata *udata, struct qedr_srq *srq, struct qedr_create_srq_ureq *ureq, int access, int dmasync) @@ -1352,14 +1351,14 @@ static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx, struct scatterlist *sg; int rc; - rc = qedr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr, + rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr, ureq->srq_len, access, dmasync, 1); if (rc) return rc; - srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr, - sizeof(struct rdma_srq_producers), - access, dmasync); + srq->prod_umem = + ib_umem_get(udata, ureq->prod_pair_addr, + sizeof(struct rdma_srq_producers), access, dmasync); if (IS_ERR(srq->prod_umem)) { qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); ib_umem_release(srq->usrq.umem); @@ -1434,7 +1433,6 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, struct qedr_pd *pd = get_qedr_pd(ibpd); struct qedr_create_srq_ureq ureq = {}; u64 pbl_base_addr, phy_prod_pair_addr; - struct ib_ucontext *ib_ctx = NULL; struct qedr_srq_hwq_info *hw_srq; u32 page_cnt, page_size; struct qedr_srq *srq; @@ -1460,15 +1458,13 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, hw_srq->max_sges = init_attr->attr.max_sge; if (udata && ibpd->uobject && ibpd->uobject->context) { - ib_ctx = ibpd->uobject->context; - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { DP_ERR(dev, "create srq: problem copying data from user space\n"); goto err0; } - rc = qedr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0); + rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0); if (rc) goto err0; @@ -1699,13 +1695,10 @@ static int qedr_create_user_qp(struct qedr_dev *dev, struct qed_rdma_create_qp_in_params in_params; struct qed_rdma_create_qp_out_params out_params; struct qedr_pd *pd = get_qedr_pd(ibpd); - struct ib_ucontext *ib_ctx = NULL; struct qedr_create_qp_ureq ureq; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; - ib_ctx = ibpd->uobject->context; - memset(&ureq, 0, sizeof(ureq)); rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); if (rc) { @@ -1714,14 +1707,14 @@ static int qedr_create_user_qp(struct qedr_dev *dev, } /* SQ - read access only (0), dma sync not required (0) */ - rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr, + rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, ureq.sq_len, 0, 0, alloc_and_init); if (rc) return rc; if (!qp->srq) { /* RQ - read access only (0), dma sync not required (0) */ - rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, + rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, 0, 0, alloc_and_init); if (rc) return rc; @@ -2117,7 +2110,7 @@ static int qedr_update_qp_state(struct qedr_dev *dev, default: status = -EINVAL; break; - }; + } break; case QED_ROCE_QP_STATE_INIT: switch (new_state) { @@ -2138,7 +2131,7 @@ static int qedr_update_qp_state(struct qedr_dev *dev, /* Invalid state change. */ status = -EINVAL; break; - }; + } break; case QED_ROCE_QP_STATE_RTR: /* RTR->XXX */ @@ -2151,7 +2144,7 @@ static int qedr_update_qp_state(struct qedr_dev *dev, /* Invalid state change. */ status = -EINVAL; break; - }; + } break; case QED_ROCE_QP_STATE_RTS: /* RTS->XXX */ @@ -2164,7 +2157,7 @@ static int qedr_update_qp_state(struct qedr_dev *dev, /* Invalid state change. */ status = -EINVAL; break; - }; + } break; case QED_ROCE_QP_STATE_SQD: /* SQD->XXX */ @@ -2176,7 +2169,7 @@ static int qedr_update_qp_state(struct qedr_dev *dev, /* Invalid state change. */ status = -EINVAL; break; - }; + } break; case QED_ROCE_QP_STATE_ERR: /* ERR->XXX */ @@ -2194,12 +2187,12 @@ static int qedr_update_qp_state(struct qedr_dev *dev, default: status = -EINVAL; break; - }; + } break; default: status = -EINVAL; break; - }; + } return status; } @@ -2719,7 +2712,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->type = QEDR_MR_USER; - mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc, 0); if (IS_ERR(mr->umem)) { rc = -EFAULT; goto err0; diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index 5ed1ed93380f..caeb77d07a58 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -66,15 +66,6 @@ static const struct file_operations _##name##_file_ops = { \ .release = seq_release \ }; -#define DEBUGFS_FILE_CREATE(name) \ -do { \ - struct dentry *ent; \ - ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \ - ibd, &_##name##_file_ops); \ - if (!ent) \ - pr_warn("create of " #name " failed\n"); \ -} while (0) - static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) { struct qib_opcode_stats_perctx *opstats; @@ -249,17 +240,17 @@ DEBUGFS_FILE(qp_stats) void qib_dbg_ibdev_init(struct qib_ibdev *ibd) { + struct dentry *root; char name[10]; snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit); - ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root); - if (!ibd->qib_ibdev_dbg) { - pr_warn("create of %s failed\n", name); - return; - } - DEBUGFS_FILE_CREATE(opcode_stats); - DEBUGFS_FILE_CREATE(ctx_stats); - DEBUGFS_FILE_CREATE(qp_stats); + root = debugfs_create_dir(name, qib_dbg_root); + ibd->qib_ibdev_dbg = root; + + debugfs_create_file("opcode_stats", 0400, root, ibd, + &_opcode_stats_file_ops); + debugfs_create_file("ctx_stats", 0400, root, ibd, &_ctx_stats_file_ops); + debugfs_create_file("qp_stats", 0400, root, ibd, &_qp_stats_file_ops); } void qib_dbg_ibdev_exit(struct qib_ibdev *ibd) @@ -274,8 +265,6 @@ out: void qib_dbg_init(void) { qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL); - if (!qib_dbg_root) - pr_warn("init of debugfs failed\n"); } void qib_dbg_exit(void) diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 3d64081c4819..99e11c347130 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -565,13 +565,8 @@ retry: sge = &ss->sge; while (dwords) { u32 dw; - u32 len; + u32 len = rvt_get_sge_length(sge, dwords << 2); - len = dwords << 2; - if (len > sge->length) - len = sge->length; - if (len > sge->sge_length) - len = sge->sge_length; dw = (len + 3) >> 2; addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr, dw << 2, DMA_TO_DEVICE); @@ -594,24 +589,7 @@ retry: descqp = &ppd->sdma_descq[0].qw[0]; ++ppd->sdma_generation; } - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= RVT_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } - + rvt_update_sge(ss, len, false); dwoffset += dw; dwords -= dw; } diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 1cf4ca3f23e3..905206a0c2d5 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -555,7 +555,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); } @@ -565,7 +565,7 @@ static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); int ret; @@ -590,7 +590,7 @@ static ssize_t boardversion_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ @@ -602,7 +602,7 @@ static ssize_t localbus_info_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ @@ -614,7 +614,7 @@ static ssize_t nctxts_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of user ports (contexts) available. */ @@ -630,7 +630,7 @@ static ssize_t nfreectxts_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ @@ -642,7 +642,7 @@ static ssize_t serial_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); buf[sizeof(dd->serial)] = '\0'; @@ -657,7 +657,7 @@ static ssize_t chip_reset_store(struct device *device, size_t count) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); int ret; @@ -679,7 +679,7 @@ static ssize_t tempsense_show(struct device *device, struct device_attribute *attr, char *buf) { struct qib_ibdev *dev = - container_of(device, struct qib_ibdev, rdi.ibdev.dev); + rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); int ret; int idx; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 445ea19a2ec8..5cdedba2d164 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -172,12 +172,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) ssge.num_sge = swqe->wr.num_sge; sge = &ssge.sge; while (length) { - u32 len = sge->length; + u32 len = rvt_get_sge_length(sge, length); - if (len > length) - len = length; - if (len > sge->sge_length) - len = sge->sge_length; rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false); sge->vaddr += len; sge->length -= len; diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 16543d5e80c3..075f09fb7ce3 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -67,10 +67,10 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, } for (got = 0; got < num_pages; got += ret) { - ret = get_user_pages(start_page + got * PAGE_SIZE, - num_pages - got, - FOLL_WRITE | FOLL_FORCE, - p + got, NULL); + ret = get_user_pages_longterm(start_page + got * PAGE_SIZE, + num_pages - got, + FOLL_WRITE | FOLL_FORCE, + p + got, NULL); if (ret < 0) goto bail_release; } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 276304f611ab..5ff32d32c61c 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -144,12 +144,8 @@ static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length) u32 ndesc = 1; /* count the header */ while (length) { - u32 len = sge.length; + u32 len = rvt_get_sge_length(&sge, length); - if (len > length) - len = length; - if (len > sge.sge_length) - len = sge.sge_length; if (((long) sge.vaddr & (sizeof(u32) - 1)) || (len != length && (len & (sizeof(u32) - 1)))) { ndesc = 0; @@ -186,12 +182,8 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) struct rvt_sge *sge = &ss->sge; while (length) { - u32 len = sge->length; + u32 len = rvt_get_sge_length(sge, length); - if (len > length) - len = length; - if (len > sge->sge_length) - len = sge->sge_length; memcpy(data, sge->vaddr, len); sge->vaddr += len; sge->length -= len; @@ -440,13 +432,9 @@ static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss, u32 last; while (1) { - u32 len = ss->sge.length; + u32 len = rvt_get_sge_length(&ss->sge, length); u32 off; - if (len > length) - len = length; - if (len > ss->sge.sge_length) - len = ss->sge.sge_length; /* If the source address is not aligned, try to align it. */ off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); if (off) { @@ -1494,6 +1482,7 @@ static void qib_fill_device_attr(struct qib_devdata *dd) } static const struct ib_device_ops qib_dev_ops = { + .init_port = qib_create_port_files, .modify_device = qib_modify_device, .process_mad = qib_process_mad, }; @@ -1567,7 +1556,6 @@ int qib_register_ib_device(struct qib_devdata *dd) /* * Fill in rvt info object. */ - dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files; dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; dd->verbs_dev.rdi.driver_f.setup_wqe = qib_check_send_wqe; diff --git a/drivers/infiniband/hw/usnic/Makefile b/drivers/infiniband/hw/usnic/Makefile index 94ae7a1a6950..f12a4938ffd2 100644 --- a/drivers/infiniband/hw/usnic/Makefile +++ b/drivers/infiniband/hw/usnic/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-y := -Idrivers/net/ethernet/cisco/enic +ccflags-y := -I $(srctree)/drivers/net/ethernet/cisco/enic obj-$(CONFIG_INFINIBAND_USNIC)+= usnic_verbs.o diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c index a3115709fb03..e5a3f02fb078 100644 --- a/drivers/infiniband/hw/usnic/usnic_debugfs.c +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c @@ -113,42 +113,21 @@ static const struct file_operations flowinfo_ops = { void usnic_debugfs_init(void) { debugfs_root = debugfs_create_dir(DRV_NAME, NULL); - if (IS_ERR(debugfs_root)) { - usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n"); - goto out_clear_root; - } flows_dentry = debugfs_create_dir("flows", debugfs_root); - if (IS_ERR_OR_NULL(flows_dentry)) { - usnic_err("Failed to create debugfs flow dir with err %ld\n", - PTR_ERR(flows_dentry)); - goto out_free_root; - } debugfs_create_file("build-info", S_IRUGO, debugfs_root, NULL, &usnic_debugfs_buildinfo_ops); - return; - -out_free_root: - debugfs_remove_recursive(debugfs_root); -out_clear_root: - debugfs_root = NULL; } void usnic_debugfs_exit(void) { - if (!debugfs_root) - return; - debugfs_remove_recursive(debugfs_root); debugfs_root = NULL; } void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow) { - if (IS_ERR_OR_NULL(flows_dentry)) - return; - scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name), "%u", qp_flow->flow->flow_id); qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name, @@ -156,11 +135,6 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow) flows_dentry, qp_flow, &flowinfo_ops); - if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) { - usnic_err("Failed to create dbg fs entry for flow %u with error %ld\n", - qp_flow->flow->flow_id, - PTR_ERR(qp_flow->dbgfs_dentry)); - } } void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index b2323a52a0dd..c4a4cfe4b567 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -333,32 +333,25 @@ static void usnic_get_dev_fw_str(struct ib_device *device, char *str) static const struct ib_device_ops usnic_dev_ops = { .alloc_pd = usnic_ib_alloc_pd, .alloc_ucontext = usnic_ib_alloc_ucontext, - .create_ah = usnic_ib_create_ah, .create_cq = usnic_ib_create_cq, .create_qp = usnic_ib_create_qp, .dealloc_pd = usnic_ib_dealloc_pd, .dealloc_ucontext = usnic_ib_dealloc_ucontext, .dereg_mr = usnic_ib_dereg_mr, - .destroy_ah = usnic_ib_destroy_ah, .destroy_cq = usnic_ib_destroy_cq, .destroy_qp = usnic_ib_destroy_qp, .get_dev_fw_str = usnic_get_dev_fw_str, - .get_dma_mr = usnic_ib_get_dma_mr, .get_link_layer = usnic_ib_port_link_layer, .get_netdev = usnic_get_netdev, .get_port_immutable = usnic_port_immutable, .mmap = usnic_ib_mmap, .modify_qp = usnic_ib_modify_qp, - .poll_cq = usnic_ib_poll_cq, - .post_recv = usnic_ib_post_recv, - .post_send = usnic_ib_post_send, .query_device = usnic_ib_query_device, .query_gid = usnic_ib_query_gid, .query_pkey = usnic_ib_query_pkey, .query_port = usnic_ib_query_port, .query_qp = usnic_ib_query_qp, .reg_user_mr = usnic_ib_reg_mr, - .req_notify_cq = usnic_ib_req_notify_cq, }; /* Start of PF discovery section */ @@ -372,7 +365,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) usnic_dbg("\n"); netdev = pci_get_drvdata(dev); - us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev)); + us_ibdev = ib_alloc_device(usnic_ib_dev, ib_dev); if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); @@ -422,7 +415,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC; rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); - if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", NULL)) + if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d")) goto err_fwd_dealloc; usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index a7e4b2ccfaf8..c85d48ae7442 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -50,7 +50,7 @@ static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { struct usnic_ib_dev *us_ibdev = - container_of(device, struct usnic_ib_dev, ib_dev.dev); + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); unsigned short subsystem_device_id; mutex_lock(&us_ibdev->usdev_lock); @@ -67,14 +67,13 @@ static DEVICE_ATTR_RO(board_id); static ssize_t config_show(struct device *device, struct device_attribute *attr, char *buf) { - struct usnic_ib_dev *us_ibdev; + struct usnic_ib_dev *us_ibdev = + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); char *ptr; unsigned left; unsigned n; enum usnic_vnic_res_type res_type; - us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); - /* Buffer space limit is 1 page */ ptr = buf; left = PAGE_SIZE; @@ -130,9 +129,8 @@ static DEVICE_ATTR_RO(config); static ssize_t iface_show(struct device *device, struct device_attribute *attr, char *buf) { - struct usnic_ib_dev *us_ibdev; - - us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + struct usnic_ib_dev *us_ibdev = + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); return scnprintf(buf, PAGE_SIZE, "%s\n", netdev_name(us_ibdev->netdev)); @@ -142,9 +140,8 @@ static DEVICE_ATTR_RO(iface); static ssize_t max_vf_show(struct device *device, struct device_attribute *attr, char *buf) { - struct usnic_ib_dev *us_ibdev; - - us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + struct usnic_ib_dev *us_ibdev = + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); return scnprintf(buf, PAGE_SIZE, "%u\n", kref_read(&us_ibdev->vf_cnt)); @@ -154,10 +151,10 @@ static DEVICE_ATTR_RO(max_vf); static ssize_t qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) { - struct usnic_ib_dev *us_ibdev; + struct usnic_ib_dev *us_ibdev = + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); int qp_per_vf; - us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); @@ -169,9 +166,8 @@ static DEVICE_ATTR_RO(qp_per_vf); static ssize_t cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) { - struct usnic_ib_dev *us_ibdev; - - us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + struct usnic_ib_dev *us_ibdev = + rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); return scnprintf(buf, PAGE_SIZE, "%d\n", us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 1d4abef17e38..9dea18106247 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -449,7 +449,7 @@ struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num) int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { - if (index > 1) + if (index > 0) return -EINVAL; *pkey = 0xffff; @@ -683,7 +683,7 @@ struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev, struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); usnic_dbg("\n"); - context = kmalloc(sizeof(*context), GFP_KERNEL); + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); @@ -760,57 +760,4 @@ int usnic_ib_mmap(struct ib_ucontext *context, return -EINVAL; } -/* In ib callbacks section - Start of stub funcs */ -struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata) - -{ - usnic_dbg("\n"); - return ERR_PTR(-EPERM); -} - -int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags) -{ - usnic_dbg("\n"); - return -EINVAL; -} - -int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr) -{ - usnic_dbg("\n"); - return -EINVAL; -} - -int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - usnic_dbg("\n"); - return -EINVAL; -} - -int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, - struct ib_wc *wc) -{ - usnic_dbg("\n"); - return -EINVAL; -} - -int usnic_ib_req_notify_cq(struct ib_cq *cq, - enum ib_cq_notify_flags flags) -{ - usnic_dbg("\n"); - return -EINVAL; -} - -struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc) -{ - usnic_dbg("\n"); - return ERR_PTR(-ENOMEM); -} - - -/* In ib callbacks section - End of stub funcs */ /* End of ib callbacks section */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index e33144261b9a..99a6d81c2bcd 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -75,19 +75,4 @@ struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev, int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); int usnic_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata); - -int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags); -int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr); -int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr); -int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, - struct ib_wc *wc); -int usnic_ib_req_notify_cq(struct ib_cq *cq, - enum ib_cq_notify_flags flags); -struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc); #endif /* !USNIC_IB_VERBS_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 49275a548751..ce01a59fccc4 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -157,9 +157,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, off = 0; while (ret) { - chunk = kmalloc(sizeof(*chunk) + - sizeof(struct scatterlist) * - min_t(int, ret, USNIC_UIOM_PAGE_CHUNK), + chunk = kmalloc(struct_size(chunk, page_list, + min_t(int, ret, USNIC_UIOM_PAGE_CHUNK)), GFP_KERNEL); if (!chunk) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 0f004c737620..104c7db4704f 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -141,7 +141,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, goto err_cq; } - cq->umem = ib_umem_get(context, ucmd.buf_addr, ucmd.buf_size, + cq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(cq->umem)) { ret = PTR_ERR(cq->umem); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index 6fd5a8f4e2f6..8f9749d54688 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -57,7 +57,8 @@ #define PVRDMA_ROCEV1_VERSION 17 #define PVRDMA_ROCEV2_VERSION 18 -#define PVRDMA_VERSION PVRDMA_ROCEV2_VERSION +#define PVRDMA_PPN64_VERSION 19 +#define PVRDMA_VERSION PVRDMA_PPN64_VERSION #define PVRDMA_BOARD_ID 1 #define PVRDMA_REV_ID 1 @@ -279,8 +280,10 @@ struct pvrdma_device_shared_region { /* W: Async ring page info. */ struct pvrdma_ring_page_info cq_ring_pages; /* W: CQ ring page info. */ - u32 uar_pfn; /* W: UAR pageframe. */ - u32 pad2; /* Pad to 8-byte align. */ + union { + u32 uar_pfn; /* W: UAR pageframe. */ + u64 uar_pfn64; /* W: 64-bit UAR page frame. */ + }; struct pvrdma_device_caps caps; /* R: Device capabilities. */ }; @@ -411,8 +414,10 @@ struct pvrdma_cmd_query_pkey_resp { struct pvrdma_cmd_create_uc { struct pvrdma_cmd_hdr hdr; - u32 pfn; /* UAR page frame number */ - u8 reserved[4]; + union { + u32 pfn; /* UAR page frame number */ + u64 pfn64; /* 64-bit UAR page frame number */ + }; }; struct pvrdma_cmd_create_uc_resp { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 39c37b6fd715..e582beaf9430 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -278,7 +278,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) spin_lock_init(&dev->srq_tbl_lock); rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group); - ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", NULL); + ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d"); if (ret) goto err_srq_free; @@ -795,7 +795,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); /* Allocate zero-out device */ - dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev)); + dev = ib_alloc_device(pvrdma_dev, ib_dev); if (!dev) { dev_err(&pdev->dev, "failed to allocate IB device\n"); return -ENOMEM; @@ -905,7 +905,11 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, PVRDMA_GOS_BITS_64; dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX; dev->dsr->gos_info.gos_ver = 1; - dev->dsr->uar_pfn = dev->driver_uar.pfn; + + if (dev->dsr_version < PVRDMA_PPN64_VERSION) + dev->dsr->uar_pfn = dev->driver_uar.pfn; + else + dev->dsr->uar_pfn64 = dev->driver_uar.pfn; /* Command slot. */ dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index fa96fa4fb829..a85884e90e84 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -126,8 +126,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(-EINVAL); } - umem = ib_umem_get(pd->uobject->context, start, - length, access_flags, 0); + umem = ib_umem_get(udata, start, length, access_flags, 0); if (IS_ERR(umem)) { dev_warn(&dev->pdev->dev, "could not get umem for mem region\n"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 1ec3646087ba..08f4257169bd 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -262,8 +262,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, if (!is_srq) { /* set qp->sq.wqe_cnt, shift, buf_size.. */ - qp->rumem = ib_umem_get(pd->uobject->context, - ucmd.rbuf_addr, + qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr, ucmd.rbuf_size, 0, 0); if (IS_ERR(qp->rumem)) { ret = PTR_ERR(qp->rumem); @@ -275,8 +274,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, qp->srq = to_vsrq(init_attr->srq); } - qp->sumem = ib_umem_get(pd->uobject->context, - ucmd.sbuf_addr, + qp->sumem = ib_umem_get(udata, ucmd.sbuf_addr, ucmd.sbuf_size, 0, 0); if (IS_ERR(qp->sumem)) { if (!is_srq) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 06ba7c7a2235..951d9d68107a 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -153,9 +153,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, goto err_srq; } - srq->umem = ib_umem_get(pd->uobject->context, - ucmd.buf_addr, - ucmd.buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0, 0); if (IS_ERR(srq->umem)) { ret = PTR_ERR(srq->umem); goto err_srq; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 4d238d0e484b..fafb2add3b44 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -327,7 +327,7 @@ struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, if (!vdev->ib_active) return ERR_PTR(-EAGAIN); - context = kmalloc(sizeof(*context), GFP_KERNEL); + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); @@ -340,7 +340,12 @@ struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev, /* get ctx_handle from host */ memset(cmd, 0, sizeof(*cmd)); - cmd->pfn = context->uar.pfn; + + if (vdev->dsr_version < PVRDMA_PPN64_VERSION) + cmd->pfn = context->uar.pfn; + else + cmd->pfn64 = context->uar.pfn; + cmd->hdr.cmd = PVRDMA_CMD_CREATE_UC; ret = pvrdma_cmd_post(vdev, &req, &rsp, PVRDMA_CMD_CREATE_UC_RESP); if (ret < 0) { @@ -457,7 +462,7 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd)) return ERR_PTR(-ENOMEM); - pd = kmalloc(sizeof(*pd), GFP_KERNEL); + pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) { ptr = ERR_PTR(-ENOMEM); goto err; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 49c9541050d4..8b1c1e8dd7ef 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -388,8 +388,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (length == 0) return ERR_PTR(-EINVAL); - umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags, 0); + umem = ib_umem_get(udata, start, length, mr_access_flags, 0); if (IS_ERR(umem)) return (void *)umem; diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index 8a89afff3363..dcc1870b8d23 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -66,7 +66,7 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, struct rvt_pd *pd; struct ib_pd *ret; - pd = kmalloc(sizeof(*pd), GFP_KERNEL); + pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) { ret = ERR_PTR(-ENOMEM); goto bail; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index c6cc3e4ab71d..80c994cffd42 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2988,34 +2988,12 @@ do_write: sge = &sqp->s_sge.sge; while (sqp->s_len) { - u32 len = sqp->s_len; + u32 len = rvt_get_sge_length(sge, sqp->s_len); - if (len > sge->length) - len = sge->length; - if (len > sge->sge_length) - len = sge->sge_length; WARN_ON_ONCE(len == 0); rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, release, copy_last); - sge->vaddr += len; - sge->length -= len; - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (!release) - rvt_put_mr(sge->mr); - if (--sqp->s_sge.num_sge) - *sge = *sqp->s_sge.sg_list++; - } else if (sge->length == 0 && sge->mr->lkey) { - if (++sge->n >= RVT_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - break; - sge->n = 0; - } - sge->vaddr = - sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = - sge->mr->map[sge->m]->segs[sge->n].length; - } + rvt_update_sge(&sqp->s_sge, len, !release); sqp->s_len -= len; } if (release) diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h index df8e1adbef9d..e3c416c6f900 100644 --- a/drivers/infiniband/sw/rdmavt/trace_cq.h +++ b/drivers/infiniband/sw/rdmavt/trace_cq.h @@ -105,7 +105,7 @@ DEFINE_EVENT(rvt_cq_template, rvt_create_cq, TP_ARGS(cq, attr)); #define CQ_PRN \ -"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x" +"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x flags %x imm %x" DECLARE_EVENT_CLASS( rvt_cq_entry_template, @@ -119,6 +119,8 @@ DECLARE_EVENT_CLASS( __field(u32, qpn) __field(u32, length) __field(u32, idx) + __field(u32, flags) + __field(u32, imm) ), TP_fast_assign( RDI_DEV_ASSIGN(cq->rdi) @@ -128,6 +130,8 @@ DECLARE_EVENT_CLASS( __entry->length = wc->byte_len; __entry->qpn = wc->qp->qp_num; __entry->idx = idx; + __entry->flags = wc->wc_flags; + __entry->imm = be32_to_cpu(wc->ex.imm_data); ), TP_printk( CQ_PRN, @@ -137,7 +141,9 @@ DECLARE_EVENT_CLASS( __entry->status, __entry->opcode, show_wc_opcode(__entry->opcode), __entry->length, - __entry->qpn + __entry->qpn, + __entry->flags, + __entry->imm ) ); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index aef3aa3fe667..b3f0c5578925 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -91,7 +91,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) { struct rvt_dev_info *rdi; - rdi = (struct rvt_dev_info *)ib_alloc_device(size); + rdi = container_of(_ib_alloc_device(size), struct rvt_dev_info, ibdev); if (!rdi) return rdi; @@ -304,7 +304,7 @@ static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev, { struct rvt_ucontext *context; - context = kmalloc(sizeof(*context), GFP_KERNEL); + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); return &context->ibucontext; @@ -446,7 +446,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) * These functions are not part of verbs specifically but are * required for rdmavt to function. */ - if ((!rdi->driver_f.port_callback) || + if ((!rdi->ibdev.ops.init_port) || (!rdi->driver_f.get_pci_dev)) return -EINVAL; break; @@ -644,8 +644,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) rdi->ibdev.driver_id = driver_id; /* We are now good to announce we exist */ - ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev), - rdi->driver_f.port_callback); + ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev)); if (ret) { rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); goto bail_wss; diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index e996da67a851..00eb99d3df86 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -146,8 +146,7 @@ void retransmit_timer(struct timer_list *t) } } -void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, - struct sk_buff *skb) +void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { int must_sched; @@ -155,7 +154,8 @@ void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, must_sched = skb_queue_len(&qp->resp_pkts) > 1; if (must_sched != 0) - rxe_counter_inc(rxe, RXE_CNT_COMPLETER_SCHED); + rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED); + rxe_run_task(&qp->comp.task, must_sched); } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 01b74597b36a..a65fbd0ff42e 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -239,11 +239,9 @@ int rxe_responder(void *arg); u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb); -void rxe_resp_queue_pkt(struct rxe_dev *rxe, - struct rxe_qp *qp, struct sk_buff *skb); +void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb); -void rxe_comp_queue_pkt(struct rxe_dev *rxe, - struct rxe_qp *qp, struct sk_buff *skb); +void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb); static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp) { diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 9d3916b93f23..2438093776a0 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -171,7 +171,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, void *vaddr; int err; - umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0); + umem = ib_umem_get(udata, start, length, access, 0); if (IS_ERR(umem)) { pr_warn("err %d from rxe_umem_get\n", (int)PTR_ERR(umem)); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 8fd03ae20efc..19f3c69916b1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -555,7 +555,7 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev) int err; struct rxe_dev *rxe = NULL; - rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe)); + rxe = ib_alloc_device(rxe_dev, ib_dev); if (!rxe) return NULL; diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 5c29a1bb575a..f9a492ed900b 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -266,14 +266,12 @@ err1: return -EINVAL; } -static inline void rxe_rcv_pkt(struct rxe_dev *rxe, - struct rxe_pkt_info *pkt, - struct sk_buff *skb) +static inline void rxe_rcv_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb) { if (pkt->mask & RXE_REQ_MASK) - rxe_resp_queue_pkt(rxe, pkt->qp, skb); + rxe_resp_queue_pkt(pkt->qp, skb); else - rxe_comp_queue_pkt(rxe, pkt->qp, skb); + rxe_comp_queue_pkt(pkt->qp, skb); } static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) @@ -319,7 +317,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) pkt->qp = qp; rxe_add_ref(qp); - rxe_rcv_pkt(rxe, pkt, skb); + rxe_rcv_pkt(pkt, skb); } spin_unlock_bh(&mcg->mcg_lock); @@ -411,7 +409,7 @@ void rxe_rcv(struct sk_buff *skb) if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN)) rxe_rcv_mcast_pkt(rxe, skb); else - rxe_rcv_pkt(rxe, pkt, skb); + rxe_rcv_pkt(pkt, skb); return; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 231528188250..aca9f60f9b21 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -104,8 +104,7 @@ static char *resp_state_name[] = { }; /* rxe_recv calls here to add a request packet to the input queue */ -void rxe_resp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, - struct sk_buff *skb) +void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) { int must_sched; struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index b20e6e0415f5..3d01247a28db 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1129,8 +1129,8 @@ static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) static ssize_t parent_show(struct device *device, struct device_attribute *attr, char *buf) { - struct rxe_dev *rxe = container_of(device, struct rxe_dev, - ib_dev.dev); + struct rxe_dev *rxe = + rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1)); } @@ -1258,7 +1258,7 @@ int rxe_register_device(struct rxe_dev *rxe) rdma_set_device_sysfs_group(dev, &rxe_attr_group); dev->driver_id = RDMA_DRIVER_RXE; - err = ib_register_device(dev, "rxe%d", NULL); + err = ib_register_device(dev, "rxe%d"); if (err) { pr_warn("%s failed with error %d\n", __func__, err); goto err1; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 73e808c1e6ad..2aa3457a30ce 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -780,12 +780,12 @@ static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *w #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG void ipoib_create_debug_files(struct net_device *dev); void ipoib_delete_debug_files(struct net_device *dev); -int ipoib_register_debugfs(void); +void ipoib_register_debugfs(void); void ipoib_unregister_debugfs(void); #else static inline void ipoib_create_debug_files(struct net_device *dev) { } static inline void ipoib_delete_debug_files(struct net_device *dev) { } -static inline int ipoib_register_debugfs(void) { return 0; } +static inline void ipoib_register_debugfs(void) { } static inline void ipoib_unregister_debugfs(void) { } #endif diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 178488028734..64c19f6fa931 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -267,14 +267,10 @@ void ipoib_create_debug_files(struct net_device *dev) snprintf(name, sizeof(name), "%s_mcg", dev->name); priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, ipoib_root, dev, &ipoib_mcg_fops); - if (!priv->mcg_dentry) - ipoib_warn(priv, "failed to create mcg debug file\n"); snprintf(name, sizeof(name), "%s_path", dev->name); priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, ipoib_root, dev, &ipoib_path_fops); - if (!priv->path_dentry) - ipoib_warn(priv, "failed to create path debug file\n"); } void ipoib_delete_debug_files(struct net_device *dev) @@ -286,10 +282,9 @@ void ipoib_delete_debug_files(struct net_device *dev) priv->mcg_dentry = priv->path_dentry = NULL; } -int ipoib_register_debugfs(void) +void ipoib_register_debugfs(void) { ipoib_root = debugfs_create_dir("ipoib", NULL); - return ipoib_root ? 0 : -ENOMEM; } void ipoib_unregister_debugfs(void) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index d932f99201d1..3d3407d1feb4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2411,7 +2411,7 @@ static ssize_t dev_id_show(struct device *dev, } static DEVICE_ATTR_RO(dev_id); -int ipoib_intercept_dev_id_attr(struct net_device *dev) +static int ipoib_intercept_dev_id_attr(struct net_device *dev) { device_remove_file(&dev->dev, &dev_attr_dev_id); return device_create_file(&dev->dev, &dev_attr_dev_id); @@ -2577,9 +2577,7 @@ static int __init ipoib_init_module(void) */ BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE); - ret = ipoib_register_debugfs(); - if (ret) - return ret; + ipoib_register_debugfs(); /* * We create a global workqueue here that is used for all flush diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index e9b7efc302d0..2ba70729d7b0 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -145,9 +145,8 @@ static void iser_data_buf_dump(struct iser_data_buf *data, for_each_sg(data->sg, sg, data->dma_nents, i) iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p " "off:0x%x sz:0x%x dma_len:0x%x\n", - i, (unsigned long)ib_sg_dma_address(ibdev, sg), - sg_page(sg), sg->offset, - sg->length, ib_sg_dma_len(ibdev, sg)); + i, (unsigned long)sg_dma_address(sg), + sg_page(sg), sg->offset, sg->length, sg_dma_len(sg)); } static void iser_dump_page_vec(struct iser_page_vec *page_vec) @@ -204,8 +203,8 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, reg->rkey = device->pd->unsafe_global_rkey; else reg->rkey = 0; - reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]); - reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]); + reg->sge.addr = sg_dma_address(&sg[0]); + reg->sge.length = sg_dma_len(&sg[0]); iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx," " length=0x%x\n", reg->sge.lkey, reg->rkey, @@ -240,8 +239,8 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, page_vec->npages = 0; page_vec->fake_mr.page_size = SIZE_4K; plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, - mem->size, NULL, iser_set_page); - if (unlikely(plen < mem->size)) { + mem->dma_nents, NULL, iser_set_page); + if (unlikely(plen < mem->dma_nents)) { iser_err("page vec too short to hold this SG\n"); iser_data_buf_dump(mem, device->ib_device); iser_dump_page_vec(page_vec); @@ -448,10 +447,10 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K); - if (unlikely(n != mem->size)) { + n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SIZE_4K); + if (unlikely(n != mem->dma_nents)) { iser_err("failed to map sg (%d/%d)\n", - n, mem->size); + n, mem->dma_nents); return n < 0 ? n : -EINVAL; } diff --git a/drivers/infiniband/ulp/isert/Makefile b/drivers/infiniband/ulp/isert/Makefile index c8bf2421f5bc..a4a4766e3e18 100644 --- a/drivers/infiniband/ulp/isert/Makefile +++ b/drivers/infiniband/ulp/isert/Makefile @@ -1,2 +1 @@ -ccflags-y := -Idrivers/target -Idrivers/target/iscsi obj-$(CONFIG_INFINIBAND_ISERT) += ib_isert.o diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 31d91538bbf4..84184910f038 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -443,8 +443,7 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, if (pool_size <= 0) goto err; ret = -ENOMEM; - pool = kzalloc(sizeof(struct srp_fr_pool) + - pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); + pool = kzalloc(struct_size(pool, desc, pool_size), GFP_KERNEL); if (!pool) goto err; pool->size = pool_size; @@ -1601,9 +1600,8 @@ static int srp_map_sg_entry(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; - struct ib_device *ibdev = dev->dev; - dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg); - unsigned int dma_len = ib_sg_dma_len(ibdev, sg); + dma_addr_t dma_addr = sg_dma_address(sg); + unsigned int dma_len = sg_dma_len(sg); unsigned int len = 0; int ret; @@ -1697,13 +1695,11 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, int count) { struct srp_target_port *target = ch->target; - struct srp_device *dev = target->srp_host->srp_dev; struct scatterlist *sg; int i; for_each_sg(scat, sg, count, i) { - srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), - ib_sg_dma_len(dev->dev, sg), + srp_map_desc(state, sg_dma_address(sg), sg_dma_len(sg), target->global_rkey); } @@ -1853,8 +1849,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, buf->len = cpu_to_be32(data_len); WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len); for_each_sg(scat, sg, count, i) { - sge[i].addr = ib_sg_dma_address(ibdev, sg); - sge[i].length = ib_sg_dma_len(ibdev, sg); + sge[i].addr = sg_dma_address(sg); + sge[i].length = sg_dma_len(sg); sge[i].lkey = target->lkey; } req->cmd->num_sge += count; @@ -1875,9 +1871,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_direct_buf *buf; buf = (void *)cmd->add_data + cmd->add_cdb_len; - buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); + buf->va = cpu_to_be64(sg_dma_address(scat)); buf->key = cpu_to_be32(target->global_rkey); - buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); + buf->len = cpu_to_be32(sg_dma_len(scat)); req->nmdesc = 0; goto map_complete; @@ -3824,6 +3820,7 @@ static ssize_t srp_create_target(struct device *dev, target_host->max_id = 1; target_host->max_lun = -1LL; target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; + target_host->max_segment_size = ib_dma_max_seg_size(ibdev); target = host_to_target(target_host); diff --git a/drivers/infiniband/ulp/srpt/Makefile b/drivers/infiniband/ulp/srpt/Makefile index e3ee4bdfffa5..43fbde42c58b 100644 --- a/drivers/infiniband/ulp/srpt/Makefile +++ b/drivers/infiniband/ulp/srpt/Makefile @@ -1,2 +1 @@ -ccflags-y := -Idrivers/target obj-$(CONFIG_INFINIBAND_SRPT) += ib_srpt.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9de9abacf7f6..0257731e6d42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3e0fa8a8077b..a25a8c6f938e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1711,12 +1711,57 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } EXPORT_SYMBOL(mlx5_cmd_exec); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context) +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx) { - return cmd_exec(dev, in, in_size, out, out_size, callback, context, - false); + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); + init_waitqueue_head(&ctx->wait); +} +EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +/** + * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx + * @ctx: The ctx to clean + * + * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The + * caller must ensure that mlx5_cmd_exec_cb() is not called during or after + * the call mlx5_cleanup_async_ctx(). + */ +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) +{ + atomic_dec(&ctx->num_inflight); + wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +static void mlx5_cmd_exec_cb_handler(int status, void *_work) +{ + struct mlx5_async_work *work = _work; + struct mlx5_async_ctx *ctx = work->ctx; + + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); +} + +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work) +{ + int ret; + + work->ctx = ctx; + work->user_callback = callback; + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) + return -EIO; + ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); + + return ret; } EXPORT_SYMBOL(mlx5_cmd_exec_cb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c deleted file mode 100644 index 3a3b0005fd2b..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/cmd.h> -#include "mlx5_core.h" - -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port) -{ - int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); - int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); - int err = -ENOMEM; - void *data; - void *resp; - u32 *out; - u32 *in; - - in = kzalloc(inlen, GFP_KERNEL); - out = kzalloc(outlen, GFP_KERNEL); - if (!in || !out) - goto out; - - MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); - MLX5_SET(mad_ifc_in, in, op_mod, opmod); - MLX5_SET(mad_ifc_in, in, port, port); - - data = MLX5_ADDR_OF(mad_ifc_in, in, mad); - memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); - if (err) - goto out; - - resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); - memcpy(outb, resp, - MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); - -out: - kfree(out); - kfree(in); - return err; -} -EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index be81b319b0dc..e38aa206ab6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -459,6 +459,53 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) return err; } +static int handle_hca_cap_odp(struct mlx5_core_dev *dev) +{ + void *set_ctx; + void *set_hca_cap; + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + int err; + + if (!MLX5_CAP_GEN(dev, pg)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + + /** + * If all bits are cleared we shouldn't try to set it + * or we might fail while trying to access a reserved bit. + */ + if (!(MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))) + return 0; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP], + MLX5_ST_SZ_BYTES(odp_cap)); + + /* set ODP SRQ support for RC/UD and XRC transports */ + MLX5_SET(odp_cap, set_hca_cap, ud_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive))); + + MLX5_SET(odp_cap, set_hca_cap, rc_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive))); + + MLX5_SET(odp_cap, set_hca_cap, xrc_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))); + + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ODP); + + kfree(set_ctx); + return err; +} + static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; @@ -693,6 +740,11 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_clr_master; } + if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) + mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); + dev->iseg_base = pci_resource_start(dev->pdev, 0); dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); if (!dev->iseg) { @@ -926,6 +978,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } + err = handle_hca_cap_odp(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_odp failed\n"); + goto reclaim_boot_pages; + } + err = mlx5_satisfy_startup_pages(dev, 0); if (err) { dev_err(&pdev->dev, "failed to allocate init pages\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 0670165afd5f..ea744d8466ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -51,9 +51,10 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context) + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; @@ -71,7 +72,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(mkc, mkc, mkey_7_0, key); if (callback) - return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); @@ -105,7 +106,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen) { - return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, NULL, 0, NULL, NULL); } EXPORT_SYMBOL(mlx5_core_create_mkey); diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h index e94290b29e99..ef1bae2983f3 100644 --- a/include/linux/cgroup_rdma.h +++ b/include/linux/cgroup_rdma.h @@ -39,7 +39,7 @@ struct rdmacg_device { * APIs for RDMA/IB stack to publish when a device wants to * participate in resource accounting */ -int rdmacg_register_device(struct rdmacg_device *device); +void rdmacg_register_device(struct rdmacg_device *device); void rdmacg_unregister_device(struct rdmacg_device *device); /* APIs for RDMA/IB stack to charge/uncharge pool specific resources */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8c4a820bd4c1..0845a227a7b2 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1201,6 +1201,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap) +#define MLX5_CAP_ODP_MAX(mdev, cap)\ + MLX5_GET(odp_cap, mdev->caps.hca_max[MLX5_CAP_ODP], cap) + #define MLX5_CAP_VECTOR_CALC(mdev, cap) \ MLX5_GET(vector_calc_cap, \ mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54299251d40d..f62a78e40865 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -364,6 +364,7 @@ struct mlx5_core_sig_ctx { enum { MLX5_MKEY_MR = 1, MLX5_MKEY_MW, + MLX5_MKEY_INDIRECT_DEVX, }; struct mlx5_core_mkey { @@ -850,11 +851,30 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +struct mlx5_async_ctx { + struct mlx5_core_dev *dev; + atomic_t num_inflight; + struct wait_queue_head wait; +}; + +struct mlx5_async_work; + +typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); + +struct mlx5_async_work { + struct mlx5_async_ctx *ctx; + mlx5_async_cbk_t user_callback; +}; + +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx); +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx); +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work); + int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context); int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); @@ -885,9 +905,10 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context); + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); @@ -897,8 +918,6 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *out, int outlen); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port); int mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); void mlx5_pagealloc_start(struct mlx5_core_dev *dev); @@ -939,10 +958,6 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out, size_t sz); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, - u32 wq_num, u8 type, int error); -#endif int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 35fe5217b244..c5c679390fbd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -72,6 +72,7 @@ enum { enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, + MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, }; @@ -831,7 +832,9 @@ struct mlx5_ifc_odp_cap_bits { struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps; - u8 reserved_at_e0[0x720]; + struct mlx5_ifc_odp_per_transport_service_cap_bits xrc_odp_caps; + + u8 reserved_at_100[0x700]; }; struct mlx5_ifc_calc_op { diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 5d3755ec5afa..73af05db04c7 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -36,6 +36,7 @@ #include <linux/list.h> #include <linux/scatterlist.h> #include <linux/workqueue.h> +#include <rdma/ib_verbs.h> struct ib_ucontext; struct ib_umem_odp; @@ -80,7 +81,7 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem) #ifdef CONFIG_INFINIBAND_USER_MEM -struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, +struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, size_t size, int access, int dmasync); void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); @@ -91,9 +92,10 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, #include <linux/err.h> -static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context, +static inline struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, size_t size, - int access, int dmasync) { + int access, int dmasync) +{ return ERR_PTR(-EINVAL); } static inline void ib_umem_release(struct ib_umem *umem) { } diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 0b1446fe2fab..dadc96dea39c 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -83,6 +83,19 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) return container_of(umem, struct ib_umem_odp, umem); } +/* + * The lower 2 bits of the DMA address signal the R/W permissions for + * the entry. To upgrade the permissions, provide the appropriate + * bitmask to the map_dma_pages function. + * + * Be aware that upgrading a mapped address might result in change of + * the DMA address for the page. + */ +#define ODP_READ_ALLOWED_BIT (1<<0ULL) +#define ODP_WRITE_ALLOWED_BIT (1<<1ULL) + +#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_ucontext_per_mm { @@ -103,23 +116,10 @@ struct ib_ucontext_per_mm { }; int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access); -struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm, +struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root_umem, unsigned long addr, size_t size); void ib_umem_odp_release(struct ib_umem_odp *umem_odp); -/* - * The lower 2 bits of the DMA address signal the R/W permissions for - * the entry. To upgrade the permissions, provide the appropriate - * bitmask to the map_dma_pages function. - * - * Be aware that upgrading a mapped address might result in change of - * the DMA address for the page. - */ -#define ODP_READ_ALLOWED_BIT (1<<0ULL) -#define ODP_WRITE_ALLOWED_BIT (1<<1ULL) - -#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) - int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, u64 bcnt, u64 access_mask, unsigned long current_seq); @@ -169,12 +169,6 @@ static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) return -EINVAL; } -static inline struct ib_umem_odp * -ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size) -{ - return ERR_PTR(-EINVAL); -} - static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 80debf5982ac..2e1f1e885ee5 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -268,6 +268,7 @@ enum ib_odp_transport_cap_bits { IB_ODP_SUPPORT_WRITE = 1 << 2, IB_ODP_SUPPORT_READ = 1 << 3, IB_ODP_SUPPORT_ATOMIC = 1 << 4, + IB_ODP_SUPPORT_SRQ_RECV = 1 << 5, }; struct ib_odp_caps { @@ -276,6 +277,7 @@ struct ib_odp_caps { uint32_t rc_odp_caps; uint32_t uc_odp_caps; uint32_t ud_odp_caps; + uint32_t xrc_odp_caps; } per_transport_caps; }; @@ -1504,12 +1506,10 @@ struct ib_ucontext { bool cleanup_retryable; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING void (*invalidate_range)(struct ib_umem_odp *umem_odp, unsigned long start, unsigned long end); struct mutex per_mm_list_lock; struct list_head per_mm_list; -#endif struct ib_rdmacg_object cg_obj; /* @@ -2506,6 +2506,17 @@ struct ib_device_ops { */ int (*get_hw_stats)(struct ib_device *device, struct rdma_hw_stats *stats, u8 port, int index); + /* + * This function is called once for each port when a ib device is + * registered. + */ + int (*init_port)(struct ib_device *device, u8 port_num, + struct kobject *port_sysfs); + /** + * Allows rdma drivers to add their own restrack attributes. + */ + int (*fill_res_entry)(struct sk_buff *msg, + struct rdma_restrack_entry *entry); }; struct ib_device { @@ -2561,6 +2572,8 @@ struct ib_device { __be64 node_guid; u32 local_dma_lkey; u16 is_switch:1; + /* Indicates kernel verbs support, should not be used in drivers */ + u16 kverbs_provider:1; u8 node_type; u8 phys_port_cnt; struct ib_device_attr attrs; @@ -2616,16 +2629,23 @@ struct ib_client { const struct sockaddr *addr, void *client_data); struct list_head list; + + /* kverbs are not required by the client */ + u8 no_kverbs_req:1; }; -struct ib_device *ib_alloc_device(size_t size); +struct ib_device *_ib_alloc_device(size_t size); +#define ib_alloc_device(drv_struct, member) \ + container_of(_ib_alloc_device(sizeof(struct drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof( \ + struct drv_struct, member))), \ + struct drv_struct, member) + void ib_dealloc_device(struct ib_device *device); void ib_get_device_fw_str(struct ib_device *device, char *str); -int ib_register_device(struct ib_device *device, const char *name, - int (*port_callback)(struct ib_device *, u8, - struct kobject *)); +int ib_register_device(struct ib_device *device, const char *name); void ib_unregister_device(struct ib_device *device); int ib_register_client (struct ib_client *client); @@ -3686,32 +3706,18 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, { dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); } -/** - * ib_sg_dma_address - Return the DMA address from a scatter/gather entry - * @dev: The device for which the DMA addresses were created - * @sg: The scatter/gather entry - * - * Note: this function is obsolete. To do: change all occurrences of - * ib_sg_dma_address() into sg_dma_address(). - */ -static inline u64 ib_sg_dma_address(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg_dma_address(sg); -} /** - * ib_sg_dma_len - Return the DMA length from a scatter/gather entry - * @dev: The device for which the DMA addresses were created - * @sg: The scatter/gather entry + * ib_dma_max_seg_size - Return the size limit of a single DMA transfer + * @dev: The device to query * - * Note: this function is obsolete. To do: change all occurrences of - * ib_sg_dma_len() into sg_dma_len(). + * The returned value represents a size in bytes. */ -static inline unsigned int ib_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) +static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { - return sg_dma_len(sg); + struct device_dma_parameters *p = dev->dma_device->dma_parms; + + return p ? p->max_segment_size : UINT_MAX; } /** @@ -4222,6 +4228,7 @@ void rdma_roce_rescan_device(struct ib_device *ibdev); struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); +struct ib_ucontext *rdma_get_ucontext(struct ib_udata *udata); int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); @@ -4258,4 +4265,27 @@ rdma_set_device_sysfs_group(struct ib_device *dev, dev->groups[1] = group; } +/** + * rdma_device_to_ibdev - Get ib_device pointer from device pointer + * + * @device: device pointer for which ib_device pointer to retrieve + * + * rdma_device_to_ibdev() retrieves ib_device pointer from device. + * + */ +static inline struct ib_device *rdma_device_to_ibdev(struct device *device) +{ + return container_of(device, struct ib_device, dev); +} + +/** + * rdma_device_to_drv_device - Helper macro to reach back to driver's + * ib_device holder structure from device pointer. + * + * NOTE: New drivers should not make use of this API; This API is only for + * existing drivers who have exposed sysfs entries using + * rdma_set_device_sysfs_group(). + */ +#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \ + container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member) #endif /* IB_VERBS_H */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index dd0ed8048bb4..acb3bc96dfa7 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -250,9 +250,6 @@ struct rvt_driver_provided { */ void (*do_send)(struct rvt_qp *qp); - /* Passed to ib core registration. Callback to create syfs files */ - int (*port_callback)(struct ib_device *, u8, struct kobject *); - /* * Returns a pointer to the undelying hardware's PCI device. This is * used to display information as to what hardware is being referenced diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 8f179be9d9a9..cc66cc7a11d3 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -49,6 +49,7 @@ enum rdma_restrack_type { }; #define RDMA_RESTRACK_HASH_BITS 8 +struct ib_device; struct rdma_restrack_entry; /** @@ -64,13 +65,6 @@ struct rdma_restrack_root { * @hash: global database for all resources per-device */ DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS); - /** - * @fill_res_entry: driver-specific fill function - * - * Allows rdma drivers to add their own restrack attributes. - */ - int (*fill_res_entry)(struct sk_buff *msg, - struct rdma_restrack_entry *entry); }; /** @@ -122,25 +116,9 @@ struct rdma_restrack_entry { bool user; }; -/** - * rdma_restrack_init() - initialize resource tracking - * @res: resource tracking root - */ -void rdma_restrack_init(struct rdma_restrack_root *res); - -/** - * rdma_restrack_clean() - clean resource tracking - * @res: resource tracking root - */ -void rdma_restrack_clean(struct rdma_restrack_root *res); - -/** - * rdma_restrack_count() - the current usage of specific object - * @res: resource entry - * @type: actual type of object to operate - * @ns: PID namespace - */ -int rdma_restrack_count(struct rdma_restrack_root *res, +void rdma_restrack_init(struct ib_device *dev); +void rdma_restrack_clean(struct ib_device *dev); +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, struct pid_namespace *ns); diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index acb1bfa3cc99..175d761695e1 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -157,6 +157,7 @@ struct uverbs_obj_fd_type { extern const struct uverbs_obj_type_class uverbs_idr_class; extern const struct uverbs_obj_type_class uverbs_fd_class; +void uverbs_close_fd(struct file *f); #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ sizeof(char)) diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 480d9a60b68e..0474c7400268 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -270,6 +270,8 @@ struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_tm_caps tm_caps; struct ib_uverbs_cq_moderation_caps cq_moderation_caps; __aligned_u64 max_dm_size; + __u32 xrc_odp_caps; + __u32 reserved; }; struct ib_uverbs_query_port { diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index b8d121d457f1..8149d224030b 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -84,6 +84,14 @@ enum mlx5_ib_devx_obj_query_attrs { MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, }; +enum mlx5_ib_devx_obj_query_async_attrs { + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN, +}; + enum mlx5_ib_devx_query_eqn_attrs { MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, @@ -94,6 +102,7 @@ enum mlx5_ib_devx_obj_methods { MLX5_IB_METHOD_DEVX_OBJ_DESTROY, MLX5_IB_METHOD_DEVX_OBJ_MODIFY, MLX5_IB_METHOD_DEVX_OBJ_QUERY, + MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY, }; enum mlx5_ib_devx_umem_reg_attrs { @@ -113,11 +122,20 @@ enum mlx5_ib_devx_umem_methods { MLX5_IB_METHOD_DEVX_UMEM_DEREG, }; +enum mlx5_ib_devx_async_cmd_fd_alloc_attrs { + MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum mlx5_ib_devx_async_cmd_fd_methods { + MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC = (1U << UVERBS_ID_NS_SHIFT), +}; + enum mlx5_ib_objects { MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_OBJECT_DEVX_OBJ, MLX5_IB_OBJECT_DEVX_UMEM, MLX5_IB_OBJECT_FLOW_MATCHER, + MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, }; enum mlx5_ib_flow_matcher_create_attrs { diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 4ef62c0e8452..4a701033b93f 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -51,5 +51,10 @@ enum mlx5_ib_uapi_flow_action_packet_reformat_type { MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3, }; +struct mlx5_ib_uapi_devx_async_cmd_hdr { + __aligned_u64 wr_id; + __u8 out_data[]; +}; + #endif diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c index d3bbb757ee49..1d75ae7f1cb7 100644 --- a/kernel/cgroup/rdma.c +++ b/kernel/cgroup/rdma.c @@ -313,10 +313,8 @@ EXPORT_SYMBOL(rdmacg_try_charge); * If IB stack wish a device to participate in rdma cgroup resource * tracking, it must invoke this API to register with rdma cgroup before * any user space application can start using the RDMA resources. - * Returns 0 on success or EINVAL when table length given is beyond - * supported size. */ -int rdmacg_register_device(struct rdmacg_device *device) +void rdmacg_register_device(struct rdmacg_device *device) { INIT_LIST_HEAD(&device->dev_node); INIT_LIST_HEAD(&device->rpools); @@ -324,7 +322,6 @@ int rdmacg_register_device(struct rdmacg_device *device) mutex_lock(&rdmacg_mutex); list_add_tail(&device->dev_node, &rdmacg_devices); mutex_unlock(&rdmacg_mutex); - return 0; } EXPORT_SYMBOL(rdmacg_register_device); diff --git a/net/rds/ib.h b/net/rds/ib.h index 71ff356ee702..1fd1cac85da2 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -331,10 +331,8 @@ static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev, unsigned int i; for_each_sg(sglist, sg, sg_dma_len, i) { - ib_dma_sync_single_for_cpu(dev, - ib_sg_dma_address(dev, sg), - ib_sg_dma_len(dev, sg), - direction); + ib_dma_sync_single_for_cpu(dev, sg_dma_address(sg), + sg_dma_len(sg), direction); } } #define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu @@ -348,10 +346,8 @@ static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev, unsigned int i; for_each_sg(sglist, sg, sg_dma_len, i) { - ib_dma_sync_single_for_device(dev, - ib_sg_dma_address(dev, sg), - ib_sg_dma_len(dev, sg), - direction); + ib_dma_sync_single_for_device(dev, sg_dma_address(sg), + sg_dma_len(sg), direction); } } #define ib_dma_sync_sg_for_device rds_ib_dma_sync_sg_for_device diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c index e0f70c4051b6..31cf37da4510 100644 --- a/net/rds/ib_fmr.c +++ b/net/rds/ib_fmr.c @@ -108,8 +108,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, page_cnt = 0; for (i = 0; i < sg_dma_len; ++i) { - unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]); - u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); + unsigned int dma_len = sg_dma_len(&scat[i]); + u64 dma_addr = sg_dma_address(&scat[i]); if (dma_addr & ~PAGE_MASK) { if (i > 0) { @@ -148,8 +148,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, page_cnt = 0; for (i = 0; i < sg_dma_len; ++i) { - unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]); - u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); + unsigned int dma_len = sg_dma_len(&scat[i]); + u64 dma_addr = sg_dma_address(&scat[i]); for (j = 0; j < dma_len; j += PAGE_SIZE) dma_pages[page_cnt++] = diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 6431a023ac89..688dcd68d4ea 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -181,8 +181,8 @@ static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev, ret = -EINVAL; for (i = 0; i < ibmr->sg_dma_len; ++i) { - unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]); - u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]); + unsigned int dma_len = sg_dma_len(&ibmr->sg[i]); + u64 dma_addr = sg_dma_address(&ibmr->sg[i]); frmr->sg_byte_len += dma_len; if (dma_addr & ~PAGE_MASK) { diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 2f16146e4ec9..672b91a9e207 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -346,8 +346,8 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, sge->length = sizeof(struct rds_header); sge = &recv->r_sge[1]; - sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg); - sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg); + sge->addr = sg_dma_address(&recv->r_frag->f_sg); + sge->length = sg_dma_len(&recv->r_frag->f_sg); ret = 0; out: @@ -409,9 +409,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv, recv->r_ibinc, sg_page(&recv->r_frag->f_sg), - (long) ib_sg_dma_address( - ic->i_cm_id->device, - &recv->r_frag->f_sg)); + (long)sg_dma_address(&recv->r_frag->f_sg)); /* XXX when can this fail? */ ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL); diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 4e0c36acf866..f2d2d92306e2 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -645,16 +645,16 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]) { len = min(RDS_FRAG_SIZE, - ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff); + sg_dma_len(scat) - rm->data.op_dmaoff); send->s_wr.num_sge = 2; - send->s_sge[1].addr = ib_sg_dma_address(dev, scat); + send->s_sge[1].addr = sg_dma_address(scat); send->s_sge[1].addr += rm->data.op_dmaoff; send->s_sge[1].length = len; bytes_sent += len; rm->data.op_dmaoff += len; - if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) { + if (rm->data.op_dmaoff == sg_dma_len(scat)) { scat++; rm->data.op_dmasg++; rm->data.op_dmaoff = 0; @@ -808,8 +808,8 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) } /* Convert our struct scatterlist to struct ib_sge */ - send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg); - send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg); + send->s_sge[0].addr = sg_dma_address(op->op_sg); + send->s_sge[0].length = sg_dma_len(op->op_sg); send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr, @@ -921,9 +921,8 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) for (j = 0; j < send->s_rdma_wr.wr.num_sge && scat != &op->op_sg[op->op_count]; j++) { - len = ib_sg_dma_len(ic->i_cm_id->device, scat); - send->s_sge[j].addr = - ib_sg_dma_address(ic->i_cm_id->device, scat); + len = sg_dma_len(scat); + send->s_sge[j].addr = sg_dma_address(scat); send->s_sge[j].length = len; send->s_sge[j].lkey = ic->i_pd->local_dma_lkey; |