diff options
Diffstat (limited to 'drivers/infiniband/core/nldev.c')
-rw-r--r-- | drivers/infiniband/core/nldev.c | 708 |
1 files changed, 548 insertions, 160 deletions
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e0b0a91da696..12dc97067ed2 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -92,7 +92,9 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { .len = sizeof(struct __kernel_sockaddr_storage) }, [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 }, @@ -114,6 +116,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY }, [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, @@ -129,6 +132,11 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 }, @@ -145,6 +153,9 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 }, [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, + [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -241,7 +252,7 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) { char fw[IB_FW_VERSION_NAME_MAX]; int ret = 0; - u8 port; + u32 port; if (fill_nldev_handle(msg, device)) return -EMSGSIZE; @@ -384,6 +395,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) [RDMA_RESTRACK_CM_ID] = "cm_id", [RDMA_RESTRACK_MR] = "mr", [RDMA_RESTRACK_CTX] = "ctx", + [RDMA_RESTRACK_SRQ] = "srq", }; struct nlattr *table_attr; @@ -446,27 +458,11 @@ static int fill_res_name_pid(struct sk_buff *msg, return err ? -EMSGSIZE : 0; } -static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, - struct rdma_restrack_entry *res) -{ - if (!dev->ops.fill_res_entry) - return false; - return dev->ops.fill_res_entry(msg, res); -} - -static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg, - struct rdma_restrack_entry *res) -{ - if (!dev->ops.fill_stat_entry) - return false; - return dev->ops.fill_stat_entry(msg, res); -} - -static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, - struct rdma_restrack_entry *res, uint32_t port) +static int fill_res_qp_entry_query(struct sk_buff *msg, + struct rdma_restrack_entry *res, + struct ib_device *dev, + struct ib_qp *qp) { - struct ib_qp *qp = container_of(res, struct ib_qp, res); - struct ib_device *dev = qp->device; struct ib_qp_init_attr qp_init_attr; struct ib_qp_attr qp_attr; int ret; @@ -475,16 +471,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, if (ret) return ret; - if (port && port != qp_attr.port_num) - return -EAGAIN; - - /* In create_qp() port is not set yet */ - if (qp_attr.port_num && - nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num)) - goto err; - - if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num)) - goto err; if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN, qp_attr.dest_qp_num)) @@ -508,19 +494,53 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state)) goto err; + if (dev->ops.fill_res_qp_entry) + return dev->ops.fill_res_qp_entry(msg, qp); + return 0; + +err: return -EMSGSIZE; +} + +static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_qp *qp = container_of(res, struct ib_qp, res); + struct ib_device *dev = qp->device; + int ret; + + if (port && port != qp->port) + return -EAGAIN; + + /* In create_qp() port is not set yet */ + if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) + return -EINVAL; + + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); + if (ret) + return -EMSGSIZE; + if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id)) - goto err; + return -EMSGSIZE; - if (fill_res_name_pid(msg, res)) - goto err; + ret = fill_res_name_pid(msg, res); + if (ret) + return -EMSGSIZE; - if (fill_res_entry(dev, msg, res)) - goto err; + return fill_res_qp_entry_query(msg, res, dev, qp); +} - return 0; +static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_qp *qp = container_of(res, struct ib_qp, res); + struct ib_device *dev = qp->device; -err: return -EMSGSIZE; + if (port && port != qp->port) + return -EAGAIN; + if (!dev->ops.fill_res_qp_entry_raw) + return -EINVAL; + return dev->ops.fill_res_qp_entry_raw(msg, qp); } static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, @@ -568,9 +588,8 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, if (fill_res_name_pid(msg, res)) goto err; - if (fill_res_entry(dev, msg, res)) - goto err; - + if (dev->ops.fill_res_cm_id_entry) + return dev->ops.fill_res_cm_id_entry(msg, cm_id); return 0; err: return -EMSGSIZE; @@ -583,35 +602,42 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, struct ib_device *dev = cq->device; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe)) - goto err; + return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD)) - goto err; + return -EMSGSIZE; /* Poll context is only valid for kernel CQs */ if (rdma_is_kernel_res(res) && nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx)) - goto err; + return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL))) - goto err; + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id)) - goto err; + return -EMSGSIZE; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, cq->uobject->uevent.uobject.context->res.id)) - goto err; + return -EMSGSIZE; if (fill_res_name_pid(msg, res)) - goto err; + return -EMSGSIZE; - if (fill_res_entry(dev, msg, res)) - goto err; + return (dev->ops.fill_res_cq_entry) ? + dev->ops.fill_res_cq_entry(msg, cq) : 0; +} - return 0; +static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_cq *cq = container_of(res, struct ib_cq, res); + struct ib_device *dev = cq->device; -err: return -EMSGSIZE; + if (!dev->ops.fill_res_cq_entry_raw) + return -EINVAL; + return dev->ops.fill_res_cq_entry_raw(msg, cq); } static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, @@ -622,38 +648,45 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) - goto err; + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) - goto err; + return -EMSGSIZE; } if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, RDMA_NLDEV_ATTR_PAD)) - goto err; + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) - goto err; + return -EMSGSIZE; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id)) - goto err; + return -EMSGSIZE; if (fill_res_name_pid(msg, res)) - goto err; + return -EMSGSIZE; - if (fill_res_entry(dev, msg, res)) - goto err; + return (dev->ops.fill_res_mr_entry) ? + dev->ops.fill_res_mr_entry(msg, mr) : + 0; +} - return 0; +static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_mr *mr = container_of(res, struct ib_mr, res); + struct ib_device *dev = mr->pd->device; -err: return -EMSGSIZE; + if (!dev->ops.fill_res_mr_entry_raw) + return -EINVAL; + return dev->ops.fill_res_mr_entry_raw(msg, mr); } static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_pd *pd = container_of(res, struct ib_pd, res); - struct ib_device *dev = pd->device; if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, @@ -676,15 +709,138 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, pd->uobject->context->res.id)) goto err; - if (fill_res_name_pid(msg, res)) - goto err; + return fill_res_name_pid(msg, res); + +err: return -EMSGSIZE; +} + +static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res); + + if (rdma_is_kernel_res(res)) + return 0; - if (fill_res_entry(dev, msg, res)) + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id)) + return -EMSGSIZE; + + return fill_res_name_pid(msg, res); +} + +static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range, + uint32_t max_range) +{ + struct nlattr *entry_attr; + + if (!min_range) + return 0; + + entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); + if (!entry_attr) + return -EMSGSIZE; + + if (min_range == max_range) { + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range)) + goto err; + } else { + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range)) + goto err; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range)) + goto err; + } + nla_nest_end(msg, entry_attr); + return 0; + +err: + nla_nest_cancel(msg, entry_attr); + return -EMSGSIZE; +} + +static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq) +{ + uint32_t min_range = 0, prev = 0; + struct rdma_restrack_entry *res; + struct rdma_restrack_root *rt; + struct nlattr *table_attr; + struct ib_qp *qp = NULL; + unsigned long id = 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); + if (!table_attr) + return -EMSGSIZE; + + rt = &srq->device->res[RDMA_RESTRACK_QP]; + xa_lock(&rt->xa); + xa_for_each(&rt->xa, id, res) { + if (!rdma_restrack_get(res)) + continue; + + qp = container_of(res, struct ib_qp, res); + if (!qp->srq || (qp->srq->res.id != srq->res.id)) { + rdma_restrack_put(res); + continue; + } + + if (qp->qp_num < prev) + /* qp_num should be ascending */ + goto err_loop; + + if (min_range == 0) { + min_range = qp->qp_num; + } else if (qp->qp_num > (prev + 1)) { + if (fill_res_range_qp_entry(msg, min_range, prev)) + goto err_loop; + + min_range = qp->qp_num; + } + prev = qp->qp_num; + rdma_restrack_put(res); + } + + xa_unlock(&rt->xa); + + if (fill_res_range_qp_entry(msg, min_range, prev)) goto err; + nla_nest_end(msg, table_attr); return 0; -err: return -EMSGSIZE; +err_loop: + rdma_restrack_put(res); + xa_unlock(&rt->xa); +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_srq *srq = container_of(res, struct ib_srq, res); + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id)) + goto err; + + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type)) + goto err; + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id)) + goto err; + + if (ib_srq_has_cq(srq->srq_type)) { + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, + srq->ext.cq->res.id)) + goto err; + } + + if (fill_res_srq_qps(msg, srq)) + goto err; + + return fill_res_name_pid(msg, res); + +err: + return -EMSGSIZE; } static int fill_stat_counter_mode(struct sk_buff *msg, @@ -695,11 +851,16 @@ static int fill_stat_counter_mode(struct sk_buff *msg, if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode)) return -EMSGSIZE; - if (m->mode == RDMA_COUNTER_MODE_AUTO) + if (m->mode == RDMA_COUNTER_MODE_AUTO) { if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) && nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type)) return -EMSGSIZE; + if ((m->mask & RDMA_COUNTER_MASK_PID) && + fill_res_name_pid(msg, &counter->res)) + return -EMSGSIZE; + } + return 0; } @@ -738,9 +899,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg, xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { qp = container_of(res, struct ib_qp, res); - if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) - continue; - if (!qp->counter || (qp->counter->id != counter->id)) continue; @@ -793,9 +951,8 @@ static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) goto err; - if (fill_stat_entry(dev, msg, res)) - goto err; - + if (dev->ops.fill_stat_mr_entry) + return dev->ops.fill_stat_mr_entry(msg, mr); return 0; err: @@ -813,14 +970,21 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg, if (!table_attr) return -EMSGSIZE; - for (i = 0; i < st->num_counters; i++) - if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i])) + mutex_lock(&st->lock); + for (i = 0; i < st->num_counters; i++) { + if (test_bit(i, st->is_disabled)) + continue; + if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name, + st->value[i])) goto err; + } + mutex_unlock(&st->lock); nla_nest_end(msg, table_attr); return 0; err: + mutex_unlock(&st->lock); nla_nest_cancel(msg, table_attr); return -EMSGSIZE; } @@ -840,7 +1004,6 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) || - fill_res_name_pid(msg, &counter->res) || fill_stat_counter_mode(msg, counter) || fill_stat_counter_qps(msg, counter) || fill_stat_counter_hwcounters(msg, counter)) @@ -916,8 +1079,12 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) { char name[IB_DEVICE_NAME_MAX] = {}; - nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], + nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], IB_DEVICE_NAME_MAX); + if (strlen(name) == 0) { + err = -EINVAL; + goto done; + } err = ib_device_rename(device, name); goto done; } @@ -1173,7 +1340,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb, struct nldev_fill_res_entry { enum rdma_nldev_attr nldev_attr; - enum rdma_nldev_command nldev_cmd; u8 flags; u32 entry; u32 id; @@ -1185,44 +1351,51 @@ enum nldev_res_flags { static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_QP] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, .id = RDMA_NLDEV_ATTR_RES_LQPN, }, [RDMA_RESTRACK_CM_ID] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CM_IDN, }, [RDMA_RESTRACK_CQ] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CQN, }, [RDMA_RESTRACK_MR] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY, .id = RDMA_NLDEV_ATTR_RES_MRN, }, [RDMA_RESTRACK_PD] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY, .id = RDMA_NLDEV_ATTR_RES_PDN, }, [RDMA_RESTRACK_COUNTER] = { - .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET, .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID, }, + [RDMA_RESTRACK_CTX] = { + .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX, + .flags = NLDEV_PER_DEV, + .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY, + .id = RDMA_NLDEV_ATTR_RES_CTXN, + }, + [RDMA_RESTRACK_SRQ] = { + .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ, + .flags = NLDEV_PER_DEV, + .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY, + .id = RDMA_NLDEV_ATTR_RES_SRQN, + }, + }; static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1277,7 +1450,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd), + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NL_GET_OP(nlh->nlmsg_type)), 0, 0); if (fill_nldev_handle(msg, device)) { @@ -1288,11 +1462,10 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN); ret = fill_func(msg, has_cap_net_admin, res, port); - - rdma_restrack_put(res); if (ret) goto err_free; + rdma_restrack_put(res); nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); @@ -1356,7 +1529,8 @@ static int res_get_common_dumpit(struct sk_buff *skb, } nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd), + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NL_GET_OP(cb->nlh->nlmsg_type)), 0, NLM_F_MULTI); if (fill_nldev_handle(skb, device)) { @@ -1438,27 +1612,32 @@ err_index: return ret; } -#define RES_GET_FUNCS(name, type) \ - static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ +#define RES_GET_FUNCS(name, type) \ + static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ struct netlink_callback *cb) \ - { \ - return res_get_common_dumpit(skb, cb, type, \ - fill_res_##name##_entry); \ - } \ - static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ - struct nlmsghdr *nlh, \ + { \ + return res_get_common_dumpit(skb, cb, type, \ + fill_res_##name##_entry); \ + } \ + static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ + struct nlmsghdr *nlh, \ struct netlink_ext_ack *extack) \ - { \ - return res_get_common_doit(skb, nlh, extack, type, \ - fill_res_##name##_entry); \ + { \ + return res_get_common_doit(skb, nlh, extack, type, \ + fill_res_##name##_entry); \ } RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); +RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP); RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID); RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ); +RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ); RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); +RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR); RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER); +RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX); +RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ); static LIST_HEAD(link_ops); static DECLARE_RWSEM(link_ops_rwsem); @@ -1512,13 +1691,13 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) return -EINVAL; - nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], + nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(ibdev_name)); - if (strchr(ibdev_name, '%')) + if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0) return -EINVAL; - nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); - nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], + nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); + nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], sizeof(ndev_name)); ndev = dev_get_by_name(sock_net(skb->sk), ndev_name); @@ -1560,7 +1739,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (!device) return -EINVAL; - if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) { + if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) { ib_device_put(device); return -EINVAL; } @@ -1585,7 +1764,7 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE]) return -EINVAL; - nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE], + nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE], sizeof(client_name)); if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { @@ -1680,6 +1859,19 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_free(msg); return err; } + + /* + * Copy-on-fork is supported. + * See commits: + * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes") + * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm") + * for more details. Don't backport this without them. + * + * Return value ignored on purpose, assume copy-on-fork is not + * supported in case of failure. + */ + nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1); + nlmsg_end(msg, nlh); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); } @@ -1705,24 +1897,113 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } +static int nldev_stat_set_mode_doit(struct sk_buff *msg, + struct netlink_ext_ack *extack, + struct nlattr *tb[], + struct ib_device *device, u32 port) +{ + u32 mode, mask = 0, qpn, cntn = 0; + int ret; + + /* Currently only counter for QP is supported */ + if (!tb[RDMA_NLDEV_ATTR_STAT_RES] || + nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) + return -EINVAL; + + mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); + if (mode == RDMA_COUNTER_MODE_AUTO) { + if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) + mask = nla_get_u32( + tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); + return rdma_counter_set_auto_mode(device, port, mask, extack); + } + + if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) + return -EINVAL; + + qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); + if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { + cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); + ret = rdma_counter_bind_qpn(device, port, qpn, cntn); + if (ret) + return ret; + } else { + ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn); + if (ret) + return ret; + } + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { + ret = -EMSGSIZE; + goto err_fill; + } + + return 0; + +err_fill: + rdma_counter_unbind_qpn(device, port, qpn, cntn); + return ret; +} + +static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], + struct ib_device *device, + u32 port) +{ + struct rdma_hw_stats *stats; + struct nlattr *entry_attr; + unsigned long *target; + int rem, i, ret = 0; + u32 index; + + stats = ib_get_hw_stats_port(device, port); + if (!stats) + return -EINVAL; + + target = kcalloc(BITS_TO_LONGS(stats->num_counters), + sizeof(*stats->is_disabled), GFP_KERNEL); + if (!target) + return -ENOMEM; + + nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], + rem) { + index = nla_get_u32(entry_attr); + if ((index >= stats->num_counters) || + !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) { + ret = -EINVAL; + goto out; + } + + set_bit(index, target); + } + + for (i = 0; i < stats->num_counters; i++) { + if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL)) + continue; + + ret = rdma_counter_modify(device, port, i, test_bit(i, target)); + if (ret) + goto out; + } + +out: + kfree(target); + return ret; +} + static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - u32 index, port, mode, mask = 0, qpn, cntn = 0; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; struct sk_buff *msg; + u32 index, port; int ret; - ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); - /* Currently only counter for QP is supported */ - if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] || - !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || - !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE]) - return -EINVAL; - - if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, + extack); + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || + !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); @@ -1733,61 +2014,49 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(device, port)) { ret = -EINVAL; - goto err; + goto err_put_device; + } + + if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] && + !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { + ret = -EINVAL; + goto err_put_device; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_put_device; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { + ret = -EMSGSIZE; + goto err_free_msg; + } - mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); - if (mode == RDMA_COUNTER_MODE_AUTO) { - if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) - mask = nla_get_u32( - tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); - - ret = rdma_counter_set_auto_mode(device, port, - mask ? true : false, mask); - if (ret) - goto err_msg; - } else { - if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) - goto err_msg; - qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); - if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { - cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); - ret = rdma_counter_bind_qpn(device, port, qpn, cntn); - } else { - ret = rdma_counter_bind_qpn_alloc(device, port, - qpn, &cntn); - } + if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) { + ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port); if (ret) - goto err_msg; + goto err_free_msg; + } - if (fill_nldev_handle(msg, device) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { - ret = -EMSGSIZE; - goto err_fill; - } + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { + ret = nldev_stat_set_counter_dynamic_doit(tb, device, port); + if (ret) + goto err_free_msg; } nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); -err_fill: - rdma_counter_unbind_qpn(device, port, qpn, cntn); -err_msg: +err_free_msg: nlmsg_free(msg); -err: +err_put_device: ib_device_put(device); return ret; } @@ -1879,13 +2148,14 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, if (!device) return -EINVAL; - if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) { + if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) { ret = -EINVAL; goto err; } port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); - if (!rdma_is_port_valid(device, port)) { + stats = ib_get_hw_stats_port(device, port); + if (!stats) { ret = -EINVAL; goto err; } @@ -1907,11 +2177,6 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, goto err_msg; } - stats = device->port_data ? device->port_data[port].hw_stats : NULL; - if (stats == NULL) { - ret = -EINVAL; - goto err_msg; - } mutex_lock(&stats->lock); num_cnts = device->ops.get_hw_stats(device, stats, port, 0); @@ -1926,9 +2191,13 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, goto err_stats; } for (i = 0; i < num_cnts; i++) { + if (test_bit(i, stats->is_disabled)) + continue; + v = stats->value[i] + rdma_counter_get_hwstat_value(device, port, i); - if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) { + if (rdma_nl_stat_hwcounter_entry(msg, + stats->descs[i].name, v)) { ret = -EMSGSIZE; goto err_table; } @@ -2076,6 +2345,99 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb, return ret; } +static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry; + struct rdma_hw_stats *stats; + struct ib_device *device; + struct sk_buff *msg; + u32 devid, port; + int ret, i; + + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || + !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) + return -EINVAL; + + devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = ib_device_get_by_index(sock_net(skb->sk), devid); + if (!device) + return -EINVAL; + + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) { + ret = -EINVAL; + goto err; + } + + stats = ib_get_hw_stats_port(device, port); + if (!stats) { + ret = -EINVAL; + goto err; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto err; + } + + nlh = nlmsg_put( + msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS), + 0, 0); + + ret = -EMSGSIZE; + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) + goto err_msg; + + table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + if (!table) + goto err_msg; + + mutex_lock(&stats->lock); + for (i = 0; i < stats->num_counters; i++) { + entry = nla_nest_start(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); + if (!entry) + goto err_msg_table; + + if (nla_put_string(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, + stats->descs[i].name) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i)) + goto err_msg_entry; + + if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) && + (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, + !test_bit(i, stats->is_disabled)))) + goto err_msg_entry; + + nla_nest_end(msg, entry); + } + mutex_unlock(&stats->lock); + + nla_nest_end(msg, table); + nlmsg_end(msg, nlh); + ib_device_put(device); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); + +err_msg_entry: + nla_nest_cancel(msg, entry); +err_msg_table: + mutex_unlock(&stats->lock); + nla_nest_cancel(msg, table); +err_msg: + nlmsg_free(msg); +err: + ib_device_put(device); + return ret; +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -2124,6 +2486,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .doit = nldev_res_get_pd_doit, .dump = nldev_res_get_pd_dumpit, }, + [RDMA_NLDEV_CMD_RES_CTX_GET] = { + .doit = nldev_res_get_ctx_doit, + .dump = nldev_res_get_ctx_dumpit, + }, + [RDMA_NLDEV_CMD_RES_SRQ_GET] = { + .doit = nldev_res_get_srq_doit, + .dump = nldev_res_get_srq_dumpit, + }, [RDMA_NLDEV_CMD_SYS_GET] = { .doit = nldev_sys_get_doit, }, @@ -2142,6 +2512,24 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .doit = nldev_stat_del_doit, .flags = RDMA_NL_ADMIN_PERM, }, + [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = { + .doit = nldev_res_get_qp_raw_doit, + .dump = nldev_res_get_qp_raw_dumpit, + .flags = RDMA_NL_ADMIN_PERM, + }, + [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = { + .doit = nldev_res_get_cq_raw_doit, + .dump = nldev_res_get_cq_raw_dumpit, + .flags = RDMA_NL_ADMIN_PERM, + }, + [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = { + .doit = nldev_res_get_mr_raw_doit, + .dump = nldev_res_get_mr_raw_dumpit, + .flags = RDMA_NL_ADMIN_PERM, + }, + [RDMA_NLDEV_CMD_STAT_GET_STATUS] = { + .doit = nldev_stat_get_counter_status_doit, + }, }; void __init nldev_init(void) @@ -2149,7 +2537,7 @@ void __init nldev_init(void) rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); } -void __exit nldev_exit(void) +void nldev_exit(void) { rdma_nl_unregister(RDMA_NL_NLDEV); } |