aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/bnxt_re/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/bnxt_re/main.c')
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c263
1 files changed, 217 insertions, 46 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index aafc19aa5de1..33a448036c2e 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -80,6 +80,79 @@ static DEFINE_MUTEX(bnxt_re_dev_lock);
static struct workqueue_struct *bnxt_re_wq;
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait);
+/* SR-IOV helper functions */
+
+static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
+{
+ struct bnxt *bp;
+
+ bp = netdev_priv(rdev->en_dev->net);
+ if (BNXT_VF(bp))
+ rdev->is_virtfn = 1;
+}
+
+/* Set the maximum number of each resource that the driver actually wants
+ * to allocate. This may be up to the maximum number the firmware has
+ * reserved for the function. The driver may choose to allocate fewer
+ * resources than the firmware maximum.
+ */
+static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+{
+ u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0;
+ u32 i;
+ u32 vf_pct;
+ u32 num_vfs;
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+
+ rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
+ dev_attr->max_qp);
+
+ rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
+ /* Use max_mr from fw since max_mrw does not get set */
+ rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count,
+ dev_attr->max_mr);
+ rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
+ dev_attr->max_srq);
+ rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT,
+ dev_attr->max_cq);
+
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+ rdev->qplib_ctx.tqm_count[i] =
+ rdev->dev_attr.tqm_alloc_reqs[i];
+
+ if (rdev->num_vfs) {
+ /*
+ * Reserve a set of resources for the PF. Divide the remaining
+ * resources among the VFs
+ */
+ vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
+ num_vfs = 100 * rdev->num_vfs;
+ vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs;
+ vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs;
+ vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs;
+ /*
+ * The driver allows many more MRs than other resources. If the
+ * firmware does also, then reserve a fixed amount for the PF
+ * and divide the rest among VFs. VFs may use many MRs for NFS
+ * mounts, ISER, NVME applications, etc. If the firmware
+ * severely restricts the number of MRs, then let PF have
+ * half and divide the rest among VFs, as for the other
+ * resource types.
+ */
+ if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K)
+ vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs;
+ else
+ vf_mrws = (rdev->qplib_ctx.mrw_count -
+ BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs;
+ vf_gids = BNXT_RE_MAX_GID_PER_VF;
+ }
+ rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws;
+ rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids;
+ rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps;
+ rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs;
+ rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs;
+}
+
/* for handling bnxt_en callbacks later */
static void bnxt_re_stop(void *p)
{
@@ -91,6 +164,15 @@ static void bnxt_re_start(void *p)
static void bnxt_re_sriov_config(void *p, int num_vfs)
{
+ struct bnxt_re_dev *rdev = p;
+
+ if (!rdev)
+ return;
+
+ rdev->num_vfs = num_vfs;
+ bnxt_re_set_resource_limits(rdev);
+ bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
+ &rdev->qplib_ctx);
}
static void bnxt_re_shutdown(void *p)
@@ -417,7 +499,7 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
return ERR_PTR(-EINVAL);
if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) {
- dev_dbg(&pdev->dev,
+ dev_info(&pdev->dev,
"%s: probe error: RoCE is not supported on this device",
ROCE_DRV_MODULE_NAME);
return ERR_PTR(-ENODEV);
@@ -490,6 +572,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->query_port = bnxt_re_query_port;
ibdev->get_port_immutable = bnxt_re_get_port_immutable;
+ ibdev->get_dev_fw_str = bnxt_re_query_fw_str;
ibdev->query_pkey = bnxt_re_query_pkey;
ibdev->query_gid = bnxt_re_query_gid;
ibdev->get_netdev = bnxt_re_get_netdev;
@@ -505,6 +588,12 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->query_ah = bnxt_re_query_ah;
ibdev->destroy_ah = bnxt_re_destroy_ah;
+ ibdev->create_srq = bnxt_re_create_srq;
+ ibdev->modify_srq = bnxt_re_modify_srq;
+ ibdev->query_srq = bnxt_re_query_srq;
+ ibdev->destroy_srq = bnxt_re_destroy_srq;
+ ibdev->post_srq_recv = bnxt_re_post_srq_recv;
+
ibdev->create_qp = bnxt_re_create_qp;
ibdev->modify_qp = bnxt_re_modify_qp;
ibdev->query_qp = bnxt_re_query_qp;
@@ -541,14 +630,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
}
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
-
- return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->dev_attr.fw_ver);
-}
-
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
@@ -558,12 +639,10 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
}
static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
-static DEVICE_ATTR(fw_rev, 0444, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
static struct device_attribute *bnxt_re_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_rev,
&dev_attr_hca_type
};
@@ -577,7 +656,6 @@ static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
mutex_unlock(&bnxt_re_dev_lock);
synchronize_rcu();
- flush_workqueue(bnxt_re_wq);
ib_dealloc_device(&rdev->ibdev);
/* rdev is gone */
@@ -616,10 +694,10 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
return rdev;
}
-static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
- struct creq_func_event *aeqe)
+static int bnxt_re_handle_unaffi_async_event(struct creq_func_event
+ *unaffi_async)
{
- switch (aeqe->event) {
+ switch (unaffi_async->event) {
case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
break;
case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
@@ -648,6 +726,93 @@ static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
return 0;
}
+static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
+ struct bnxt_re_qp *qp)
+{
+ struct ib_event event;
+
+ memset(&event, 0, sizeof(event));
+ if (qp->qplib_qp.srq) {
+ event.device = &qp->rdev->ibdev;
+ event.element.qp = &qp->ib_qp;
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ }
+
+ if (event.device && qp->ib_qp.event_handler)
+ qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+
+ return 0;
+}
+
+static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
+ void *obj)
+{
+ int rc = 0;
+ u8 event;
+
+ if (!obj)
+ return rc; /* QP was already dead, still return success */
+
+ event = affi_async->event;
+ if (event == CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION) {
+ struct bnxt_qplib_qp *lib_qp = obj;
+ struct bnxt_re_qp *qp = container_of(lib_qp, struct bnxt_re_qp,
+ qplib_qp);
+ rc = bnxt_re_handle_qp_async_event(affi_async, qp);
+ }
+ return rc;
+}
+
+static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
+ void *aeqe, void *obj)
+{
+ struct creq_qp_event *affi_async;
+ struct creq_func_event *unaffi_async;
+ u8 type;
+ int rc;
+
+ type = ((struct creq_base *)aeqe)->type;
+ if (type == CREQ_BASE_TYPE_FUNC_EVENT) {
+ unaffi_async = aeqe;
+ rc = bnxt_re_handle_unaffi_async_event(unaffi_async);
+ } else {
+ affi_async = aeqe;
+ rc = bnxt_re_handle_affi_async_event(affi_async, obj);
+ }
+
+ return rc;
+}
+
+static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_srq *handle, u8 event)
+{
+ struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq,
+ qplib_srq);
+ struct ib_event ib_event;
+ int rc = 0;
+
+ if (!srq) {
+ dev_err(NULL, "%s: SRQ is NULL, SRQN not handled",
+ ROCE_DRV_MODULE_NAME);
+ rc = -EINVAL;
+ goto done;
+ }
+ ib_event.device = &srq->rdev->ibdev;
+ ib_event.element.srq = &srq->ib_srq;
+ if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
+ ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ else
+ ib_event.event = IB_EVENT_SRQ_ERR;
+
+ if (srq->ib_srq.event_handler) {
+ /* Lock event_handler? */
+ (*srq->ib_srq.event_handler)(&ib_event,
+ srq->ib_srq.srq_context);
+ }
+done:
+ return rc;
+}
+
static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
struct bnxt_qplib_cq *handle)
{
@@ -690,7 +855,8 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
i - 1, rdev->msix_entries[i].vector,
rdev->msix_entries[i].db_offset,
- &bnxt_re_cqn_handler, NULL);
+ &bnxt_re_cqn_handler,
+ &bnxt_re_srqn_handler);
if (rc) {
dev_err(rdev_to_dev(rdev),
@@ -734,7 +900,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
/* Configure and allocate resources for qplib */
rdev->qplib_res.rcfw = &rdev->rcfw;
- rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
+ rdev->is_virtfn);
if (rc)
goto fail;
@@ -1035,19 +1202,6 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
}
}
-static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
-{
- u32 i;
-
- rdev->qplib_ctx.qpc_count = BNXT_RE_MAX_QPC_COUNT;
- rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT;
- rdev->qplib_ctx.srqc_count = BNXT_RE_MAX_SRQC_COUNT;
- rdev->qplib_ctx.cq_count = BNXT_RE_MAX_CQ_COUNT;
- for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
- rdev->qplib_ctx.tqm_count[i] =
- rdev->dev_attr.tqm_alloc_reqs[i];
-}
-
/* worker thread for polling periodic events. Now used for QoS programming*/
static void bnxt_re_worker(struct work_struct *work)
{
@@ -1070,6 +1224,9 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
}
set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ /* Check whether VF or PF */
+ bnxt_re_get_sriov_func_type(rdev);
+
rc = bnxt_re_request_msix(rdev);
if (rc) {
pr_err("Failed to get MSI-X vectors: %#x\n", rc);
@@ -1101,16 +1258,18 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
(rdev->en_dev->pdev, &rdev->rcfw,
rdev->msix_entries[BNXT_RE_AEQ_IDX].vector,
rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset,
- 0, &bnxt_re_aeq_handler);
+ rdev->is_virtfn, &bnxt_re_aeq_handler);
if (rc) {
pr_err("Failed to enable RCFW channel: %#x\n", rc);
goto free_ring;
}
- rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
+ rdev->is_virtfn);
if (rc)
goto disable_rcfw;
- bnxt_re_set_resource_limits(rdev);
+ if (!rdev->is_virtfn)
+ bnxt_re_set_resource_limits(rdev);
rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0);
if (rc) {
@@ -1125,7 +1284,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
goto free_ctx;
}
- rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, 0);
+ rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx,
+ rdev->is_virtfn);
if (rc) {
pr_err("Failed to initialize RCFW: %#x\n", rc);
goto free_sctx;
@@ -1144,13 +1304,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
goto fail;
}
- rc = bnxt_re_setup_qos(rdev);
- if (rc)
- pr_info("RoCE priority not yet configured\n");
+ if (!rdev->is_virtfn) {
+ rc = bnxt_re_setup_qos(rdev);
+ if (rc)
+ pr_info("RoCE priority not yet configured\n");
- INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
- set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
- schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+ INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
+ set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
+ schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+ }
/* Register ib dev */
rc = bnxt_re_register_ib(rdev);
@@ -1176,6 +1338,7 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
&rdev->active_width);
+ set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
@@ -1277,7 +1440,7 @@ static void bnxt_re_task(struct work_struct *work)
break;
}
smp_mb__before_atomic();
- clear_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
+ atomic_dec(&rdev->sched_count);
kfree(re_work);
}
@@ -1339,7 +1502,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
/* netdev notifier will call NETDEV_UNREGISTER again later since
* we are still holding the reference to the netdev
*/
- if (test_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags))
+ if (atomic_read(&rdev->sched_count) > 0)
goto exit;
bnxt_re_ib_unreg(rdev, false);
bnxt_re_remove_one(rdev);
@@ -1359,7 +1522,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
re_work->vlan_dev = (real_dev == netdev ?
NULL : netdev);
INIT_WORK(&re_work->work, bnxt_re_task);
- set_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
+ atomic_inc(&rdev->sched_count);
queue_work(bnxt_re_wq, &re_work->work);
}
}
@@ -1400,7 +1563,7 @@ err_netdev:
static void __exit bnxt_re_mod_exit(void)
{
- struct bnxt_re_dev *rdev;
+ struct bnxt_re_dev *rdev, *next;
LIST_HEAD(to_be_deleted);
mutex_lock(&bnxt_re_dev_lock);
@@ -1408,9 +1571,17 @@ static void __exit bnxt_re_mod_exit(void)
if (!list_empty(&bnxt_re_dev_list))
list_splice_init(&bnxt_re_dev_list, &to_be_deleted);
mutex_unlock(&bnxt_re_dev_lock);
-
- list_for_each_entry(rdev, &to_be_deleted, list) {
+ /*
+ * Cleanup the devices in reverse order so that the VF device
+ * cleanup is done before PF cleanup
+ */
+ list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) {
dev_info(rdev_to_dev(rdev), "Unregistering Device");
+ /*
+ * Flush out any scheduled tasks before destroying the
+ * resources
+ */
+ flush_workqueue(bnxt_re_wq);
bnxt_re_dev_stop(rdev);
bnxt_re_ib_unreg(rdev, true);
bnxt_re_remove_one(rdev);