aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/Makefile3
-rw-r--r--drivers/infiniband/core/addr.c6
-rw-r--r--drivers/infiniband/core/agent.c4
-rw-r--r--drivers/infiniband/core/cm.c119
-rw-r--r--drivers/infiniband/core/cma.c132
-rw-r--r--drivers/infiniband/core/device.c33
-rw-r--r--drivers/infiniband/core/fmr_pool.c49
-rw-r--r--drivers/infiniband/core/mad.c28
-rw-r--r--drivers/infiniband/core/mad_rmpp.c16
-rw-r--r--drivers/infiniband/core/multicast.c27
-rw-r--r--drivers/infiniband/core/rdma_core.c627
-rw-r--r--drivers/infiniband/core/rdma_core.h78
-rw-r--r--drivers/infiniband/core/sa_query.c886
-rw-r--r--drivers/infiniband/core/sysfs.c6
-rw-r--r--drivers/infiniband/core/ucm.c41
-rw-r--r--drivers/infiniband/core/ucma.c24
-rw-r--r--drivers/infiniband/core/umem.c17
-rw-r--r--drivers/infiniband/core/umem_odp.c81
-rw-r--r--drivers/infiniband/core/user_mad.c48
-rw-r--r--drivers/infiniband/core/uverbs.h69
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1582
-rw-r--r--drivers/infiniband/core/uverbs_main.c510
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c81
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c275
-rw-r--r--drivers/infiniband/core/verbs.c86
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c118
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h6
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c35
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c201
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h7
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c25
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c19
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c269
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h18
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c21
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c26
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c118
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h9
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c67
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c393
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c79
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c141
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c39
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h48
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c44
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c44
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c96
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c64
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h24
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile2
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h15
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c641
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h30
-rw-r--r--drivers/infiniband/hw/hfi1/common.h15
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c238
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h62
-rw-r--r--drivers/infiniband/hw/hfi1/device.c2
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c170
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c442
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c14
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h201
-rw-r--r--drivers/infiniband/hw/hfi1/init.c102
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c30
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c95
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c32
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c19
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h34
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c16
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c68
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c203
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c43
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h46
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c4
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c5
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h17
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h8
-rw-r--r--drivers/infiniband/hw/hfi1/trace_misc.h48
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rc.h7
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h77
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c14
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c73
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c196
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h17
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c5
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c201
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h18
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c153
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h20
-rw-r--r--drivers/infiniband/hw/hfi1/vnic.h184
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c903
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c323
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c62
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h5
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c121
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c22
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c10
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c12
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c134
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c10
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c87
-rw-r--r--drivers/infiniband/hw/mlx4/main.c33
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c11
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h6
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c6
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c112
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c74
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c11
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h2
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c21
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c419
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c13
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h12
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c8
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c344
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c107
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c69
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c17
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c7
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c94
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c7
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.c5
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c12
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h6
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c64
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h10
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c23
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c54
-rw-r--r--drivers/infiniband/hw/qedr/main.c89
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h20
-rw-r--r--drivers/infiniband/hw/qedr/qedr_cm.c19
-rw-r--r--drivers/infiniband/hw/qedr/qedr_cm.h2
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c240
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c31
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c77
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c57
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c37
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_util.h38
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c48
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h8
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c43
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c8
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c30
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c39
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.h6
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c61
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c57
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c44
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h4
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_cq.h127
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rc.h109
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h34
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig1
-rw-r--r--drivers/infiniband/sw/rxe/Makefile3
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h20
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c32
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c14
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c78
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h61
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c14
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c83
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c33
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h9
-rw-r--r--drivers/infiniband/ulp/Makefile1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h44
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c73
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c65
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c364
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c456
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c120
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c64
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c12
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c2
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Kconfig8
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Makefile7
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c475
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h489
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c187
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h329
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c389
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c1056
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c390
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c13
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c13
221 files changed, 13877 insertions, 6021 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 66f86027ed47..234fe01904e7 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,6 +85,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
source "drivers/infiniband/ulp/iser/Kconfig"
source "drivers/infiniband/ulp/isert/Kconfig"
+source "drivers/infiniband/ulp/opa_vnic/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index e426ac877d19..6ebd9ad95010 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -29,4 +29,5 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
-ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
+ rdma_core.o uverbs_std_types.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 329d08c884f6..02971e239a18 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -444,9 +444,9 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
- dst = ip6_route_output(addr->net, NULL, &fl6);
- if ((ret = dst->error))
- goto put;
+ ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
+ if (ret < 0)
+ return ret;
rt = (struct rt6_info *)dst;
if (ipv6_addr_any(&fl6.saddr)) {
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 11dacd97a667..324ef85a13b6 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -137,13 +137,13 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
err2:
ib_free_send_mad(send_buf);
err1:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
}
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 6535f09dc575..1844770f3ae8 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -228,7 +228,7 @@ struct cm_device {
struct cm_av {
struct cm_port *port;
union ib_gid dgid;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
};
@@ -241,7 +241,7 @@ struct cm_work {
__be32 local_id; /* Established / timewait */
__be32 remote_id;
struct ib_cm_event cm_event;
- struct ib_sa_path_rec path[0];
+ struct sa_path_rec path[0];
};
struct cm_timewait_info {
@@ -343,7 +343,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
ret = -ENODEV;
goto out;
}
- ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
+ ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto out;
@@ -355,7 +355,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
ret = PTR_ERR(m);
goto out;
}
@@ -390,7 +390,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
return PTR_ERR(m);
}
m->ah = ah;
@@ -400,7 +400,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
if (msg->context[0])
cm_deref_id(msg->context[0]);
ib_free_send_mad(msg);
@@ -440,7 +440,7 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
-static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
+static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
struct cm_id_private *cm_id_priv)
{
struct cm_device *cm_dev;
@@ -453,7 +453,8 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- path->gid_type, ndev, &p, NULL)) {
+ sa_conv_pathrec_to_gid_type(path),
+ ndev, &p, NULL)) {
port = cm_dev->port[p-1];
break;
}
@@ -1172,8 +1173,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_req_param *param)
{
- struct ib_sa_path_rec *pri_path = param->primary_path;
- struct ib_sa_path_rec *alt_path = param->alternate_path;
+ struct sa_path_rec *pri_path = param->primary_path;
+ struct sa_path_rec *alt_path = param->alternate_path;
cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
@@ -1202,8 +1203,10 @@ static void cm_format_req(struct cm_req_msg *req_msg,
}
if (pri_path->hop_limit <= 1) {
- req_msg->primary_local_lid = pri_path->slid;
- req_msg->primary_remote_lid = pri_path->dlid;
+ req_msg->primary_local_lid =
+ htons(ntohl(sa_path_get_slid(pri_path)));
+ req_msg->primary_remote_lid =
+ htons(ntohl(sa_path_get_dlid(pri_path)));
} else {
/* Work-around until there's a way to obtain remote LID info */
req_msg->primary_local_lid = IB_LID_PERMISSIVE;
@@ -1223,8 +1226,10 @@ static void cm_format_req(struct cm_req_msg *req_msg,
if (alt_path) {
if (alt_path->hop_limit <= 1) {
- req_msg->alt_local_lid = alt_path->slid;
- req_msg->alt_remote_lid = alt_path->dlid;
+ req_msg->alt_local_lid =
+ htons(ntohl(sa_path_get_slid(alt_path)));
+ req_msg->alt_remote_lid =
+ htons(ntohl(sa_path_get_dlid(alt_path)));
} else {
req_msg->alt_local_lid = IB_LID_PERMISSIVE;
req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
@@ -1401,14 +1406,15 @@ static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
}
static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
- struct ib_sa_path_rec *primary_path,
- struct ib_sa_path_rec *alt_path)
+ struct sa_path_rec *primary_path,
+ struct sa_path_rec *alt_path)
{
- memset(primary_path, 0, sizeof *primary_path);
primary_path->dgid = req_msg->primary_local_gid;
primary_path->sgid = req_msg->primary_remote_gid;
- primary_path->dlid = req_msg->primary_local_lid;
- primary_path->slid = req_msg->primary_remote_lid;
+ sa_path_set_dlid(primary_path,
+ htonl(ntohs(req_msg->primary_local_lid)));
+ sa_path_set_slid(primary_path,
+ htonl(ntohs(req_msg->primary_remote_lid)));
primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
primary_path->hop_limit = req_msg->primary_hop_limit;
primary_path->traffic_class = req_msg->primary_traffic_class;
@@ -1423,14 +1429,15 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
primary_path->packet_life_time =
cm_req_get_primary_local_ack_timeout(req_msg);
primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
- primary_path->service_id = req_msg->service_id;
+ sa_path_set_service_id(primary_path, req_msg->service_id);
if (req_msg->alt_local_lid) {
- memset(alt_path, 0, sizeof *alt_path);
alt_path->dgid = req_msg->alt_local_gid;
alt_path->sgid = req_msg->alt_remote_gid;
- alt_path->dlid = req_msg->alt_local_lid;
- alt_path->slid = req_msg->alt_remote_lid;
+ sa_path_set_dlid(alt_path,
+ htonl(ntohs(req_msg->alt_local_lid)));
+ sa_path_set_slid(alt_path,
+ htonl(ntohs(req_msg->alt_remote_lid)));
alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
alt_path->hop_limit = req_msg->alt_hop_limit;
alt_path->traffic_class = req_msg->alt_traffic_class;
@@ -1445,7 +1452,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
alt_path->packet_life_time =
cm_req_get_alt_local_ack_timeout(req_msg);
alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
- alt_path->service_id = req_msg->service_id;
+ sa_path_set_service_id(alt_path, req_msg->service_id);
}
}
@@ -1722,6 +1729,7 @@ static int cm_req_handler(struct cm_work *work)
struct cm_req_msg *req_msg;
union ib_gid gid;
struct ib_gid_attr gid_attr;
+ const struct ib_global_route *grh;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1758,21 +1766,34 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv->id.service_mask = ~cpu_to_be64(0);
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
- cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
- memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
- work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
+ memset(&work->path[0], 0, sizeof(work->path[0]));
+ memset(&work->path[1], 0, sizeof(work->path[1]));
+ grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
work->port->port_num,
- cm_id_priv->av.ah_attr.grh.sgid_index,
+ grh->sgid_index,
&gid, &gid_attr);
if (!ret) {
if (gid_attr.ndev) {
- work->path[0].ifindex = gid_attr.ndev->ifindex;
- work->path[0].net = dev_net(gid_attr.ndev);
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
+ sa_path_set_ifindex(&work->path[0],
+ gid_attr.ndev->ifindex);
+ sa_path_set_ndev(&work->path[0],
+ dev_net(gid_attr.ndev));
dev_put(gid_attr.ndev);
+ } else {
+ work->path[0].rec_type = SA_PATH_REC_TYPE_IB;
}
- work->path[0].gid_type = gid_attr.gid_type;
+ if (req_msg->alt_local_lid)
+ work->path[1].rec_type = work->path[0].rec_type;
+ cm_format_paths_from_req(req_msg, &work->path[0],
+ &work->path[1]);
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+ sa_path_set_dmac(&work->path[0],
+ cm_id_priv->av.ah_attr.roce.dmac);
+ work->path[0].hop_limit = grh->hop_limit;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
cm_id_priv);
}
@@ -1782,11 +1803,18 @@ static int cm_req_handler(struct cm_work *work)
&work->path[0].sgid,
&gid_attr);
if (!err && gid_attr.ndev) {
- work->path[0].ifindex = gid_attr.ndev->ifindex;
- work->path[0].net = dev_net(gid_attr.ndev);
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
+ sa_path_set_ifindex(&work->path[0],
+ gid_attr.ndev->ifindex);
+ sa_path_set_ndev(&work->path[0],
+ dev_net(gid_attr.ndev));
dev_put(gid_attr.ndev);
+ } else {
+ work->path[0].rec_type = SA_PATH_REC_TYPE_IB;
}
- work->path[0].gid_type = gid_attr.gid_type;
+ if (req_msg->alt_local_lid)
+ work->path[1].rec_type = work->path[0].rec_type;
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
@@ -2811,7 +2839,7 @@ out:
static void cm_format_lap(struct cm_lap_msg *lap_msg,
struct cm_id_private *cm_id_priv,
- struct ib_sa_path_rec *alternate_path,
+ struct sa_path_rec *alternate_path,
const void *private_data,
u8 private_data_len)
{
@@ -2822,8 +2850,10 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
/* todo: need remote CM response timeout */
cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
- lap_msg->alt_local_lid = alternate_path->slid;
- lap_msg->alt_remote_lid = alternate_path->dlid;
+ lap_msg->alt_local_lid =
+ htons(ntohl(sa_path_get_slid(alternate_path)));
+ lap_msg->alt_remote_lid =
+ htons(ntohl(sa_path_get_dlid(alternate_path)));
lap_msg->alt_local_gid = alternate_path->sgid;
lap_msg->alt_remote_gid = alternate_path->dgid;
cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
@@ -2841,7 +2871,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
}
int ib_send_cm_lap(struct ib_cm_id *cm_id,
- struct ib_sa_path_rec *alternate_path,
+ struct sa_path_rec *alternate_path,
const void *private_data,
u8 private_data_len)
{
@@ -2895,14 +2925,15 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
EXPORT_SYMBOL(ib_send_cm_lap);
static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
- struct ib_sa_path_rec *path,
+ struct sa_path_rec *path,
struct cm_lap_msg *lap_msg)
{
memset(path, 0, sizeof *path);
+ path->rec_type = SA_PATH_REC_TYPE_IB;
path->dgid = lap_msg->alt_local_gid;
path->sgid = lap_msg->alt_remote_gid;
- path->dlid = lap_msg->alt_local_lid;
- path->slid = lap_msg->alt_remote_lid;
+ sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid)));
+ sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid)));
path->flow_label = cm_lap_get_flow_label(lap_msg);
path->hop_limit = lap_msg->alt_hop_limit;
path->traffic_class = cm_lap_get_traffic_class(lap_msg);
@@ -3708,7 +3739,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
atomic_long_inc(&port->counter_group[CM_RECV].
counter[attr_id - CM_ATTR_ID_OFFSET]);
- work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
+ work = kmalloc(sizeof(*work) + sizeof(struct sa_path_rec) * paths,
GFP_KERNEL);
if (!work) {
ib_free_recv_mad(mad_recv_wc);
@@ -3800,7 +3831,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
cm_id_priv->responder_resources;
qp_attr->min_rnr_timer = 0;
}
- if (cm_id_priv->alt_av.ah_attr.dlid) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
*qp_attr_mask |= IB_QP_ALT_PATH;
qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
@@ -3854,7 +3885,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
default:
break;
}
- if (cm_id_priv->alt_av.ah_attr.dlid) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
qp_attr->path_mig_state = IB_MIG_REARM;
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index acd10d666f1c..91b7a2fe5a55 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -929,7 +929,8 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
goto out;
ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
- qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
+ rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index,
+ &sgid, NULL);
if (ret)
goto out;
@@ -1127,7 +1128,7 @@ static inline int cma_any_port(struct sockaddr *addr)
static void cma_save_ib_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
struct rdma_cm_id *listen_id,
- struct ib_sa_path_rec *path)
+ struct sa_path_rec *path)
{
struct sockaddr_ib *listen_ib, *ib;
@@ -1139,7 +1140,7 @@ static void cma_save_ib_info(struct sockaddr *src_addr,
ib->sib_pkey = path->pkey;
ib->sib_flowinfo = path->flow_label;
memcpy(&ib->sib_addr, &path->sgid, 16);
- ib->sib_sid = path->service_id;
+ ib->sib_sid = sa_path_get_service_id(path);
ib->sib_scope_id = 0;
} else {
ib->sib_pkey = listen_ib->sib_pkey;
@@ -1273,7 +1274,8 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event,
memcpy(&req->local_gid, &req_param->primary_path->sgid,
sizeof(req->local_gid));
req->has_gid = true;
- req->service_id = req_param->primary_path->service_id;
+ req->service_id =
+ sa_path_get_service_id(req_param->primary_path);
req->pkey = be16_to_cpu(req_param->primary_path->pkey);
if (req->pkey != req_param->bth_pkey)
pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
@@ -1755,6 +1757,9 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
+ if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
+ (id_priv->id.qp_type != IB_QPT_UD))
+ ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
@@ -1821,8 +1826,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
struct rdma_cm_id *id;
struct rdma_route *rt;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
- const __be64 service_id =
- ib_event->param.req_rcvd.primary_path->service_id;
+ struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path;
+ const __be64 service_id = sa_path_get_service_id(path);
int ret;
id = rdma_create_id(listen_id->route.addr.dev_addr.net,
@@ -1844,7 +1849,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (!rt->path_rec)
goto err;
- rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
+ rt->path_rec[0] = *path;
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
@@ -2297,7 +2302,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
}
EXPORT_SYMBOL(rdma_set_service_type);
-static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
+static void cma_query_handler(int status, struct sa_path_rec *path_rec,
void *context)
{
struct cma_work *work = context;
@@ -2324,18 +2329,25 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct cma_work *work)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- struct ib_sa_path_rec path_rec;
+ struct sa_path_rec path_rec;
ib_sa_comp_mask comp_mask;
struct sockaddr_in6 *sin6;
struct sockaddr_ib *sib;
memset(&path_rec, 0, sizeof path_rec);
+
+ if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num))
+ path_rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path_rec.rec_type = SA_PATH_REC_TYPE_IB;
rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
- path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
+ sa_path_set_service_id(&path_rec,
+ rdma_get_service_id(&id_priv->id,
+ cma_dst_addr(id_priv)));
comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
@@ -2449,7 +2461,7 @@ err1:
}
int rdma_set_ib_paths(struct rdma_cm_id *id,
- struct ib_sa_path_rec *path_rec, int num_paths)
+ struct sa_path_rec *path_rec, int num_paths)
{
struct rdma_id_private *id_priv;
int ret;
@@ -2528,6 +2540,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct cma_work *work;
int ret;
struct net_device *ndev = NULL;
+ enum ib_gid_type gid_type = IB_GID_TYPE_IB;
u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
@@ -2572,21 +2585,22 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
}
}
- route->path_rec->net = &init_net;
- route->path_rec->ifindex = ndev->ifindex;
supported_gids = roce_gid_type_mask_support(id_priv->id.device,
id_priv->id.port_num);
- route->path_rec->gid_type =
- cma_route_gid_type(addr->dev_addr.network,
- supported_gids,
- id_priv->gid_type);
+ gid_type = cma_route_gid_type(addr->dev_addr.network,
+ supported_gids,
+ id_priv->gid_type);
+ route->path_rec->rec_type =
+ sa_conv_gid_to_pathrec_type(gid_type);
+ sa_path_set_ndev(route->path_rec, &init_net);
+ sa_path_set_ifindex(route->path_rec, ndev->ifindex);
}
if (!ndev) {
ret = -ENODEV;
goto err2;
}
- memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
+ sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
@@ -2594,8 +2608,10 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
&route->path_rec->dgid);
/* Use the hint from IP Stack to select GID Type */
- if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
- route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+ gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
+
if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
/* TODO: get the hoplimit from the inet/inet6 device */
route->path_rec->hop_limit = addr->dev_addr.hoplimit;
@@ -3941,63 +3957,10 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
}
}
-static void cma_query_sa_classport_info_cb(int status,
- struct ib_class_port_info *rec,
- void *context)
-{
- struct class_port_info_context *cb_ctx = context;
-
- WARN_ON(!context);
-
- if (status || !rec) {
- pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
- cb_ctx->device->name, cb_ctx->port_num, status);
- goto out;
- }
-
- memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
-
-out:
- complete(&cb_ctx->done);
-}
-
-static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
- struct ib_class_port_info *class_port_info)
-{
- struct class_port_info_context *cb_ctx;
- int ret;
-
- cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
- if (!cb_ctx)
- return -ENOMEM;
-
- cb_ctx->device = device;
- cb_ctx->class_port_info = class_port_info;
- cb_ctx->port_num = port_num;
- init_completion(&cb_ctx->done);
-
- ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
- CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
- GFP_KERNEL, cma_query_sa_classport_info_cb,
- cb_ctx, &cb_ctx->sa_query);
- if (ret < 0) {
- pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
- device->name, port_num, ret);
- goto out;
- }
-
- wait_for_completion(&cb_ctx->done);
-
-out:
- kfree(cb_ctx);
- return ret;
-}
-
static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
struct ib_sa_mcmember_rec rec;
- struct ib_class_port_info class_port_info;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
ib_sa_comp_mask comp_mask;
int ret;
@@ -4018,21 +3981,14 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state;
- if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) {
- ret = cma_query_sa_classport_info(id_priv->id.device,
- id_priv->id.port_num,
- &class_port_info);
-
- if (ret)
- return ret;
-
- if (!(ib_get_cpi_capmask2(&class_port_info) &
- IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
- pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
- "RDMA CM: SM doesn't support Send Only Full Member option\n",
- id_priv->id.device->name, id_priv->id.port_num);
- return -EOPNOTSUPP;
- }
+ if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
+ (!ib_sa_sendonly_fullmem_support(&sa_client,
+ id_priv->id.device,
+ id_priv->id.port_num))) {
+ pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
+ "RDMA CM: SM doesn't support Send Only Full Member option\n",
+ id_priv->id.device->name, id_priv->id.port_num);
+ return -EOPNOTSUPP;
}
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7c9e34d679d3..81d447da0048 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -172,8 +172,16 @@ static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
- ib_cache_release_one(dev);
- kfree(dev->port_immutable);
+ WARN_ON(dev->reg_state == IB_DEV_REGISTERED);
+ if (dev->reg_state == IB_DEV_UNREGISTERED) {
+ /*
+ * In IB_DEV_UNINITIALIZED state, cache or port table
+ * is not even created. Free cache and port table only when
+ * device reaches UNREGISTERED state.
+ */
+ ib_cache_release_one(dev);
+ kfree(dev->port_immutable);
+ }
kfree(dev);
}
@@ -380,32 +388,27 @@ int ib_register_device(struct ib_device *device,
ret = ib_cache_setup_one(device);
if (ret) {
pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
- goto out;
+ goto port_cleanup;
}
ret = ib_device_register_rdmacg(device);
if (ret) {
pr_warn("Couldn't register device with rdma cgroup\n");
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
pr_warn("Couldn't query the device attributes\n");
- ib_device_unregister_rdmacg(device);
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
pr_warn("Couldn't register device %s with driver model\n",
device->name);
- ib_device_unregister_rdmacg(device);
- ib_cache_cleanup_one(device);
- goto out;
+ goto cache_cleanup;
}
device->reg_state = IB_DEV_REGISTERED;
@@ -417,6 +420,14 @@ int ib_register_device(struct ib_device *device,
down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem);
+ mutex_unlock(&device_mutex);
+ return 0;
+
+cache_cleanup:
+ ib_cache_cleanup_one(device);
+ ib_cache_release_one(device);
+port_cleanup:
+ kfree(device->port_immutable);
out:
mutex_unlock(&device_mutex);
return ret;
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index cdfad5f26212..84d2615b5d4b 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -96,7 +96,8 @@ struct ib_fmr_pool {
void * arg);
void *flush_arg;
- struct task_struct *thread;
+ struct kthread_worker *worker;
+ struct kthread_work work;
atomic_t req_ser;
atomic_t flush_ser;
@@ -174,29 +175,19 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
spin_unlock_irq(&pool->pool_lock);
}
-static int ib_fmr_cleanup_thread(void *pool_ptr)
+static void ib_fmr_cleanup_func(struct kthread_work *work)
{
- struct ib_fmr_pool *pool = pool_ptr;
+ struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work);
- do {
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
- ib_fmr_batch_release(pool);
-
- atomic_inc(&pool->flush_ser);
- wake_up_interruptible(&pool->force_wait);
-
- if (pool->flush_function)
- pool->flush_function(pool, pool->flush_arg);
- }
+ ib_fmr_batch_release(pool);
+ atomic_inc(&pool->flush_ser);
+ wake_up_interruptible(&pool->force_wait);
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
- !kthread_should_stop())
- schedule();
- __set_current_state(TASK_RUNNING);
- } while (!kthread_should_stop());
+ if (pool->flush_function)
+ pool->flush_function(pool, pool->flush_arg);
- return 0;
+ if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0)
+ kthread_queue_work(pool->worker, &pool->work);
}
/**
@@ -265,15 +256,13 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
atomic_set(&pool->flush_ser, 0);
init_waitqueue_head(&pool->force_wait);
- pool->thread = kthread_run(ib_fmr_cleanup_thread,
- pool,
- "ib_fmr(%s)",
- device->name);
- if (IS_ERR(pool->thread)) {
- pr_warn(PFX "couldn't start cleanup thread\n");
- ret = PTR_ERR(pool->thread);
+ pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name);
+ if (IS_ERR(pool->worker)) {
+ pr_warn(PFX "couldn't start cleanup kthread worker\n");
+ ret = PTR_ERR(pool->worker);
goto out_free_pool;
}
+ kthread_init_work(&pool->work, ib_fmr_cleanup_func);
{
struct ib_pool_fmr *fmr;
@@ -338,7 +327,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
LIST_HEAD(fmr_list);
int i;
- kthread_stop(pool->thread);
+ kthread_destroy_worker(pool->worker);
ib_fmr_batch_release(pool);
i = 0;
@@ -388,7 +377,7 @@ int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
spin_unlock_irq(&pool->pool_lock);
serial = atomic_inc_return(&pool->req_ser);
- wake_up_process(pool->thread);
+ kthread_queue_work(pool->worker, &pool->work);
if (wait_event_interruptible(pool->force_wait,
atomic_read(&pool->flush_ser) - serial >= 0))
@@ -502,7 +491,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
list_add_tail(&fmr->list, &pool->dirty_list);
if (++pool->dirty_len >= pool->dirty_watermark) {
atomic_inc(&pool->req_ser);
- wake_up_process(pool->thread);
+ kthread_queue_work(pool->worker, &pool->work);
}
}
}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 57f231f1c721..192ee3dafb80 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -605,7 +605,7 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
*/
-int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
+void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_snoop_private *mad_snoop_priv;
@@ -622,7 +622,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
agent);
unregister_mad_snoop(mad_snoop_priv);
}
- return 0;
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -1834,12 +1833,13 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
const struct ib_mad_send_wr_private *wr,
const struct ib_mad_recv_wc *rwc )
{
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
u8 send_resp, rcv_resp;
union ib_gid sgid;
struct ib_device *device = mad_agent_priv->agent.device;
u8 port_num = mad_agent_priv->agent.port_num;
u8 lmc;
+ bool has_grh;
send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad);
rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr);
@@ -1848,36 +1848,40 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
/* both requests, or both responses. GIDs different */
return 0;
- if (ib_query_ah(wr->send_buf.ah, &attr))
+ if (rdma_query_ah(wr->send_buf.ah, &attr))
/* Assume not equal, to avoid false positives. */
return 0;
- if (!!(attr.ah_flags & IB_AH_GRH) !=
- !!(rwc->wc->wc_flags & IB_WC_GRH))
+ has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH);
+ if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH))
/* one has GID, other does not. Assume different */
return 0;
if (!send_resp && rcv_resp) {
/* is request/response. */
- if (!(attr.ah_flags & IB_AH_GRH)) {
+ if (!has_grh) {
if (ib_get_cached_lmc(device, port_num, &lmc))
return 0;
- return (!lmc || !((attr.src_path_bits ^
+ return (!lmc || !((rdma_ah_get_path_bits(&attr) ^
rwc->wc->dlid_path_bits) &
((1 << lmc) - 1)));
} else {
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(&attr);
+
if (ib_get_cached_gid(device, port_num,
- attr.grh.sgid_index, &sgid, NULL))
+ grh->sgid_index, &sgid, NULL))
return 0;
return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
16);
}
}
- if (!(attr.ah_flags & IB_AH_GRH))
- return attr.dlid == rwc->wc->slid;
+ if (!has_grh)
+ return rdma_ah_get_dlid(&attr) == rwc->wc->slid;
else
- return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
+ return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw,
+ rwc->recv_buf.grh->sgid.raw,
16);
}
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 382941b46e43..0d3cca0a8890 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -81,7 +81,7 @@ static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
deref_rmpp_recv(rmpp_recv);
wait_for_completion(&rmpp_recv->comp);
- ib_destroy_ah(rmpp_recv->ah);
+ rdma_destroy_ah(rmpp_recv->ah);
kfree(rmpp_recv);
}
@@ -171,7 +171,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
hdr_len, 0, GFP_KERNEL,
IB_MGMT_BASE_VERSION);
if (IS_ERR(msg))
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
else {
msg->ah = ah;
msg->context[0] = ah;
@@ -201,7 +201,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
@@ -209,7 +209,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah)
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -237,7 +237,7 @@ static void nack_recv(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
@@ -852,7 +852,7 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
struct mad_rmpp_recv *rmpp_recv;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
int newwin = 1;
@@ -867,10 +867,10 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
(rmpp_recv->method & IB_MGMT_METHOD_RESP))
continue;
- if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+ if (rdma_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
continue;
- if (rmpp_recv->slid == ah_attr.dlid) {
+ if (rmpp_recv->slid == rdma_ah_get_dlid(&ah_attr)) {
newwin = rmpp_recv->repwin;
break;
}
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 322cb67b07a9..45f2f095f793 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -720,7 +720,7 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
@@ -743,19 +743,18 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
return ret;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(rec->mlid);
- ah_attr->sl = rec->sl;
- ah_attr->port_num = port_num;
- ah_attr->static_rate = rec->rate;
-
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = rec->mgid;
-
- ah_attr->grh.sgid_index = (u8) gid_index;
- ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
- ah_attr->grh.hop_limit = rec->hop_limit;
- ah_attr->grh.traffic_class = rec->traffic_class;
-
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid));
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
+
+ rdma_ah_set_grh(ah_attr, &rec->mgid,
+ be32_to_cpu(rec->flow_label),
+ (u8)gid_index,
+ rec->hop_limit,
+ rec->traffic_class);
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_mcmember);
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
new file mode 100644
index 000000000000..41c31a2bf093
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.c
@@ -0,0 +1,627 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <linux/rcupdate.h>
+#include "uverbs.h"
+#include "core_priv.h"
+#include "rdma_core.h"
+
+void uverbs_uobject_get(struct ib_uobject *uobject)
+{
+ kref_get(&uobject->ref);
+}
+
+static void uverbs_uobject_free(struct kref *ref)
+{
+ struct ib_uobject *uobj =
+ container_of(ref, struct ib_uobject, ref);
+
+ if (uobj->type->type_class->needs_kfree_rcu)
+ kfree_rcu(uobj, rcu);
+ else
+ kfree(uobj);
+}
+
+void uverbs_uobject_put(struct ib_uobject *uobject)
+{
+ kref_put(&uobject->ref, uverbs_uobject_free);
+}
+
+static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
+{
+ /*
+ * When a shared access is required, we use a positive counter. Each
+ * shared access request checks that the value != -1 and increment it.
+ * Exclusive access is required for operations like write or destroy.
+ * In exclusive access mode, we check that the counter is zero (nobody
+ * claimed this object) and we set it to -1. Releasing a shared access
+ * lock is done simply by decreasing the counter. As for exclusive
+ * access locks, since only a single one of them is is allowed
+ * concurrently, setting the counter to zero is enough for releasing
+ * this lock.
+ */
+ if (!exclusive)
+ return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
+ -EBUSY : 0;
+
+ /* lock is either WRITE or DESTROY - should be exclusive */
+ return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+}
+
+static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
+ const struct uverbs_obj_type *type)
+{
+ struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
+
+ if (!uobj)
+ return ERR_PTR(-ENOMEM);
+ /*
+ * user_handle should be filled by the handler,
+ * The object is added to the list in the commit stage.
+ */
+ uobj->context = context;
+ uobj->type = type;
+ atomic_set(&uobj->usecnt, 0);
+ kref_init(&uobj->ref);
+
+ return uobj;
+}
+
+static int idr_add_uobj(struct ib_uobject *uobj)
+{
+ int ret;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&uobj->context->ufile->idr_lock);
+
+ /*
+ * We start with allocating an idr pointing to NULL. This represents an
+ * object which isn't initialized yet. We'll replace it later on with
+ * the real object once we commit.
+ */
+ ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
+ min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
+ if (ret >= 0)
+ uobj->id = ret;
+
+ spin_unlock(&uobj->context->ufile->idr_lock);
+ idr_preload_end();
+
+ return ret < 0 ? ret : 0;
+}
+
+/*
+ * It only removes it from the uobjects list, uverbs_uobject_put() is still
+ * required.
+ */
+static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
+{
+ spin_lock(&uobj->context->ufile->idr_lock);
+ idr_remove(&uobj->context->ufile->idr, uobj->id);
+ spin_unlock(&uobj->context->ufile->idr_lock);
+}
+
+/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
+static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct ib_uobject *uobj;
+
+ rcu_read_lock();
+ /* object won't be released as we're protected in rcu */
+ uobj = idr_find(&ucontext->ufile->idr, id);
+ if (!uobj) {
+ uobj = ERR_PTR(-ENOENT);
+ goto free;
+ }
+
+ uverbs_uobject_get(uobj);
+free:
+ rcu_read_unlock();
+ return uobj;
+}
+
+static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct file *f;
+ struct ib_uobject *uobject;
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(type, struct uverbs_obj_fd_type, type);
+
+ if (exclusive)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ f = fget(id);
+ if (!f)
+ return ERR_PTR(-EBADF);
+
+ uobject = f->private_data;
+ /*
+ * fget(id) ensures we are not currently running uverbs_close_fd,
+ * and the caller is expected to ensure that uverbs_close_fd is never
+ * done while a call top lookup is possible.
+ */
+ if (f->f_op != fd_type->fops) {
+ fput(f);
+ return ERR_PTR(-EBADF);
+ }
+
+ uverbs_uobject_get(uobject);
+ return uobject;
+}
+
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext,
+ int id, bool exclusive)
+{
+ struct ib_uobject *uobj;
+ int ret;
+
+ uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ if (uobj->type != type) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ ret = uverbs_try_lock_object(uobj, exclusive);
+ if (ret) {
+ WARN(ucontext->cleanup_reason,
+ "ib_uverbs: Trying to lookup_get while cleanup context\n");
+ goto free;
+ }
+
+ return uobj;
+free:
+ uobj->type->type_class->lookup_put(uobj, exclusive);
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ int ret;
+ struct ib_uobject *uobj;
+
+ uobj = alloc_uobj(ucontext, type);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ ret = idr_add_uobj(uobj);
+ if (ret)
+ goto uobj_put;
+
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto idr_remove;
+
+ return uobj;
+
+idr_remove:
+ uverbs_idr_remove_uobj(uobj);
+uobj_put:
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(type, struct uverbs_obj_fd_type, type);
+ int new_fd;
+ struct ib_uobject *uobj;
+ struct ib_uobject_file *uobj_file;
+ struct file *filp;
+
+ new_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (new_fd < 0)
+ return ERR_PTR(new_fd);
+
+ uobj = alloc_uobj(ucontext, type);
+ if (IS_ERR(uobj)) {
+ put_unused_fd(new_fd);
+ return uobj;
+ }
+
+ uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
+ filp = anon_inode_getfile(fd_type->name,
+ fd_type->fops,
+ uobj_file,
+ fd_type->flags);
+ if (IS_ERR(filp)) {
+ put_unused_fd(new_fd);
+ uverbs_uobject_put(uobj);
+ return (void *)filp;
+ }
+
+ uobj_file->uobj.id = new_fd;
+ uobj_file->uobj.object = filp;
+ uobj_file->ufile = ucontext->ufile;
+ INIT_LIST_HEAD(&uobj->list);
+ kref_get(&uobj_file->ufile->ref);
+
+ return uobj;
+}
+
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
+ struct ib_ucontext *ucontext)
+{
+ return type->type_class->alloc_begin(type, ucontext);
+}
+
+static void uverbs_uobject_add(struct ib_uobject *uobject)
+{
+ mutex_lock(&uobject->context->uobjects_lock);
+ list_add(&uobject->list, &uobject->context->uobjects);
+ mutex_unlock(&uobject->context->uobjects_lock);
+}
+
+static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ const struct uverbs_obj_idr_type *idr_type =
+ container_of(uobj->type, struct uverbs_obj_idr_type,
+ type);
+ int ret = idr_type->destroy_object(uobj, why);
+
+ /*
+ * We can only fail gracefully if the user requested to destroy the
+ * object. In the rest of the cases, just remove whatever you can.
+ */
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ return ret;
+
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ uverbs_idr_remove_uobj(uobj);
+
+ return ret;
+}
+
+static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+ struct file *filp = uobj->object;
+ int id = uobj_file->uobj.id;
+
+ /* Unsuccessful NEW */
+ fput(filp);
+ put_unused_fd(id);
+}
+
+static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ const struct uverbs_obj_fd_type *fd_type =
+ container_of(uobj->type, struct uverbs_obj_fd_type, type);
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+ int ret = fd_type->context_closed(uobj_file, why);
+
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ return ret;
+
+ if (why == RDMA_REMOVE_DURING_CLEANUP) {
+ alloc_abort_fd_uobject(uobj);
+ return ret;
+ }
+
+ uobj_file->uobj.context = NULL;
+ return ret;
+}
+
+static void lockdep_check(struct ib_uobject *uobj, bool exclusive)
+{
+#ifdef CONFIG_LOCKDEP
+ if (exclusive)
+ WARN_ON(atomic_read(&uobj->usecnt) > 0);
+ else
+ WARN_ON(atomic_read(&uobj->usecnt) == -1);
+#endif
+}
+
+static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ int ret;
+ struct ib_ucontext *ucontext = uobj->context;
+
+ ret = uobj->type->type_class->remove_commit(uobj, why);
+ if (ret && why == RDMA_REMOVE_DESTROY) {
+ /* We couldn't remove the object, so just unlock the uobject */
+ atomic_set(&uobj->usecnt, 0);
+ uobj->type->type_class->lookup_put(uobj, true);
+ } else {
+ mutex_lock(&ucontext->uobjects_lock);
+ list_del(&uobj->list);
+ mutex_unlock(&ucontext->uobjects_lock);
+ /* put the ref we took when we created the object */
+ uverbs_uobject_put(uobj);
+ }
+
+ return ret;
+}
+
+/* This is called only for user requested DESTROY reasons */
+int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
+{
+ int ret;
+ struct ib_ucontext *ucontext = uobj->context;
+
+ /* put the ref count we took at lookup_get */
+ uverbs_uobject_put(uobj);
+ /* Cleanup is running. Calling this should have been impossible */
+ if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
+ WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
+ return 0;
+ }
+ lockdep_check(uobj, true);
+ ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
+
+ up_read(&ucontext->cleanup_rwsem);
+ return ret;
+}
+
+static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
+{
+ uverbs_uobject_add(uobj);
+ spin_lock(&uobj->context->ufile->idr_lock);
+ /*
+ * We already allocated this IDR with a NULL object, so
+ * this shouldn't fail.
+ */
+ WARN_ON(idr_replace(&uobj->context->ufile->idr,
+ uobj, uobj->id));
+ spin_unlock(&uobj->context->ufile->idr_lock);
+}
+
+static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uobject_file *uobj_file =
+ container_of(uobj, struct ib_uobject_file, uobj);
+
+ uverbs_uobject_add(&uobj_file->uobj);
+ fd_install(uobj_file->uobj.id, uobj->object);
+ /* This shouldn't be used anymore. Use the file object instead */
+ uobj_file->uobj.id = 0;
+ /* Get another reference as we export this to the fops */
+ uverbs_uobject_get(&uobj_file->uobj);
+}
+
+int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
+{
+ /* Cleanup is running. Calling this should have been impossible */
+ if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
+ int ret;
+
+ WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
+ ret = uobj->type->type_class->remove_commit(uobj,
+ RDMA_REMOVE_DURING_CLEANUP);
+ if (ret)
+ pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
+ uobj->id);
+ return ret;
+ }
+
+ uobj->type->type_class->alloc_commit(uobj);
+ up_read(&uobj->context->cleanup_rwsem);
+
+ return 0;
+}
+
+static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
+{
+ uverbs_idr_remove_uobj(uobj);
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ uverbs_uobject_put(uobj);
+}
+
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
+{
+ uobj->type->type_class->alloc_abort(uobj);
+}
+
+static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+}
+
+static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+ struct file *filp = uobj->object;
+
+ WARN_ON(exclusive);
+ /* This indirectly calls uverbs_close_fd and free the object */
+ fput(filp);
+}
+
+void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
+{
+ lockdep_check(uobj, exclusive);
+ uobj->type->type_class->lookup_put(uobj, exclusive);
+ /*
+ * In order to unlock an object, either decrease its usecnt for
+ * read access or zero it in case of exclusive access. See
+ * uverbs_try_lock_object for locking schema information.
+ */
+ if (!exclusive)
+ atomic_dec(&uobj->usecnt);
+ else
+ atomic_set(&uobj->usecnt, 0);
+
+ uverbs_uobject_put(uobj);
+}
+
+const struct uverbs_obj_type_class uverbs_idr_class = {
+ .alloc_begin = alloc_begin_idr_uobject,
+ .lookup_get = lookup_get_idr_uobject,
+ .alloc_commit = alloc_commit_idr_uobject,
+ .alloc_abort = alloc_abort_idr_uobject,
+ .lookup_put = lookup_put_idr_uobject,
+ .remove_commit = remove_commit_idr_uobject,
+ /*
+ * When we destroy an object, we first just lock it for WRITE and
+ * actually DESTROY it in the finalize stage. So, the problematic
+ * scenario is when we just started the finalize stage of the
+ * destruction (nothing was executed yet). Now, the other thread
+ * fetched the object for READ access, but it didn't lock it yet.
+ * The DESTROY thread continues and starts destroying the object.
+ * When the other thread continue - without the RCU, it would
+ * access freed memory. However, the rcu_read_lock delays the free
+ * until the rcu_read_lock of the READ operation quits. Since the
+ * exclusive lock of the object is still taken by the DESTROY flow, the
+ * READ operation will get -EBUSY and it'll just bail out.
+ */
+ .needs_kfree_rcu = true,
+};
+
+static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
+{
+ struct ib_ucontext *ucontext;
+ struct ib_uverbs_file *ufile = uobj_file->ufile;
+ int ret;
+
+ mutex_lock(&uobj_file->ufile->cleanup_mutex);
+
+ /* uobject was either already cleaned up or is cleaned up right now anyway */
+ if (!uobj_file->uobj.context ||
+ !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
+ goto unlock;
+
+ ucontext = uobj_file->uobj.context;
+ ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
+ up_read(&ucontext->cleanup_rwsem);
+ if (ret)
+ pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
+unlock:
+ mutex_unlock(&ufile->cleanup_mutex);
+}
+
+void uverbs_close_fd(struct file *f)
+{
+ struct ib_uobject_file *uobj_file = f->private_data;
+ struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
+
+ _uverbs_close_fd(uobj_file);
+ uverbs_uobject_put(&uobj_file->uobj);
+ kref_put(uverbs_file_ref, ib_uverbs_release_file);
+}
+
+void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
+{
+ enum rdma_remove_reason reason = device_removed ?
+ RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
+ unsigned int cur_order = 0;
+
+ ucontext->cleanup_reason = reason;
+ /*
+ * Waits for all remove_commit and alloc_commit to finish. Logically, We
+ * want to hold this forever as the context is going to be destroyed,
+ * but we'll release it since it causes a "held lock freed" BUG message.
+ */
+ down_write(&ucontext->cleanup_rwsem);
+
+ while (!list_empty(&ucontext->uobjects)) {
+ struct ib_uobject *obj, *next_obj;
+ unsigned int next_order = UINT_MAX;
+
+ /*
+ * This shouldn't run while executing other commands on this
+ * context. Thus, the only thing we should take care of is
+ * releasing a FD while traversing this list. The FD could be
+ * closed and released from the _release fop of this FD.
+ * In order to mitigate this, we add a lock.
+ * We take and release the lock per order traversal in order
+ * to let other threads (which might still use the FDs) chance
+ * to run.
+ */
+ mutex_lock(&ucontext->uobjects_lock);
+ list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
+ list) {
+ if (obj->type->destroy_order == cur_order) {
+ int ret;
+
+ /*
+ * if we hit this WARN_ON, that means we are
+ * racing with a lookup_get.
+ */
+ WARN_ON(uverbs_try_lock_object(obj, true));
+ ret = obj->type->type_class->remove_commit(obj,
+ reason);
+ list_del(&obj->list);
+ if (ret)
+ pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
+ obj->id, cur_order);
+ /* put the ref we took when we created the object */
+ uverbs_uobject_put(obj);
+ } else {
+ next_order = min(next_order,
+ obj->type->destroy_order);
+ }
+ }
+ mutex_unlock(&ucontext->uobjects_lock);
+ cur_order = next_order;
+ }
+ up_write(&ucontext->cleanup_rwsem);
+}
+
+void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
+{
+ ucontext->cleanup_reason = 0;
+ mutex_init(&ucontext->uobjects_lock);
+ INIT_LIST_HEAD(&ucontext->uobjects);
+ init_rwsem(&ucontext->cleanup_rwsem);
+}
+
+const struct uverbs_obj_type_class uverbs_fd_class = {
+ .alloc_begin = alloc_begin_fd_uobject,
+ .lookup_get = lookup_get_fd_uobject,
+ .alloc_commit = alloc_commit_fd_uobject,
+ .alloc_abort = alloc_abort_fd_uobject,
+ .lookup_put = lookup_put_fd_uobject,
+ .remove_commit = remove_commit_fd_uobject,
+ .needs_kfree_rcu = false,
+};
+
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
new file mode 100644
index 000000000000..1b82e7ff7fe8
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_CORE_H
+#define RDMA_CORE_H
+
+#include <linux/idr.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mutex.h>
+
+/*
+ * These functions initialize the context and cleanups its uobjects.
+ * The context has a list of objects which is protected by a mutex
+ * on the context. initialize_ucontext should be called when we create
+ * a context.
+ * cleanup_ucontext removes all uobjects from the context and puts them.
+ */
+void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
+void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
+
+/*
+ * uverbs_uobject_get is called in order to increase the reference count on
+ * an uobject. This is useful when a handler wants to keep the uobject's memory
+ * alive, regardless if this uobject is still alive in the context's objects
+ * repository. Objects are put via uverbs_uobject_put.
+ */
+void uverbs_uobject_get(struct ib_uobject *uobject);
+
+/*
+ * In order to indicate we no longer needs this uobject, uverbs_uobject_put
+ * is called. When the reference count is decreased, the uobject is freed.
+ * For example, this is used when attaching a completion channel to a CQ.
+ */
+void uverbs_uobject_put(struct ib_uobject *uobject);
+
+/* Indicate this fd is no longer used by this consumer, but its memory isn't
+ * necessarily released yet. When the last reference is put, we release the
+ * memory. After this call is executed, calling uverbs_uobject_get isn't
+ * allowed.
+ * This must be called from the release file_operations of the file!
+ */
+void uverbs_close_fd(struct file *f);
+
+#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index ceae153997d0..e335b09c022e 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -56,6 +56,8 @@
#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
+#define IB_SA_CPI_MAX_RETRY_CNT 3
+#define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */
static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
struct ib_sa_sm_ah {
@@ -65,9 +67,23 @@ struct ib_sa_sm_ah {
u8 src_path_mask;
};
+enum rdma_class_port_info_type {
+ RDMA_CLASS_PORT_INFO_IB,
+ RDMA_CLASS_PORT_INFO_OPA
+};
+
+struct rdma_class_port_info {
+ enum rdma_class_port_info_type type;
+ union {
+ struct ib_class_port_info ib;
+ struct opa_class_port_info opa;
+ };
+};
+
struct ib_sa_classport_cache {
bool valid;
- struct ib_class_port_info data;
+ int retry_cnt;
+ struct rdma_class_port_info data;
};
struct ib_sa_port {
@@ -75,6 +91,7 @@ struct ib_sa_port {
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
struct ib_sa_classport_cache classport_info;
+ struct delayed_work ib_cpi_work;
spinlock_t classport_lock; /* protects class port info set */
spinlock_t ah_lock;
u8 port_num;
@@ -103,6 +120,7 @@ struct ib_sa_query {
#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001
#define IB_SA_CANCEL 0x00000002
+#define IB_SA_QUERY_OPA 0x00000004
struct ib_sa_service_query {
void (*callback)(int, struct ib_sa_service_rec *, void *);
@@ -111,9 +129,10 @@ struct ib_sa_service_query {
};
struct ib_sa_path_query {
- void (*callback)(int, struct ib_sa_path_rec *, void *);
+ void (*callback)(int, struct sa_path_rec *, void *);
void *context;
struct ib_sa_query sa_query;
+ struct sa_path_rec *conv_pr;
};
struct ib_sa_guidinfo_query {
@@ -123,7 +142,7 @@ struct ib_sa_guidinfo_query {
};
struct ib_sa_classport_info_query {
- void (*callback)(int, struct ib_class_port_info *, void *);
+ void (*callback)(void *);
void *context;
struct ib_sa_query sa_query;
};
@@ -170,12 +189,12 @@ static DEFINE_SPINLOCK(tid_lock);
static u32 tid;
#define PATH_REC_FIELD(field) \
- .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \
+ .struct_offset_bytes = offsetof(struct sa_path_rec, field), \
+ .struct_size_bytes = sizeof((struct sa_path_rec *)0)->field, \
.field_name = "sa_path_rec:" #field
static const struct ib_field path_rec_table[] = {
- { PATH_REC_FIELD(service_id),
+ { PATH_REC_FIELD(ib.service_id),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 64 },
@@ -187,15 +206,15 @@ static const struct ib_field path_rec_table[] = {
.offset_words = 6,
.offset_bits = 0,
.size_bits = 128 },
- { PATH_REC_FIELD(dlid),
+ { PATH_REC_FIELD(ib.dlid),
.offset_words = 10,
.offset_bits = 0,
.size_bits = 16 },
- { PATH_REC_FIELD(slid),
+ { PATH_REC_FIELD(ib.slid),
.offset_words = 10,
.offset_bits = 16,
.size_bits = 16 },
- { PATH_REC_FIELD(raw_traffic),
+ { PATH_REC_FIELD(ib.raw_traffic),
.offset_words = 11,
.offset_bits = 0,
.size_bits = 1 },
@@ -269,6 +288,136 @@ static const struct ib_field path_rec_table[] = {
.size_bits = 48 },
};
+#define OPA_PATH_REC_FIELD(field) \
+ .struct_offset_bytes = \
+ offsetof(struct sa_path_rec, field), \
+ .struct_size_bytes = \
+ sizeof((struct sa_path_rec *)0)->field, \
+ .field_name = "sa_path_rec:" #field
+
+static const struct ib_field opa_path_rec_table[] = {
+ { OPA_PATH_REC_FIELD(opa.service_id),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 64 },
+ { OPA_PATH_REC_FIELD(dgid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_PATH_REC_FIELD(sgid),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_PATH_REC_FIELD(opa.dlid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_PATH_REC_FIELD(opa.slid),
+ .offset_words = 11,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_PATH_REC_FIELD(opa.raw_traffic),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 12,
+ .offset_bits = 1,
+ .size_bits = 3 },
+ { OPA_PATH_REC_FIELD(flow_label),
+ .offset_words = 12,
+ .offset_bits = 4,
+ .size_bits = 20 },
+ { OPA_PATH_REC_FIELD(hop_limit),
+ .offset_words = 12,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(traffic_class),
+ .offset_words = 13,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(reversible),
+ .offset_words = 13,
+ .offset_bits = 8,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(numb_path),
+ .offset_words = 13,
+ .offset_bits = 9,
+ .size_bits = 7 },
+ { OPA_PATH_REC_FIELD(pkey),
+ .offset_words = 13,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_PATH_REC_FIELD(opa.l2_8B),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_10B),
+ .offset_words = 14,
+ .offset_bits = 1,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_9B),
+ .offset_words = 14,
+ .offset_bits = 2,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_16B),
+ .offset_words = 14,
+ .offset_bits = 3,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 4,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(opa.qos_type),
+ .offset_words = 14,
+ .offset_bits = 6,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(opa.qos_priority),
+ .offset_words = 14,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 16,
+ .size_bits = 3 },
+ { OPA_PATH_REC_FIELD(sl),
+ .offset_words = 14,
+ .offset_bits = 19,
+ .size_bits = 5 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(mtu_selector),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(mtu),
+ .offset_words = 15,
+ .offset_bits = 2,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(rate_selector),
+ .offset_words = 15,
+ .offset_bits = 8,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(rate),
+ .offset_words = 15,
+ .offset_bits = 10,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(packet_life_time_selector),
+ .offset_words = 15,
+ .offset_bits = 16,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(packet_life_time),
+ .offset_words = 15,
+ .offset_bits = 18,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(preference),
+ .offset_words = 15,
+ .offset_bits = 24,
+ .size_bits = 8 },
+};
+
#define MCMEMBER_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
.struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
@@ -406,7 +555,7 @@ static const struct ib_field service_rec_table[] = {
.struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
.field_name = "ib_class_port_info:" #field
-static const struct ib_field classport_info_rec_table[] = {
+static const struct ib_field ib_classport_info_rec_table[] = {
{ CLASSPORTINFO_REC_FIELD(base_version),
.offset_words = 0,
.offset_bits = 0,
@@ -477,6 +626,88 @@ static const struct ib_field classport_info_rec_table[] = {
.size_bits = 32 },
};
+#define OPA_CLASSPORTINFO_REC_FIELD(field) \
+ .struct_offset_bytes =\
+ offsetof(struct opa_class_port_info, field), \
+ .struct_size_bytes = \
+ sizeof((struct opa_class_port_info *)0)->field, \
+ .field_name = "opa_class_port_info:" #field
+
+static const struct ib_field opa_classport_info_rec_table[] = {
+ { OPA_CLASSPORTINFO_REC_FIELD(base_version),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(class_version),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
+ .offset_words = 9,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
+ .offset_words = 16,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
+ .offset_words = 17,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
+ .offset_words = 18,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
+ .offset_words = 18,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
+ .offset_words = 19,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 19,
+ .offset_bits = 8,
+ .size_bits = 24 },
+};
+
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
.struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
@@ -518,7 +749,7 @@ static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
struct ib_sa_query *query)
{
- struct ib_sa_path_rec *sa_rec = query->mad_buf->context[1];
+ struct sa_path_rec *sa_rec = query->mad_buf->context[1];
struct ib_sa_mad *mad = query->mad_buf->mad;
ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
u16 val16;
@@ -543,7 +774,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
/* Now build the attributes */
if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
- val64 = be64_to_cpu(sa_rec->service_id);
+ val64 = be64_to_cpu(sa_path_get_service_id(sa_rec));
nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
sizeof(val64), &val64);
}
@@ -927,96 +1158,10 @@ static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
- ib_destroy_ah(sm_ah->ah);
+ rdma_destroy_ah(sm_ah->ah);
kfree(sm_ah);
}
-static void update_sm_ah(struct work_struct *work)
-{
- struct ib_sa_port *port =
- container_of(work, struct ib_sa_port, update_task);
- struct ib_sa_sm_ah *new_ah;
- struct ib_port_attr port_attr;
- struct ib_ah_attr ah_attr;
-
- if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
- pr_warn("Couldn't query port\n");
- return;
- }
-
- new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
- if (!new_ah) {
- return;
- }
-
- kref_init(&new_ah->ref);
- new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
-
- new_ah->pkey_index = 0;
- if (ib_find_pkey(port->agent->device, port->port_num,
- IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
- pr_err("Couldn't find index for default PKey\n");
-
- memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = port_attr.sm_lid;
- ah_attr.sl = port_attr.sm_sl;
- ah_attr.port_num = port->port_num;
- if (port_attr.grh_required) {
- ah_attr.ah_flags = IB_AH_GRH;
- ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
- ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
- }
-
- new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
- if (IS_ERR(new_ah->ah)) {
- pr_warn("Couldn't create new SM AH\n");
- kfree(new_ah);
- return;
- }
-
- spin_lock_irq(&port->ah_lock);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = new_ah;
- spin_unlock_irq(&port->ah_lock);
-
-}
-
-static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
-{
- if (event->event == IB_EVENT_PORT_ERR ||
- event->event == IB_EVENT_PORT_ACTIVE ||
- event->event == IB_EVENT_LID_CHANGE ||
- event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER) {
- unsigned long flags;
- struct ib_sa_device *sa_dev =
- container_of(handler, typeof(*sa_dev), event_handler);
- struct ib_sa_port *port =
- &sa_dev->port[event->element.port_num - sa_dev->start_port];
-
- if (!rdma_cap_ib_sa(handler->device, port->port_num))
- return;
-
- spin_lock_irqsave(&port->ah_lock, flags);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = NULL;
- spin_unlock_irqrestore(&port->ah_lock, flags);
-
- if (event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER ||
- event->event == IB_EVENT_LID_CHANGE) {
- spin_lock_irqsave(&port->classport_lock, flags);
- port->classport_info.valid = false;
- spin_unlock_irqrestore(&port->classport_lock, flags);
- }
- queue_work(ib_wq, &sa_dev->port[event->element.port_num -
- sa_dev->start_port].update_task);
- }
-}
-
void ib_sa_register_client(struct ib_sa_client *client)
{
atomic_set(&client->users, 1);
@@ -1085,7 +1230,8 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
}
int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
@@ -1093,21 +1239,26 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
struct net_device *ndev = NULL;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(rec->dlid);
- ah_attr->sl = rec->sl;
- ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
- get_src_path_mask(device, port_num);
- ah_attr->port_num = port_num;
- ah_attr->static_rate = rec->rate;
-
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) &
+ get_src_path_mask(device, port_num));
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
use_roce = rdma_cap_eth_ah(device, port_num);
if (use_roce) {
struct net_device *idev;
struct net_device *resolved_dev;
- struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
- .net = rec->net ? rec->net :
- &init_net};
+ struct rdma_dev_addr dev_addr = {
+ .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
+ sa_path_get_ifindex(rec) : 0),
+ .net = sa_path_get_ndev(rec) ?
+ sa_path_get_ndev(rec) :
+ &init_net
+ };
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
@@ -1128,7 +1279,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
dev_addr.network == RDMA_NETWORK_IPV6) &&
- rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
idev = device->get_netdev(device, port_num);
@@ -1159,28 +1310,31 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
}
if (rec->hop_limit > 0 || use_roce) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = rec->dgid;
+ enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
- ret = ib_find_cached_gid_by_port(device, &rec->sgid,
- rec->gid_type, port_num, ndev,
- &gid_index);
+ ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
+ port_num, ndev, &gid_index);
if (ret) {
if (ndev)
dev_put(ndev);
return ret;
}
- ah_attr->grh.sgid_index = gid_index;
- ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
- ah_attr->grh.hop_limit = rec->hop_limit;
- ah_attr->grh.traffic_class = rec->traffic_class;
+ rdma_ah_set_grh(ah_attr, &rec->dgid,
+ be32_to_cpu(rec->flow_label),
+ gid_index, rec->hop_limit,
+ rec->traffic_class);
if (ndev)
dev_put(ndev);
}
- if (use_roce)
- memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
+ if (use_roce) {
+ u8 *dmac = sa_path_get_dmac(rec);
+
+ if (!dmac)
+ return -EINVAL;
+ memcpy(ah_attr->roce.dmac, dmac, ETH_ALEN);
+ }
return 0;
}
@@ -1203,7 +1357,9 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
query->sm_ah->pkey_index,
0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
gfp_mask,
- IB_MGMT_BASE_VERSION);
+ ((query->flags & IB_SA_QUERY_OPA) ?
+ OPA_MGMT_BASE_VERSION :
+ IB_MGMT_BASE_VERSION));
if (IS_ERR(query->mad_buf)) {
kref_put(&query->sm_ah->ref, free_sm_ah);
return -ENOMEM;
@@ -1220,16 +1376,21 @@ static void free_mad(struct ib_sa_query *query)
kref_put(&query->sm_ah->ref, free_sm_ah);
}
-static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
+static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
{
+ struct ib_sa_mad *mad = query->mad_buf->mad;
unsigned long flags;
memset(mad, 0, sizeof *mad);
- mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ if (query->flags & IB_SA_QUERY_OPA) {
+ mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
+ } else {
+ mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ }
mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
- mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
-
spin_lock_irqsave(&tid_lock, flags);
mad->mad_hdr.tid =
cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
@@ -1258,7 +1419,8 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
query->mad_buf->context[0] = query;
query->id = id;
- if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
+ if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
+ (!(query->flags & IB_SA_QUERY_OPA))) {
if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query, gfp_mask))
return id;
@@ -1281,18 +1443,75 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
return ret ? ret : id;
}
-void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
{
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
}
EXPORT_SYMBOL(ib_sa_unpack_path);
-void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
+void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
{
ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
}
EXPORT_SYMBOL(ib_sa_pack_path);
+static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ unsigned long flags;
+ bool ret = false;
+
+ if (!sa_dev)
+ return ret;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (!port->classport_info.valid)
+ goto ret;
+
+ if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
+ ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
+ OPA_CLASS_PORT_INFO_PR_SUPPORT;
+ret:
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return ret;
+}
+
+enum opa_pr_supported {
+ PR_NOT_SUPPORTED,
+ PR_OPA_SUPPORTED,
+ PR_IB_SUPPORTED
+};
+
+/**
+ * Check if current PR query can be an OPA query.
+ * Retuns PR_NOT_SUPPORTED if a path record query is not
+ * possible, PR_OPA_SUPPORTED if an OPA path record query
+ * is possible and PR_IB_SUPPORTED if an IB path record
+ * query is possible.
+ */
+static int opa_pr_query_possible(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num,
+ struct sa_path_rec *rec)
+{
+ struct ib_port_attr port_attr;
+
+ if (ib_query_port(device, port_num, &port_attr))
+ return PR_NOT_SUPPORTED;
+
+ if (ib_sa_opa_pathrecord_support(client, device, port_num))
+ return PR_OPA_SUPPORTED;
+
+ if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
+ return PR_NOT_SUPPORTED;
+ else
+ return PR_IB_SUPPORTED;
+}
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
@@ -1301,22 +1520,44 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
container_of(sa_query, struct ib_sa_path_query, sa_query);
if (mad) {
- struct ib_sa_path_rec rec;
-
- ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
- mad->data, &rec);
- rec.net = NULL;
- rec.ifindex = 0;
- rec.gid_type = IB_GID_TYPE_IB;
- eth_zero_addr(rec.dmac);
- query->callback(status, &rec, query->context);
+ struct sa_path_rec rec;
+
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ ib_unpack(opa_path_rec_table,
+ ARRAY_SIZE(opa_path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ query->callback(status, &rec, query->context);
+ } else {
+ ib_unpack(path_rec_table,
+ ARRAY_SIZE(path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_ndev(&rec, NULL);
+ sa_path_set_ifindex(&rec, 0);
+ sa_path_set_dmac_zero(&rec);
+
+ if (query->conv_pr) {
+ struct sa_path_rec opa;
+
+ memset(&opa, 0, sizeof(struct sa_path_rec));
+ sa_convert_path_ib_to_opa(&opa, &rec);
+ query->callback(status, &opa, query->context);
+ } else {
+ query->callback(status, &rec, query->context);
+ }
+ }
} else
query->callback(status, NULL, query->context);
}
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
{
- kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
+ struct ib_sa_path_query *query =
+ container_of(sa_query, struct ib_sa_path_query, sa_query);
+
+ kfree(query->conv_pr);
+ kfree(query);
}
/**
@@ -1346,11 +1587,11 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
*/
int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec,
+ struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
- struct ib_sa_path_rec *resp,
+ struct sa_path_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
@@ -1360,11 +1601,16 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
+ enum opa_pr_supported status;
int ret;
if (!sa_dev)
return -ENODEV;
+ if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
+ (rec->rec_type != SA_PATH_REC_TYPE_OPA))
+ return -EINVAL;
+
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
@@ -1373,9 +1619,26 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
return -ENOMEM;
query->sa_query.port = port;
+ if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
+ status = opa_pr_query_possible(client, device, port_num, rec);
+ if (status == PR_NOT_SUPPORTED) {
+ ret = -EINVAL;
+ goto err1;
+ } else if (status == PR_OPA_SUPPORTED) {
+ query->sa_query.flags |= IB_SA_QUERY_OPA;
+ } else {
+ query->conv_pr =
+ kmalloc(sizeof(*query->conv_pr), gfp_mask);
+ if (!query->conv_pr) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+ }
+ }
+
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
- goto err1;
+ goto err2;
ib_sa_client_get(client);
query->sa_query.client = client;
@@ -1383,7 +1646,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
query->sa_query.release = ib_sa_path_rec_release;
@@ -1391,24 +1654,36 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
mad->sa_hdr.comp_mask = comp_mask;
- ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
+ if (query->sa_query.flags & IB_SA_QUERY_OPA) {
+ ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
+ rec, mad->data);
+ } else if (query->conv_pr) {
+ sa_convert_path_opa_to_ib(query->conv_pr, rec);
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ query->conv_pr, mad->data);
+ } else {
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ rec, mad->data);
+ }
*sa_query = &query->sa_query;
query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
- query->sa_query.mad_buf->context[1] = rec;
+ query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
+ query->conv_pr : rec;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
- goto err2;
+ goto err3;
return ret;
-err2:
+err3:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-
+err2:
+ kfree(query->conv_pr);
err1:
kfree(query);
return ret;
@@ -1508,7 +1783,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
query->sa_query.release = ib_sa_service_rec_release;
@@ -1600,7 +1875,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
query->sa_query.release = ib_sa_mcmember_rec_release;
@@ -1697,7 +1972,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
query->sa_query.release = ib_sa_guidinfo_rec_release;
@@ -1728,7 +2003,42 @@ err1:
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
-/* Support get SA ClassPortInfo */
+bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
+ struct ib_device *device,
+ u8 port_num)
+{
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ bool ret = false;
+ unsigned long flags;
+
+ if (!sa_dev)
+ return ret;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if ((port->classport_info.valid) &&
+ (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB))
+ ret = ib_get_cpi_capmask2(&port->classport_info.data.ib)
+ & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support);
+
+struct ib_classport_info_context {
+ struct completion done;
+ struct ib_sa_query *sa_query;
+};
+
+static void ib_classportinfo_cb(void *context)
+{
+ struct ib_classport_info_context *cb_ctx = context;
+
+ complete(&cb_ctx->done);
+}
+
static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
@@ -1736,91 +2046,91 @@ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
unsigned long flags;
struct ib_sa_classport_info_query *query =
container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
+ struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
if (mad) {
- struct ib_class_port_info rec;
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ struct opa_class_port_info rec;
- ib_unpack(classport_info_rec_table,
- ARRAY_SIZE(classport_info_rec_table),
- mad->data, &rec);
+ ib_unpack(opa_classport_info_rec_table,
+ ARRAY_SIZE(opa_classport_info_rec_table),
+ mad->data, &rec);
- spin_lock_irqsave(&sa_query->port->classport_lock, flags);
- if (!status && !sa_query->port->classport_info.valid) {
- memcpy(&sa_query->port->classport_info.data, &rec,
- sizeof(sa_query->port->classport_info.data));
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.opa, &rec,
+ sizeof(info->data.opa));
- sa_query->port->classport_info.valid = true;
- }
- spin_unlock_irqrestore(&sa_query->port->classport_lock, flags);
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_OPA;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
- query->callback(status, &rec, query->context);
- } else {
- query->callback(status, NULL, query->context);
+ } else {
+ struct ib_class_port_info rec;
+
+ ib_unpack(ib_classport_info_rec_table,
+ ARRAY_SIZE(ib_classport_info_rec_table),
+ mad->data, &rec);
+
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.ib, &rec,
+ sizeof(info->data.ib));
+
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_IB;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
+ }
}
+ query->callback(query->context);
}
-static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
+static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_classport_info_query,
sa_query));
}
-int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_class_port_info *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
+static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
+ int timeout_ms,
+ void (*callback)(void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
{
- struct ib_sa_classport_info_query *query;
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
struct ib_mad_agent *agent;
+ struct ib_sa_classport_info_query *query;
struct ib_sa_mad *mad;
- struct ib_class_port_info cached_class_port_info;
+ gfp_t gfp_mask = GFP_KERNEL;
int ret;
- unsigned long flags;
- if (!sa_dev)
- return -ENODEV;
-
- port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
- /* Use cached ClassPortInfo attribute if valid instead of sending mad */
- spin_lock_irqsave(&port->classport_lock, flags);
- if (port->classport_info.valid && callback) {
- memcpy(&cached_class_port_info, &port->classport_info.data,
- sizeof(cached_class_port_info));
- spin_unlock_irqrestore(&port->classport_lock, flags);
- callback(0, &cached_class_port_info, context);
- return 0;
- }
- spin_unlock_irqrestore(&port->classport_lock, flags);
-
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
+ query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
+ port->port_num) ?
+ IB_SA_QUERY_OPA : 0;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
- goto err1;
+ goto err_free;
- ib_sa_client_get(client);
- query->sa_query.client = client;
- query->callback = callback;
- query->context = context;
+ query->callback = callback;
+ query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
- query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
-
- query->sa_query.release = ib_sa_portclass_info_rec_release;
- /* support GET only */
+ query->sa_query.callback = ib_sa_classport_info_rec_callback;
+ query->sa_query.release = ib_sa_classport_info_rec_release;
mad->mad_hdr.method = IB_MGMT_METHOD_GET;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
mad->sa_hdr.comp_mask = 0;
@@ -1828,20 +2138,71 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
- goto err2;
+ goto err_free_mad;
return ret;
-err2:
+err_free_mad:
*sa_query = NULL;
- ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-err1:
+err_free:
kfree(query);
return ret;
}
-EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
+
+static void update_ib_cpi(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, ib_cpi_work.work);
+ struct ib_classport_info_context *cb_context;
+ unsigned long flags;
+ int ret;
+
+ /* If the classport info is valid, nothing
+ * to do here.
+ */
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (port->classport_info.valid) {
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+ cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
+ if (!cb_context)
+ goto err_nomem;
+
+ init_completion(&cb_context->done);
+
+ ret = ib_sa_classport_info_rec_query(port, 3000,
+ ib_classportinfo_cb, cb_context,
+ &cb_context->sa_query);
+ if (ret < 0)
+ goto free_cb_err;
+ wait_for_completion(&cb_context->done);
+free_cb_err:
+ kfree(cb_context);
+ spin_lock_irqsave(&port->classport_lock, flags);
+
+ /* If the classport info is still not valid, the query should have
+ * failed for some reason. Retry issuing the query
+ */
+ if (!port->classport_info.valid) {
+ port->classport_info.retry_cnt++;
+ if (port->classport_info.retry_cnt <=
+ IB_SA_CPI_MAX_RETRY_CNT) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
+ }
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+err_nomem:
+ return;
+}
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
@@ -1870,7 +2231,8 @@ static void send_handler(struct ib_mad_agent *agent,
spin_unlock_irqrestore(&idr_lock, flags);
free_mad(query);
- ib_sa_client_put(query->client);
+ if (query->client)
+ ib_sa_client_put(query->client);
query->release(query);
}
@@ -1897,6 +2259,102 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
ib_free_recv_mad(mad_recv_wc);
}
+static void update_sm_ah(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, update_task);
+ struct ib_sa_sm_ah *new_ah;
+ struct ib_port_attr port_attr;
+ struct rdma_ah_attr ah_attr;
+
+ if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
+ pr_warn("Couldn't query port\n");
+ return;
+ }
+
+ new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
+ if (!new_ah)
+ return;
+
+ kref_init(&new_ah->ref);
+ new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
+
+ new_ah->pkey_index = 0;
+ if (ib_find_pkey(port->agent->device, port->port_num,
+ IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
+ pr_err("Couldn't find index for default PKey\n");
+
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.type = rdma_ah_find_type(port->agent->device,
+ port->port_num);
+ rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
+ rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
+ rdma_ah_set_port_num(&ah_attr, port->port_num);
+ if (port_attr.grh_required) {
+ rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
+
+ rdma_ah_set_subnet_prefix(&ah_attr,
+ cpu_to_be64(port_attr.subnet_prefix));
+ rdma_ah_set_interface_id(&ah_attr,
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
+ }
+
+ new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr);
+ if (IS_ERR(new_ah->ah)) {
+ pr_warn("Couldn't create new SM AH\n");
+ kfree(new_ah);
+ return;
+ }
+
+ spin_lock_irq(&port->ah_lock);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = new_ah;
+ spin_unlock_irq(&port->ah_lock);
+}
+
+static void ib_sa_event(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ if (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER) {
+ unsigned long flags;
+ struct ib_sa_device *sa_dev =
+ container_of(handler, typeof(*sa_dev), event_handler);
+ u8 port_num = event->element.port_num - sa_dev->start_port;
+ struct ib_sa_port *port = &sa_dev->port[port_num];
+
+ if (!rdma_cap_ib_sa(handler->device, port->port_num))
+ return;
+
+ spin_lock_irqsave(&port->ah_lock, flags);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = NULL;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ if (event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PORT_ACTIVE) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ spin_lock_irqsave(&port->classport_lock, flags);
+ port->classport_info.valid = false;
+ port->classport_info.retry_cnt = 0;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ queue_delayed_work(ib_wq,
+ &port->ib_cpi_work, delay);
+ }
+ queue_work(ib_wq, &sa_dev->port[port_num].update_task);
+ }
+}
+
static void ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
@@ -1934,6 +2392,8 @@ static void ib_sa_add_one(struct ib_device *device)
goto err;
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
+ INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
+ update_ib_cpi);
count++;
}
@@ -1980,11 +2440,11 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
return;
ib_unregister_event_handler(&sa_dev->event_handler);
-
flush_workqueue(ib_wq);
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
if (rdma_cap_ib_sa(device, i + 1)) {
+ cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
ib_unregister_mad_agent(sa_dev->port[i].agent);
if (sa_dev->port[i].sm_ah)
kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index daadf3130c9f..7ebe1ef23652 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -253,6 +253,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
speed = " EDR";
rate = 250;
break;
+ case IB_SPEED_HDR:
+ speed = " HDR";
+ rate = 500;
+ break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
rate = 25;
@@ -1301,7 +1305,7 @@ err_put:
free_port_list_attributes(device);
err_unregister:
- device_unregister(class_dev);
+ device_del(class_dev);
err:
return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index cc0d51fb06e3..112099c86a19 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -702,10 +702,10 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
return 0;
}
-static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
+static int ib_ucm_path_get(struct sa_path_rec **path, u64 src)
{
struct ib_user_path_rec upath;
- struct ib_sa_path_rec *sa_path;
+ struct sa_path_rec *sa_path;
*path = NULL;
@@ -962,7 +962,7 @@ static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file,
int in_len, int out_len)
{
struct ib_ucm_context *ctx;
- struct ib_sa_path_rec *path = NULL;
+ struct sa_path_rec *path = NULL;
struct ib_ucm_lap cmd;
const void *data = NULL;
int result;
@@ -1205,12 +1205,15 @@ static void ib_ucm_release_dev(struct device *dev)
struct ib_ucm_device *ucm_dev;
ucm_dev = container_of(dev, struct ib_ucm_device, dev);
- cdev_del(&ucm_dev->cdev);
+ kfree(ucm_dev);
+}
+
+static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev)
+{
if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
clear_bit(ucm_dev->devnum, dev_map);
else
clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map);
- kfree(ucm_dev);
}
static const struct file_operations ucm_fops = {
@@ -1266,7 +1269,9 @@ static void ib_ucm_add_one(struct ib_device *device)
if (!ucm_dev)
return;
+ device_initialize(&ucm_dev->dev);
ucm_dev->ib_dev = device;
+ ucm_dev->dev.release = ib_ucm_release_dev;
devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
if (devnum >= IB_UCM_MAX_DEVICES) {
@@ -1286,16 +1291,14 @@ static void ib_ucm_add_one(struct ib_device *device)
cdev_init(&ucm_dev->cdev, &ucm_fops);
ucm_dev->cdev.owner = THIS_MODULE;
kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
- if (cdev_add(&ucm_dev->cdev, base, 1))
- goto err;
ucm_dev->dev.class = &cm_class;
ucm_dev->dev.parent = device->dev.parent;
- ucm_dev->dev.devt = ucm_dev->cdev.dev;
- ucm_dev->dev.release = ib_ucm_release_dev;
+ ucm_dev->dev.devt = base;
+
dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
- if (device_register(&ucm_dev->dev))
- goto err_cdev;
+ if (cdev_device_add(&ucm_dev->cdev, &ucm_dev->dev))
+ goto err_devnum;
if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev))
goto err_dev;
@@ -1304,15 +1307,11 @@ static void ib_ucm_add_one(struct ib_device *device)
return;
err_dev:
- device_unregister(&ucm_dev->dev);
-err_cdev:
- cdev_del(&ucm_dev->cdev);
- if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
+ cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
+err_devnum:
+ ib_ucm_free_dev(ucm_dev);
err:
- kfree(ucm_dev);
+ put_device(&ucm_dev->dev);
return;
}
@@ -1323,7 +1322,9 @@ static void ib_ucm_remove_one(struct ib_device *device, void *client_data)
if (!ucm_dev)
return;
- device_unregister(&ucm_dev->dev);
+ cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
+ ib_ucm_free_dev(ucm_dev);
+ put_device(&ucm_dev->dev);
}
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index e12f8faf8c23..276f0ef835bd 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -898,11 +898,18 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
for (i = 0, out_len -= sizeof(*resp);
i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
i++, out_len -= sizeof(struct ib_path_rec_data)) {
+ struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i];
resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
IB_PATH_BIDIRECTIONAL;
- ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
- &resp->path_data[i].path_rec);
+ if (rec->rec_type == SA_PATH_REC_TYPE_IB) {
+ ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
+ } else {
+ struct sa_path_rec ib;
+
+ sa_convert_path_opa_to_ib(&ib, rec);
+ ib_sa_pack_path(&ib, &resp->path_data[i].path_rec);
+ }
}
if (copy_to_user(response, resp,
@@ -1197,7 +1204,7 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
static int ucma_set_ib_path(struct ucma_context *ctx,
struct ib_path_rec_data *path_data, size_t optlen)
{
- struct ib_sa_path_rec sa_path;
+ struct sa_path_rec sa_path;
struct rdma_cm_event event;
int ret;
@@ -1215,8 +1222,17 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
memset(&sa_path, 0, sizeof(sa_path));
+ sa_path.rec_type = SA_PATH_REC_TYPE_IB;
ib_sa_unpack_path(path_data->path_rec, &sa_path);
- ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+
+ if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) {
+ struct sa_path_rec opa;
+
+ sa_convert_path_ib_to_opa(&opa, &sa_path);
+ ret = rdma_set_ib_paths(ctx->cm_id, &opa, 1);
+ } else {
+ ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+ }
if (ret)
return ret;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 27f155d2df8d..3dbf811d3c51 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -115,11 +115,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!umem)
return ERR_PTR(-ENOMEM);
- umem->context = context;
- umem->length = size;
- umem->address = addr;
- umem->page_size = PAGE_SIZE;
- umem->pid = get_task_pid(current, PIDTYPE_PID);
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_shift = PAGE_SHIFT;
+ umem->pid = get_task_pid(current, PIDTYPE_PID);
/*
* We ask for writable memory if any of the following
* access flags are set. "Local write" and "remote write"
@@ -133,7 +133,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (access & IB_ACCESS_ON_DEMAND) {
put_pid(umem->pid);
- ret = ib_umem_odp_get(context, umem);
+ ret = ib_umem_odp_get(context, umem, access);
if (ret) {
kfree(umem);
return ERR_PTR(ret);
@@ -315,7 +315,6 @@ EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
- int shift;
int i;
int n;
struct scatterlist *sg;
@@ -323,11 +322,9 @@ int ib_umem_page_count(struct ib_umem *umem)
if (umem->odp_data)
return ib_umem_num_pages(umem);
- shift = ilog2(umem->page_size);
-
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> shift;
+ n += sg_dma_len(sg) >> umem->page_shift;
return n;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index cb2742b548bb..0780b1afefa9 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -38,6 +38,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -254,11 +255,11 @@ struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
if (!umem)
return ERR_PTR(-ENOMEM);
- umem->context = context;
- umem->length = size;
- umem->address = addr;
- umem->page_size = PAGE_SIZE;
- umem->writable = 1;
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_shift = PAGE_SHIFT;
+ umem->writable = 1;
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
if (!odp_data) {
@@ -306,7 +307,8 @@ out_umem:
}
EXPORT_SYMBOL(ib_alloc_odp_umem);
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
+ int access)
{
int ret_val;
struct pid *our_pid;
@@ -315,6 +317,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
if (!mm)
return -EINVAL;
+ if (access & IB_ACCESS_HUGETLB) {
+ struct vm_area_struct *vma;
+ struct hstate *h;
+
+ vma = find_vma(mm, ib_umem_start(umem));
+ if (!vma || !is_vm_hugetlb_page(vma))
+ return -EINVAL;
+ h = hstate_vma(vma);
+ umem->page_shift = huge_page_shift(h);
+ umem->hugetlb = 1;
+ } else {
+ umem->hugetlb = 0;
+ }
+
/* Prevent creating ODP MRs in child processes */
rcu_read_lock();
our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
@@ -325,7 +341,6 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
goto out_mm;
}
- umem->hugetlb = 0;
umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
if (!umem->odp_data) {
ret_val = -ENOMEM;
@@ -504,7 +519,6 @@ out:
static int ib_umem_odp_map_dma_single_page(
struct ib_umem *umem,
int page_index,
- u64 base_virt_addr,
struct page *page,
u64 access_mask,
unsigned long current_seq)
@@ -527,7 +541,7 @@ static int ib_umem_odp_map_dma_single_page(
if (!(umem->odp_data->dma_list[page_index])) {
dma_addr = ib_dma_map_page(dev,
page,
- 0, PAGE_SIZE,
+ 0, BIT(umem->page_shift),
DMA_BIDIRECTIONAL);
if (ib_dma_mapping_error(dev, dma_addr)) {
ret = -EFAULT;
@@ -555,8 +569,9 @@ out:
if (remove_existing_mapping && umem->context->invalidate_range) {
invalidate_page_trampoline(
umem,
- base_virt_addr + (page_index * PAGE_SIZE),
- base_virt_addr + ((page_index+1)*PAGE_SIZE),
+ ib_umem_start(umem) + (page_index >> umem->page_shift),
+ ib_umem_start(umem) + ((page_index + 1) >>
+ umem->page_shift),
NULL);
ret = -EAGAIN;
}
@@ -595,10 +610,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
struct page **local_page_list = NULL;
- u64 off;
- int j, k, ret = 0, start_idx, npages = 0;
- u64 base_virt_addr;
+ u64 page_mask, off;
+ int j, k, ret = 0, start_idx, npages = 0, page_shift;
unsigned int flags = 0;
+ phys_addr_t p = 0;
if (access_mask == 0)
return -EINVAL;
@@ -611,9 +626,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
if (!local_page_list)
return -ENOMEM;
- off = user_virt & (~PAGE_MASK);
- user_virt = user_virt & PAGE_MASK;
- base_virt_addr = user_virt;
+ page_shift = umem->page_shift;
+ page_mask = ~(BIT(page_shift) - 1);
+ off = user_virt & (~page_mask);
+ user_virt = user_virt & page_mask;
bcnt += off; /* Charge for the first page offset as well. */
owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
@@ -631,13 +647,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
if (access_mask & ODP_WRITE_ALLOWED_BIT)
flags |= FOLL_WRITE;
- start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
+ start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
k = start_idx;
while (bcnt > 0) {
- const size_t gup_num_pages =
- min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
- PAGE_SIZE / sizeof(struct page *));
+ const size_t gup_num_pages = min_t(size_t,
+ (bcnt + BIT(page_shift) - 1) >> page_shift,
+ PAGE_SIZE / sizeof(struct page *));
down_read(&owning_mm->mmap_sem);
/*
@@ -656,14 +672,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
break;
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- user_virt += npages << PAGE_SHIFT;
mutex_lock(&umem->odp_data->umem_mutex);
- for (j = 0; j < npages; ++j) {
+ for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
+ if (user_virt & ~page_mask) {
+ p += PAGE_SIZE;
+ if (page_to_phys(local_page_list[j]) != p) {
+ ret = -EFAULT;
+ break;
+ }
+ put_page(local_page_list[j]);
+ continue;
+ }
+
ret = ib_umem_odp_map_dma_single_page(
- umem, k, base_virt_addr, local_page_list[j],
- access_mask, current_seq);
+ umem, k, local_page_list[j],
+ access_mask, current_seq);
if (ret < 0)
break;
+
+ p = page_to_phys(local_page_list[j]);
k++;
}
mutex_unlock(&umem->odp_data->umem_mutex);
@@ -707,8 +734,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
* invalidations, so we must make sure we free each page only
* once. */
mutex_lock(&umem->odp_data->umem_mutex);
- for (addr = virt; addr < bound; addr += (u64)umem->page_size) {
- idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+ for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
+ idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
if (umem->odp_data->page_list[idx]) {
struct page *page = umem->odp_data->page_list[idx];
dma_addr_t dma = umem->odp_data->dma_list[idx];
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index aca7ff7abedc..36a6f5c8914c 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -197,7 +197,7 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_umad_packet *packet = send_wc->send_buf->context[0];
dequeue_send(file, packet);
- ib_destroy_ah(packet->msg->ah);
+ rdma_destroy_ah(packet->msg->ah);
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
@@ -235,17 +235,19 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
if (packet->mad.hdr.grh_present) {
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
+ const struct ib_global_route *grh;
ib_init_ah_from_wc(agent->device, agent->port_num,
mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
&ah_attr);
- packet->mad.hdr.gid_index = ah_attr.grh.sgid_index;
- packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit;
- packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class;
- memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16);
- packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label);
+ grh = rdma_ah_read_grh(&ah_attr);
+ packet->mad.hdr.gid_index = grh->sgid_index;
+ packet->mad.hdr.hop_limit = grh->hop_limit;
+ packet->mad.hdr.traffic_class = grh->traffic_class;
+ memcpy(packet->mad.hdr.gid, &grh->dgid, 16);
+ packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label);
}
if (queue_packet(file, agent, packet))
@@ -449,7 +451,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
struct ib_umad_file *file = filp->private_data;
struct ib_umad_packet *packet;
struct ib_mad_agent *agent;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
struct ib_ah *ah;
struct ib_rmpp_mad *rmpp_mad;
__be64 *tid;
@@ -489,20 +491,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
}
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid);
- ah_attr.sl = packet->mad.hdr.sl;
- ah_attr.src_path_bits = packet->mad.hdr.path_bits;
- ah_attr.port_num = file->port->port_num;
+ ah_attr.type = rdma_ah_find_type(file->port->ib_dev,
+ file->port->port_num);
+ rdma_ah_set_dlid(&ah_attr, be16_to_cpu(packet->mad.hdr.lid));
+ rdma_ah_set_sl(&ah_attr, packet->mad.hdr.sl);
+ rdma_ah_set_path_bits(&ah_attr, packet->mad.hdr.path_bits);
+ rdma_ah_set_port_num(&ah_attr, file->port->port_num);
if (packet->mad.hdr.grh_present) {
- ah_attr.ah_flags = IB_AH_GRH;
- memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
- ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
- ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
- ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
- ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
+ rdma_ah_set_grh(&ah_attr, NULL,
+ be32_to_cpu(packet->mad.hdr.flow_label),
+ packet->mad.hdr.gid_index,
+ packet->mad.hdr.hop_limit,
+ packet->mad.hdr.traffic_class);
+ rdma_ah_set_dgid_raw(&ah_attr, packet->mad.hdr.gid);
}
- ah = ib_create_ah(agent->qp->pd, &ah_attr);
+ ah = rdma_create_ah(agent->qp->pd, &ah_attr);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_up;
@@ -596,7 +600,7 @@ err_send:
err_msg:
ib_free_send_mad(packet->msg);
err_ah:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
err_up:
mutex_unlock(&file->mutex);
err:
@@ -1183,7 +1187,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
cdev_init(&port->cdev, &umad_fops);
port->cdev.owner = THIS_MODULE;
- port->cdev.kobj.parent = &umad_dev->kobj;
+ cdev_set_parent(&port->cdev, &umad_dev->kobj);
kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
@@ -1202,7 +1206,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
base += IB_UMAD_MAX_PORTS;
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
- port->sm_cdev.kobj.parent = &umad_dev->kobj;
+ cdev_set_parent(&port->sm_cdev, &umad_dev->kobj);
kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index e1bedf0bac04..64d494a64daf 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -76,12 +76,13 @@
* an asynchronous event queue file is created and released when the
* event file is closed.
*
- * struct ib_uverbs_event_file: One reference is held by the VFS and
- * released when the file is closed. For asynchronous event files,
- * another reference is held by the corresponding main context file
- * and released when that file is closed. For completion event files,
- * a reference is taken when a CQ is created that uses the file, and
- * released when the CQ is destroyed.
+ * struct ib_uverbs_event_queue: Base structure for
+ * struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file.
+ * One reference is held by the VFS and released when the file is closed.
+ * For asynchronous event files, another reference is held by the corresponding
+ * main context file and released when that file is closed. For completion
+ * event files, a reference is taken when a CQ is created that uses the file,
+ * and released when the CQ is destroyed.
*/
struct ib_uverbs_device {
@@ -101,18 +102,26 @@ struct ib_uverbs_device {
struct list_head uverbs_events_file_list;
};
-struct ib_uverbs_event_file {
- struct kref ref;
- int is_async;
- struct ib_uverbs_file *uverbs_file;
+struct ib_uverbs_event_queue {
spinlock_t lock;
int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
+};
+
+struct ib_uverbs_async_event_file {
+ struct ib_uverbs_event_queue ev_queue;
+ struct ib_uverbs_file *uverbs_file;
+ struct kref ref;
struct list_head list;
};
+struct ib_uverbs_completion_event_file {
+ struct ib_uobject_file uobj_file;
+ struct ib_uverbs_event_queue ev_queue;
+};
+
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
@@ -120,9 +129,13 @@ struct ib_uverbs_file {
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
- struct ib_uverbs_event_file *async_file;
+ struct ib_uverbs_async_event_file *async_file;
struct list_head list;
int is_closed;
+
+ struct idr idr;
+ /* spinlock protects write access to idr */
+ spinlock_t idr_lock;
};
struct ib_uverbs_event {
@@ -159,6 +172,8 @@ struct ib_usrq_object {
struct ib_uqp_object {
struct ib_uevent_object uevent;
+ /* lock for mcast list */
+ struct mutex mcast_lock;
struct list_head mcast_list;
struct ib_uxrcd_object *uxrcd;
};
@@ -176,32 +191,18 @@ struct ib_ucq_object {
u32 async_events_reported;
};
-extern spinlock_t ib_uverbs_idr_lock;
-extern struct idr ib_uverbs_pd_idr;
-extern struct idr ib_uverbs_mr_idr;
-extern struct idr ib_uverbs_mw_idr;
-extern struct idr ib_uverbs_ah_idr;
-extern struct idr ib_uverbs_cq_idr;
-extern struct idr ib_uverbs_qp_idr;
-extern struct idr ib_uverbs_srq_idr;
-extern struct idr ib_uverbs_xrcd_idr;
-extern struct idr ib_uverbs_rule_idr;
-extern struct idr ib_uverbs_wq_idr;
-extern struct idr ib_uverbs_rwq_ind_tbl_idr;
-
-void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
-
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async);
+extern const struct file_operations uverbs_event_fops;
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+ struct ib_device *ib_dev);
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+ struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
struct ib_uevent_object *uobj);
+void ib_uverbs_release_file(struct kref *ref);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
@@ -210,9 +211,12 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why);
int uverbs_dealloc_mw(struct ib_mw *mw);
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj);
struct ib_uverbs_flow_spec {
union {
@@ -229,6 +233,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
struct ib_uverbs_flow_spec_action_tag flow_tag;
+ struct ib_uverbs_flow_spec_action_drop drop;
};
};
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 7b7a76e1279a..70b7fb156414 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -40,270 +40,29 @@
#include <linux/uaccess.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+
#include "uverbs.h"
#include "core_priv.h"
-struct uverbs_lock_class {
- struct lock_class_key key;
- char name[16];
-};
-
-static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" };
-static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" };
-static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" };
-static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" };
-static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
-static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
-static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
-static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
-static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
-static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
-static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
-
-/*
- * The ib_uobject locking scheme is as follows:
- *
- * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
- * needs to be held during all idr write operations. When an object is
- * looked up, a reference must be taken on the object's kref before
- * dropping this lock. For read operations, the rcu_read_lock()
- * and rcu_write_lock() but similarly the kref reference is grabbed
- * before the rcu_read_unlock().
- *
- * - Each object also has an rwsem. This rwsem must be held for
- * reading while an operation that uses the object is performed.
- * For example, while registering an MR, the associated PD's
- * uobject.mutex must be held for reading. The rwsem must be held
- * for writing while initializing or destroying an object.
- *
- * - In addition, each object has a "live" flag. If this flag is not
- * set, then lookups of the object will fail even if it is found in
- * the idr. This handles a reader that blocks and does not acquire
- * the rwsem until after the object is destroyed. The destroy
- * operation will set the live flag to 0 and then drop the rwsem;
- * this will allow the reader to acquire the rwsem, see that the
- * live flag is 0, and then drop the rwsem and its reference to
- * object. The underlying storage will not be freed until the last
- * reference to the object is dropped.
- */
-
-static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
- struct ib_ucontext *context, struct uverbs_lock_class *c)
-{
- uobj->user_handle = user_handle;
- uobj->context = context;
- kref_init(&uobj->ref);
- init_rwsem(&uobj->mutex);
- lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
- uobj->live = 0;
-}
-
-static void release_uobj(struct kref *kref)
-{
- kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
-}
-
-static void put_uobj(struct ib_uobject *uobj)
-{
- kref_put(&uobj->ref, release_uobj);
-}
-
-static void put_uobj_read(struct ib_uobject *uobj)
-{
- up_read(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static void put_uobj_write(struct ib_uobject *uobj)
-{
- up_write(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
-{
- int ret;
-
- idr_preload(GFP_KERNEL);
- spin_lock(&ib_uverbs_idr_lock);
-
- ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
- if (ret >= 0)
- uobj->id = ret;
-
- spin_unlock(&ib_uverbs_idr_lock);
- idr_preload_end();
-
- return ret < 0 ? ret : 0;
-}
-
-void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
+static struct ib_uverbs_completion_event_file *
+ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
{
- spin_lock(&ib_uverbs_idr_lock);
- idr_remove(idr, uobj->id);
- spin_unlock(&ib_uverbs_idr_lock);
-}
+ struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
+ fd, context);
+ struct ib_uobject_file *uobj_file;
-static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
+ if (IS_ERR(uobj))
+ return (void *)uobj;
- rcu_read_lock();
- uobj = idr_find(idr, id);
- if (uobj) {
- if (uobj->context == context)
- kref_get(&uobj->ref);
- else
- uobj = NULL;
- }
- rcu_read_unlock();
+ uverbs_uobject_get(uobj);
+ uobj_put_read(uobj);
- return uobj;
-}
-
-static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
- struct ib_ucontext *context, int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
-
- if (nested)
- down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
- else
- down_read(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_read(uobj);
- return NULL;
- }
-
- return uobj;
-}
-
-static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
-
- down_write(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_write(uobj);
- return NULL;
- }
-
- return uobj;
-}
-
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
- int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = idr_read_uobj(idr, id, context, nested);
- return uobj ? uobj->object : NULL;
-}
-
-static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
-}
-
-static void put_pd_read(struct ib_pd *pd)
-{
- put_uobj_read(pd->uobject);
-}
-
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
-{
- return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
-}
-
-static void put_cq_read(struct ib_cq *cq)
-{
- put_uobj_read(cq->uobject);
-}
-
-static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
-}
-
-static void put_ah_read(struct ib_ah *ah)
-{
- put_uobj_read(ah->uobject);
-}
-
-static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
-}
-
-static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
-}
-
-static void put_wq_read(struct ib_wq *wq)
-{
- put_uobj_read(wq->uobject);
-}
-
-static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
- struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
-}
-
-static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
-{
- put_uobj_read(ind_table->uobject);
-}
-
-static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
-
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
- return uobj ? uobj->object : NULL;
-}
-
-static void put_qp_read(struct ib_qp *qp)
-{
- put_uobj_read(qp->uobject);
-}
-
-static void put_qp_write(struct ib_qp *qp)
-{
- put_uobj_write(qp->uobject);
-}
-
-static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
-}
-
-static void put_srq_read(struct ib_srq *srq)
-{
- put_uobj_read(srq->uobject);
-}
-
-static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
- struct ib_uobject **uobj)
-{
- *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
- return *uobj ? (*uobj)->object : NULL;
-}
-
-static void put_xrcd_read(struct ib_uobject *uobj)
-{
- put_uobj_read(uobj);
+ uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
+ return container_of(uobj_file, struct ib_uverbs_completion_event_file,
+ uobj_file);
}
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -348,17 +107,10 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext->device = ib_dev;
ucontext->cg_obj = cg_obj;
- INIT_LIST_HEAD(&ucontext->pd_list);
- INIT_LIST_HEAD(&ucontext->mr_list);
- INIT_LIST_HEAD(&ucontext->mw_list);
- INIT_LIST_HEAD(&ucontext->cq_list);
- INIT_LIST_HEAD(&ucontext->qp_list);
- INIT_LIST_HEAD(&ucontext->srq_list);
- INIT_LIST_HEAD(&ucontext->ah_list);
- INIT_LIST_HEAD(&ucontext->wq_list);
- INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
- INIT_LIST_HEAD(&ucontext->xrcd_list);
- INIT_LIST_HEAD(&ucontext->rule_list);
+ /* ufile is required when some objects are released */
+ ucontext->ufile = file;
+ uverbs_initialize_ucontext(ucontext);
+
rcu_read_lock();
ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
@@ -382,7 +134,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_free;
resp.async_fd = ret;
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
+ filp = ib_uverbs_alloc_async_event_file(file, ib_dev);
if (IS_ERR(filp)) {
ret = PTR_ERR(filp);
goto err_fd;
@@ -565,19 +317,9 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret) {
- kfree(uobj);
- return ret;
- }
-
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(pd), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
if (IS_ERR(pd)) {
@@ -591,10 +333,6 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
atomic_set(&pd->usecnt, 0);
uobj->object = pd;
- ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
- if (ret)
- goto err_idr;
-
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
@@ -604,25 +342,15 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
goto err_copy;
}
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->pd_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
-
-err_idr:
ib_dealloc_pd(pd);
err:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -633,45 +361,19 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
{
struct ib_uverbs_dealloc_pd cmd;
struct ib_uobject *uobj;
- struct ib_pd *pd;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- pd = uobj->object;
-
- if (atomic_read(&pd->usecnt)) {
- ret = -EBUSY;
- goto err_put;
- }
-
- ret = pd->device->dealloc_pd(uobj->object);
- WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
- if (ret)
- goto err_put;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- uobj->live = 0;
- put_uobj_write(uobj);
+ uobj = uobj_get_write(uobj_get_type(pd), cmd.pd_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
+ ret = uobj_remove_commit(uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
-
-err_put:
- put_uobj_write(uobj);
- return ret;
+ return ret ?: in_len;
}
struct xrcd_table_entry {
@@ -808,16 +510,13 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj) {
- ret = -ENOMEM;
+ obj = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd),
+ file->ucontext);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
goto err_tree_mutex_unlock;
}
- init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
-
- down_write(&obj->uobject.mutex);
-
if (!xrcd) {
xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
if (IS_ERR(xrcd)) {
@@ -835,10 +534,6 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
atomic_set(&obj->refcnt, 0);
obj->uobject.object = xrcd;
- ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
- if (ret)
- goto err_idr;
-
memset(&resp, 0, sizeof resp);
resp.xrcd_handle = obj->uobject.id;
@@ -847,7 +542,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
/* create new inode/xrcd table entry */
ret = xrcd_table_insert(file->device, inode, xrcd);
if (ret)
- goto err_insert_xrcd;
+ goto err_dealloc_xrcd;
}
atomic_inc(&xrcd->usecnt);
}
@@ -861,12 +556,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
if (f.file)
fdput(f);
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
- up_write(&obj->uobject.mutex);
+ uobj_alloc_commit(&obj->uobject);
mutex_unlock(&file->device->xrcd_tree_mutex);
return in_len;
@@ -878,14 +568,11 @@ err_copy:
atomic_dec(&xrcd->usecnt);
}
-err_insert_xrcd:
- idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
-
-err_idr:
+err_dealloc_xrcd:
ib_dealloc_xrcd(xrcd);
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject);
err_tree_mutex_unlock:
if (f.file)
@@ -903,75 +590,41 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
{
struct ib_uverbs_close_xrcd cmd;
struct ib_uobject *uobj;
- struct ib_xrcd *xrcd = NULL;
- struct inode *inode = NULL;
- struct ib_uxrcd_object *obj;
- int live;
int ret = 0;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- mutex_lock(&file->device->xrcd_tree_mutex);
- uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
- if (!uobj) {
- ret = -EINVAL;
- goto out;
- }
-
- xrcd = uobj->object;
- inode = xrcd->inode;
- obj = container_of(uobj, struct ib_uxrcd_object, uobject);
- if (atomic_read(&obj->refcnt)) {
- put_uobj_write(uobj);
- ret = -EBUSY;
- goto out;
- }
-
- if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
- ret = ib_dealloc_xrcd(uobj->object);
- if (!ret)
- uobj->live = 0;
+ uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
+ file->ucontext);
+ if (IS_ERR(uobj)) {
+ mutex_unlock(&file->device->xrcd_tree_mutex);
+ return PTR_ERR(uobj);
}
- live = uobj->live;
- if (inode && ret)
- atomic_inc(&xrcd->usecnt);
-
- put_uobj_write(uobj);
-
- if (ret)
- goto out;
-
- if (inode && !live)
- xrcd_table_delete(file->device, inode);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- ret = in_len;
-
-out:
- mutex_unlock(&file->device->xrcd_tree_mutex);
- return ret;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
- struct ib_xrcd *xrcd)
+int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+ struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why)
{
struct inode *inode;
+ int ret;
inode = xrcd->inode;
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
- return;
+ return 0;
- ib_dealloc_xrcd(xrcd);
+ ret = ib_dealloc_xrcd(xrcd);
- if (inode)
+ if (why == RDMA_REMOVE_DESTROY && ret)
+ atomic_inc(&xrcd->usecnt);
+ else if (inode)
xrcd_table_delete(dev, inode);
+
+ return ret;
}
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
@@ -1004,14 +657,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(mr), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -1025,10 +675,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
goto err_put;
}
}
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags, &udata);
@@ -1043,9 +689,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
atomic_inc(&pd->usecnt);
uobj->object = mr;
- ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
- if (ret)
- goto err_unreg;
memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey;
@@ -1058,32 +701,20 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mr_list);
- mutex_unlock(&file->mutex);
+ uobj_put_obj_read(pd);
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
-err_unreg:
ib_dereg_mr(mr);
err_put:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
-err_charge:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -1119,11 +750,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
- file->ucontext);
-
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
mr = uobj->object;
@@ -1134,7 +764,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
if (cmd.flags & IB_MR_REREG_PD) {
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -1167,11 +797,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
put_uobjs:
-
- put_uobj_write(mr->uobject);
+ uobj_put_write(uobj);
return ret;
}
@@ -1182,39 +811,20 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dereg_mr cmd;
- struct ib_mr *mr;
struct ib_uobject *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mr = uobj->object;
-
- ret = ib_dereg_mr(mr);
- if (!ret)
- uobj->live = 0;
+ uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
+ ret = uobj_remove_commit(uobj);
- put_uobj(uobj);
-
- return in_len;
+ return ret ?: in_len;
}
ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
@@ -1236,14 +846,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(mw), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -1254,11 +861,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
-
mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
if (IS_ERR(mw)) {
ret = PTR_ERR(mw);
@@ -1271,9 +873,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
atomic_inc(&pd->usecnt);
uobj->object = mw;
- ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
- if (ret)
- goto err_unalloc;
memset(&resp, 0, sizeof(resp));
resp.rkey = mw->rkey;
@@ -1285,32 +884,17 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mw_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
-err_unalloc:
uverbs_dealloc_mw(mw);
-
err_put:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
-err_charge:
- put_pd_read(pd);
-
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -1320,39 +904,19 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dealloc_mw cmd;
- struct ib_mw *mw;
struct ib_uobject *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mw = uobj->object;
-
- ret = uverbs_dealloc_mw(mw);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
+ uobj = uobj_get_write(uobj_get_type(mw), cmd.mw_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
@@ -1362,8 +926,8 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
- struct file *filp;
- int ret;
+ struct ib_uobject *uobj;
+ struct ib_uverbs_completion_event_file *ev_file;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -1371,25 +935,23 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- ret = get_unused_fd_flags(O_CLOEXEC);
- if (ret < 0)
- return ret;
- resp.fd = ret;
+ uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
- if (IS_ERR(filp)) {
- put_unused_fd(resp.fd);
- return PTR_ERR(filp);
- }
+ resp.fd = uobj->id;
+
+ ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj_file.uobj);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
- put_unused_fd(resp.fd);
- fput(filp);
+ uobj_alloc_abort(uobj);
return -EFAULT;
}
- fd_install(resp.fd, filp);
+ uobj_alloc_commit(uobj);
return in_len;
}
@@ -1407,7 +969,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
void *context)
{
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file = NULL;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
struct ib_uverbs_ex_create_cq_resp resp;
@@ -1416,21 +978,21 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd->comp_vector >= file->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return ERR_PTR(-ENOMEM);
-
- init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class);
- down_write(&obj->uobject.mutex);
+ obj = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return obj;
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
- if (!ev_file) {
- ret = -EINVAL;
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel,
+ file->ucontext);
+ if (IS_ERR(ev_file)) {
+ ret = PTR_ERR(ev_file);
goto err;
}
}
+ obj->uobject.user_handle = cmd->user_handle;
obj->uverbs_file = file;
obj->comp_events_reported = 0;
obj->async_events_reported = 0;
@@ -1443,13 +1005,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
attr.flags = cmd->flags;
- ret = ib_rdmacg_try_charge(&obj->uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
-
- cq = ib_dev->create_cq(ib_dev, &attr,
- file->ucontext, uhw);
+ cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
@@ -1459,14 +1015,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
cq->uobject = &obj->uobject;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = ev_file;
+ cq->cq_context = &ev_file->ev_queue;
atomic_set(&cq->usecnt, 0);
obj->uobject.object = cq;
- ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
- if (ret)
- goto err_free;
-
memset(&resp, 0, sizeof resp);
resp.base.cq_handle = obj->uobject.id;
resp.base.cqe = cq->cqe;
@@ -1478,32 +1030,19 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (ret)
goto err_cb;
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
-
- up_write(&obj->uobject.mutex);
+ uobj_alloc_commit(&obj->uobject);
return obj;
err_cb:
- idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
-
-err_free:
ib_destroy_cq(cq);
err_file:
- ib_rdmacg_uncharge(&obj->uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
-
-err_charge:
if (ev_file)
ib_uverbs_release_ucq(file, ev_file, obj);
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject);
return ERR_PTR(ret);
}
@@ -1626,7 +1165,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1641,7 +1180,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
ret = -EFAULT;
out:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return ret ? ret : in_len;
}
@@ -1688,7 +1227,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1720,7 +1259,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
ret = in_len;
out_put:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return ret;
}
@@ -1735,14 +1274,14 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
ib_req_notify_cq(cq, cmd.solicited_only ?
IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
return in_len;
}
@@ -1757,44 +1296,38 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_cq *cq;
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file;
+ struct ib_uverbs_event_queue *ev_queue;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(cq), cmd.cq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
cq = uobj->object;
- ev_file = cq->cq_context;
+ ev_queue = cq->cq_context;
obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
- ret = ib_destroy_cq(cq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ memset(&resp, 0, sizeof(resp));
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
+ }
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_ucq(file, ev_file, obj);
-
- memset(&resp, 0, sizeof resp);
resp.comp_events_reported = obj->comp_events_reported;
resp.async_events_reported = obj->async_events_reported;
- put_uobj(uobj);
-
+ uverbs_uobject_put(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
return -EFAULT;
@@ -1816,7 +1349,7 @@ static int create_qp(struct ib_uverbs_file *file,
struct ib_device *device;
struct ib_pd *pd = NULL;
struct ib_xrcd *xrcd = NULL;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_uobject *xrcd_uobj = ERR_PTR(-ENOENT);
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
@@ -1830,18 +1363,20 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = kzalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+ obj->uxrcd = NULL;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
+ mutex_init(&obj->mcast_lock);
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
- &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
sizeof(cmd->rwq_ind_tbl_handle) &&
(cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle,
- file->ucontext);
+ ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ cmd->rwq_ind_tbl_handle,
+ file->ucontext);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
@@ -1865,8 +1400,15 @@ static int create_qp(struct ib_uverbs_file *file,
has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
- xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
- &xrcd_uobj);
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle,
+ file->ucontext);
+
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
goto err_put;
@@ -1878,8 +1420,8 @@ static int create_qp(struct ib_uverbs_file *file,
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = idr_read_srq(cmd->srq_handle,
- file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd->srq_handle,
+ file->ucontext);
if (!srq || srq->srq_type != IB_SRQT_BASIC) {
ret = -EINVAL;
goto err_put;
@@ -1888,8 +1430,8 @@ static int create_qp(struct ib_uverbs_file *file,
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = idr_read_cq(cmd->recv_cq_handle,
- file->ucontext, 0);
+ rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle,
+ file->ucontext);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1899,10 +1441,11 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (has_sq)
- scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
+ scq = uobj_get_obj_read(cq, cmd->send_cq_handle,
+ file->ucontext);
if (!ind_tbl)
rcq = rcq ?: scq;
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1954,11 +1497,6 @@ static int create_qp(struct ib_uverbs_file *file,
goto err_put;
}
- ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, device,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_put;
-
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
@@ -1966,7 +1504,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_create;
+ goto err_put;
}
if (cmd->qp_type != IB_QPT_XRC_TGT) {
@@ -1994,9 +1532,6 @@ static int create_qp(struct ib_uverbs_file *file,
qp->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
memset(&resp, 0, sizeof resp);
resp.base.qpn = qp->qp_num;
@@ -2018,54 +1553,41 @@ static int create_qp(struct ib_uverbs_file *file,
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ uobj_put_obj_read(scq);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ uobj_put_obj_read(rcq);
if (srq)
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
+ uobj_put_obj_read(ind_tbl);
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_cb:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
-err_destroy:
ib_destroy_qp(qp);
-err_create:
- ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, device,
- RDMACG_RESOURCE_HCA_OBJECT);
-
err_put:
- if (xrcd)
- put_xrcd_read(xrcd_uobj);
+ if (!IS_ERR(xrcd_uobj))
+ uobj_put_read(xrcd_uobj);
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ uobj_put_obj_read(scq);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ uobj_put_obj_read(rcq);
if (srq)
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
+ uobj_put_obj_read(ind_tbl);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -2201,17 +1723,22 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle,
+ file->ucontext);
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
- xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
- goto err_put;
+ goto err_xrcd;
}
attr.event_handler = ib_uverbs_qp_event_handler;
@@ -2226,15 +1753,11 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
qp = ib_open_qp(xrcd, &attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_put;
+ goto err_xrcd;
}
- qp->uobject = &obj->uevent.uobject;
-
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd.user_handle;
memset(&resp, 0, sizeof resp);
resp.qpn = qp->qp_num;
@@ -2243,32 +1766,25 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_remove;
+ goto err_destroy;
}
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
+ qp->uobject = &obj->uevent.uobject;
+ uobj_put_read(xrcd_uobj);
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
+ uobj_alloc_commit(&obj->uevent.uobject);
return in_len;
-err_remove:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
err_destroy:
ib_destroy_qp(qp);
-
+err_xrcd:
+ uobj_put_read(xrcd_uobj);
err_put:
- put_xrcd_read(xrcd_uobj);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -2282,6 +1798,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_qp_attr *attr;
struct ib_qp_init_attr *init_attr;
+ const struct ib_global_route *grh;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -2294,7 +1811,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2302,7 +1819,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
if (ret)
goto out;
@@ -2331,29 +1848,39 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
resp.alt_port_num = attr->alt_port_num;
resp.alt_timeout = attr->alt_timeout;
- memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
- resp.dest.flow_label = attr->ah_attr.grh.flow_label;
- resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
- resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
- resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
- resp.dest.dlid = attr->ah_attr.dlid;
- resp.dest.sl = attr->ah_attr.sl;
- resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
- resp.dest.static_rate = attr->ah_attr.static_rate;
- resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
- resp.dest.port_num = attr->ah_attr.port_num;
-
- memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
- resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
- resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
- resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
- resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
- resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
- resp.alt_dest.sl = attr->alt_ah_attr.sl;
- resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
- resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
- resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
- resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
+ resp.dest.dlid = rdma_ah_get_dlid(&attr->ah_attr);
+ resp.dest.sl = rdma_ah_get_sl(&attr->ah_attr);
+ resp.dest.src_path_bits = rdma_ah_get_path_bits(&attr->ah_attr);
+ resp.dest.static_rate = rdma_ah_get_static_rate(&attr->ah_attr);
+ resp.dest.is_global = !!(rdma_ah_get_ah_flags(&attr->ah_attr) &
+ IB_AH_GRH);
+ if (resp.dest.is_global) {
+ grh = rdma_ah_read_grh(&attr->ah_attr);
+ memcpy(resp.dest.dgid, grh->dgid.raw, 16);
+ resp.dest.flow_label = grh->flow_label;
+ resp.dest.sgid_index = grh->sgid_index;
+ resp.dest.hop_limit = grh->hop_limit;
+ resp.dest.traffic_class = grh->traffic_class;
+ }
+ resp.dest.port_num = rdma_ah_get_port_num(&attr->ah_attr);
+
+ resp.alt_dest.dlid = rdma_ah_get_dlid(&attr->alt_ah_attr);
+ resp.alt_dest.sl = rdma_ah_get_sl(&attr->alt_ah_attr);
+ resp.alt_dest.src_path_bits = rdma_ah_get_path_bits(&attr->alt_ah_attr);
+ resp.alt_dest.static_rate
+ = rdma_ah_get_static_rate(&attr->alt_ah_attr);
+ resp.alt_dest.is_global
+ = !!(rdma_ah_get_ah_flags(&attr->alt_ah_attr) &
+ IB_AH_GRH);
+ if (resp.alt_dest.is_global) {
+ grh = rdma_ah_read_grh(&attr->alt_ah_attr);
+ memcpy(resp.alt_dest.dgid, grh->dgid.raw, 16);
+ resp.alt_dest.flow_label = grh->flow_label;
+ resp.alt_dest.sgid_index = grh->sgid_index;
+ resp.alt_dest.hop_limit = grh->hop_limit;
+ resp.alt_dest.traffic_class = grh->traffic_class;
+ }
+ resp.alt_dest.port_num = rdma_ah_get_port_num(&attr->alt_ah_attr);
resp.max_send_wr = init_attr->cap.max_send_wr;
resp.max_recv_wr = init_attr->cap.max_recv_wr;
@@ -2398,7 +1925,7 @@ static int modify_qp(struct ib_uverbs_file *file,
if (!attr)
return -ENOMEM;
- qp = idr_read_qp(cmd->base.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2427,31 +1954,47 @@ static int modify_qp(struct ib_uverbs_file *file,
attr->alt_timeout = cmd->base.alt_timeout;
attr->rate_limit = cmd->rate_limit;
- memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16);
- attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label;
- attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index;
- attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit;
- attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class;
- attr->ah_attr.dlid = cmd->base.dest.dlid;
- attr->ah_attr.sl = cmd->base.dest.sl;
- attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits;
- attr->ah_attr.static_rate = cmd->base.dest.static_rate;
- attr->ah_attr.ah_flags = cmd->base.dest.is_global ?
- IB_AH_GRH : 0;
- attr->ah_attr.port_num = cmd->base.dest.port_num;
-
- memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16);
- attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label;
- attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index;
- attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit;
- attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class;
- attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid;
- attr->alt_ah_attr.sl = cmd->base.alt_dest.sl;
- attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits;
- attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate;
- attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ?
- IB_AH_GRH : 0;
- attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num;
+ attr->ah_attr.type = rdma_ah_find_type(qp->device,
+ cmd->base.dest.port_num);
+ if (cmd->base.dest.is_global) {
+ rdma_ah_set_grh(&attr->ah_attr, NULL,
+ cmd->base.dest.flow_label,
+ cmd->base.dest.sgid_index,
+ cmd->base.dest.hop_limit,
+ cmd->base.dest.traffic_class);
+ rdma_ah_set_dgid_raw(&attr->ah_attr, cmd->base.dest.dgid);
+ } else {
+ rdma_ah_set_ah_flags(&attr->ah_attr, 0);
+ }
+ rdma_ah_set_dlid(&attr->ah_attr, cmd->base.dest.dlid);
+ rdma_ah_set_sl(&attr->ah_attr, cmd->base.dest.sl);
+ rdma_ah_set_path_bits(&attr->ah_attr, cmd->base.dest.src_path_bits);
+ rdma_ah_set_static_rate(&attr->ah_attr, cmd->base.dest.static_rate);
+ rdma_ah_set_port_num(&attr->ah_attr,
+ cmd->base.dest.port_num);
+
+ attr->alt_ah_attr.type = rdma_ah_find_type(qp->device,
+ cmd->base.dest.port_num);
+ if (cmd->base.alt_dest.is_global) {
+ rdma_ah_set_grh(&attr->alt_ah_attr, NULL,
+ cmd->base.alt_dest.flow_label,
+ cmd->base.alt_dest.sgid_index,
+ cmd->base.alt_dest.hop_limit,
+ cmd->base.alt_dest.traffic_class);
+ rdma_ah_set_dgid_raw(&attr->alt_ah_attr,
+ cmd->base.alt_dest.dgid);
+ } else {
+ rdma_ah_set_ah_flags(&attr->alt_ah_attr, 0);
+ }
+
+ rdma_ah_set_dlid(&attr->alt_ah_attr, cmd->base.alt_dest.dlid);
+ rdma_ah_set_sl(&attr->alt_ah_attr, cmd->base.alt_dest.sl);
+ rdma_ah_set_path_bits(&attr->alt_ah_attr,
+ cmd->base.alt_dest.src_path_bits);
+ rdma_ah_set_static_rate(&attr->alt_ah_attr,
+ cmd->base.alt_dest.static_rate);
+ rdma_ah_set_port_num(&attr->alt_ah_attr,
+ cmd->base.alt_dest.port_num);
if (qp->real_qp == qp) {
if (cmd->base.attr_mask & IB_QP_AV) {
@@ -2470,7 +2013,7 @@ static int modify_qp(struct ib_uverbs_file *file,
}
release_qp:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
out:
kfree(attr);
@@ -2557,42 +2100,27 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(qp), cmd.qp_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
qp = uobj->object;
obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- if (!list_empty(&obj->mcast_list)) {
- put_uobj_write(uobj);
- return -EBUSY;
- }
-
- ret = ib_destroy_qp(qp);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- if (obj->uxrcd)
- atomic_dec(&obj->uxrcd->refcnt);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, &obj->uevent);
+ }
resp.events_reported = obj->uevent.events_reported;
-
- put_uobj(uobj);
+ uverbs_uobject_put(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -2603,9 +2131,13 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
static void *alloc_wr(size_t wr_size, __u32 num_sge)
{
+ if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge))
+ return NULL;
+
return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
num_sge * sizeof (struct ib_sge), GFP_KERNEL);
-};
+}
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
@@ -2636,7 +2168,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (!user_wr)
return -ENOMEM;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
@@ -2672,7 +2204,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
+ ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah,
+ file->ucontext);
if (!ud->ah) {
kfree(ud);
ret = -EINVAL;
@@ -2779,11 +2312,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
ret = -EFAULT;
out_put:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
while (wr) {
if (is_ud && ud_wr(wr)->ah)
- put_ah_read(ud_wr(wr)->ah);
+ uobj_put_obj_read(ud_wr(wr)->ah);
next = wr->next;
kfree(wr);
wr = next;
@@ -2832,6 +2365,13 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
goto err;
}
+ if (user_wr->num_sge >=
+ (U32_MAX - ALIGN(sizeof *next, sizeof (struct ib_sge))) /
+ sizeof (struct ib_sge)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
user_wr->num_sge * sizeof (struct ib_sge),
GFP_KERNEL);
@@ -2900,21 +2440,21 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
resp.bad_wr = 0;
ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
- put_qp_read(qp);
-
- if (ret)
+ uobj_put_obj_read(qp);
+ if (ret) {
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
break;
}
+ }
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -2950,14 +2490,14 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
goto out;
resp.bad_wr = 0;
ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ret)
for (next = wr; next; next = next->next) {
@@ -2990,9 +2530,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_ah *ah;
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
int ret;
struct ib_udata udata;
+ u8 *dmac;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -3004,54 +2545,50 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
(unsigned long)cmd.response + sizeof(resp),
in_len - sizeof(cmd), out_len - sizeof(resp));
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
-
- init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(uobj_get_type(ah), file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
}
- attr.dlid = cmd.attr.dlid;
- attr.sl = cmd.attr.sl;
- attr.src_path_bits = cmd.attr.src_path_bits;
- attr.static_rate = cmd.attr.static_rate;
- attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0;
- attr.port_num = cmd.attr.port_num;
- attr.grh.flow_label = cmd.attr.grh.flow_label;
- attr.grh.sgid_index = cmd.attr.grh.sgid_index;
- attr.grh.hop_limit = cmd.attr.grh.hop_limit;
- attr.grh.traffic_class = cmd.attr.grh.traffic_class;
- memset(&attr.dmac, 0, sizeof(attr.dmac));
- memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
-
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_charge;
+ attr.type = rdma_ah_find_type(ib_dev, cmd.attr.port_num);
+ rdma_ah_set_dlid(&attr, cmd.attr.dlid);
+ rdma_ah_set_sl(&attr, cmd.attr.sl);
+ rdma_ah_set_path_bits(&attr, cmd.attr.src_path_bits);
+ rdma_ah_set_static_rate(&attr, cmd.attr.static_rate);
+ rdma_ah_set_port_num(&attr, cmd.attr.port_num);
+
+ if (cmd.attr.is_global) {
+ rdma_ah_set_grh(&attr, NULL, cmd.attr.grh.flow_label,
+ cmd.attr.grh.sgid_index,
+ cmd.attr.grh.hop_limit,
+ cmd.attr.grh.traffic_class);
+ rdma_ah_set_dgid_raw(&attr, cmd.attr.grh.dgid);
+ } else {
+ rdma_ah_set_ah_flags(&attr, 0);
+ }
+ dmac = rdma_ah_retrieve_dmac(&attr);
+ if (dmac)
+ memset(dmac, 0, ETH_ALEN);
ah = pd->device->create_ah(pd, &attr, &udata);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
- goto err_create;
+ goto err_put;
}
ah->device = pd->device;
ah->pd = pd;
atomic_inc(&pd->usecnt);
ah->uobject = uobj;
+ uobj->user_handle = cmd.user_handle;
uobj->object = ah;
- ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
- if (ret)
- goto err_destroy;
-
resp.ah_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@@ -3060,32 +2597,19 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
goto err_copy;
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->ah_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(uobj);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
-
-err_destroy:
- ib_destroy_ah(ah);
-
-err_create:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+ rdma_destroy_ah(ah);
-err_charge:
- put_pd_read(pd);
+err_put:
+ uobj_put_obj_read(pd);
err:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
return ret;
}
@@ -3094,38 +2618,19 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_destroy_ah cmd;
- struct ib_ah *ah;
struct ib_uobject *uobj;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- ah = uobj->object;
-
- ret = ib_destroy_ah(ah);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
+ uobj = uobj_get_write(uobj_get_type(ah), cmd.ah_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ ret = uobj_remove_commit(uobj);
+ return ret ?: in_len;
}
ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
@@ -3142,12 +2647,13 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
@@ -3171,7 +2677,8 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
kfree(mcast);
out_put:
- put_qp_write(qp);
+ mutex_unlock(&obj->mcast_lock);
+ uobj_put_obj_read(qp);
return ret ? ret : in_len;
}
@@ -3186,31 +2693,37 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
int ret = -EINVAL;
+ bool found = false;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
- ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
- if (ret)
- goto out_put;
-
obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
list_del(&mcast->list);
kfree(mcast);
+ found = true;
break;
}
-out_put:
- put_qp_write(qp);
+ if (!found) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ ret = ib_detach_mcast(qp, (union ib_gid *)cmd.gid, cmd.mlid);
+out_put:
+ mutex_unlock(&obj->mcast_lock);
+ uobj_put_obj_read(qp);
return ret ? ret : in_len;
}
@@ -3227,6 +2740,13 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (kern_spec->drop.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_drop))
+ return -EINVAL;
+
+ ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop);
+ break;
default:
return -EINVAL;
}
@@ -3402,20 +2922,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = kmalloc(sizeof(*obj), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext,
- &wq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+ cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3450,9 +2968,6 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
atomic_inc(&cq->usecnt);
wq->uobject = &obj->uevent.uobject;
obj->uevent.uobject.object = wq;
- err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
- if (err)
- goto destroy_wq;
memset(&resp, 0, sizeof(resp));
resp.wq_handle = obj->uevent.uobject.id;
@@ -3465,27 +2980,19 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (err)
goto err_copy;
- put_pd_read(pd);
- put_cq_read(cq);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(pd);
+ uobj_put_obj_read(cq);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
-destroy_wq:
ib_destroy_wq(wq);
err_put_cq:
- put_cq_read(cq);
+ uobj_put_obj_read(cq);
err_put_pd:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_uobj:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return err;
}
@@ -3526,36 +3033,26 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
resp.response_length = required_resp_len;
- uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(wq), cmd.wq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
wq = uobj->object;
obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
- ret = ib_destroy_wq(wq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- ib_uverbs_release_uevent(file, &obj->uevent);
+ ret = uobj_remove_commit(uobj);
resp.events_reported = obj->uevent.events_reported;
- put_uobj(uobj);
-
- ret = ib_copy_to_udata(ucore, &resp, resp.response_length);
+ uverbs_uobject_put(uobj);
if (ret)
return ret;
- return 0;
+ return ib_copy_to_udata(ucore, &resp, resp.response_length);
}
int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
@@ -3588,7 +3085,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = idr_read_wq(cmd.wq_handle, file->ucontext);
+ wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext);
if (!wq)
return -EINVAL;
@@ -3599,7 +3096,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.flags_mask = cmd.flags_mask;
}
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
- put_wq_read(wq);
+ uobj_put_obj_read(wq);
return ret;
}
@@ -3677,7 +3174,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
+ wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs],
+ file->ucontext);
if (!wq) {
err = -EINVAL;
goto put_wqs;
@@ -3686,14 +3184,12 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto put_wqs;
}
- init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
- down_write(&uobj->mutex);
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
@@ -3713,10 +3209,6 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (i = 0; i < num_wq_handles; i++)
atomic_inc(&wqs[i]->usecnt);
- err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- if (err)
- goto destroy_ind_tbl;
-
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
resp.response_length = required_resp_len;
@@ -3729,26 +3221,18 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
kfree(wqs_handles);
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
+ uobj_put_obj_read(wqs[j]);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_alloc_commit(uobj);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-destroy_ind_tbl:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
put_wqs:
for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
+ uobj_put_obj_read(wqs[j]);
err_free:
kfree(wqs_handles);
kfree(wqs);
@@ -3761,10 +3245,8 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_uobject *uobj;
int ret;
- struct ib_wq **ind_tbl;
size_t required_cmd_sz;
required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
@@ -3784,31 +3266,12 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- rwq_ind_tbl = uobj->object;
- ind_tbl = rwq_ind_tbl->ind_tbl;
-
- ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ uobj = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- kfree(ind_tbl);
- return ret;
+ return uobj_remove_commit(uobj);
}
int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
@@ -3882,15 +3345,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kern_flow_attr = &cmd.flow_attr;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(uobj_get_type(flow), file->ucontext);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto err_free_attr;
}
- init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
- down_write(&uobj->mutex);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
if (!qp) {
err = -EINVAL;
goto err_uobj;
@@ -3931,24 +3392,14 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -EINVAL;
goto err_free;
}
-
- err = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (err)
- goto err_free;
-
flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
- goto err_create;
+ goto err_free;
}
flow_id->uobject = uobj;
uobj->object = flow_id;
- err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
- if (err)
- goto destroy_flow;
-
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
@@ -3957,30 +3408,20 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (err)
goto err_copy;
- put_qp_read(qp);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rule_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ uobj_put_obj_read(qp);
+ uobj_alloc_commit(uobj);
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-destroy_flow:
ib_destroy_flow(flow_id);
-err_create:
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
err_free:
kfree(flow_attr);
err_put:
- put_qp_read(qp);
+ uobj_put_obj_read(qp);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj);
err_free_attr:
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
@@ -3993,7 +3434,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_destroy_flow cmd;
- struct ib_flow *flow_id;
struct ib_uobject *uobj;
int ret;
@@ -4007,29 +3447,12 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- flow_id = uobj->object;
-
- ret = ib_destroy_flow(flow_id);
- if (!ret) {
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- uobj->live = 0;
- }
-
- put_uobj_write(uobj);
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
+ uobj = uobj_get_write(uobj_get_type(flow), cmd.flow_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+ ret = uobj_remove_commit(uobj);
return ret;
}
@@ -4046,31 +3469,37 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_srq_init_attr attr;
int ret;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ obj = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq),
+ file->ucontext);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
if (cmd->srq_type == IB_SRQT_XRC) {
- attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
- if (!attr.ext.xrc.xrcd) {
+ xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle,
+ file->ucontext);
+ if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
}
+ attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!attr.ext.xrc.xrcd) {
+ ret = -EINVAL;
+ goto err_put_xrcd;
+ }
+
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
+ attr.ext.xrc.cq = uobj_get_obj_read(cq, cmd->cq_handle,
+ file->ucontext);
if (!attr.ext.xrc.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
}
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
@@ -4086,11 +3515,6 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
- ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- if (ret)
- goto err_put_cq;
-
srq = pd->device->create_srq(pd, &attr, udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
@@ -4115,9 +3539,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
atomic_set(&srq->usecnt, 0);
obj->uevent.uobject.object = srq;
- ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
memset(&resp, 0, sizeof resp);
resp.srq_handle = obj->uevent.uobject.id;
@@ -4133,44 +3555,32 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
if (cmd->srq_type == IB_SRQT_XRC) {
- put_uobj_read(xrcd_uobj);
- put_cq_read(attr.ext.xrc.cq);
+ uobj_put_read(xrcd_uobj);
+ uobj_put_obj_read(attr.ext.xrc.cq);
}
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
+ uobj_put_obj_read(pd);
+ uobj_alloc_commit(&obj->uevent.uobject);
return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
-
-err_destroy:
ib_destroy_srq(srq);
err_put:
- ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, ib_dev,
- RDMACG_RESOURCE_HCA_OBJECT);
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_put_cq:
if (cmd->srq_type == IB_SRQT_XRC)
- put_cq_read(attr.ext.xrc.cq);
+ uobj_put_obj_read(attr.ext.xrc.cq);
err_put_xrcd:
if (cmd->srq_type == IB_SRQT_XRC) {
atomic_dec(&obj->uxrcd->refcnt);
- put_uobj_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
err:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject);
return ret;
}
@@ -4255,7 +3665,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
@@ -4264,7 +3674,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
return ret ? ret : in_len;
}
@@ -4286,13 +3696,13 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
ret = ib_query_srq(srq, &attr);
- put_srq_read(srq);
+ uobj_put_obj_read(srq);
if (ret)
return ret;
@@ -4321,53 +3731,39 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_srq *srq;
struct ib_uevent_object *obj;
int ret = -EINVAL;
- struct ib_usrq_object *us;
enum ib_srq_type srq_type;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
+ uobj = uobj_get_write(uobj_get_type(srq), cmd.srq_handle,
+ file->ucontext);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
srq = uobj->object;
obj = container_of(uobj, struct ib_uevent_object, uobject);
srq_type = srq->srq_type;
+ /*
+ * Make sure we don't free the memory in remove_commit as we still
+ * needs the uobject memory to create the response.
+ */
+ uverbs_uobject_get(uobj);
- ret = ib_destroy_srq(srq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ memset(&resp, 0, sizeof(resp));
- if (ret)
+ ret = uobj_remove_commit(uobj);
+ if (ret) {
+ uverbs_uobject_put(uobj);
return ret;
-
- ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
-
- if (srq_type == IB_SRQT_XRC) {
- us = container_of(obj, struct ib_usrq_object, uevent);
- atomic_dec(&us->uxrcd->refcnt);
}
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, obj);
-
- memset(&resp, 0, sizeof resp);
resp.events_reported = obj->events_reported;
+ uverbs_uobject_put(uobj);
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ return -EFAULT;
- put_uobj(uobj);
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
-
- return ret ? ret : in_len;
+ return in_len;
}
int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 35c788a32e26..3d2609608f58 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -52,6 +52,7 @@
#include "uverbs.h"
#include "core_priv.h"
+#include "rdma_core.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -67,19 +68,6 @@ enum {
static struct class *uverbs_class;
-DEFINE_SPINLOCK(ib_uverbs_idr_lock);
-DEFINE_IDR(ib_uverbs_pd_idr);
-DEFINE_IDR(ib_uverbs_mr_idr);
-DEFINE_IDR(ib_uverbs_mw_idr);
-DEFINE_IDR(ib_uverbs_ah_idr);
-DEFINE_IDR(ib_uverbs_cq_idr);
-DEFINE_IDR(ib_uverbs_qp_idr);
-DEFINE_IDR(ib_uverbs_srq_idr);
-DEFINE_IDR(ib_uverbs_xrcd_idr);
-DEFINE_IDR(ib_uverbs_rule_idr);
-DEFINE_IDR(ib_uverbs_wq_idr);
-DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
-
static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -168,37 +156,37 @@ static struct kobj_type ib_uverbs_dev_ktype = {
.release = ib_uverbs_release_dev,
};
-static void ib_uverbs_release_event_file(struct kref *ref)
+static void ib_uverbs_release_async_event_file(struct kref *ref)
{
- struct ib_uverbs_event_file *file =
- container_of(ref, struct ib_uverbs_event_file, ref);
+ struct ib_uverbs_async_event_file *file =
+ container_of(ref, struct ib_uverbs_async_event_file, ref);
kfree(file);
}
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+ struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
if (ev_file) {
- spin_lock_irq(&ev_file->lock);
+ spin_lock_irq(&ev_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&ev_file->lock);
+ spin_unlock_irq(&ev_file->ev_queue.lock);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ uverbs_uobject_put(&ev_file->uobj_file.uobj);
}
- spin_lock_irq(&file->async_file->lock);
+ spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&file->async_file->ev_queue.lock);
}
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
@@ -206,16 +194,16 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
{
struct ib_uverbs_event *evt, *tmp;
- spin_lock_irq(&file->async_file->lock);
+ spin_lock_irq(&file->async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&file->async_file->ev_queue.lock);
}
-static void ib_uverbs_detach_umcast(struct ib_qp *qp,
- struct ib_uqp_object *uobj)
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj)
{
struct ib_uverbs_mcast_entry *mcast, *tmp;
@@ -227,138 +215,11 @@ static void ib_uverbs_detach_umcast(struct ib_qp *qp,
}
static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
- struct ib_ucontext *context)
+ struct ib_ucontext *context,
+ bool device_removed)
{
- struct ib_uobject *uobj, *tmp;
-
context->closing = 1;
-
- list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
- struct ib_ah *ah = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
- ib_destroy_ah(ah);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- /* Remove MWs before QPs, in order to support type 2A MWs. */
- list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
- struct ib_mw *mw = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
- uverbs_dealloc_mw(mw);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
- struct ib_flow *flow_id = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
- ib_destroy_flow(flow_id);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
- struct ib_qp *qp = uobj->object;
- struct ib_uqp_object *uqp =
- container_of(uobj, struct ib_uqp_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
- if (qp == qp->real_qp)
- ib_uverbs_detach_umcast(qp, uqp);
- ib_destroy_qp(qp);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- ib_uverbs_release_uevent(file, &uqp->uevent);
- kfree(uqp);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
- struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
- struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- ib_destroy_rwq_ind_table(rwq_ind_tbl);
- kfree(ind_tbl);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
- struct ib_wq *wq = uobj->object;
- struct ib_uwq_object *uwq =
- container_of(uobj, struct ib_uwq_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
- ib_destroy_wq(wq);
- ib_uverbs_release_uevent(file, &uwq->uevent);
- kfree(uwq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
- struct ib_srq *srq = uobj->object;
- struct ib_uevent_object *uevent =
- container_of(uobj, struct ib_uevent_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
- ib_destroy_srq(srq);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- ib_uverbs_release_uevent(file, uevent);
- kfree(uevent);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
- struct ib_cq *cq = uobj->object;
- struct ib_uverbs_event_file *ev_file = cq->cq_context;
- struct ib_ucq_object *ucq =
- container_of(uobj, struct ib_ucq_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
- ib_destroy_cq(cq);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- ib_uverbs_release_ucq(file, ev_file, ucq);
- kfree(ucq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
- struct ib_mr *mr = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
- ib_dereg_mr(mr);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
- mutex_lock(&file->device->xrcd_tree_mutex);
- list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
- struct ib_xrcd *xrcd = uobj->object;
- struct ib_uxrcd_object *uxrcd =
- container_of(uobj, struct ib_uxrcd_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- ib_uverbs_dealloc_xrcd(file->device, xrcd);
- kfree(uxrcd);
- }
- mutex_unlock(&file->device->xrcd_tree_mutex);
-
- list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
- struct ib_pd *pd = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
- ib_dealloc_pd(pd);
- ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
- RDMACG_RESOURCE_HCA_OBJECT);
- kfree(uobj);
- }
-
+ uverbs_cleanup_ucontext(context, device_removed);
put_pid(context->tgid);
ib_rdmacg_uncharge(&context->cg_obj, context->device,
@@ -372,7 +233,7 @@ static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
complete(&dev->comp);
}
-static void ib_uverbs_release_file(struct kref *ref)
+void ib_uverbs_release_file(struct kref *ref)
{
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
@@ -392,58 +253,54 @@ static void ib_uverbs_release_file(struct kref *ref)
kfree(file);
}
-static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
- size_t count, loff_t *pos)
+static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
+ struct ib_uverbs_file *uverbs_file,
+ struct file *filp, char __user *buf,
+ size_t count, loff_t *pos,
+ size_t eventsz)
{
- struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *event;
- int eventsz;
int ret = 0;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
- while (list_empty(&file->event_list)) {
- spin_unlock_irq(&file->lock);
+ while (list_empty(&ev_queue->event_list)) {
+ spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
- if (wait_event_interruptible(file->poll_wait,
- (!list_empty(&file->event_list) ||
+ if (wait_event_interruptible(ev_queue->poll_wait,
+ (!list_empty(&ev_queue->event_list) ||
/* The barriers built into wait_event_interruptible()
* and wake_up() guarentee this will see the null set
* without using RCU
*/
- !file->uverbs_file->device->ib_dev)))
+ !uverbs_file->device->ib_dev)))
return -ERESTARTSYS;
/* If device was disassociated and no event exists set an error */
- if (list_empty(&file->event_list) &&
- !file->uverbs_file->device->ib_dev)
+ if (list_empty(&ev_queue->event_list) &&
+ !uverbs_file->device->ib_dev)
return -EIO;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
}
- event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
-
- if (file->is_async)
- eventsz = sizeof (struct ib_uverbs_async_event_desc);
- else
- eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+ event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
if (eventsz > count) {
ret = -EINVAL;
event = NULL;
} else {
- list_del(file->event_list.next);
+ list_del(ev_queue->event_list.next);
if (event->counter) {
++(*event->counter);
list_del(&event->obj_list);
}
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
if (event) {
if (copy_to_user(buf, event, eventsz))
@@ -457,87 +314,158 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
return ret;
}
-static unsigned int ib_uverbs_event_poll(struct file *filp,
+static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
+ buf, count, pos,
+ sizeof(struct ib_uverbs_async_event_desc));
+}
+
+static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return ib_uverbs_event_read(&comp_ev_file->ev_queue,
+ comp_ev_file->uobj_file.ufile, filp,
+ buf, count, pos,
+ sizeof(struct ib_uverbs_comp_event_desc));
+}
+
+static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
+ struct file *filp,
struct poll_table_struct *wait)
{
unsigned int pollflags = 0;
- struct ib_uverbs_event_file *file = filp->private_data;
- poll_wait(filp, &file->poll_wait, wait);
+ poll_wait(filp, &ev_queue->poll_wait, wait);
- spin_lock_irq(&file->lock);
- if (!list_empty(&file->event_list))
+ spin_lock_irq(&ev_queue->lock);
+ if (!list_empty(&ev_queue->event_list))
pollflags = POLLIN | POLLRDNORM;
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
return pollflags;
}
-static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
+static unsigned int ib_uverbs_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ return ib_uverbs_event_poll(filp->private_data, filp, wait);
+}
+
+static unsigned int ib_uverbs_comp_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
- return fasync_helper(fd, filp, on, &file->async_queue);
+ return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
}
-static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_event_queue *ev_queue = filp->private_data;
+
+ return fasync_helper(fd, filp, on, &ev_queue->async_queue);
+}
+
+static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
+}
+
+static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+ struct ib_uverbs_file *uverbs_file = file->uverbs_file;
struct ib_uverbs_event *entry, *tmp;
int closed_already = 0;
- mutex_lock(&file->uverbs_file->device->lists_mutex);
- spin_lock_irq(&file->lock);
- closed_already = file->is_closed;
- file->is_closed = 1;
- list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
+ mutex_lock(&uverbs_file->device->lists_mutex);
+ spin_lock_irq(&file->ev_queue.lock);
+ closed_already = file->ev_queue.is_closed;
+ file->ev_queue.is_closed = 1;
+ list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
if (entry->counter)
list_del(&entry->obj_list);
kfree(entry);
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&file->ev_queue.lock);
if (!closed_already) {
list_del(&file->list);
- if (file->is_async)
- ib_unregister_event_handler(&file->uverbs_file->
- event_handler);
+ ib_unregister_event_handler(&uverbs_file->event_handler);
+ }
+ mutex_unlock(&uverbs_file->device->lists_mutex);
+
+ kref_put(&uverbs_file->ref, ib_uverbs_release_file);
+ kref_put(&file->ref, ib_uverbs_release_async_event_file);
+
+ return 0;
+}
+
+static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_completion_event_file *file = filp->private_data;
+ struct ib_uverbs_event *entry, *tmp;
+
+ spin_lock_irq(&file->ev_queue.lock);
+ list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ kfree(entry);
}
- mutex_unlock(&file->uverbs_file->device->lists_mutex);
+ spin_unlock_irq(&file->ev_queue.lock);
- kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&file->ref, ib_uverbs_release_event_file);
+ uverbs_close_fd(filp);
return 0;
}
-static const struct file_operations uverbs_event_fops = {
+const struct file_operations uverbs_event_fops = {
+ .owner = THIS_MODULE,
+ .read = ib_uverbs_comp_event_read,
+ .poll = ib_uverbs_comp_event_poll,
+ .release = ib_uverbs_comp_event_close,
+ .fasync = ib_uverbs_comp_event_fasync,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations uverbs_async_event_fops = {
.owner = THIS_MODULE,
- .read = ib_uverbs_event_read,
- .poll = ib_uverbs_event_poll,
- .release = ib_uverbs_event_close,
- .fasync = ib_uverbs_event_fasync,
+ .read = ib_uverbs_async_event_read,
+ .poll = ib_uverbs_async_event_poll,
+ .release = ib_uverbs_async_event_close,
+ .fasync = ib_uverbs_async_event_fasync,
.llseek = no_llseek,
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
- struct ib_uverbs_event_file *file = cq_context;
+ struct ib_uverbs_event_queue *ev_queue = cq_context;
struct ib_ucq_object *uobj;
struct ib_uverbs_event *entry;
unsigned long flags;
- if (!file)
+ if (!ev_queue)
return;
- spin_lock_irqsave(&file->lock, flags);
- if (file->is_closed) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_lock_irqsave(&ev_queue->lock, flags);
+ if (ev_queue->is_closed) {
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
@@ -546,12 +474,12 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
entry->desc.comp.cq_handle = cq->uobject->user_handle;
entry->counter = &uobj->comp_events_reported;
- list_add_tail(&entry->list, &file->event_list);
+ list_add_tail(&entry->list, &ev_queue->event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
- wake_up_interruptible(&file->poll_wait);
- kill_fasync(&file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&ev_queue->poll_wait);
+ kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
}
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
@@ -562,15 +490,15 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
struct ib_uverbs_event *entry;
unsigned long flags;
- spin_lock_irqsave(&file->async_file->lock, flags);
- if (file->async_file->is_closed) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
+ if (file->async_file->ev_queue.is_closed) {
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
}
@@ -579,13 +507,13 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
entry->desc.async.reserved = 0;
entry->counter = counter;
- list_add_tail(&entry->list, &file->async_file->event_list);
+ list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
- wake_up_interruptible(&file->async_file->poll_wait);
- kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
+ kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
@@ -603,7 +531,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
struct ib_uevent_object *uobj;
/* for XRC target qp's, check that qp is live */
- if (!event->element.qp->uobject || !event->element.qp->uobject->live)
+ if (!event->element.qp->uobject)
return;
uobj = container_of(event->element.qp->uobject,
@@ -648,15 +576,23 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
{
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
file->async_file = NULL;
}
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async)
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
{
- struct ib_uverbs_event_file *ev_file;
+ spin_lock_init(&ev_queue->lock);
+ INIT_LIST_HEAD(&ev_queue->event_list);
+ init_waitqueue_head(&ev_queue->poll_wait);
+ ev_queue->is_closed = 0;
+ ev_queue->async_queue = NULL;
+}
+
+struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
+ struct ib_device *ib_dev)
+{
+ struct ib_uverbs_async_event_file *ev_file;
struct file *filp;
int ret;
@@ -664,16 +600,11 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
if (!ev_file)
return ERR_PTR(-ENOMEM);
- kref_init(&ev_file->ref);
- spin_lock_init(&ev_file->lock);
- INIT_LIST_HEAD(&ev_file->event_list);
- init_waitqueue_head(&ev_file->poll_wait);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
ev_file->uverbs_file = uverbs_file;
kref_get(&ev_file->uverbs_file->ref);
- ev_file->async_queue = NULL;
- ev_file->is_closed = 0;
-
- filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
+ kref_init(&ev_file->ref);
+ filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
ev_file, O_RDONLY);
if (IS_ERR(filp))
goto err_put_refs;
@@ -683,64 +614,33 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
&uverbs_file->device->uverbs_events_file_list);
mutex_unlock(&uverbs_file->device->lists_mutex);
- if (is_async) {
- WARN_ON(uverbs_file->async_file);
- uverbs_file->async_file = ev_file;
- kref_get(&uverbs_file->async_file->ref);
- INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
- ib_dev,
- ib_uverbs_event_handler);
- ret = ib_register_event_handler(&uverbs_file->event_handler);
- if (ret)
- goto err_put_file;
-
- /* At that point async file stuff was fully set */
- ev_file->is_async = 1;
- }
+ WARN_ON(uverbs_file->async_file);
+ uverbs_file->async_file = ev_file;
+ kref_get(&uverbs_file->async_file->ref);
+ INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
+ ib_dev,
+ ib_uverbs_event_handler);
+ ret = ib_register_event_handler(&uverbs_file->event_handler);
+ if (ret)
+ goto err_put_file;
+
+ /* At that point async file stuff was fully set */
return filp;
err_put_file:
fput(filp);
- kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&uverbs_file->async_file->ref,
+ ib_uverbs_release_async_event_file);
uverbs_file->async_file = NULL;
return ERR_PTR(ret);
err_put_refs:
kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
return filp;
}
-/*
- * Look up a completion event file by FD. If lookup is successful,
- * takes a ref to the event file struct that it returns; if
- * unsuccessful, returns NULL.
- */
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
-{
- struct ib_uverbs_event_file *ev_file = NULL;
- struct fd f = fdget(fd);
-
- if (!f.file)
- return NULL;
-
- if (f.file->f_op != &uverbs_event_fops)
- goto out;
-
- ev_file = f.file->private_data;
- if (ev_file->is_async) {
- ev_file = NULL;
- goto out;
- }
-
- kref_get(&ev_file->ref);
-
-out:
- fdput(f);
- return ev_file;
-}
-
static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
{
u64 mask;
@@ -986,6 +886,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
}
file->device = dev;
+ spin_lock_init(&file->idr_lock);
+ idr_init(&file->idr);
file->ucontext = NULL;
file->async_file = NULL;
kref_init(&file->ref);
@@ -1019,10 +921,11 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_lock(&file->cleanup_mutex);
if (file->ucontext) {
- ib_uverbs_cleanup_ucontext(file, file->ucontext);
+ ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
file->ucontext = NULL;
}
mutex_unlock(&file->cleanup_mutex);
+ idr_destroy(&file->idr);
mutex_lock(&file->device->lists_mutex);
if (!file->is_closed) {
@@ -1032,7 +935,8 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->device->lists_mutex);
if (file->async_file)
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
+ kref_put(&file->async_file->ref,
+ ib_uverbs_release_async_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
kobject_put(&dev->kobj);
@@ -1189,7 +1093,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
cdev_init(&uverbs_dev->cdev, NULL);
uverbs_dev->cdev.owner = THIS_MODULE;
uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
- uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
+ cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
@@ -1231,7 +1135,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
struct ib_uverbs_file *file;
- struct ib_uverbs_event_file *event_file;
+ struct ib_uverbs_async_event_file *event_file;
struct ib_event event;
/* Pending running commands to terminate */
@@ -1268,7 +1172,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
* (e.g mmput).
*/
ib_dev->disassociate_ucontext(ucontext);
- ib_uverbs_cleanup_ucontext(file, ucontext);
+ mutex_lock(&file->cleanup_mutex);
+ ib_uverbs_cleanup_ucontext(file, ucontext, true);
+ mutex_unlock(&file->cleanup_mutex);
}
mutex_lock(&uverbs_dev->lists_mutex);
@@ -1278,21 +1184,20 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
event_file = list_first_entry(&uverbs_dev->
uverbs_events_file_list,
- struct ib_uverbs_event_file,
+ struct ib_uverbs_async_event_file,
list);
- spin_lock_irq(&event_file->lock);
- event_file->is_closed = 1;
- spin_unlock_irq(&event_file->lock);
+ spin_lock_irq(&event_file->ev_queue.lock);
+ event_file->ev_queue.is_closed = 1;
+ spin_unlock_irq(&event_file->ev_queue.lock);
list_del(&event_file->list);
- if (event_file->is_async) {
- ib_unregister_event_handler(&event_file->uverbs_file->
- event_handler);
- event_file->uverbs_file->event_handler.device = NULL;
- }
+ ib_unregister_event_handler(
+ &event_file->uverbs_file->event_handler);
+ event_file->uverbs_file->event_handler.device =
+ NULL;
- wake_up_interruptible(&event_file->poll_wait);
- kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&event_file->ev_queue.poll_wait);
+ kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
mutex_unlock(&uverbs_dev->lists_mutex);
}
@@ -1396,13 +1301,6 @@ static void __exit ib_uverbs_cleanup(void)
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
if (overflow_maj)
unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
- idr_destroy(&ib_uverbs_pd_idr);
- idr_destroy(&ib_uverbs_mr_idr);
- idr_destroy(&ib_uverbs_mw_idr);
- idr_destroy(&ib_uverbs_ah_idr);
- idr_destroy(&ib_uverbs_cq_idr);
- idr_destroy(&ib_uverbs_qp_idr);
- idr_destroy(&ib_uverbs_srq_idr);
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index af020f80d50f..8b9587fe2303 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -34,20 +34,25 @@
#include <rdma/ib_marshall.h>
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
- struct ib_ah_attr *src)
+ struct rdma_ah_attr *src)
{
- memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid);
- dst->grh.flow_label = src->grh.flow_label;
- dst->grh.sgid_index = src->grh.sgid_index;
- dst->grh.hop_limit = src->grh.hop_limit;
- dst->grh.traffic_class = src->grh.traffic_class;
memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
- dst->dlid = src->dlid;
- dst->sl = src->sl;
- dst->src_path_bits = src->src_path_bits;
- dst->static_rate = src->static_rate;
- dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
- dst->port_num = src->port_num;
+ dst->dlid = rdma_ah_get_dlid(src);
+ dst->sl = rdma_ah_get_sl(src);
+ dst->src_path_bits = rdma_ah_get_path_bits(src);
+ dst->static_rate = rdma_ah_get_static_rate(src);
+ dst->is_global = rdma_ah_get_ah_flags(src) &
+ IB_AH_GRH ? 1 : 0;
+ if (dst->is_global) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(src);
+
+ memcpy(dst->grh.dgid, grh->dgid.raw, sizeof(grh->dgid));
+ dst->grh.flow_label = grh->flow_label;
+ dst->grh.sgid_index = grh->sgid_index;
+ dst->grh.hop_limit = grh->hop_limit;
+ dst->grh.traffic_class = grh->traffic_class;
+ }
+ dst->port_num = rdma_ah_get_port_num(src);
dst->reserved = 0;
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
@@ -91,15 +96,15 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
}
EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
-void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
- struct ib_sa_path_rec *src)
+void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct sa_path_rec *src)
{
memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
- dst->dlid = src->dlid;
- dst->slid = src->slid;
- dst->raw_traffic = src->raw_traffic;
+ dst->dlid = htons(ntohl(sa_path_get_dlid(src)));
+ dst->slid = htons(ntohl(sa_path_get_slid(src)));
+ dst->raw_traffic = sa_path_get_raw_traffic(src);
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
@@ -115,17 +120,43 @@ void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
}
+
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct sa_path_rec *src)
+{
+ struct sa_path_rec rec;
+
+ if (src->rec_type == SA_PATH_REC_TYPE_OPA) {
+ sa_convert_path_opa_to_ib(&rec, src);
+ __ib_copy_path_rec_to_user(dst, &rec);
+ return;
+ }
+ __ib_copy_path_rec_to_user(dst, src);
+}
EXPORT_SYMBOL(ib_copy_path_rec_to_user);
-void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
+void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
struct ib_user_path_rec *src)
{
+ __be32 slid, dlid;
+
+ memset(dst, 0, sizeof(*dst));
+ if ((ib_is_opa_gid((union ib_gid *)src->sgid)) ||
+ (ib_is_opa_gid((union ib_gid *)src->dgid))) {
+ dst->rec_type = SA_PATH_REC_TYPE_OPA;
+ slid = htonl(opa_get_lid_from_gid((union ib_gid *)src->sgid));
+ dlid = htonl(opa_get_lid_from_gid((union ib_gid *)src->dgid));
+ } else {
+ dst->rec_type = SA_PATH_REC_TYPE_IB;
+ slid = htonl(ntohs(src->slid));
+ dlid = htonl(ntohs(src->dlid));
+ }
memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
- dst->dlid = src->dlid;
- dst->slid = src->slid;
- dst->raw_traffic = src->raw_traffic;
+ sa_path_set_dlid(dst, dlid);
+ sa_path_set_slid(dst, slid);
+ sa_path_set_raw_traffic(dst, src->raw_traffic);
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
@@ -141,9 +172,9 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
- memset(dst->dmac, 0, sizeof(dst->dmac));
- dst->net = NULL;
- dst->ifindex = 0;
- dst->gid_type = IB_GID_TYPE_IB;
+ /* TODO: No need to set this */
+ sa_path_set_dmac_zero(dst);
+ sa_path_set_ndev(dst, NULL);
+ sa_path_set_ifindex(dst, 0);
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
new file mode 100644
index 000000000000..ef293379f37a
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <linux/bug.h>
+#include <linux/file.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_ah(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return rdma_destroy_ah((struct ib_ah *)uobject->object);
+}
+
+static int uverbs_free_flow(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_destroy_flow((struct ib_flow *)uobject->object);
+}
+
+static int uverbs_free_mw(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
+}
+
+static int uverbs_free_qp(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_qp *qp = uobject->object;
+ struct ib_uqp_object *uqp =
+ container_of(uobject, struct ib_uqp_object, uevent.uobject);
+ int ret;
+
+ if (why == RDMA_REMOVE_DESTROY) {
+ if (!list_empty(&uqp->mcast_list))
+ return -EBUSY;
+ } else if (qp == qp->real_qp) {
+ ib_uverbs_detach_umcast(qp, uqp);
+ }
+
+ ret = ib_destroy_qp(qp);
+ if (ret && why == RDMA_REMOVE_DESTROY)
+ return ret;
+
+ if (uqp->uxrcd)
+ atomic_dec(&uqp->uxrcd->refcnt);
+
+ ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent);
+ return ret;
+}
+
+static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
+ struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
+ int ret;
+
+ ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ kfree(ind_tbl);
+ return ret;
+}
+
+static int uverbs_free_wq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_wq *wq = uobject->object;
+ struct ib_uwq_object *uwq =
+ container_of(uobject, struct ib_uwq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_wq(wq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
+ return ret;
+}
+
+static int uverbs_free_srq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_srq *srq = uobject->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobject, struct ib_uevent_object, uobject);
+ enum ib_srq_type srq_type = srq->srq_type;
+ int ret;
+
+ ret = ib_destroy_srq(srq);
+
+ if (ret && why == RDMA_REMOVE_DESTROY)
+ return ret;
+
+ if (srq_type == IB_SRQT_XRC) {
+ struct ib_usrq_object *us =
+ container_of(uevent, struct ib_usrq_object, uevent);
+
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
+ ib_uverbs_release_uevent(uobject->context->ufile, uevent);
+ return ret;
+}
+
+static int uverbs_free_cq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_cq *cq = uobject->object;
+ struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobject, struct ib_ucq_object, uobject);
+ int ret;
+
+ ret = ib_destroy_cq(cq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
+ container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) : NULL,
+ ucq);
+ return ret;
+}
+
+static int uverbs_free_mr(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_dereg_mr((struct ib_mr *)uobject->object);
+}
+
+static int uverbs_free_xrcd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_xrcd *xrcd = uobject->object;
+ struct ib_uxrcd_object *uxrcd =
+ container_of(uobject, struct ib_uxrcd_object, uobject);
+ int ret;
+
+ mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
+ ret = -EBUSY;
+ else
+ ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
+ xrcd, why);
+ mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
+
+ return ret;
+}
+
+static int uverbs_free_pd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_pd *pd = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
+ return -EBUSY;
+
+ ib_dealloc_pd((struct ib_pd *)uobject->object);
+ return 0;
+}
+
+static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_completion_event_file *comp_event_file =
+ container_of(uobj_file, struct ib_uverbs_completion_event_file,
+ uobj_file);
+ struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
+
+ spin_lock_irq(&event_queue->lock);
+ event_queue->is_closed = 1;
+ spin_unlock_irq(&event_queue->lock);
+
+ if (why == RDMA_REMOVE_DRIVER_REMOVE) {
+ wake_up_interruptible(&event_queue->poll_wait);
+ kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
+ }
+ return 0;
+};
+
+const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = {
+ .type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0),
+ .context_closed = uverbs_hot_unplug_completion_event_file,
+ .fops = &uverbs_event_fops,
+ .name = "[infinibandevent]",
+ .flags = O_RDONLY,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_cq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0),
+ .destroy_object = uverbs_free_cq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_qp = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0),
+ .destroy_object = uverbs_free_qp,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_mw = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_mw,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_mr = {
+ /* 1 is used in order to free the MR after all the MWs */
+ .type = UVERBS_TYPE_ALLOC_IDR(1),
+ .destroy_object = uverbs_free_mr,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_srq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0),
+ .destroy_object = uverbs_free_srq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_ah = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_ah,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_flow = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_flow,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_wq = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0),
+ .destroy_object = uverbs_free_wq,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table = {
+ .type = UVERBS_TYPE_ALLOC_IDR(0),
+ .destroy_object = uverbs_free_rwq_ind_tbl,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd = {
+ .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0),
+ .destroy_object = uverbs_free_xrcd,
+};
+
+const struct uverbs_obj_idr_type uverbs_type_attrs_pd = {
+ /* 2 is used in order to free the PD after MRs */
+ .type = UVERBS_TYPE_ALLOC_IDR(2),
+ .destroy_object = uverbs_free_pd,
+};
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 85ed5051fdfd..4792f5209ac2 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -311,7 +311,7 @@ EXPORT_SYMBOL(ib_dealloc_pd);
/* Address handles */
-struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
{
struct ib_ah *ah;
@@ -321,12 +321,13 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
ah->device = pd->device;
ah->pd = pd;
ah->uobject = NULL;
+ ah->type = ah_attr->type;
atomic_inc(&pd->usecnt);
}
return ah;
}
-EXPORT_SYMBOL(ib_create_ah);
+EXPORT_SYMBOL(rdma_create_ah);
int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
{
@@ -452,7 +453,7 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
u32 flow_class;
u16 gid_index;
@@ -464,6 +465,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
union ib_gid sgid;
memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->type = rdma_ah_find_type(device, port_num);
if (rdma_cap_eth_ah(device, port_num)) {
if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
net_type = wc->network_hdr_type;
@@ -494,7 +496,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
return -ENODEV;
ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
- ah_attr->dmac,
+ ah_attr->roce.dmac,
wc->wc_flags & IB_WC_WITH_VLAN ?
NULL : &vlan_id,
&if_index, &hoplimit);
@@ -525,15 +527,12 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
return ret;
}
- ah_attr->dlid = wc->slid;
- ah_attr->sl = wc->sl;
- ah_attr->src_path_bits = wc->dlid_path_bits;
- ah_attr->port_num = port_num;
+ rdma_ah_set_dlid(ah_attr, wc->slid);
+ rdma_ah_set_sl(ah_attr, wc->sl);
+ rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
+ rdma_ah_set_port_num(ah_attr, port_num);
if (wc->wc_flags & IB_WC_GRH) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = sgid;
-
if (!rdma_cap_eth_ah(device, port_num)) {
if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
ret = ib_find_cached_gid_by_port(device, &dgid,
@@ -547,11 +546,12 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
}
}
- ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
- ah_attr->grh.flow_label = flow_class & 0xFFFFF;
- ah_attr->grh.hop_limit = hoplimit;
- ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+
}
return 0;
}
@@ -560,34 +560,37 @@ EXPORT_SYMBOL(ib_init_ah_from_wc);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
const struct ib_grh *grh, u8 port_num)
{
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
int ret;
ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
- return ib_create_ah(pd, &ah_attr);
+ return rdma_create_ah(pd, &ah_attr);
}
EXPORT_SYMBOL(ib_create_ah_from_wc);
-int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
+ if (ah->type != ah_attr->type)
+ return -EINVAL;
+
return ah->device->modify_ah ?
ah->device->modify_ah(ah, ah_attr) :
-ENOSYS;
}
-EXPORT_SYMBOL(ib_modify_ah);
+EXPORT_SYMBOL(rdma_modify_ah);
-int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
return ah->device->query_ah ?
ah->device->query_ah(ah, ah_attr) :
-ENOSYS;
}
-EXPORT_SYMBOL(ib_query_ah);
+EXPORT_SYMBOL(rdma_query_ah);
-int ib_destroy_ah(struct ib_ah *ah)
+int rdma_destroy_ah(struct ib_ah *ah)
{
struct ib_pd *pd;
int ret;
@@ -599,7 +602,7 @@ int ib_destroy_ah(struct ib_ah *ah)
return ret;
}
-EXPORT_SYMBOL(ib_destroy_ah);
+EXPORT_SYMBOL(rdma_destroy_ah);
/* Shared receive queues */
@@ -1201,19 +1204,22 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
EXPORT_SYMBOL(ib_modify_qp_is_ok);
int ib_resolve_eth_dmac(struct ib_device *device,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
int ret = 0;
+ struct ib_global_route *grh;
- if (!rdma_is_port_valid(device, ah_attr->port_num))
+ if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
return -EINVAL;
- if (!rdma_cap_eth_ah(device, ah_attr->port_num))
+ if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE)
return 0;
- if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw,
- ah_attr->dmac);
+ grh = rdma_ah_retrieve_grh(ah_attr);
+
+ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
+ rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
+ ah_attr->roce.dmac);
} else {
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
@@ -1221,8 +1227,8 @@ int ib_resolve_eth_dmac(struct ib_device *device,
int hop_limit;
ret = ib_query_gid(device,
- ah_attr->port_num,
- ah_attr->grh.sgid_index,
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index,
&sgid, &sgid_attr);
if (ret || !sgid_attr.ndev) {
@@ -1233,14 +1239,14 @@ int ib_resolve_eth_dmac(struct ib_device *device,
ifindex = sgid_attr.ndev->ifindex;
- ret = rdma_addr_find_l2_eth_by_grh(&sgid,
- &ah_attr->grh.dgid,
- ah_attr->dmac,
- NULL, &ifindex, &hop_limit);
+ ret =
+ rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ah_attr->roce.dmac,
+ NULL, &ifindex, &hop_limit);
dev_put(sgid_attr.ndev);
- ah_attr->grh.hop_limit = hop_limit;
+ grh->hop_limit = hop_limit;
}
out:
return ret;
@@ -1519,7 +1525,9 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->attach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
+ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE))
return -EINVAL;
ret = qp->device->attach_mcast(qp, gid, lid);
@@ -1535,7 +1543,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->detach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
+ lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE))
return -EINVAL;
ret = qp->device->detach_mcast(qp, gid, lid);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 33af2e3de399..7ba9e699d7ab 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -524,19 +524,20 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah)
}
struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_ah *ah;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
int rc;
u16 vlan_tag;
u8 nw_type;
struct ib_gid_attr sgid_attr;
- if (!(ah_attr->ah_flags & IB_AH_GRH)) {
+ if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set");
return ERR_PTR(-EINVAL);
}
@@ -548,33 +549,33 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
ah->qplib_ah.pd = &pd->qplib_pd;
/* Supply the configuration for the HW */
- memcpy(ah->qplib_ah.dgid.data, ah_attr->grh.dgid.raw,
+ memcpy(ah->qplib_ah.dgid.data, grh->dgid.raw,
sizeof(union ib_gid));
/*
* If RoCE V2 is enabled, stack will have two entries for
* each GID entry. Avoiding this duplicte entry in HW. Dividing
* the GID index by 2 for RoCE V2
*/
- ah->qplib_ah.sgid_index = ah_attr->grh.sgid_index / 2;
- ah->qplib_ah.host_sgid_index = ah_attr->grh.sgid_index;
- ah->qplib_ah.traffic_class = ah_attr->grh.traffic_class;
- ah->qplib_ah.flow_label = ah_attr->grh.flow_label;
- ah->qplib_ah.hop_limit = ah_attr->grh.hop_limit;
- ah->qplib_ah.sl = ah_attr->sl;
+ ah->qplib_ah.sgid_index = grh->sgid_index / 2;
+ ah->qplib_ah.host_sgid_index = grh->sgid_index;
+ ah->qplib_ah.traffic_class = grh->traffic_class;
+ ah->qplib_ah.flow_label = grh->flow_label;
+ ah->qplib_ah.hop_limit = grh->hop_limit;
+ ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr);
if (ib_pd->uobject &&
!rdma_is_multicast_addr((struct in6_addr *)
- ah_attr->grh.dgid.raw) &&
+ grh->dgid.raw) &&
!rdma_link_local_addr((struct in6_addr *)
- ah_attr->grh.dgid.raw)) {
+ grh->dgid.raw)) {
union ib_gid sgid;
rc = ib_get_cached_gid(&rdev->ibdev, 1,
- ah_attr->grh.sgid_index, &sgid,
+ grh->sgid_index, &sgid,
&sgid_attr);
if (rc) {
dev_err(rdev_to_dev(rdev),
"Failed to query gid at index %d",
- ah_attr->grh.sgid_index);
+ grh->sgid_index);
goto fail;
}
if (sgid_attr.ndev) {
@@ -595,8 +596,8 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V1;
break;
}
- rc = rdma_addr_find_l2_eth_by_grh(&sgid, &ah_attr->grh.dgid,
- ah_attr->dmac, &vlan_tag,
+ rc = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ah_attr->roce.dmac, &vlan_tag,
&sgid_attr.ndev->ifindex,
NULL);
if (rc) {
@@ -605,7 +606,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
}
}
- memcpy(ah->qplib_ah.dmac, ah_attr->dmac, ETH_ALEN);
+ memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN);
rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah);
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH");
@@ -634,24 +635,24 @@ fail:
return ERR_PTR(rc);
}
-int bnxt_re_modify_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr)
+int bnxt_re_modify_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
{
return 0;
}
-int bnxt_re_query_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr)
+int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
{
struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
- memcpy(ah_attr->grh.dgid.raw, ah->qplib_ah.dgid.data,
- sizeof(union ib_gid));
- ah_attr->grh.sgid_index = ah->qplib_ah.host_sgid_index;
- ah_attr->grh.traffic_class = ah->qplib_ah.traffic_class;
- ah_attr->sl = ah->qplib_ah.sl;
- memcpy(ah_attr->dmac, ah->qplib_ah.dmac, ETH_ALEN);
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->port_num = 1;
- ah_attr->static_rate = 0;
+ ah_attr->type = ib_ah->type;
+ rdma_ah_set_sl(ah_attr, ah->qplib_ah.sl);
+ memcpy(ah_attr->roce.dmac, ah->qplib_ah.dmac, ETH_ALEN);
+ rdma_ah_set_grh(ah_attr, NULL, 0,
+ ah->qplib_ah.host_sgid_index,
+ 0, ah->qplib_ah.traffic_class);
+ rdma_ah_set_dgid_raw(ah_attr, ah->qplib_ah.dgid.data);
+ rdma_ah_set_port_num(ah_attr, 1);
+ rdma_ah_set_static_rate(ah_attr, 0);
return 0;
}
@@ -692,9 +693,9 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
kfree(rdev->qp1_sqp);
}
- if (qp->rumem && !IS_ERR(qp->rumem))
+ if (!IS_ERR_OR_NULL(qp->rumem))
ib_umem_release(qp->rumem);
- if (qp->sumem && !IS_ERR(qp->sumem))
+ if (!IS_ERR_OR_NULL(qp->sumem))
ib_umem_release(qp->sumem);
mutex_lock(&rdev->qp_lock);
@@ -1258,6 +1259,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp->qplib_qp.qkey = qp_attr->qkey;
}
if (qp_attr_mask & IB_QP_AV) {
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(&qp_attr->ah_attr);
+
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX |
@@ -1265,25 +1269,23 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS |
CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC |
CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
- memcpy(qp->qplib_qp.ah.dgid.data, qp_attr->ah_attr.grh.dgid.raw,
+ memcpy(qp->qplib_qp.ah.dgid.data, grh->dgid.raw,
sizeof(qp->qplib_qp.ah.dgid.data));
- qp->qplib_qp.ah.flow_label = qp_attr->ah_attr.grh.flow_label;
+ qp->qplib_qp.ah.flow_label = grh->flow_label;
/* If RoCE V2 is enabled, stack will have two entries for
* each GID entry. Avoiding this duplicte entry in HW. Dividing
* the GID index by 2 for RoCE V2
*/
- qp->qplib_qp.ah.sgid_index =
- qp_attr->ah_attr.grh.sgid_index / 2;
- qp->qplib_qp.ah.host_sgid_index =
- qp_attr->ah_attr.grh.sgid_index;
- qp->qplib_qp.ah.hop_limit = qp_attr->ah_attr.grh.hop_limit;
- qp->qplib_qp.ah.traffic_class =
- qp_attr->ah_attr.grh.traffic_class;
- qp->qplib_qp.ah.sl = qp_attr->ah_attr.sl;
- ether_addr_copy(qp->qplib_qp.ah.dmac, qp_attr->ah_attr.dmac);
+ qp->qplib_qp.ah.sgid_index = grh->sgid_index / 2;
+ qp->qplib_qp.ah.host_sgid_index = grh->sgid_index;
+ qp->qplib_qp.ah.hop_limit = grh->hop_limit;
+ qp->qplib_qp.ah.traffic_class = grh->traffic_class;
+ qp->qplib_qp.ah.sl = rdma_ah_get_sl(&qp_attr->ah_attr);
+ ether_addr_copy(qp->qplib_qp.ah.dmac,
+ qp_attr->ah_attr.roce.dmac);
status = ib_get_cached_gid(&rdev->ibdev, 1,
- qp_attr->ah_attr.grh.sgid_index,
+ grh->sgid_index,
&sgid, &sgid_attr);
if (!status && sgid_attr.ndev) {
memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr,
@@ -1423,14 +1425,14 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp.access);
qp_attr->pkey_index = qplib_qp.pkey_index;
qp_attr->qkey = qplib_qp.qkey;
- memcpy(qp_attr->ah_attr.grh.dgid.raw, qplib_qp.ah.dgid.data,
- sizeof(qplib_qp.ah.dgid.data));
- qp_attr->ah_attr.grh.flow_label = qplib_qp.ah.flow_label;
- qp_attr->ah_attr.grh.sgid_index = qplib_qp.ah.host_sgid_index;
- qp_attr->ah_attr.grh.hop_limit = qplib_qp.ah.hop_limit;
- qp_attr->ah_attr.grh.traffic_class = qplib_qp.ah.traffic_class;
- qp_attr->ah_attr.sl = qplib_qp.ah.sl;
- ether_addr_copy(qp_attr->ah_attr.dmac, qplib_qp.ah.dmac);
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp.ah.flow_label,
+ qplib_qp.ah.host_sgid_index,
+ qplib_qp.ah.hop_limit,
+ qplib_qp.ah.traffic_class);
+ rdma_ah_set_dgid_raw(&qp_attr->ah_attr, qplib_qp.ah.dgid.data);
+ rdma_ah_set_sl(&qp_attr->ah_attr, qplib_qp.ah.sl);
+ ether_addr_copy(qp_attr->ah_attr.roce.dmac, qplib_qp.ah.dmac);
qp_attr->path_mtu = __to_ib_mtu(qplib_qp.path_mtu);
qp_attr->timeout = qplib_qp.timeout;
qp_attr->retry_cnt = qplib_qp.retry_cnt;
@@ -2116,7 +2118,7 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
dev_err(rdev_to_dev(rdev), "Failed to destroy HW CQ");
return rc;
}
- if (cq->umem && !IS_ERR(cq->umem))
+ if (!IS_ERR_OR_NULL(cq->umem))
ib_umem_release(cq->umem);
if (cq) {
@@ -2818,7 +2820,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
{
struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
struct bnxt_re_dev *rdev = mr->rdev;
- int rc = 0;
+ int rc;
if (mr->npages && mr->pages) {
rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
@@ -2829,7 +2831,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
}
rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
- if (!IS_ERR(mr->ib_umem) && mr->ib_umem)
+ if (!IS_ERR_OR_NULL(mr->ib_umem))
ib_umem_release(mr->ib_umem);
kfree(mr);
@@ -3016,7 +3018,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
struct bnxt_re_mr *mr;
struct ib_umem *umem;
u64 *pbl_tbl, *pbl_tbl_orig;
- int i, umem_pgs, pages, page_shift, rc;
+ int i, umem_pgs, pages, rc;
struct scatterlist *sg;
int entry;
@@ -3062,22 +3064,22 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
}
pbl_tbl_orig = pbl_tbl;
- page_shift = ilog2(umem->page_size);
if (umem->hugetlb) {
dev_err(rdev_to_dev(rdev), "umem hugetlb not supported!");
rc = -EFAULT;
goto fail;
}
- if (umem->page_size != PAGE_SIZE) {
- dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
+
+ if (umem->page_shift != PAGE_SHIFT) {
+ dev_err(rdev_to_dev(rdev), "umem page shift unsupported!");
rc = -EFAULT;
goto fail;
}
/* Map umem buf ptrs to the PBL */
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- pages = sg_dma_len(sg) >> page_shift;
+ pages = sg_dma_len(sg) >> umem->page_shift;
for (i = 0; i < pages; i++, pbl_tbl++)
- *pbl_tbl = sg_dma_address(sg) + (i << page_shift);
+ *pbl_tbl = sg_dma_address(sg) + (i << umem->page_shift);
}
rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl_orig,
umem_pgs, false);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index b4084c252f06..5c3d71765454 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -150,10 +150,10 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
struct ib_udata *udata);
int bnxt_re_dealloc_pd(struct ib_pd *pd);
struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
-int bnxt_re_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-int bnxt_re_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int bnxt_re_destroy_ah(struct ib_ah *ah);
struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr,
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index 445e89e5e7cf..97dbe728520a 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -51,17 +51,18 @@ void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag)
m->mem_id = MEM_PMRX;
m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
m->len = size;
- PDBG("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len);
+ pr_debug("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
data = (u64 *)m->buf;
while (size > 0) {
- PDBG("TPT %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ pr_debug("TPT %08x: %016llx\n",
+ m->addr, (unsigned long long)*data);
size -= 8;
data++;
m->addr += 8;
@@ -87,18 +88,19 @@ void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift)
m->mem_id = MEM_PMRX;
m->addr = pbl_addr;
m->len = size;
- PDBG("%s PBL addr 0x%x len %d depth %d\n",
- __func__, m->addr, m->len, npages);
+ pr_debug("%s PBL addr 0x%x len %d depth %d\n",
+ __func__, m->addr, m->len, npages);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
data = (u64 *)m->buf;
while (size > 0) {
- PDBG("PBL %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ pr_debug("PBL %08x: %016llx\n",
+ m->addr, (unsigned long long)*data);
size -= 8;
data++;
m->addr += 8;
@@ -114,8 +116,8 @@ void cxio_dump_wqe(union t3_wr *wqe)
if (size == 0)
size = 8;
while (size > 0) {
- PDBG("WQE %p: %016llx\n", data,
- (unsigned long long) be64_to_cpu(*data));
+ pr_debug("WQE %p: %016llx\n",
+ data, (unsigned long long)be64_to_cpu(*data));
size--;
data++;
}
@@ -127,8 +129,8 @@ void cxio_dump_wce(struct t3_cqe *wce)
int size = sizeof(*wce);
while (size > 0) {
- PDBG("WCE %p: %016llx\n", data,
- (unsigned long long) be64_to_cpu(*data));
+ pr_debug("WCE %p: %016llx\n",
+ data, (unsigned long long)be64_to_cpu(*data));
size -= 8;
data++;
}
@@ -148,17 +150,18 @@ void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents)
m->mem_id = MEM_PMRX;
m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
m->len = size;
- PDBG("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len);
+ pr_debug("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
data = (u64 *)m->buf;
while (size > 0) {
- PDBG("RQT %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ pr_debug("RQT %08x: %016llx\n",
+ m->addr, (unsigned long long)*data);
size -= 8;
data++;
m->addr += 8;
@@ -180,10 +183,10 @@ void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid)
m->mem_id = MEM_CM;
m->addr = hwtid * size;
m->len = size;
- PDBG("%s TCB %d len %d\n", __func__, m->addr, m->len);
+ pr_debug("%s TCB %d len %d\n", __func__, m->addr, m->len);
rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
+ pr_debug("%s toectl returned error %d\n", __func__, rc);
kfree(m);
return;
}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index ada2e5009c86..558d6a03375d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -110,8 +110,7 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
udelay(1);
if (i++ > 1000000) {
- printk(KERN_ERR "%s: stalled rnic\n",
- rdev_p->dev_name);
+ pr_err("%s: stalled rnic\n", rdev_p->dev_name);
return -EIO;
}
}
@@ -140,7 +139,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
struct t3_modify_qp_wr *wqe;
struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
if (!skb) {
- PDBG("%s alloc_skb failed\n", __func__);
+ pr_debug("%s alloc_skb failed\n", __func__);
return -ENOMEM;
}
wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
@@ -230,7 +229,7 @@ static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
}
out:
mutex_unlock(&uctx->lock);
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
return qpid;
}
@@ -242,7 +241,7 @@ static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
entry = kmalloc(sizeof *entry, GFP_KERNEL);
if (!entry)
return;
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
entry->qpid = qpid;
mutex_lock(&uctx->lock);
list_add_tail(&entry->entry, &uctx->qpids);
@@ -306,8 +305,8 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
(wq->qpid << rdev_p->qpshift);
wq->rdev = rdev_p;
- PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__,
- wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
+ pr_debug("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n",
+ __func__, wq->qpid, wq->doorbell, (unsigned long long)wq->udb);
return 0;
err4:
kfree(wq->sq);
@@ -351,8 +350,8 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
{
struct t3_cqe cqe;
- PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
+ pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
V_CQE_OPCODE(T3_SEND) |
@@ -370,11 +369,11 @@ int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
u32 ptr;
int flushed = 0;
- PDBG("%s wq %p cq %p\n", __func__, wq, cq);
+ pr_debug("%s wq %p cq %p\n", __func__, wq, cq);
/* flush RQ */
- PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
- wq->rq_rptr, wq->rq_wptr, count);
+ pr_debug("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
+ wq->rq_rptr, wq->rq_wptr, count);
ptr = wq->rq_rptr + count;
while (ptr++ != wq->rq_wptr) {
insert_recv_cqe(wq, cq);
@@ -388,8 +387,8 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
{
struct t3_cqe cqe;
- PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
+ pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
V_CQE_OPCODE(sqp->opcode) |
@@ -429,11 +428,11 @@ void cxio_flush_hw_cq(struct t3_cq *cq)
{
struct t3_cqe *cqe, *swcqe;
- PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
+ pr_debug("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
cqe = cxio_next_hw_cqe(cq);
while (cqe) {
- PDBG("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
- __func__, cq->rptr, cq->sw_wptr);
+ pr_debug("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
+ __func__, cq->rptr, cq->sw_wptr);
swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
*swcqe = *cqe;
swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
@@ -476,7 +475,7 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
(*count)++;
ptr++;
}
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
+ pr_debug("%s cq %p count %d\n", __func__, cq, *count);
}
void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
@@ -485,7 +484,7 @@ void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
u32 ptr;
*count = 0;
- PDBG("%s count zero %d\n", __func__, *count);
+ pr_debug("%s count zero %d\n", __func__, *count);
ptr = cq->sw_rptr;
while (!Q_EMPTY(ptr, cq->sw_wptr)) {
cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
@@ -494,7 +493,7 @@ void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
(*count)++;
ptr++;
}
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
+ pr_debug("%s cq %p count %d\n", __func__, cq, *count);
}
static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
@@ -521,12 +520,12 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
if (!skb) {
- PDBG("%s alloc_skb failed\n", __func__);
+ pr_debug("%s alloc_skb failed\n", __func__);
return -ENOMEM;
}
err = cxio_hal_init_ctrl_cq(rdev_p);
if (err) {
- PDBG("%s err %d initializing ctrl_cq\n", __func__, err);
+ pr_debug("%s err %d initializing ctrl_cq\n", __func__, err);
goto err;
}
rdev_p->ctrl_qp.workq = dma_alloc_coherent(
@@ -536,7 +535,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
&(rdev_p->ctrl_qp.dma_addr),
GFP_KERNEL);
if (!rdev_p->ctrl_qp.workq) {
- PDBG("%s dma_alloc_coherent failed\n", __func__);
+ pr_debug("%s dma_alloc_coherent failed\n", __func__);
err = -ENOMEM;
goto err;
}
@@ -571,9 +570,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
wqe->sge_cmd = cpu_to_be64(sge_cmd);
wqe->ctx1 = cpu_to_be64(ctx1);
wqe->ctx0 = cpu_to_be64(ctx0);
- PDBG("CtrlQP dma_addr 0x%llx workq %p size %d\n",
- (unsigned long long) rdev_p->ctrl_qp.dma_addr,
- rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
+ pr_debug("CtrlQP dma_addr 0x%llx workq %p size %d\n",
+ (unsigned long long)rdev_p->ctrl_qp.dma_addr,
+ rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
skb->priority = CPL_PRIORITY_CONTROL;
return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
err:
@@ -605,26 +604,26 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
u64 utx_cmd;
addr &= 0x7FFFFFF;
nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */
- PDBG("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
- __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
- nr_wqe, data, addr);
+ pr_debug("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
+ __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
+ nr_wqe, data, addr);
utx_len = 3; /* in 32B unit */
for (i = 0; i < nr_wqe; i++) {
if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
T3_CTRL_QP_SIZE_LOG2)) {
- PDBG("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, "
- "wait for more space i %d\n", __func__,
- rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
+ pr_debug("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, wait for more space i %d\n",
+ __func__,
+ rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
!Q_FULL(rdev_p->ctrl_qp.rptr,
rdev_p->ctrl_qp.wptr,
T3_CTRL_QP_SIZE_LOG2))) {
- PDBG("%s ctrl_qp workq interrupted\n",
- __func__);
+ pr_debug("%s ctrl_qp workq interrupted\n",
+ __func__);
return -ERESTARTSYS;
}
- PDBG("%s ctrl_qp wakeup, continue posting work request "
- "i %d\n", __func__, i);
+ pr_debug("%s ctrl_qp wakeup, continue posting work request i %d\n",
+ __func__, i);
}
wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
(1 << T3_CTRL_QP_SIZE_LOG2)));
@@ -645,7 +644,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
if ((i != 0) &&
(i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
flag = T3_COMPLETION_FLAG;
- PDBG("%s force completion at i %d\n", __func__, i);
+ pr_debug("%s force completion at i %d\n", __func__, i);
}
/* build the utx mem command */
@@ -717,8 +716,8 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
return -ENOMEM;
*stag = (stag_idx << 8) | ((*stag) & 0xFF);
}
- PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
- __func__, stag_state, type, pdid, stag_idx);
+ pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+ __func__, stag_state, type, pdid, stag_idx);
mutex_lock(&rdev_p->ctrl_qp.lock);
@@ -767,9 +766,9 @@ int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
u32 wptr;
int err;
- PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
- pbl_size);
+ pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
+ pbl_size);
mutex_lock(&rdev_p->ctrl_qp.lock);
err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
@@ -837,7 +836,7 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
if (!skb)
return -ENOMEM;
- PDBG("%s rdev_p %p\n", __func__, rdev_p);
+ pr_debug("%s rdev_p %p\n", __func__, rdev_p);
wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe));
wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
@@ -880,22 +879,20 @@ static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
static int cnt;
struct cxio_rdev *rdev_p = NULL;
struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
- PDBG("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x"
- " se %0x notify %0x cqbranch %0x creditth %0x\n",
- cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
- RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
- RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
- RSPQ_CREDIT_THRESH(rsp_msg));
- PDBG("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d "
- "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
- CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
- CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+ pr_debug("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x se %0x notify %0x cqbranch %0x creditth %0x\n",
+ cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
+ RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
+ RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
+ RSPQ_CREDIT_THRESH(rsp_msg));
+ pr_debug("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
+ CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+ CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
if (!rdev_p) {
- PDBG("%s called by t3cdev %p with null ulp\n", __func__,
- t3cdev_p);
+ pr_debug("%s called by t3cdev %p with null ulp\n", __func__,
+ t3cdev_p);
return 0;
}
if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
@@ -934,13 +931,13 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
T3_MAX_DEV_NAME_LEN);
} else {
- PDBG("%s t3cdev_p or dev_name must be set\n", __func__);
+ pr_debug("%s t3cdev_p or dev_name must be set\n", __func__);
return -EINVAL;
}
list_add_tail(&rdev_p->entry, &rdev_list);
- PDBG("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
+ pr_debug("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
if (!rdev_p->t3cdev_p)
rdev_p->t3cdev_p = dev2t3cdev(netdev_p);
@@ -949,13 +946,12 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO,
&(rdev_p->fw_info));
if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
+ pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
+ __func__, rdev_p->t3cdev_p, err);
goto err1;
}
if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) {
- printk(KERN_ERR MOD "fatal firmware version mismatch: "
- "need version %u but adapter has version %u\n",
+ pr_err("fatal firmware version mismatch: need version %u but adapter has version %u\n",
CXIO_FW_MAJ,
G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers));
err = -EINVAL;
@@ -965,15 +961,15 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
&(rdev_p->rnic_info));
if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
+ pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
+ __func__, rdev_p->t3cdev_p, err);
goto err1;
}
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
&(rdev_p->port_info));
if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
+ pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
+ __func__, rdev_p->t3cdev_p, err);
goto err1;
}
@@ -988,42 +984,39 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
PAGE_SHIFT));
rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
- PDBG("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d "
- "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
- __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
- rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
- rdev_p->rnic_info.pbl_base,
- rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
- rdev_p->rnic_info.rqt_top);
- PDBG("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu "
- "qpnr %d qpmask 0x%x\n",
- rdev_p->rnic_info.udbell_len,
- rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
- rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
+ pr_debug("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
+ __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
+ rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
+ rdev_p->rnic_info.pbl_base,
+ rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
+ rdev_p->rnic_info.rqt_top);
+ pr_debug("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu qpnr %d qpmask 0x%x\n",
+ rdev_p->rnic_info.udbell_len,
+ rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
+ rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
err = cxio_hal_init_ctrl_qp(rdev_p);
if (err) {
- printk(KERN_ERR "%s error %d initializing ctrl_qp.\n",
- __func__, err);
+ pr_err("%s error %d initializing ctrl_qp\n", __func__, err);
goto err1;
}
err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
T3_MAX_NUM_PD);
if (err) {
- printk(KERN_ERR "%s error %d initializing hal resources.\n",
+ pr_err("%s error %d initializing hal resources\n",
__func__, err);
goto err2;
}
err = cxio_hal_pblpool_create(rdev_p);
if (err) {
- printk(KERN_ERR "%s error %d initializing pbl mem pool.\n",
+ pr_err("%s error %d initializing pbl mem pool\n",
__func__, err);
goto err3;
}
err = cxio_hal_rqtpool_create(rdev_p);
if (err) {
- printk(KERN_ERR "%s error %d initializing rqt mem pool.\n",
+ pr_err("%s error %d initializing rqt mem pool\n",
__func__, err);
goto err4;
}
@@ -1086,9 +1079,9 @@ static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
/*
* Insert this completed cqe into the swcq.
*/
- PDBG("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
- __func__, Q_PTR2IDX(ptr, wq->sq_size_log2),
- Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
+ pr_debug("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
+ __func__, Q_PTR2IDX(ptr, wq->sq_size_log2),
+ Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
= sqp->cqe;
@@ -1154,12 +1147,11 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
*credit = 0;
hw_cqe = cxio_next_cqe(cq);
- PDBG("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x"
- " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
- CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
- CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
- CQE_WRID_LOW(*hw_cqe));
+ pr_debug("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
+ CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
+ CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
+ CQE_WRID_LOW(*hw_cqe));
/*
* skip cqe's not affiliated with a QP.
@@ -1278,9 +1270,10 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
struct t3_swsq *sqp;
- PDBG("%s out of order completion going in swsq at idx %ld\n",
- __func__,
- Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2));
+ pr_debug("%s out of order completion going in swsq at idx %ld\n",
+ __func__,
+ Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe),
+ wq->sq_size_log2));
sqp = wq->sq +
Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
sqp->cqe = *hw_cqe;
@@ -1298,13 +1291,13 @@ proc_cqe:
*/
if (SQ_TYPE(*hw_cqe)) {
wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
- PDBG("%s completing sq idx %ld\n", __func__,
- Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
+ pr_debug("%s completing sq idx %ld\n", __func__,
+ Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
*cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
wq->sq_rptr++;
} else {
- PDBG("%s completing rq idx %ld\n", __func__,
- Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+ pr_debug("%s completing rq idx %ld\n", __func__,
+ Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
*cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
cxio_hal_pblpool_free(wq->rdev,
@@ -1322,12 +1315,12 @@ flush_wq:
skip_cqe:
if (SW_CQE(*hw_cqe)) {
- PDBG("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->sw_rptr);
+ pr_debug("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
+ __func__, cq, cq->cqid, cq->sw_rptr);
++cq->sw_rptr;
} else {
- PDBG("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->rptr);
+ pr_debug("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
+ __func__, cq, cq->cqid, cq->rptr);
++cq->rptr;
/*
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 78fbe9ffe7f0..7e70c5492262 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -196,8 +196,11 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
u8 *cqe_flushed, u64 *cookie, u32 *credit);
int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb);
-#define MOD "iw_cxgb3: "
-#define PDBG(fmt, args...) pr_debug(MOD fmt, ## args)
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#ifdef DEBUG
void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index c40088ecf9f3..c6e7bc4420b6 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -209,13 +209,13 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
{
u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
&rscp->qpid_fifo_lock);
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
return qpid;
}
void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
{
- PDBG("%s qpid 0x%x\n", __func__, qpid);
+ pr_debug("%s qpid 0x%x\n", __func__, qpid);
cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
}
@@ -257,13 +257,13 @@ void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
{
unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
return (u32)addr;
}
void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
}
@@ -282,17 +282,18 @@ int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
pbl_chunk);
if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
- PDBG("%s failed to add PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s failed to add PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
- printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n",
- __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start);
+ pr_warn("%s: Failed to add all PBL chunks (%x/%x)\n",
+ __func__, pbl_start,
+ rdev_p->rnic_info.pbl_top - pbl_start);
return 0;
}
pbl_chunk >>= 1;
} else {
- PDBG("%s added PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s added PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
pbl_start += pbl_chunk;
}
}
@@ -315,13 +316,13 @@ void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
{
unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
return (u32)addr;
}
void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6);
gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index b3e11329801d..47b2ce2ef203 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -105,7 +105,7 @@ static void iwch_db_drop_task(struct work_struct *work)
static void rnic_init(struct iwch_dev *rnicp)
{
- PDBG("%s iwch_dev %p\n", __func__, rnicp);
+ pr_debug("%s iwch_dev %p\n", __func__, rnicp);
idr_init(&rnicp->cqidr);
idr_init(&rnicp->qpidr);
idr_init(&rnicp->mmidr);
@@ -145,12 +145,11 @@ static void open_rnic_dev(struct t3cdev *tdev)
{
struct iwch_dev *rnicp;
- PDBG("%s t3cdev %p\n", __func__, tdev);
- printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
- DRV_VERSION);
+ pr_debug("%s t3cdev %p\n", __func__, tdev);
+ pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION);
rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
if (!rnicp) {
- printk(KERN_ERR MOD "Cannot allocate ib device\n");
+ pr_err("Cannot allocate ib device\n");
return;
}
rnicp->rdev.ulp = rnicp;
@@ -160,7 +159,7 @@ static void open_rnic_dev(struct t3cdev *tdev)
if (cxio_rdev_open(&rnicp->rdev)) {
mutex_unlock(&dev_mutex);
- printk(KERN_ERR MOD "Unable to open CXIO rdev\n");
+ pr_err("Unable to open CXIO rdev\n");
ib_dealloc_device(&rnicp->ibdev);
return;
}
@@ -171,18 +170,18 @@ static void open_rnic_dev(struct t3cdev *tdev)
mutex_unlock(&dev_mutex);
if (iwch_register_device(rnicp)) {
- printk(KERN_ERR MOD "Unable to register device\n");
+ pr_err("Unable to register device\n");
close_rnic_dev(tdev);
}
- printk(KERN_INFO MOD "Initialized device %s\n",
- pci_name(rnicp->rdev.rnic_info.pdev));
+ pr_info("Initialized device %s\n",
+ pci_name(rnicp->rdev.rnic_info.pdev));
return;
}
static void close_rnic_dev(struct t3cdev *tdev)
{
struct iwch_dev *dev, *tmp;
- PDBG("%s t3cdev %p\n", __func__, tdev);
+ pr_debug("%s t3cdev %p\n", __func__, tdev);
mutex_lock(&dev_mutex);
list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
if (dev->rdev.t3cdev_p == tdev) {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 65ee64400deb..b61630eba912 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -112,9 +112,9 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status);
static void start_ep_timer(struct iwch_ep *ep)
{
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
if (timer_pending(&ep->timer)) {
- PDBG("%s stopped / restarted timer ep %p\n", __func__, ep);
+ pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep);
del_timer_sync(&ep->timer);
} else
get_ep(&ep->com);
@@ -126,7 +126,7 @@ static void start_ep_timer(struct iwch_ep *ep)
static void stop_ep_timer(struct iwch_ep *ep)
{
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
if (!timer_pending(&ep->timer)) {
WARN(1, "%s timer stopped when its not running! ep %p state %u\n",
__func__, ep, ep->com.state);
@@ -227,13 +227,13 @@ int iwch_resume_tid(struct iwch_ep *ep)
static void set_emss(struct iwch_ep *ep, u16 opt)
{
- PDBG("%s ep %p opt %u\n", __func__, ep, opt);
+ pr_debug("%s ep %p opt %u\n", __func__, ep, opt);
ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
if (G_TCPOPT_TSTAMP(opt))
ep->emss -= 12;
if (ep->emss < 128)
ep->emss = 128;
- PDBG("emss=%d\n", ep->emss);
+ pr_debug("emss=%d\n", ep->emss);
}
static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
@@ -257,7 +257,7 @@ static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
unsigned long flags;
spin_lock_irqsave(&epc->lock, flags);
- PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
+ pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
__state_set(epc, new);
spin_unlock_irqrestore(&epc->lock, flags);
return;
@@ -273,7 +273,7 @@ static void *alloc_ep(int size, gfp_t gfp)
spin_lock_init(&epc->lock);
init_waitqueue_head(&epc->waitq);
}
- PDBG("%s alloc ep %p\n", __func__, epc);
+ pr_debug("%s alloc ep %p\n", __func__, epc);
return epc;
}
@@ -282,7 +282,8 @@ void __free_ep(struct kref *kref)
struct iwch_ep *ep;
ep = container_of(container_of(kref, struct iwch_ep_common, kref),
struct iwch_ep, com);
- PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
+ pr_debug("%s ep %p state %s\n",
+ __func__, ep, states[state_read(&ep->com)]);
if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
dst_release(ep->dst);
@@ -293,7 +294,7 @@ void __free_ep(struct kref *kref)
static void release_ep_resources(struct iwch_ep *ep)
{
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
set_bit(RELEASE_RESOURCES, &ep->com.flags);
put_ep(&ep->com);
}
@@ -358,7 +359,7 @@ static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
{
- PDBG("%s t3cdev %p\n", __func__, dev);
+ pr_debug("%s t3cdev %p\n", __func__, dev);
kfree_skb(skb);
}
@@ -367,7 +368,7 @@ static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
*/
static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
{
- printk(KERN_ERR MOD "ARP failure during connect\n");
+ pr_err("ARP failure during connect\n");
kfree_skb(skb);
}
@@ -379,7 +380,7 @@ static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
{
struct cpl_abort_req *req = cplhdr(skb);
- PDBG("%s t3cdev %p\n", __func__, dev);
+ pr_debug("%s t3cdev %p\n", __func__, dev);
req->cmd = CPL_ABORT_NO_RST;
iwch_cxgb3_ofld_send(dev, skb);
}
@@ -389,10 +390,10 @@ static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
struct cpl_close_con_req *req;
struct sk_buff *skb;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), gfp);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
skb->priority = CPL_PRIORITY_DATA;
@@ -408,11 +409,10 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
{
struct cpl_abort_req *req;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(skb, sizeof(*req), gfp);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
skb->priority = CPL_PRIORITY_DATA;
@@ -434,12 +434,11 @@ static int send_connect(struct iwch_ep *ep)
unsigned int mtu_idx;
int wscale;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
@@ -478,7 +477,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
struct mpa_message *mpa;
int len;
- PDBG("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
+ pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
BUG_ON(skb_cloned(skb));
@@ -538,13 +537,13 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
struct mpa_message *mpa;
struct sk_buff *skb;
- PDBG("%s ep %p plen %d\n", __func__, ep, plen);
+ pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
mpalen = sizeof(*mpa) + plen;
skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
skb_reserve(skb, sizeof(*req));
@@ -587,13 +586,13 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
int len;
struct sk_buff *skb;
- PDBG("%s ep %p plen %d\n", __func__, ep, plen);
+ pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
mpalen = sizeof(*mpa) + plen;
skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
skb->priority = CPL_PRIORITY_DATA;
@@ -636,7 +635,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_act_establish *req = cplhdr(skb);
unsigned int tid = GET_TID(req);
- PDBG("%s ep %p tid %d\n", __func__, ep, tid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, tid);
dst_confirm(ep->dst);
@@ -660,7 +659,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
{
- PDBG("%s ep %p\n", __FILE__, ep);
+ pr_debug("%s ep %p\n", __FILE__, ep);
state_set(&ep->com, ABORTING);
send_abort(ep, skb, gfp);
}
@@ -669,12 +668,12 @@ static void close_complete_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
if (ep->com.cm_id) {
- PDBG("close complete delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("close complete delivered ep %p cm_id %p tid %d\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
ep->com.cm_id->rem_ref(ep->com.cm_id);
ep->com.cm_id = NULL;
@@ -686,12 +685,12 @@ static void peer_close_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_DISCONNECT;
if (ep->com.cm_id) {
- PDBG("peer close delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("peer close delivered ep %p cm_id %p tid %d\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
}
}
@@ -700,13 +699,13 @@ static void peer_abort_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
event.status = -ECONNRESET;
if (ep->com.cm_id) {
- PDBG("abort delivered ep %p cm_id %p tid %d\n", ep,
- ep->com.cm_id, ep->hwtid);
+ pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep,
+ ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
ep->com.cm_id->rem_ref(ep->com.cm_id);
ep->com.cm_id = NULL;
@@ -718,7 +717,7 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status)
{
struct iw_cm_event event;
- PDBG("%s ep %p status %d\n", __func__, ep, status);
+ pr_debug("%s ep %p status %d\n", __func__, ep, status);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REPLY;
event.status = status;
@@ -732,8 +731,8 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status)
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
}
if (ep->com.cm_id) {
- PDBG("%s ep %p tid %d status %d\n", __func__, ep,
- ep->hwtid, status);
+ pr_debug("%s ep %p tid %d status %d\n", __func__, ep,
+ ep->hwtid, status);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
}
if (status < 0) {
@@ -747,7 +746,7 @@ static void connect_request_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
memcpy(&event.local_addr, &ep->com.local_addr,
@@ -776,7 +775,7 @@ static void established_upcall(struct iwch_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_ESTABLISHED;
/*
@@ -785,7 +784,7 @@ static void established_upcall(struct iwch_ep *ep)
*/
event.ird = event.ord = 8;
if (ep->com.cm_id) {
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
}
}
@@ -795,10 +794,10 @@ static int update_rx_credits(struct iwch_ep *ep, u32 credits)
struct cpl_rx_data_ack *req;
struct sk_buff *skb;
- PDBG("%s ep %p credits %u\n", __func__, ep, credits);
+ pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
+ pr_err("update_rx_credits - cannot alloc skb!\n");
return 0;
}
@@ -819,7 +818,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
enum iwch_qp_attr_mask mask;
int err;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
/*
* Stop mpa timer. If it expired, then the state has
@@ -906,10 +905,10 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa_rev;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
+ __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
attrs.mpa_attr = ep->mpa_attr;
attrs.max_ird = ep->ird;
@@ -944,7 +943,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
struct mpa_message *mpa;
u16 plen;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
/*
* Stop mpa timer. If it expired, then the state has
@@ -964,7 +963,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
return;
}
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
/*
* Copy the new data into our accumulation buffer.
@@ -979,7 +978,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
*/
if (ep->mpa_pkt_len < sizeof(*mpa))
return;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
mpa = (struct mpa_message *) ep->mpa_pkt;
/*
@@ -1029,10 +1028,10 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa_rev;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
+ __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
state_set(&ep->com, MPA_REQ_RCVD);
@@ -1047,7 +1046,7 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_rx_data *hdr = cplhdr(skb);
unsigned int dlen = ntohs(hdr->len);
- PDBG("%s ep %p dlen %u\n", __func__, ep, dlen);
+ pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen);
skb_pull(skb, sizeof(*hdr));
skb_trim(skb, dlen);
@@ -1065,8 +1064,7 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
case MPA_REP_SENT:
break;
default:
- printk(KERN_ERR MOD "%s Unexpected streaming data."
- " ep %p state %d tid %d\n",
+ pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n",
__func__, ep, state_read(&ep->com), ep->hwtid);
/*
@@ -1095,11 +1093,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
int post_zb = 0;
- PDBG("%s ep %p credits %u\n", __func__, ep, credits);
+ pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
if (credits == 0) {
- PDBG("%s 0 credit ack ep %p state %u\n",
- __func__, ep, state_read(&ep->com));
+ pr_debug("%s 0 credit ack ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
return CPL_RET_BUF_DONE;
}
@@ -1107,24 +1105,24 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
BUG_ON(credits != 1);
dst_confirm(ep->dst);
if (!ep->mpa_skb) {
- PDBG("%s rdma_init wr_ack ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s rdma_init wr_ack ep %p state %u\n",
+ __func__, ep, ep->com.state);
if (ep->mpa_attr.initiator) {
- PDBG("%s initiator ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s initiator ep %p state %u\n",
+ __func__, ep, ep->com.state);
if (peer2peer && ep->com.state == FPDU_MODE)
post_zb = 1;
} else {
- PDBG("%s responder ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s responder ep %p state %u\n",
+ __func__, ep, ep->com.state);
if (ep->com.state == MPA_REQ_RCVD) {
ep->com.rpl_done = 1;
wake_up(&ep->com.waitq);
}
}
} else {
- PDBG("%s lsm ack ep %p state %u freeing skb\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s lsm ack ep %p state %u freeing skb\n",
+ __func__, ep, ep->com.state);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
}
@@ -1140,7 +1138,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
int release = 0;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
BUG_ON(!ep);
/*
@@ -1159,8 +1157,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
release = 1;
break;
default:
- printk(KERN_ERR "%s ep %p state %d\n",
- __func__, ep, ep->com.state);
+ pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
break;
}
spin_unlock_irqrestore(&ep->com.lock, flags);
@@ -1184,8 +1181,8 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct iwch_ep *ep = ctx;
struct cpl_act_open_rpl *rpl = cplhdr(skb);
- PDBG("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
- status2errno(rpl->status));
+ pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
+ status2errno(rpl->status));
connect_reply_upcall(ep, status2errno(rpl->status));
state_set(&ep->com, DEAD);
if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
@@ -1202,10 +1199,10 @@ static int listen_start(struct iwch_listen_ep *ep)
struct sk_buff *skb;
struct cpl_pass_open_req *req;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n");
+ pr_err("t3c_listen_start failed to alloc skb!\n");
return -ENOMEM;
}
@@ -1230,8 +1227,8 @@ static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct iwch_listen_ep *ep = ctx;
struct cpl_pass_open_rpl *rpl = cplhdr(skb);
- PDBG("%s ep %p status %d error %d\n", __func__, ep,
- rpl->status, status2errno(rpl->status));
+ pr_debug("%s ep %p status %d error %d\n", __func__, ep,
+ rpl->status, status2errno(rpl->status));
ep->com.rpl_err = status2errno(rpl->status);
ep->com.rpl_done = 1;
wake_up(&ep->com.waitq);
@@ -1244,10 +1241,10 @@ static int listen_stop(struct iwch_listen_ep *ep)
struct sk_buff *skb;
struct cpl_close_listserv_req *req;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
@@ -1264,7 +1261,7 @@ static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
struct iwch_listen_ep *ep = ctx;
struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->com.rpl_err = status2errno(rpl->status);
ep->com.rpl_done = 1;
wake_up(&ep->com.waitq);
@@ -1278,7 +1275,7 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
u32 opt0h, opt0l, opt2;
int wscale;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(*rpl));
skb_get(skb);
@@ -1312,8 +1309,8 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
struct sk_buff *skb)
{
- PDBG("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
- peer_ip);
+ pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
+ peer_ip);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(struct cpl_tid_release));
skb_get(skb);
@@ -1347,11 +1344,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct rtable *rt;
struct iff_mac tim;
- PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
+ pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
if (state_read(&parent_ep->com) != LISTEN) {
- printk(KERN_ERR "%s - listening ep not in LISTEN\n",
- __func__);
+ pr_err("%s - listening ep not in LISTEN\n", __func__);
goto reject;
}
@@ -1361,8 +1357,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
tim.mac_addr = req->dst_mac;
tim.vlan_tag = ntohs(req->vlan_tag);
if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
- printk(KERN_ERR "%s bad dst mac %pM\n",
- __func__, req->dst_mac);
+ pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac);
goto reject;
}
@@ -1373,22 +1368,19 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
req->local_port,
req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
if (!rt) {
- printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
- __func__);
+ pr_err("%s - failed to find dst entry!\n", __func__);
goto reject;
}
dst = &rt->dst;
l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
if (!l2t) {
- printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
- __func__);
+ pr_err("%s - failed to allocate l2t entry!\n", __func__);
dst_release(dst);
goto reject;
}
child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
if (!child_ep) {
- printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
- __func__);
+ pr_err("%s - failed to allocate ep entry!\n", __func__);
l2t_release(tdev, l2t);
dst_release(dst);
goto reject;
@@ -1423,7 +1415,7 @@ static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct iwch_ep *ep = ctx;
struct cpl_pass_establish *req = cplhdr(skb);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->snd_seq = ntohl(req->snd_isn);
ep->rcv_seq = ntohl(req->rcv_isn);
@@ -1444,7 +1436,7 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
int disconnect = 1;
int release = 0;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
dst_confirm(ep->dst);
spin_lock_irqsave(&ep->com.lock, flags);
@@ -1467,14 +1459,14 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
__state_set(&ep->com, CLOSING);
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case FPDU_MODE:
@@ -1539,8 +1531,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
if (is_neg_adv_abort(req->status)) {
- PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
- ep->hwtid);
+ pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
+ ep->hwtid);
t3_l2t_send_event(ep->com.tdev, ep->l2t);
return CPL_RET_BUF_DONE;
}
@@ -1554,7 +1546,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
}
spin_lock_irqsave(&ep->com.lock, flags);
- PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
+ pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state);
switch (ep->com.state) {
case CONNECTING:
break;
@@ -1568,7 +1560,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
case MPA_REP_SENT:
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case MPA_REQ_RCVD:
@@ -1581,7 +1573,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
*/
ep->com.rpl_done = 1;
ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
+ pr_debug("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
break;
case MORIBUND:
@@ -1595,16 +1587,14 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (ret)
- printk(KERN_ERR MOD
- "%s - qp <- error failed!\n",
- __func__);
+ pr_err("%s - qp <- error failed!\n", __func__);
}
peer_abort_upcall(ep);
break;
case ABORTING:
break;
case DEAD:
- PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+ pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
spin_unlock_irqrestore(&ep->com.lock, flags);
return CPL_RET_BUF_DONE;
default:
@@ -1620,8 +1610,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
if (!rpl_skb) {
- printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
- __func__);
+ pr_err("%s - cannot allocate skb!\n", __func__);
release = 1;
goto out;
}
@@ -1645,7 +1634,7 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
unsigned long flags;
int release = 0;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
BUG_ON(!ep);
/* The cm_id may be null if we failed to connect */
@@ -1699,9 +1688,9 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
if (state_read(&ep->com) != FPDU_MODE)
return CPL_RET_BUF_DONE;
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
skb_pull(skb, sizeof(struct cpl_rdma_terminate));
- PDBG("%s saving %d bytes of term msg\n", __func__, skb->len);
+ pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len);
skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
skb->len);
ep->com.qp->attr.terminate_msg_len = skb->len;
@@ -1714,12 +1703,12 @@ static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_rdma_ec_status *rep = cplhdr(skb);
struct iwch_ep *ep = ctx;
- PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
- rep->status);
+ pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
+ rep->status);
if (rep->status) {
struct iwch_qp_attributes attrs;
- printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
+ pr_err("%s BAD CLOSE - Aborting tid %u\n",
__func__, ep->hwtid);
stop_ep_timer(ep);
attrs.next_state = IWCH_QP_STATE_ERROR;
@@ -1739,8 +1728,8 @@ static void ep_timeout(unsigned long arg)
int abort = 1;
spin_lock_irqsave(&ep->com.lock, flags);
- PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
+ ep->com.state);
switch (ep->com.state) {
case MPA_REQ_SENT:
__state_set(&ep->com, ABORTING);
@@ -1774,7 +1763,7 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
int err;
struct iwch_ep *ep = to_ep(cm_id);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (state_read(&ep->com) == DEAD) {
put_ep(&ep->com);
@@ -1800,7 +1789,7 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct iwch_dev *h = to_iwch_dev(cm_id->device);
struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (state_read(&ep->com) == DEAD) {
err = -ECONNRESET;
goto err;
@@ -1826,7 +1815,7 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (peer2peer && ep->ird == 0)
ep->ird = 1;
- PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
+ pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
/* bind QP to EP and move to RTS */
attrs.mpa_attr = ep->mpa_attr;
@@ -1907,7 +1896,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto out;
}
@@ -1928,15 +1917,15 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.cm_id = cm_id;
ep->com.qp = get_qhp(h, conn_param->qpn);
BUG_ON(!ep->com.qp);
- PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
- ep->com.qp, cm_id);
+ pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
+ ep->com.qp, cm_id);
/*
* Allocate an active TID to initiate a TCP connection.
*/
ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
if (ep->atid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -1946,7 +1935,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
raddr->sin_addr.s_addr, laddr->sin_port,
raddr->sin_port, IPTOS_LOWDELAY);
if (!rt) {
- printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+ pr_err("%s - cannot find route\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
@@ -1954,7 +1943,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
&raddr->sin_addr.s_addr);
if (!ep->l2t) {
- printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
+ pr_err("%s - cannot alloc l2e\n", __func__);
err = -ENOMEM;
goto fail4;
}
@@ -1999,11 +1988,11 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto fail1;
}
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->com.tdev = h->rdev.t3cdev_p;
cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
@@ -2016,7 +2005,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
*/
ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
if (ep->stid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -2048,7 +2037,7 @@ int iwch_destroy_listen(struct iw_cm_id *cm_id)
int err;
struct iwch_listen_ep *ep = to_listen_ep(cm_id);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
might_sleep();
state_set(&ep->com, DEAD);
@@ -2077,8 +2066,8 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
spin_lock_irqsave(&ep->com.lock, flags);
- PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
- states[ep->com.state], abrupt);
+ pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep,
+ states[ep->com.state], abrupt);
tdev = (struct t3cdev *)ep->com.tdev;
rdev = (struct cxio_rdev *)tdev->ulp;
@@ -2115,8 +2104,8 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
case MORIBUND:
case ABORTING:
case DEAD:
- PDBG("%s ignoring disconnect ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s ignoring disconnect ep %p state %u\n",
+ __func__, ep, ep->com.state);
break;
default:
BUG();
@@ -2145,8 +2134,8 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
if (ep->dst != old)
return 0;
- PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
- l2t);
+ pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
+ l2t);
dst_hold(new);
l2t_release(ep->com.tdev, ep->l2t);
ep->l2t = l2t;
@@ -2225,8 +2214,8 @@ static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
if (rpl->status != CPL_ERR_NONE) {
- printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
- "for tid %u\n", rpl->status, GET_TID(rpl));
+ pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
+ rpl->status, GET_TID(rpl));
}
return CPL_RET_BUF_DONE;
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index e66e75921797..cc7fe644d260 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -53,17 +53,17 @@
#define MPA_MARKERS 0x80
#define MPA_FLAGS_MASK 0xE0
-#define put_ep(ep) { \
- PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, kref_read(&((ep)->kref))); \
- WARN_ON(kref_read(&((ep)->kref)) < 1); \
- kref_put(&((ep)->kref), __free_ep); \
+#define put_ep(ep) { \
+ pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \
+ __func__, __LINE__, ep, kref_read(&((ep)->kref))); \
+ WARN_ON(kref_read(&((ep)->kref)) < 1); \
+ kref_put(&((ep)->kref), __free_ep); \
}
-#define get_ep(ep) { \
- PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, kref_read(&((ep)->kref))); \
- kref_get(&((ep)->kref)); \
+#define get_ep(ep) { \
+ pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \
+ __func__, __LINE__, ep, kref_read(&((ep)->kref))); \
+ kref_get(&((ep)->kref)); \
}
struct mpa_message {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 97fbfd2c298e..dd5348e48806 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -67,8 +67,8 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
&credit);
if (t3a_device(chp->rhp) && credit) {
- PDBG("%s updating %d cq credits on id %d\n", __func__,
- credit, chp->cq.cqid);
+ pr_debug("%s updating %d cq credits on id %d\n", __func__,
+ credit, chp->cq.cqid);
cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
}
@@ -83,11 +83,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
wc->vendor_err = CQE_STATUS(cqe);
wc->wc_flags = 0;
- PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
- "lo 0x%x cookie 0x%llx\n", __func__,
- CQE_QPID(cqe), CQE_TYPE(cqe),
- CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
- CQE_WRID_LOW(cqe), (unsigned long long) cookie);
+ pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
+ __func__,
+ CQE_QPID(cqe), CQE_TYPE(cqe),
+ CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
+ CQE_WRID_LOW(cqe), (unsigned long long)cookie);
if (CQE_TYPE(cqe) == 0) {
if (!CQE_STATUS(cqe))
@@ -122,8 +122,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
wc->opcode = IB_WC_REG_MR;
break;
default:
- printk(KERN_ERR MOD "Unexpected opcode %d "
- "in the CQE received for QPID=0x%0x\n",
+ pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
CQE_OPCODE(cqe), CQE_QPID(cqe));
ret = -EINVAL;
goto out;
@@ -177,8 +176,8 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
wc->status = IB_WC_WR_FLUSH_ERR;
break;
default:
- printk(KERN_ERR MOD "Unexpected cqe_status 0x%x for "
- "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe));
+ pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
+ CQE_STATUS(cqe), CQE_QPID(cqe));
ret = -EINVAL;
}
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
index abcc9e76962b..4a0c82a8fb60 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -52,7 +52,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
if (!qhp) {
- printk(KERN_ERR "%s unaffiliated error 0x%x qpid 0x%x\n",
+ pr_err("%s unaffiliated error 0x%x qpid 0x%x\n",
__func__, CQE_STATUS(rsp_msg->cqe),
CQE_QPID(rsp_msg->cqe));
spin_unlock(&rnicp->lock);
@@ -61,15 +61,16 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
(qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
- PDBG("%s AE received after RTS - "
- "qp state %d qpid 0x%x status 0x%x\n", __func__,
- qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
+ pr_debug("%s AE received after RTS - qp state %d qpid 0x%x status 0x%x\n",
+ __func__,
+ qhp->attr.state, qhp->wq.qpid,
+ CQE_STATUS(rsp_msg->cqe));
spin_unlock(&rnicp->lock);
return;
}
- printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
- "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__,
+ pr_err("%s - AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ __func__,
CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
@@ -117,8 +118,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
chp = get_chp(rnicp, cqid);
qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
if (!chp || !qhp) {
- printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n",
+ pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
cqid, CQE_QPID(rsp_msg->cqe),
CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
@@ -137,12 +137,12 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
(CQE_STATUS(rsp_msg->cqe) == 0)) {
if (SQ_TYPE(rsp_msg->cqe)) {
- PDBG("%s QPID 0x%x ep %p disconnecting\n",
- __func__, qhp->wq.qpid, qhp->ep);
+ pr_debug("%s QPID 0x%x ep %p disconnecting\n",
+ __func__, qhp->wq.qpid, qhp->ep);
iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
} else {
- PDBG("%s post REQ_ERR AE QPID 0x%x\n", __func__,
- qhp->wq.qpid);
+ pr_debug("%s post REQ_ERR AE QPID 0x%x\n", __func__,
+ qhp->wq.qpid);
post_qp_event(rnicp, chp, rsp_msg,
IB_EVENT_QP_REQ_ERR, 0);
iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
@@ -218,7 +218,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
break;
default:
- printk(KERN_ERR MOD "Unknown T3 status 0x%x QPID 0x%x\n",
+ pr_err("Unknown T3 status 0x%x QPID 0x%x\n",
CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 1d04c872c9d5..12886b1b4b10 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -48,7 +48,7 @@ static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 86ecd3ea6a4b..29d30744d6c9 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -62,7 +62,7 @@
#include "common.h"
static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
return ERR_PTR(-ENOSYS);
@@ -103,7 +103,7 @@ static int iwch_dealloc_ucontext(struct ib_ucontext *context)
struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
struct iwch_mm_entry *mm, *tmp;
- PDBG("%s context %p\n", __func__, context);
+ pr_debug("%s context %p\n", __func__, context);
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
@@ -117,7 +117,7 @@ static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev,
struct iwch_ucontext *context;
struct iwch_dev *rhp = to_iwch_dev(ibdev);
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
@@ -131,7 +131,7 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq)
{
struct iwch_cq *chp;
- PDBG("%s ib_cq %p\n", __func__, ib_cq);
+ pr_debug("%s ib_cq %p\n", __func__, ib_cq);
chp = to_iwch_cq(ib_cq);
remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
@@ -157,7 +157,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
static int warned;
size_t resplen;
- PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
+ pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
if (attr->flags)
return ERR_PTR(-EINVAL);
@@ -227,8 +227,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
mm->addr = virt_to_phys(chp->cq.queue);
if (udata->outlen < sizeof uresp) {
if (!warned++)
- printk(KERN_WARNING MOD "Warning - "
- "downlevel libcxgb3 (non-fatal).\n");
+ pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n");
mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
sizeof(struct t3_cqe));
resplen = sizeof(struct iwch_create_cq_resp_v0);
@@ -246,9 +245,9 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
}
insert_mmap(ucontext, mm);
}
- PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
- chp->cq.cqid, chp, (1 << chp->cq.size_log2),
- (unsigned long long) chp->cq.dma_addr);
+ pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
+ chp->cq.cqid, chp, (1 << chp->cq.size_log2),
+ (unsigned long long)chp->cq.dma_addr);
return &chp->ibcq;
}
@@ -259,7 +258,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
struct t3_cq oldcq, newcq;
int ret;
- PDBG("%s ib_cq %p cqe %d\n", __func__, cq, cqe);
+ pr_debug("%s ib_cq %p cqe %d\n", __func__, cq, cqe);
/* We don't downsize... */
if (cqe <= cq->cqe)
@@ -306,8 +305,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
oldcq.cqid = newcq.cqid;
ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
if (ret) {
- printk(KERN_ERR MOD "%s - cxio_destroy_cq failed %d\n",
- __func__, ret);
+ pr_err("%s - cxio_destroy_cq failed %d\n", __func__, ret);
}
/* add user hooks here */
@@ -342,12 +340,11 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
chp->cq.rptr = rptr;
} else
spin_lock_irqsave(&chp->lock, flag);
- PDBG("%s rptr 0x%x\n", __func__, chp->cq.rptr);
+ pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr);
err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
spin_unlock_irqrestore(&chp->lock, flag);
if (err < 0)
- printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err,
- chp->cq.cqid);
+ pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid);
if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
err = 0;
return err;
@@ -363,8 +360,8 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct iwch_ucontext *ucontext;
u64 addr;
- PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
- key, len);
+ pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
+ key, len);
if (vma->vm_start & (PAGE_SIZE-1)) {
return -EINVAL;
@@ -416,7 +413,7 @@ static int iwch_deallocate_pd(struct ib_pd *pd)
php = to_iwch_pd(pd);
rhp = php->rhp;
- PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
+ pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
kfree(php);
return 0;
@@ -430,7 +427,7 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
u32 pdid;
struct iwch_dev *rhp;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
rhp = (struct iwch_dev *) ibdev;
pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
if (!pdid)
@@ -448,7 +445,7 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
return ERR_PTR(-EFAULT);
}
}
- PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
+ pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
return &php->ibpd;
}
@@ -458,7 +455,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
struct iwch_mr *mhp;
u32 mmid;
- PDBG("%s ib_mr %p\n", __func__, ib_mr);
+ pr_debug("%s ib_mr %p\n", __func__, ib_mr);
mhp = to_iwch_mr(ib_mr);
kfree(mhp->pages);
@@ -472,7 +469,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
kfree((void *) (unsigned long) mhp->kva);
if (mhp->umem)
ib_umem_release(mhp->umem);
- PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
kfree(mhp);
return 0;
}
@@ -487,13 +484,13 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
__be64 *page_list;
int shift = 26, npages, ret, i;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
/*
* T3 only supports 32 bits of size.
*/
if (sizeof(phys_addr_t) > 4) {
- pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
+ pr_warn_once("Cannot support dma_mrs on this platform\n");
return ERR_PTR(-ENOTSUPP);
}
@@ -518,8 +515,8 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
for (i = 0; i < npages; i++)
page_list[i] = cpu_to_be64((u64)i << shift);
- PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, mask, shift, total_size, npages);
+ pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
+ __func__, mask, shift, total_size, npages);
ret = iwch_alloc_pbl(mhp, npages);
if (ret) {
@@ -567,7 +564,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;
struct scatterlist *sg;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
php = to_iwch_pd(pd);
rhp = php->rhp;
@@ -584,7 +581,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err);
}
- shift = ffs(mhp->umem->page_size) - 1;
+ shift = mhp->umem->page_shift;
n = mhp->umem->nmap;
@@ -604,7 +601,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
- mhp->umem->page_size * k);
+ (k << shift));
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
if (err)
@@ -637,8 +634,8 @@ pbl_done:
if (udata && !t3a_device(rhp)) {
uresp.pbl_addr = (mhp->attr.pbl_addr -
rhp->rdev.rnic_info.pbl_base) >> 3;
- PDBG("%s user resp pbl_addr 0x%x\n", __func__,
- uresp.pbl_addr);
+ pr_debug("%s user resp pbl_addr 0x%x\n", __func__,
+ uresp.pbl_addr);
if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
iwch_dereg_mr(&mhp->ibmr);
@@ -692,7 +689,7 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
kfree(mhp);
return ERR_PTR(-ENOMEM);
}
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
}
@@ -707,7 +704,7 @@ static int iwch_dealloc_mw(struct ib_mw *mw)
mmid = (mw->rkey) >> 8;
cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
remove_handle(rhp, &rhp->mmidr, mmid);
- PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
+ pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
kfree(mhp);
return 0;
}
@@ -757,7 +754,7 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
if (insert_handle(rhp, &rhp->mmidr, mhp, mmid))
goto err3;
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmr);
err3:
cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
@@ -818,8 +815,8 @@ static int iwch_destroy_qp(struct ib_qp *ib_qp)
cxio_destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
- PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
- ib_qp, qhp->wq.qpid, qhp);
+ pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
+ ib_qp, qhp->wq.qpid, qhp);
kfree(qhp);
return 0;
}
@@ -837,7 +834,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
int wqsize, sqsize, rqsize;
struct iwch_ucontext *ucontext;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
if (attrs->qp_type != IB_QPT_RC)
return ERR_PTR(-EINVAL);
php = to_iwch_pd(pd);
@@ -878,8 +875,8 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
wqsize = roundup_pow_of_two(rqsize +
roundup_pow_of_two(attrs->cap.max_send_wr * 2));
- PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__,
- wqsize, sqsize, rqsize);
+ pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__,
+ wqsize, sqsize, rqsize);
qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
if (!qhp)
return ERR_PTR(-ENOMEM);
@@ -974,11 +971,10 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
}
qhp->ibqp.qp_num = qhp->wq.qpid;
init_timer(&(qhp->timer));
- PDBG("%s sq_num_entries %d, rq_num_entries %d "
- "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
- __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
- qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
- 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
+ pr_debug("%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
+ __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
+ qhp->wq.qpid, qhp, (unsigned long long)qhp->wq.dma_addr,
+ 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
return &qhp->ibqp;
}
@@ -990,7 +986,7 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum iwch_qp_attr_mask mask = 0;
struct iwch_qp_attributes attrs;
- PDBG("%s ib_qp %p\n", __func__, ibqp);
+ pr_debug("%s ib_qp %p\n", __func__, ibqp);
/* iwarp does not support the RTR state */
if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
@@ -1023,20 +1019,20 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
void iwch_qp_add_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
atomic_inc(&(to_iwch_qp(qp)->refcnt));
}
void iwch_qp_rem_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
wake_up(&(to_iwch_qp(qp)->wait));
}
static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
{
- PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
+ pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
}
@@ -1044,7 +1040,7 @@ static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
static int iwch_query_pkey(struct ib_device *ibdev,
u8 port, u16 index, u16 * pkey)
{
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
*pkey = 0;
return 0;
}
@@ -1054,8 +1050,8 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port,
{
struct iwch_dev *dev;
- PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
- __func__, ibdev, port, index, gid);
+ pr_debug("%s ibdev %p, port %d, index %d, gid %p\n",
+ __func__, ibdev, port, index, gid);
dev = to_iwch_dev(ibdev);
BUG_ON(port == 0 || port > 2);
memset(&(gid->raw[0]), 0, sizeof(gid->raw));
@@ -1090,7 +1086,7 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
struct iwch_dev *dev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
if (uhw->inlen || uhw->outlen)
return -EINVAL;
@@ -1128,7 +1124,7 @@ static int iwch_query_port(struct ib_device *ibdev,
struct net_device *netdev;
struct in_device *inetdev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
dev = to_iwch_dev(ibdev);
netdev = dev->rdev.port_info.lldevs[port-1];
@@ -1171,7 +1167,7 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
@@ -1183,7 +1179,7 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
struct ethtool_drvinfo info;
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
@@ -1193,7 +1189,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
iwch_dev->rdev.rnic_info.pdev->device);
}
@@ -1278,7 +1274,7 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
if (port != 0 || !stats)
return -ENOSYS;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
dev = to_iwch_dev(ibdev);
ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
if (ret)
@@ -1348,7 +1344,7 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str,
struct ethtool_drvinfo info;
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
- PDBG("%s dev 0x%p\n", __func__, iwch_dev);
+ pr_debug("%s dev 0x%p\n", __func__, iwch_dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
snprintf(str, str_len, "%s", info.fw_version);
}
@@ -1358,7 +1354,7 @@ int iwch_register_device(struct iwch_dev *dev)
int ret;
int i;
- PDBG("%s iwch_dev %p\n", __func__, dev);
+ pr_debug("%s iwch_dev %p\n", __func__, dev);
strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
@@ -1469,7 +1465,7 @@ void iwch_unregister_device(struct iwch_dev *dev)
{
int i;
- PDBG("%s iwch_dev %p\n", __func__, dev);
+ pr_debug("%s iwch_dev %p\n", __func__, dev);
for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
device_remove_file(&dev->ibdev.dev,
iwch_class_attributes[i]);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 252c464a09f6..9e216edec4c0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -217,8 +217,9 @@ static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext,
if (mm->key == key && mm->len == len) {
list_del_init(&mm->entry);
spin_unlock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, key,
+ (unsigned long long)mm->addr, mm->len);
return mm;
}
}
@@ -230,8 +231,8 @@ static inline void insert_mmap(struct iwch_ucontext *ucontext,
struct iwch_mm_entry *mm)
{
spin_lock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- mm->key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, mm->key, (unsigned long long)mm->addr, mm->len);
list_add_tail(&mm->entry, &ucontext->mmaps);
spin_unlock(&ucontext->mmap_lock);
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index a9194db7f9b8..ba6d5d281b03 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -208,30 +208,30 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
if (!mhp) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EIO;
}
if (!mhp->attr.state) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EIO;
}
if (mhp->attr.zbva) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EIO;
}
if (sg_list[i].addr < mhp->attr.va_fbo) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EINVAL;
}
if (sg_list[i].addr + ((u64) sg_list[i].length) <
sg_list[i].addr) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EINVAL;
}
if (sg_list[i].addr + ((u64) sg_list[i].length) >
mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
return -EINVAL;
}
offset = sg_list[i].addr - mhp->attr.va_fbo;
@@ -427,8 +427,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
break;
default:
- PDBG("%s post of type=%d TBD!\n", __func__,
- wr->opcode);
+ pr_debug("%s post of type=%d TBD!\n", __func__,
+ wr->opcode);
err = -EINVAL;
}
if (err)
@@ -444,10 +444,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, t3_wr_flit_cnt,
(wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
- PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
- __func__, (unsigned long long) wr->wr_id, idx,
- Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
- sqp->opcode);
+ pr_debug("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
+ __func__, (unsigned long long)wr->wr_id, idx,
+ Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
+ sqp->opcode);
wr = wr->next;
num_wrs--;
qhp->wq.wptr += wr_cnt;
@@ -508,9 +508,9 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
- PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
- "wqe %p \n", __func__, (unsigned long long) wr->wr_id,
- idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
+ pr_debug("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x wqe %p\n",
+ __func__, (unsigned long long)wr->wr_id,
+ idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
++(qhp->wq.rq_wptr);
++(qhp->wq.wptr);
wr = wr->next;
@@ -664,10 +664,10 @@ int iwch_post_zb_read(struct iwch_ep *ep)
struct sk_buff *skb;
u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
- PDBG("%s enter\n", __func__);
+ pr_debug("%s enter\n", __func__);
skb = alloc_skb(40, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
+ pr_err("%s cannot send zb_read!!\n", __func__);
return -ENOMEM;
}
wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
@@ -696,10 +696,10 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
struct terminate_message *term;
struct sk_buff *skb;
- PDBG("%s %d\n", __func__, __LINE__);
+ pr_debug("%s %d\n", __func__, __LINE__);
skb = alloc_skb(40, GFP_ATOMIC);
if (!skb) {
- printk(KERN_ERR "%s cannot send TERMINATE!\n", __func__);
+ pr_err("%s cannot send TERMINATE!\n", __func__);
return -ENOMEM;
}
wqe = (union t3_wr *)skb_put(skb, 40);
@@ -729,7 +729,7 @@ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
int flushed;
- PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
+ pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
/* take a ref on the qhp since we must release the lock */
atomic_inc(&qhp->refcnt);
spin_unlock(&qhp->lock);
@@ -807,7 +807,7 @@ u16 iwch_rqes_posted(struct iwch_qp *qhp)
count++;
wqe++;
}
- PDBG("%s qhp %p count %u\n", __func__, qhp, count);
+ pr_debug("%s qhp %p count %u\n", __func__, qhp, count);
return count;
}
@@ -854,12 +854,12 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
} else
init_attr.rtr_type = 0;
init_attr.irs = qhp->ep->rcv_seq;
- PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
- "flags 0x%x qpcaps 0x%x\n", __func__,
- init_attr.rq_addr, init_attr.rq_size,
- init_attr.flags, init_attr.qpcaps);
+ pr_debug("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d flags 0x%x qpcaps 0x%x\n",
+ __func__,
+ init_attr.rq_addr, init_attr.rq_size,
+ init_attr.flags, init_attr.qpcaps);
ret = cxio_rdma_init(&rhp->rdev, &init_attr);
- PDBG("%s ret %d\n", __func__, ret);
+ pr_debug("%s ret %d\n", __func__, ret);
return ret;
}
@@ -877,9 +877,9 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
int free = 0;
struct iwch_ep *ep = NULL;
- PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
- qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
- (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+ pr_debug("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
+ qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
+ (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
spin_lock_irqsave(&qhp->lock, flag);
@@ -1034,16 +1034,15 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
goto err;
break;
default:
- printk(KERN_ERR "%s in a bad state %d\n",
- __func__, qhp->attr.state);
+ pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
ret = -EINVAL;
goto err;
break;
}
goto out;
err:
- PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
- qhp->wq.qpid);
+ pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
+ qhp->wq.qpid);
/* disassociate the LLP connection */
qhp->attr.llp_stream_handle = NULL;
@@ -1077,6 +1076,6 @@ out:
if (free)
put_ep(&ep->com);
- PDBG("%s exit state %d\n", __func__, qhp->attr.state);
+ pr_debug("%s exit state %d\n", __func__, qhp->attr.state);
return ret;
}
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 03a1b0e64fc3..b6fe45924c6e 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -101,7 +101,7 @@ MODULE_PARM_DESC(enable_tcp_window_scaling,
int c4iw_debug;
module_param(c4iw_debug, int, 0644);
-MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
+MODULE_PARM_DESC(c4iw_debug, "obsolete");
static int peer2peer = 1;
module_param(peer2peer, int, 0644);
@@ -180,7 +180,7 @@ static void ref_qp(struct c4iw_ep *ep)
static void start_ep_timer(struct c4iw_ep *ep)
{
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
if (timer_pending(&ep->timer)) {
pr_err("%s timer already started! ep %p\n",
__func__, ep);
@@ -196,7 +196,7 @@ static void start_ep_timer(struct c4iw_ep *ep)
static int stop_ep_timer(struct c4iw_ep *ep)
{
- PDBG("%s ep %p stopping\n", __func__, ep);
+ pr_debug("%s ep %p stopping\n", __func__, ep);
del_timer_sync(&ep->timer);
if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
c4iw_put_ep(&ep->com);
@@ -212,7 +212,7 @@ static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
if (c4iw_fatal_error(rdev)) {
kfree_skb(skb);
- PDBG("%s - device in error state - dropping\n", __func__);
+ pr_debug("%s - device in error state - dropping\n", __func__);
return -EIO;
}
error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
@@ -229,7 +229,7 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
if (c4iw_fatal_error(rdev)) {
kfree_skb(skb);
- PDBG("%s - device in error state - dropping\n", __func__);
+ pr_debug("%s - device in error state - dropping\n", __func__);
return -EIO;
}
error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
@@ -263,10 +263,10 @@ static void set_emss(struct c4iw_ep *ep, u16 opt)
if (ep->emss < 128)
ep->emss = 128;
if (ep->emss & 7)
- PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
- TCPOPT_MSS_G(opt), ep->mss, ep->emss);
- PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt),
- ep->mss, ep->emss);
+ pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
+ TCPOPT_MSS_G(opt), ep->mss, ep->emss);
+ pr_debug("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt),
+ ep->mss, ep->emss);
}
static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
@@ -287,7 +287,7 @@ static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
{
mutex_lock(&epc->mutex);
- PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
+ pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
__state_set(epc, new);
mutex_unlock(&epc->mutex);
return;
@@ -322,7 +322,7 @@ static void *alloc_ep(int size, gfp_t gfp)
mutex_init(&epc->mutex);
c4iw_init_wr_wait(&epc->wr_wait);
}
- PDBG("%s alloc ep %p\n", __func__, epc);
+ pr_debug("%s alloc ep %p\n", __func__, epc);
return epc;
}
@@ -384,7 +384,7 @@ void _c4iw_free_ep(struct kref *kref)
struct c4iw_ep *ep;
ep = container_of(kref, struct c4iw_ep, com.kref);
- PDBG("%s ep %p state %s\n", __func__, ep, states[ep->com.state]);
+ pr_debug("%s ep %p state %s\n", __func__, ep, states[ep->com.state]);
if (test_bit(QP_REFERENCED, &ep->com.flags))
deref_qp(ep);
if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
@@ -467,7 +467,7 @@ static struct net_device *get_real_dev(struct net_device *egress_dev)
static void arp_failure_discard(void *handle, struct sk_buff *skb)
{
- pr_err(MOD "ARP failure\n");
+ pr_err("ARP failure\n");
kfree_skb(skb);
}
@@ -528,7 +528,7 @@ static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
{
struct c4iw_ep *ep = handle;
- pr_err(MOD "ARP failure during accept - tid %u -dropping connection\n",
+ pr_err("ARP failure during accept - tid %u - dropping connection\n",
ep->hwtid);
__state_set(&ep->com, DEAD);
@@ -542,7 +542,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
{
struct c4iw_ep *ep = handle;
- printk(KERN_ERR MOD "ARP failure during connect\n");
+ pr_err("ARP failure during connect\n");
connect_reply_upcall(ep, -EHOSTUNREACH);
__state_set(&ep->com, DEAD);
if (ep->com.remote_addr.ss_family == AF_INET6) {
@@ -567,7 +567,7 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb)
struct c4iw_rdev *rdev = &ep->com.dev->rdev;
struct cpl_abort_req *req = cplhdr(skb);
- PDBG("%s rdev %p\n", __func__, rdev);
+ pr_debug("%s rdev %p\n", __func__, rdev);
req->cmd = CPL_ABORT_NO_RST;
ret = c4iw_ofld_send(rdev, skb);
if (ret) {
@@ -642,7 +642,7 @@ static int send_halfclose(struct c4iw_ep *ep)
struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!skb))
return -ENOMEM;
@@ -657,7 +657,7 @@ static int send_abort(struct c4iw_ep *ep)
u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!req_skb))
return -ENOMEM;
@@ -720,12 +720,11 @@ static int send_connect(struct c4iw_ep *ep)
roundup(sizev4, 16) :
roundup(sizev6, 16);
- PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
+ pr_debug("%s ep %p atid %u\n", __func__, ep, ep->atid);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
@@ -822,13 +821,13 @@ static int send_connect(struct c4iw_ep *ep)
t5req->params =
cpu_to_be64(FILTER_TUPLE_V(params));
t5req->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t5req->rsvd);
+ pr_debug("%s snd_isn %u\n", __func__, t5req->rsvd);
t5req->opt2 = cpu_to_be32(opt2);
} else {
t6req->params =
cpu_to_be64(FILTER_TUPLE_V(params));
t6req->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t6req->rsvd);
+ pr_debug("%s snd_isn %u\n", __func__, t6req->rsvd);
t6req->opt2 = cpu_to_be32(opt2);
}
}
@@ -877,13 +876,13 @@ static int send_connect(struct c4iw_ep *ep)
t5req6->params =
cpu_to_be64(FILTER_TUPLE_V(params));
t5req6->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd);
+ pr_debug("%s snd_isn %u\n", __func__, t5req6->rsvd);
t5req6->opt2 = cpu_to_be32(opt2);
} else {
t6req6->params =
cpu_to_be64(FILTER_TUPLE_V(params));
t6req6->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t6req6->rsvd);
+ pr_debug("%s snd_isn %u\n", __func__, t6req6->rsvd);
t6req6->opt2 = cpu_to_be32(opt2);
}
@@ -907,7 +906,8 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
struct mpa_message *mpa;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+ pr_debug("%s ep %p tid %u pd_len %d\n",
+ __func__, ep, ep->hwtid, ep->plen);
BUG_ON(skb_cloned(skb));
@@ -961,8 +961,8 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
if (mpa_rev_to_use == 2) {
mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
sizeof (struct mpa_v2_conn_params));
- PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
- ep->ord);
+ pr_debug("%s initiator ird %u ord %u\n", __func__, ep->ird,
+ ep->ord);
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
@@ -1014,7 +1014,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
struct sk_buff *skb;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+ pr_debug("%s ep %p tid %u pd_len %d\n",
+ __func__, ep, ep->hwtid, ep->plen);
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
@@ -1023,7 +1024,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
@@ -1094,7 +1095,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
struct sk_buff *skb;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+ pr_debug("%s ep %p tid %u pd_len %d\n",
+ __func__, ep, ep->hwtid, ep->plen);
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
@@ -1103,7 +1105,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
@@ -1185,8 +1187,8 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
ep = lookup_atid(t, atid);
- PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
- be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
+ pr_debug("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
+ be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
mutex_lock(&ep->com.mutex);
dst_confirm(ep->dst);
@@ -1229,13 +1231,13 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
event.status = status;
if (ep->com.cm_id) {
- PDBG("close complete delivered ep %p cm_id %p tid %u\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("close complete delivered ep %p cm_id %p tid %u\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
deref_cm_id(&ep->com);
set_bit(CLOSE_UPCALL, &ep->com.history);
@@ -1246,12 +1248,12 @@ static void peer_close_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_DISCONNECT;
if (ep->com.cm_id) {
- PDBG("peer close delivered ep %p cm_id %p tid %u\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("peer close delivered ep %p cm_id %p tid %u\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
set_bit(DISCONN_UPCALL, &ep->com.history);
}
@@ -1261,13 +1263,13 @@ static void peer_abort_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
event.status = -ECONNRESET;
if (ep->com.cm_id) {
- PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
- ep->com.cm_id, ep->hwtid);
+ pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep,
+ ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
deref_cm_id(&ep->com);
set_bit(ABORT_UPCALL, &ep->com.history);
@@ -1278,7 +1280,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status);
+ pr_debug("%s ep %p tid %u status %d\n",
+ __func__, ep, ep->hwtid, status);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REPLY;
event.status = status;
@@ -1307,8 +1310,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
}
}
- PDBG("%s ep %p tid %u status %d\n", __func__, ep,
- ep->hwtid, status);
+ pr_debug("%s ep %p tid %u status %d\n", __func__, ep,
+ ep->hwtid, status);
set_bit(CONN_RPL_UPCALL, &ep->com.history);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -1321,7 +1324,7 @@ static int connect_request_upcall(struct c4iw_ep *ep)
struct iw_cm_event event;
int ret;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
memcpy(&event.local_addr, &ep->com.local_addr,
@@ -1358,13 +1361,13 @@ static void established_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_ESTABLISHED;
event.ird = ep->ord;
event.ord = ep->ird;
if (ep->com.cm_id) {
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
set_bit(ESTAB_UPCALL, &ep->com.history);
}
@@ -1376,10 +1379,11 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
u32 credit_dack;
- PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
+ pr_debug("%s ep %p tid %u credits %u\n",
+ __func__, ep, ep->hwtid, credits);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
+ pr_err("update_rx_credits - cannot alloc skb!\n");
return 0;
}
@@ -1427,7 +1431,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
int err;
int disconnect = 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
/*
* If we get more than the supported amount of private data
@@ -1454,8 +1458,8 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
/* Validate MPA header. */
if (mpa->revision > mpa_rev) {
- printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
- " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
+ __func__, mpa_rev, mpa->revision);
err = -EPROTO;
goto err_stop_timer;
}
@@ -1525,8 +1529,9 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
MPA_V2_IRD_ORD_MASK;
resp_ord = ntohs(mpa_v2_params->ord) &
MPA_V2_IRD_ORD_MASK;
- PDBG("%s responder ird %u ord %u ep ird %u ord %u\n",
- __func__, resp_ird, resp_ord, ep->ird, ep->ord);
+ pr_debug("%s responder ird %u ord %u ep ird %u ord %u\n",
+ __func__,
+ resp_ird, resp_ord, ep->ird, ep->ord);
/*
* This is a double-check. Ideally, below checks are
@@ -1570,12 +1575,11 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
if (peer2peer)
ep->mpa_attr.p2p_type = p2p_type;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
- "%d\n", __func__, ep->mpa_attr.crc_enabled,
- ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
- ep->mpa_attr.p2p_type, p2p_type);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n",
+ __func__, ep->mpa_attr.crc_enabled,
+ ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+ ep->mpa_attr.p2p_type, p2p_type);
/*
* If responder's RTR does not match with that of initiator, assign
@@ -1610,7 +1614,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* supports, generate TERM message
*/
if (rtr_mismatch) {
- printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
+ pr_err("%s: RTR mismatch, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_NOMATCH_RTR;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
@@ -1629,8 +1633,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* initiator ORD.
*/
if (insuff_ird) {
- printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
- __func__);
+ pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_INSUFF_IRD;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
@@ -1669,7 +1672,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
struct mpa_v2_conn_params *mpa_v2_params;
u16 plen;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
/*
* If we get more than the supported amount of private data
@@ -1678,7 +1681,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
goto err_stop_timer;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
/*
* Copy the new data into our accumulation buffer.
@@ -1694,15 +1697,15 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (ep->mpa_pkt_len < sizeof(*mpa))
return 0;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
mpa = (struct mpa_message *) ep->mpa_pkt;
/*
* Validate MPA Header.
*/
if (mpa->revision > mpa_rev) {
- printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
- " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
+ __func__, mpa_rev, mpa->revision);
goto err_stop_timer;
}
@@ -1757,8 +1760,8 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
MPA_V2_IRD_ORD_MASK;
ep->ord = min_t(u32, ep->ord,
cur_max_read_depth(ep->com.dev));
- PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
- ep->ord);
+ pr_debug("%s initiator ird %u ord %u\n",
+ __func__, ep->ird, ep->ord);
if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
if (peer2peer) {
if (ntohs(mpa_v2_params->ord) &
@@ -1775,11 +1778,11 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (peer2peer)
ep->mpa_attr.p2p_type = p2p_type;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
- ep->mpa_attr.p2p_type);
+ pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n",
+ __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+ ep->mpa_attr.p2p_type);
__state_set(&ep->com, MPA_REQ_RCVD);
@@ -1815,7 +1818,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
+ pr_debug("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
skb_pull(skb, sizeof(*hdr));
skb_trim(skb, dlen);
mutex_lock(&ep->com.mutex);
@@ -1866,10 +1869,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (!ep) {
- printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
+ pr_warn("Abort rpl to freed endpoint\n");
return 0;
}
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
switch (ep->com.state) {
case ABORTING:
@@ -1878,8 +1881,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
release = 1;
break;
default:
- printk(KERN_ERR "%s ep %p state %d\n",
- __func__, ep, ep->com.state);
+ pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
break;
}
mutex_unlock(&ep->com.mutex);
@@ -1995,7 +1997,8 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
{
ep->snd_win = snd_win;
ep->rcv_win = rcv_win;
- PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win);
+ pr_debug("%s snd_win %d rcv_win %d\n",
+ __func__, ep->snd_win, ep->rcv_win);
}
#define ACT_OPEN_RETRY_COUNT 2
@@ -2100,7 +2103,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
int iptype;
__u8 *ra;
- PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
+ pr_debug("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
init_timer(&ep->timer);
c4iw_init_wr_wait(&ep->com.wr_wait);
@@ -2124,7 +2127,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
*/
ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
if (ep->atid == -1) {
- pr_err("%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -2151,7 +2154,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
ra = (__u8 *)&raddr6->sin6_addr;
}
if (!ep->dst) {
- pr_err("%s - cannot find route.\n", __func__);
+ pr_err("%s - cannot find route\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
@@ -2159,13 +2162,13 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
ep->com.dev->rdev.lldi.adapter_type,
ep->com.cm_id->tos);
if (err) {
- pr_err("%s - cannot alloc l2e.\n", __func__);
+ pr_err("%s - cannot alloc l2e\n", __func__);
goto fail4;
}
- PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
- __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
- ep->l2t->idx);
+ pr_debug("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+ __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+ ep->l2t->idx);
state_set(&ep->com, CONNECTING);
ep->tos = ep->com.cm_id->tos;
@@ -2215,12 +2218,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
- PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
- status, status2errno(status));
+ pr_debug("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
+ status, status2errno(status));
if (cxgb_is_neg_adv(status)) {
- PDBG("%s Connection problems for atid %u status %u (%s)\n",
- __func__, atid, status, neg_adv_str(status));
+ pr_debug("%s Connection problems for atid %u status %u (%s)\n",
+ __func__, atid, status, neg_adv_str(status));
ep->stats.connect_neg_adv++;
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
@@ -2315,11 +2318,11 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
if (!ep) {
- PDBG("%s stid %d lookup failure!\n", __func__, stid);
+ pr_debug("%s stid %d lookup failure!\n", __func__, stid);
goto out;
}
- PDBG("%s ep %p status %d error %d\n", __func__, ep,
- rpl->status, status2errno(rpl->status));
+ pr_debug("%s ep %p status %d error %d\n", __func__, ep,
+ rpl->status, status2errno(rpl->status));
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
c4iw_put_ep(&ep->com);
out:
@@ -2332,7 +2335,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned int stid = GET_TID(rpl);
struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
c4iw_put_ep(&ep->com);
return 0;
@@ -2350,7 +2353,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
int win;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(skb_cloned(skb));
skb_get(skb);
@@ -2421,7 +2424,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
if (peer2peer)
isn += 4;
rpl5->iss = cpu_to_be32(isn);
- PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
+ pr_debug("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
}
rpl->opt0 = cpu_to_be64(opt0);
@@ -2434,7 +2437,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
{
- PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
+ pr_debug("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(struct cpl_tid_release));
release_tid(&dev->rdev, hwtid, skb);
@@ -2460,12 +2463,13 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!parent_ep) {
- PDBG("%s connect request on invalid stid %d\n", __func__, stid);
+ pr_debug("%s connect request on invalid stid %d\n",
+ __func__, stid);
goto reject;
}
if (state_read(&parent_ep->com) != LISTEN) {
- PDBG("%s - listening ep not in LISTEN\n", __func__);
+ pr_debug("%s - listening ep not in LISTEN\n", __func__);
goto reject;
}
@@ -2474,18 +2478,18 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
/* Find output route */
if (iptype == 4) {
- PDBG("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
- , __func__, parent_ep, hwtid,
- local_ip, peer_ip, ntohs(local_port),
- ntohs(peer_port), peer_mss);
+ pr_debug("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
+ , __func__, parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
*(__be32 *)local_ip, *(__be32 *)peer_ip,
local_port, peer_port, tos);
} else {
- PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
- , __func__, parent_ep, hwtid,
- local_ip, peer_ip, ntohs(local_port),
- ntohs(peer_port), peer_mss);
+ pr_debug("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
+ , __func__, parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
local_ip, peer_ip, local_port, peer_port,
PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
@@ -2493,15 +2497,13 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
&parent_ep->com.local_addr)->sin6_scope_id);
}
if (!dst) {
- printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
- __func__);
+ pr_err("%s - failed to find dst entry!\n", __func__);
goto reject;
}
child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
if (!child_ep) {
- printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
- __func__);
+ pr_err("%s - failed to allocate ep entry!\n", __func__);
dst_release(dst);
goto reject;
}
@@ -2509,8 +2511,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
parent_ep->com.dev->rdev.lldi.adapter_type, tos);
if (err) {
- printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
- __func__);
+ pr_err("%s - failed to allocate l2t entry!\n", __func__);
dst_release(dst);
kfree(child_ep);
goto reject;
@@ -2571,8 +2572,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
child_ep->dst = dst;
child_ep->hwtid = hwtid;
- PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
- child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
+ pr_debug("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
+ child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
init_timer(&child_ep->timer);
cxgb4_insert_tid(t, child_ep, hwtid);
@@ -2607,12 +2608,12 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
int ret;
ep = get_ep_from_tid(dev, tid);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
ep->snd_seq = be32_to_cpu(req->snd_isn);
ep->rcv_seq = be32_to_cpu(req->rcv_isn);
- PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid,
- ntohs(req->tcp_opt));
+ pr_debug("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid,
+ ntohs(req->tcp_opt));
set_emss(ep, ntohs(req->tcp_opt));
@@ -2644,7 +2645,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
if (!ep)
return 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
dst_confirm(ep->dst);
set_bit(PEER_CLOSE, &ep->com.history);
@@ -2666,12 +2667,12 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
* in rdma connection migration (see c4iw_accept_cr()).
*/
__state_set(&ep->com, CLOSING);
- PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
+ pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
- PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
+ pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
break;
case FPDU_MODE:
@@ -2735,17 +2736,17 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
if (cxgb_is_neg_adv(req->status)) {
- PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
- __func__, ep->hwtid, req->status,
- neg_adv_str(req->status));
+ pr_debug("%s Negative advice on abort- tid %u status %d (%s)\n",
+ __func__, ep->hwtid, req->status,
+ neg_adv_str(req->status));
ep->stats.abort_neg_adv++;
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
mutex_unlock(&dev->rdev.stats.lock);
goto deref_ep;
}
- PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
+ ep->com.state);
set_bit(PEER_ABORT, &ep->com.history);
/*
@@ -2777,8 +2778,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
* do some housekeeping so as to re-initiate the
* connection
*/
- PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
- mpa_rev);
+ pr_debug("%s: mpa_rev=%d. Retrying with mpav1\n",
+ __func__, mpa_rev);
ep->retry_with_mpa_v1 = 1;
}
break;
@@ -2797,16 +2798,14 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (ret)
- printk(KERN_ERR MOD
- "%s - qp <- error failed!\n",
- __func__);
+ pr_err("%s - qp <- error failed!\n", __func__);
}
peer_abort_upcall(ep);
break;
case ABORTING:
break;
case DEAD:
- PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+ pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
mutex_unlock(&ep->com.mutex);
goto deref_ep;
default:
@@ -2870,7 +2869,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
if (!ep)
return 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(!ep);
/* The cm_id may be null if we failed to connect */
@@ -2918,13 +2917,13 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
BUG_ON(!ep);
if (ep && ep->com.qp) {
- printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
- ep->com.qp->wq.sq.qid);
+ pr_warn("TERM received tid %u qpid %u\n",
+ tid, ep->com.qp->wq.sq.qid);
attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
} else
- printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
+ pr_warn("TERM received tid %u no ep/qp\n", tid);
c4iw_put_ep(&ep->com);
return 0;
@@ -2946,18 +2945,19 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
+ pr_debug("%s ep %p tid %u credits %u\n",
+ __func__, ep, ep->hwtid, credits);
if (credits == 0) {
- PDBG("%s 0 credit ack ep %p tid %u state %u\n",
- __func__, ep, ep->hwtid, state_read(&ep->com));
+ pr_debug("%s 0 credit ack ep %p tid %u state %u\n",
+ __func__, ep, ep->hwtid, state_read(&ep->com));
goto out;
}
dst_confirm(ep->dst);
if (ep->mpa_skb) {
- PDBG("%s last streaming msg ack ep %p tid %u state %u "
- "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
- state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
+ pr_debug("%s last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n",
+ __func__, ep, ep->hwtid,
+ state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
mutex_lock(&ep->com.mutex);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
@@ -2975,7 +2975,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
int abort;
struct c4iw_ep *ep = to_ep(cm_id);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
if (ep->com.state != MPA_REQ_RCVD) {
@@ -3006,7 +3006,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
int abort = 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
if (ep->com.state != MPA_REQ_RCVD) {
@@ -3059,7 +3059,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->ird = 1;
}
- PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
+ pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
@@ -3188,7 +3188,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto out;
}
@@ -3215,20 +3215,20 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.dev = dev;
ep->com.qp = get_qhp(dev, conn_param->qpn);
if (!ep->com.qp) {
- PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
+ pr_debug("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
err = -EINVAL;
goto fail2;
}
ref_qp(ep);
- PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
- ep->com.qp, cm_id);
+ pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
+ ep->com.qp, cm_id);
/*
* Allocate an active TID to initiate a TCP connection.
*/
ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
if (ep->atid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -3258,9 +3258,9 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
/* find a route */
- PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
- __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
- ra, ntohs(raddr->sin_port));
+ pr_debug("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
+ __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
+ ra, ntohs(raddr->sin_port));
ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
laddr->sin_addr.s_addr,
raddr->sin_addr.s_addr,
@@ -3280,10 +3280,10 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
/* find a route */
- PDBG("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
- __func__, laddr6->sin6_addr.s6_addr,
- ntohs(laddr6->sin6_port),
- raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
+ pr_debug("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
+ __func__, laddr6->sin6_addr.s6_addr,
+ ntohs(laddr6->sin6_port),
+ raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
laddr6->sin6_addr.s6_addr,
raddr6->sin6_addr.s6_addr,
@@ -3292,7 +3292,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
raddr6->sin6_scope_id);
}
if (!ep->dst) {
- printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+ pr_err("%s - cannot find route\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
@@ -3300,13 +3300,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
if (err) {
- printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
+ pr_err("%s - cannot alloc l2e\n", __func__);
goto fail4;
}
- PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
- __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
- ep->l2t->idx);
+ pr_debug("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+ __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+ ep->l2t->idx);
state_set(&ep->com, CONNECTING);
ep->tos = cm_id->tos;
@@ -3414,12 +3414,12 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto fail1;
}
skb_queue_head_init(&ep->com.ep_skb_list);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
ep->com.dev = dev;
@@ -3439,7 +3439,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->m_local_addr.ss_family, ep);
if (ep->stid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
+ pr_err("%s - cannot alloc stid\n", __func__);
err = -ENOMEM;
goto fail2;
}
@@ -3473,7 +3473,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
int err;
struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("%s ep %p\n", __func__, ep);
might_sleep();
state_set(&ep->com, DEAD);
@@ -3514,8 +3514,8 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
mutex_lock(&ep->com.mutex);
- PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
- states[ep->com.state], abrupt);
+ pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep,
+ states[ep->com.state], abrupt);
/*
* Ref the ep here in case we have fatal errors causing the
@@ -3568,8 +3568,8 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
case MORIBUND:
case ABORTING:
case DEAD:
- PDBG("%s ignoring disconnect ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("%s ignoring disconnect ep %p state %u\n",
+ __func__, ep, ep->com.state);
break;
default:
BUG();
@@ -3600,8 +3600,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (ret)
- pr_err(MOD
- "%s - qp <- error failed!\n",
+ pr_err("%s - qp <- error failed!\n",
__func__);
}
fatal = 1;
@@ -3674,7 +3673,7 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
BUG_ON(!rpl_skb);
if (req->retval) {
- PDBG("%s passive open failure %d\n", __func__, req->retval);
+ pr_debug("%s passive open failure %d\n", __func__, req->retval);
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.pas_ofld_conn_fails++;
mutex_unlock(&dev->rdev.stats.lock);
@@ -3800,6 +3799,8 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
int ret;
req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
+ if (!req_skb)
+ return;
req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
memset(req, 0, sizeof(*req));
req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
@@ -3890,7 +3891,8 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!lep) {
- PDBG("%s connect request on invalid stid %d\n", __func__, stid);
+ pr_debug("%s connect request on invalid stid %d\n",
+ __func__, stid);
goto reject;
}
@@ -3927,9 +3929,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
skb_set_transport_header(skb, (void *)tcph - (void *)rss);
skb_get(skb);
- PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__,
- ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
- ntohs(tcph->source), iph->tos);
+ pr_debug("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__,
+ ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
+ ntohs(tcph->source), iph->tos);
dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
iph->daddr, iph->saddr, tcph->dest,
@@ -4026,8 +4028,8 @@ static void process_timeout(struct c4iw_ep *ep)
int abort = 1;
mutex_lock(&ep->com.mutex);
- PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
+ ep->com.state);
set_bit(TIMEDOUT, &ep->com.history);
switch (ep->com.state) {
case MPA_REQ_SENT:
@@ -4157,8 +4159,8 @@ static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
if (rpl->status != CPL_ERR_NONE) {
- printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
- "for tid %u\n", rpl->status, GET_TID(rpl));
+ pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
+ rpl->status, GET_TID(rpl));
}
kfree_skb(skb);
return 0;
@@ -4170,13 +4172,13 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_wr_wait *wr_waitp;
int ret;
- PDBG("%s type %u\n", __func__, rpl->type);
+ pr_debug("%s type %u\n", __func__, rpl->type);
switch (rpl->type) {
case FW6_TYPE_WR_RPL:
ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
- PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
+ pr_debug("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
if (wr_waitp)
c4iw_wake_up(wr_waitp, ret ? -ret : 0);
kfree_skb(skb);
@@ -4186,8 +4188,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
sched(dev, skb);
break;
default:
- printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__,
- rpl->type);
+ pr_err("%s unexpected fw6 msg type %u\n",
+ __func__, rpl->type);
kfree_skb(skb);
break;
}
@@ -4203,19 +4205,18 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
/* This EP will be dereferenced in peer_abort() */
if (!ep) {
- printk(KERN_WARNING MOD
- "Abort on non-existent endpoint, tid %d\n", tid);
+ pr_warn("Abort on non-existent endpoint, tid %d\n", tid);
kfree_skb(skb);
return 0;
}
if (cxgb_is_neg_adv(req->status)) {
- PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
- __func__, ep->hwtid, req->status,
- neg_adv_str(req->status));
+ pr_debug("%s Negative advice on abort- tid %u status %d (%s)\n",
+ __func__, ep->hwtid, req->status,
+ neg_adv_str(req->status));
goto out;
}
- PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
+ ep->com.state);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
out:
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index bec82a600d77..14de5bde1b63 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -146,7 +146,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
ret = c4iw_ofld_send(rdev, skb);
if (ret)
goto err4;
- PDBG("%s wait_event wr_wait %p\n", __func__, &wr_wait);
+ pr_debug("%s wait_event wr_wait %p\n", __func__, &wr_wait);
ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
if (ret)
goto err4;
@@ -159,7 +159,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
&cq->bar2_qid,
user ? &cq->bar2_pa : NULL);
if (user && !cq->bar2_pa) {
- pr_warn(MOD "%s: cqid %u not in BAR2 range.\n",
+ pr_warn("%s: cqid %u not in BAR2 range\n",
pci_name(rdev->lldi.pdev), cq->cqid);
ret = -EINVAL;
goto err4;
@@ -180,8 +180,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
{
struct t4_cqe cqe;
- PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
- wq, cq, cq->sw_cidx, cq->sw_pidx);
+ pr_debug("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
+ wq, cq, cq->sw_cidx, cq->sw_pidx);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
CQE_OPCODE_V(FW_RI_SEND) |
@@ -199,8 +199,8 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
int in_use = wq->rq.in_use - count;
BUG_ON(in_use < 0);
- PDBG("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__,
- wq, cq, wq->rq.in_use, count);
+ pr_debug("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__,
+ wq, cq, wq->rq.in_use, count);
while (in_use--) {
insert_recv_cqe(wq, cq);
flushed++;
@@ -213,8 +213,8 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
{
struct t4_cqe cqe;
- PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
- wq, cq, cq->sw_cidx, cq->sw_pidx);
+ pr_debug("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
+ wq, cq, cq->sw_cidx, cq->sw_pidx);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
CQE_OPCODE_V(swcqe->opcode) |
@@ -283,8 +283,8 @@ static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
/*
* Insert this completed cqe into the swcq.
*/
- PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n",
- __func__, cidx, cq->sw_pidx);
+ pr_debug("%s moving cqe into swcq sq idx %u cq idx %u\n",
+ __func__, cidx, cq->sw_pidx);
swsqe->cqe.header |= htonl(CQE_SWCQE_V(1));
cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
t4_swcq_produce(cq);
@@ -339,7 +339,7 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
struct t4_swsqe *swsqe;
int ret;
- PDBG("%s cqid 0x%x\n", __func__, chp->cq.cqid);
+ pr_debug("%s cqid 0x%x\n", __func__, chp->cq.cqid);
ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
/*
@@ -432,7 +432,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
u32 ptr;
*count = 0;
- PDBG("%s count zero %d\n", __func__, *count);
+ pr_debug("%s count zero %d\n", __func__, *count);
ptr = cq->sw_cidx;
while (ptr != cq->sw_pidx) {
cqe = &cq->sw_queue[ptr];
@@ -442,7 +442,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
if (++ptr == cq->size)
ptr = 0;
}
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
+ pr_debug("%s cq %p count %d\n", __func__, cq, *count);
}
/*
@@ -473,12 +473,11 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
if (ret)
return ret;
- PDBG("%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x"
- " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- __func__, CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe),
- CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe),
- CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
- CQE_WRID_LOW(hw_cqe));
+ pr_debug("%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ __func__, CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe),
+ CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe),
+ CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
+ CQE_WRID_LOW(hw_cqe));
/*
* skip cqe's not affiliated with a QP.
@@ -606,8 +605,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
struct t4_swsqe *swsqe;
- PDBG("%s out of order completion going in sw_sq at idx %u\n",
- __func__, CQE_WRID_SQ_IDX(hw_cqe));
+ pr_debug("%s out of order completion going in sw_sq at idx %u\n",
+ __func__, CQE_WRID_SQ_IDX(hw_cqe));
swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
swsqe->cqe = *hw_cqe;
swsqe->complete = 1;
@@ -641,13 +640,13 @@ proc_cqe:
BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
wq->sq.cidx = (uint16_t)idx;
- PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
+ pr_debug("%s completing sq idx %u\n", __func__, wq->sq.cidx);
*cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
if (c4iw_wr_log)
c4iw_log_wr_stats(wq, hw_cqe);
t4_sq_consume(wq);
} else {
- PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx);
+ pr_debug("%s completing rq idx %u\n", __func__, wq->rq.cidx);
*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
BUG_ON(t4_rq_empty(wq));
if (c4iw_wr_log)
@@ -664,12 +663,12 @@ flush_wq:
skip_cqe:
if (SW_CQE(hw_cqe)) {
- PDBG("%s cq %p cqid 0x%x skip sw cqe cidx %u\n",
- __func__, cq, cq->cqid, cq->sw_cidx);
+ pr_debug("%s cq %p cqid 0x%x skip sw cqe cidx %u\n",
+ __func__, cq, cq->cqid, cq->sw_cidx);
t4_swcq_consume(cq);
} else {
- PDBG("%s cq %p cqid 0x%x skip hw cqe cidx %u\n",
- __func__, cq, cq->cqid, cq->cidx);
+ pr_debug("%s cq %p cqid 0x%x skip hw cqe cidx %u\n",
+ __func__, cq, cq->cqid, cq->cidx);
t4_hwcq_consume(cq);
}
return ret;
@@ -715,10 +714,12 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->vendor_err = CQE_STATUS(&cqe);
wc->wc_flags = 0;
- PDBG("%s qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x "
- "lo 0x%x cookie 0x%llx\n", __func__, CQE_QPID(&cqe),
- CQE_TYPE(&cqe), CQE_OPCODE(&cqe), CQE_STATUS(&cqe), CQE_LEN(&cqe),
- CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), (unsigned long long)cookie);
+ pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
+ __func__, CQE_QPID(&cqe),
+ CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
+ CQE_STATUS(&cqe), CQE_LEN(&cqe),
+ CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
+ (unsigned long long)cookie);
if (CQE_TYPE(&cqe) == 0) {
if (!CQE_STATUS(&cqe))
@@ -766,8 +767,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->opcode = IB_WC_SEND;
break;
default:
- printk(KERN_ERR MOD "Unexpected opcode %d "
- "in the CQE received for QPID=0x%0x\n",
+ pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
CQE_OPCODE(&cqe), CQE_QPID(&cqe));
ret = -EINVAL;
goto out;
@@ -822,8 +822,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->status = IB_WC_WR_FLUSH_ERR;
break;
default:
- printk(KERN_ERR MOD
- "Unexpected cqe_status 0x%x for QPID=0x%0x\n",
+ pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
CQE_STATUS(&cqe), CQE_QPID(&cqe));
wc->status = IB_WC_FATAL_ERR;
}
@@ -860,7 +859,7 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq)
struct c4iw_cq *chp;
struct c4iw_ucontext *ucontext;
- PDBG("%s ib_cq %p\n", __func__, ib_cq);
+ pr_debug("%s ib_cq %p\n", __func__, ib_cq);
chp = to_c4iw_cq(ib_cq);
remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
@@ -892,7 +891,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
size_t memsize, hwentries;
struct c4iw_mm_entry *mm, *mm2;
- PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
+ pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
if (attr->flags)
return ERR_PTR(-EINVAL);
@@ -998,9 +997,9 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
mm2->len = PAGE_SIZE;
insert_mmap(ucontext, mm2);
}
- PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
- __func__, chp->cq.cqid, chp, chp->cq.size,
- chp->cq.memsize, (unsigned long long) chp->cq.dma_addr);
+ pr_debug("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
+ __func__, chp->cq.cqid, chp, chp->cq.size,
+ chp->cq.memsize, (unsigned long long)chp->cq.dma_addr);
return &chp->ibcq;
err6:
kfree(mm2);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 4e4f1a732b01..329fb65e8fb0 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -334,7 +334,7 @@ static int qp_release(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *qpd = file->private_data;
if (!qpd) {
- printk(KERN_INFO "%s null qpd?\n", __func__);
+ pr_info("%s null qpd?\n", __func__);
return 0;
}
vfree(qpd->buf);
@@ -422,7 +422,7 @@ static int stag_release(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *stagd = file->private_data;
if (!stagd) {
- printk(KERN_INFO "%s null stagd?\n", __func__);
+ pr_info("%s null stagd?\n", __func__);
return 0;
}
vfree(stagd->buf);
@@ -796,15 +796,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
* cqid and qpid range must match for now.
*/
if (rdev->lldi.udb_density != rdev->lldi.ucq_density) {
- pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
+ pr_err("%s: unsupported udb/ucq densities %u/%u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
rdev->lldi.ucq_density);
return -EINVAL;
}
if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
- pr_err(MOD "%s: unsupported qp and cq id ranges "
- "qp start %u size %u cq start %u size %u\n",
+ pr_err("%s: unsupported qp and cq id ranges qp start %u size %u cq start %u size %u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
rdev->lldi.vr->cq.size);
@@ -813,23 +812,20 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->qpmask = rdev->lldi.udb_density - 1;
rdev->cqmask = rdev->lldi.ucq_density - 1;
- PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
- "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
- "qp qid start %u size %u cq qid start %u size %u\n",
- __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
- rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
- rdev->lldi.vr->pbl.start,
- rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
- rdev->lldi.vr->rq.size,
- rdev->lldi.vr->qp.start,
- rdev->lldi.vr->qp.size,
- rdev->lldi.vr->cq.start,
- rdev->lldi.vr->cq.size);
- PDBG("udb %pR db_reg %p gts_reg %p "
- "qpmask 0x%x cqmask 0x%x\n",
- &rdev->lldi.pdev->resource[2],
- rdev->lldi.db_reg, rdev->lldi.gts_reg,
- rdev->qpmask, rdev->cqmask);
+ pr_debug("%s dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n",
+ __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
+ rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
+ rdev->lldi.vr->pbl.start,
+ rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
+ rdev->lldi.vr->rq.size,
+ rdev->lldi.vr->qp.start,
+ rdev->lldi.vr->qp.size,
+ rdev->lldi.vr->cq.start,
+ rdev->lldi.vr->cq.size);
+ pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n",
+ &rdev->lldi.pdev->resource[2],
+ rdev->lldi.db_reg, rdev->lldi.gts_reg,
+ rdev->qpmask, rdev->cqmask);
if (c4iw_num_stags(rdev) == 0)
return -EINVAL;
@@ -843,22 +839,22 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
if (err) {
- printk(KERN_ERR MOD "error %d initializing resources\n", err);
+ pr_err("error %d initializing resources\n", err);
return err;
}
err = c4iw_pblpool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
+ pr_err("error %d initializing pbl pool\n", err);
goto destroy_resource;
}
err = c4iw_rqtpool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
+ pr_err("error %d initializing rqt pool\n", err);
goto destroy_pblpool;
}
err = c4iw_ocqp_pool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
+ pr_err("error %d initializing ocqp pool\n", err);
goto destroy_rqtpool;
}
rdev->status_page = (struct t4_dev_status_page *)
@@ -936,7 +932,7 @@ static void c4iw_dealloc(struct uld_ctx *ctx)
static void c4iw_remove(struct uld_ctx *ctx)
{
- PDBG("%s c4iw_dev %p\n", __func__, ctx->dev);
+ pr_debug("%s c4iw_dev %p\n", __func__, ctx->dev);
c4iw_unregister_device(ctx->dev);
c4iw_dealloc(ctx);
}
@@ -954,25 +950,25 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
int ret;
if (!rdma_supported(infop)) {
- printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
- pci_name(infop->pdev));
+ pr_info("%s: RDMA not supported on this device\n",
+ pci_name(infop->pdev));
return ERR_PTR(-ENOSYS);
}
if (!ocqp_supported(infop))
- pr_info("%s: On-Chip Queues not supported on this device.\n",
+ pr_info("%s: On-Chip Queues not supported on this device\n",
pci_name(infop->pdev));
devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
if (!devp) {
- printk(KERN_ERR MOD "Cannot allocate ib device\n");
+ pr_err("Cannot allocate ib device\n");
return ERR_PTR(-ENOMEM);
}
devp->rdev.lldi = *infop;
/* init various hw-queue params based on lld info */
- PDBG("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
- __func__, devp->rdev.lldi.sge_ingpadboundary,
- devp->rdev.lldi.sge_egrstatuspagesize);
+ pr_debug("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
+ __func__, devp->rdev.lldi.sge_ingpadboundary,
+ devp->rdev.lldi.sge_egrstatuspagesize);
devp->rdev.hw_queue.t4_eq_status_entries =
devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1;
@@ -1000,7 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
pci_resource_len(devp->rdev.lldi.pdev, 2));
if (!devp->rdev.bar2_kva) {
- pr_err(MOD "Unable to ioremap BAR2\n");
+ pr_err("Unable to ioremap BAR2\n");
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(-EINVAL);
}
@@ -1012,20 +1008,19 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
devp->rdev.lldi.vr->ocq.size);
if (!devp->rdev.oc_mw_kva) {
- pr_err(MOD "Unable to ioremap onchip mem\n");
+ pr_err("Unable to ioremap onchip mem\n");
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(-EINVAL);
}
}
- PDBG(KERN_INFO MOD "ocq memory: "
- "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
- devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
- devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
+ pr_debug("ocq memory: hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
+ devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
+ devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
ret = c4iw_rdev_open(&devp->rdev);
if (ret) {
- printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
+ pr_err("Unable to open CXIO rdev err %d\n", ret);
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(ret);
}
@@ -1071,17 +1066,17 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
}
ctx->lldi = *infop;
- PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
- __func__, pci_name(ctx->lldi.pdev),
- ctx->lldi.nchan, ctx->lldi.nrxq,
- ctx->lldi.ntxq, ctx->lldi.nports);
+ pr_debug("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
+ __func__, pci_name(ctx->lldi.pdev),
+ ctx->lldi.nchan, ctx->lldi.nrxq,
+ ctx->lldi.ntxq, ctx->lldi.nports);
mutex_lock(&dev_mutex);
list_add_tail(&ctx->entry, &uld_ctx_list);
mutex_unlock(&dev_mutex);
for (i = 0; i < ctx->lldi.nrxq; i++)
- PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
+ pr_debug("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
out:
return ctx;
}
@@ -1138,8 +1133,7 @@ static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
goto out;
if (c4iw_handlers[opcode] == NULL) {
- pr_info("%s no handler opcode 0x%x...\n", __func__,
- opcode);
+ pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
kfree_skb(skb);
goto out;
}
@@ -1176,13 +1170,11 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
if (recv_rx_pkt(dev, gl, rsp))
return 0;
- pr_info("%s: unexpected FL contents at %p, " \
- "RSS %#llx, FL %#llx, len %u\n",
- pci_name(ctx->lldi.pdev), gl->va,
- (unsigned long long)be64_to_cpu(*rsp),
- (unsigned long long)be64_to_cpu(
- *(__force __be64 *)gl->va),
- gl->tot_len);
+ pr_info("%s: unexpected FL contents at %p, RSS %#llx, FL %#llx, len %u\n",
+ pci_name(ctx->lldi.pdev), gl->va,
+ be64_to_cpu(*rsp),
+ be64_to_cpu(*(__force __be64 *)gl->va),
+ gl->tot_len);
return 0;
} else {
@@ -1195,8 +1187,7 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
if (c4iw_handlers[opcode]) {
c4iw_handlers[opcode](dev, skb);
} else {
- pr_info("%s no handler opcode 0x%x...\n", __func__,
- opcode);
+ pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
kfree_skb(skb);
}
@@ -1209,17 +1200,16 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
{
struct uld_ctx *ctx = handle;
- PDBG("%s new_state %u\n", __func__, new_state);
+ pr_debug("%s new_state %u\n", __func__, new_state);
switch (new_state) {
case CXGB4_STATE_UP:
- printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
+ pr_info("%s: Up\n", pci_name(ctx->lldi.pdev));
if (!ctx->dev) {
int ret;
ctx->dev = c4iw_alloc(&ctx->lldi);
if (IS_ERR(ctx->dev)) {
- printk(KERN_ERR MOD
- "%s: initialization failed: %ld\n",
+ pr_err("%s: initialization failed: %ld\n",
pci_name(ctx->lldi.pdev),
PTR_ERR(ctx->dev));
ctx->dev = NULL;
@@ -1227,22 +1217,19 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
}
ret = c4iw_register_device(ctx->dev);
if (ret) {
- printk(KERN_ERR MOD
- "%s: RDMA registration failed: %d\n",
+ pr_err("%s: RDMA registration failed: %d\n",
pci_name(ctx->lldi.pdev), ret);
c4iw_dealloc(ctx);
}
}
break;
case CXGB4_STATE_DOWN:
- printk(KERN_INFO MOD "%s: Down\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Down\n", pci_name(ctx->lldi.pdev));
if (ctx->dev)
c4iw_remove(ctx);
break;
case CXGB4_STATE_START_RECOVERY:
- printk(KERN_INFO MOD "%s: Fatal Error\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
if (ctx->dev) {
struct ib_event event;
@@ -1255,8 +1242,7 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
}
break;
case CXGB4_STATE_DETACH:
- printk(KERN_INFO MOD "%s: Detach\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Detach\n", pci_name(ctx->lldi.pdev));
if (ctx->dev)
c4iw_remove(ctx);
break;
@@ -1406,9 +1392,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
t4_sq_host_wq_pidx(&qp->wq),
t4_sq_wq_size(&qp->wq));
if (ret) {
- pr_err(MOD "%s: Fatal error - "
- "DB overflow recovery failed - "
- "error syncing SQ qid %u\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed - error syncing SQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
spin_unlock(&qp->lock);
spin_unlock_irq(&qp->rhp->lock);
@@ -1422,9 +1406,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
t4_rq_wq_size(&qp->wq));
if (ret) {
- pr_err(MOD "%s: Fatal error - "
- "DB overflow recovery failed - "
- "error syncing RQ qid %u\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed - error syncing RQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
spin_unlock(&qp->lock);
spin_unlock_irq(&qp->rhp->lock);
@@ -1455,7 +1437,7 @@ static void recover_queues(struct uld_ctx *ctx)
/* flush the SGE contexts */
ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
if (ret) {
- printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed\n",
pci_name(ctx->lldi.pdev));
return;
}
@@ -1513,8 +1495,8 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
mutex_unlock(&ctx->dev->rdev.stats.lock);
break;
default:
- printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
- pci_name(ctx->lldi.pdev), control);
+ pr_warn("%s: unknown control cmd %u\n",
+ pci_name(ctx->lldi.pdev), control);
break;
}
return 0;
@@ -1543,8 +1525,7 @@ static int __init c4iw_init_module(void)
c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
if (!c4iw_debugfs_root)
- printk(KERN_WARNING MOD
- "could not create debugfs entry, continuing\n");
+ pr_warn("could not create debugfs entry, continuing\n");
cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index bdfac2ccb704..8f963df0bffc 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -47,17 +47,16 @@ static void print_tpte(struct c4iw_dev *dev, u32 stag)
"%s cxgb4_read_tpte err %d\n", __func__, ret);
return;
}
- PDBG("stag idx 0x%x valid %d key 0x%x state %d pdid %d "
- "perm 0x%x ps %d len 0x%llx va 0x%llx\n",
- stag & 0xffffff00,
- FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
- FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
- ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
- ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
+ pr_debug("stag idx 0x%x valid %d key 0x%x state %d pdid %d perm 0x%x ps %d len 0x%llx va 0x%llx\n",
+ stag & 0xffffff00,
+ FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
+ FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
+ ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
+ ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
}
static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
@@ -71,9 +70,9 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len),
CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
- PDBG("%016llx %016llx %016llx %016llx\n",
- be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
- be64_to_cpu(p[3]));
+ pr_debug("%016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
+ be64_to_cpu(p[3]));
/*
* Ingress WRITE and READ_RESP errors provide
@@ -124,8 +123,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
spin_lock_irq(&dev->lock);
qhp = get_qhp(dev, CQE_QPID(err_cqe));
if (!qhp) {
- printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ pr_err("BAD AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
CQE_QPID(err_cqe),
CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
@@ -140,8 +138,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
cqid = qhp->attr.rcq;
chp = get_chp(dev, cqid);
if (!chp) {
- printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
cqid, CQE_QPID(err_cqe),
CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
@@ -165,7 +162,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
/* Completion Events */
case T4_ERR_SUCCESS:
- printk(KERN_ERR MOD "AE with status 0!\n");
+ pr_err("AE with status 0!\n");
break;
case T4_ERR_STAG:
@@ -207,7 +204,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
break;
default:
- printk(KERN_ERR MOD "Unknown T4 status 0x%x QPID 0x%x\n",
+ pr_err("Unknown T4 status 0x%x QPID 0x%x\n",
CQE_STATUS(err_cqe), qhp->wq.sq.qid);
post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL);
break;
@@ -237,7 +234,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
if (atomic_dec_and_test(&chp->refcnt))
wake_up(&chp->wait);
} else {
- PDBG("%s unknown cqid 0x%x\n", __func__, qid);
+ pr_debug("%s unknown cqid 0x%x\n", __func__, qid);
spin_unlock_irqrestore(&dev->lock, flag);
}
return 0;
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 5846c47c8d55..819a30635d53 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -64,12 +64,11 @@
#define DRV_NAME "iw_cxgb4"
#define MOD DRV_NAME ":"
-extern int c4iw_debug;
-#define PDBG(fmt, args...) \
-do { \
- if (c4iw_debug) \
- printk(MOD fmt, ## args); \
-} while (0)
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "t4.h"
@@ -231,15 +230,15 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
ret = wait_for_completion_timeout(&wr_waitp->completion, C4IW_WR_TO);
if (!ret) {
- PDBG("%s - Device %s not responding (disabling device) - tid %u qpid %u\n",
- func, pci_name(rdev->lldi.pdev), hwtid, qpid);
+ pr_debug("%s - Device %s not responding (disabling device) - tid %u qpid %u\n",
+ func, pci_name(rdev->lldi.pdev), hwtid, qpid);
rdev->flags |= T4_FATAL_ERROR;
wr_waitp->ret = -EIO;
}
out:
if (wr_waitp->ret)
- PDBG("%s: FW reply %d tid %u qpid %u\n",
- pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
+ pr_debug("%s: FW reply %d tid %u qpid %u\n",
+ pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
return wr_waitp->ret;
}
@@ -538,8 +537,9 @@ static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
if (mm->key == key && mm->len == len) {
list_del_init(&mm->entry);
spin_unlock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, key,
+ (unsigned long long)mm->addr, mm->len);
return mm;
}
}
@@ -551,8 +551,8 @@ static inline void insert_mmap(struct c4iw_ucontext *ucontext,
struct c4iw_mm_entry *mm)
{
spin_lock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- mm->key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("%s key 0x%x addr 0x%llx len %d\n",
+ __func__, mm->key, (unsigned long long)mm->addr, mm->len);
list_add_tail(&mm->entry, &ucontext->mmaps);
spin_unlock(&ucontext->mmap_lock);
}
@@ -670,17 +670,19 @@ enum c4iw_mmid_state {
#define MPA_V2_RDMA_READ_RTR 0x4000
#define MPA_V2_IRD_ORD_MASK 0x3FFF
-#define c4iw_put_ep(ep) { \
- PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, kref_read(&((ep)->kref))); \
- WARN_ON(kref_read(&((ep)->kref)) < 1); \
- kref_put(&((ep)->kref), _c4iw_free_ep); \
+#define c4iw_put_ep(ep) { \
+ pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \
+ __func__, __LINE__, \
+ ep, kref_read(&((ep)->kref))); \
+ WARN_ON(kref_read(&((ep)->kref)) < 1); \
+ kref_put(&((ep)->kref), _c4iw_free_ep); \
}
-#define c4iw_get_ep(ep) { \
- PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, kref_read(&((ep)->kref))); \
- kref_get(&((ep)->kref)); \
+#define c4iw_get_ep(ep) { \
+ pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \
+ __func__, __LINE__, \
+ ep, kref_read(&((ep)->kref))); \
+ kref_get(&((ep)->kref)); \
}
void _c4iw_free_ep(struct kref *kref);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 410408f886c1..3ee7f43e419a 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -38,9 +38,9 @@
#include "iw_cxgb4.h"
-int use_dsgl = 0;
+int use_dsgl = 1;
module_param(use_dsgl, int, 0644);
-MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)");
+MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1) (DEPRECATED)");
#define T4_ULPTX_MIN_IO 32
#define C4IW_MAX_INLINE_SIZE 96
@@ -125,7 +125,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
cmd |= cpu_to_be32(T5_ULP_MEMIO_IMM_F);
addr &= 0x7FFFFFF;
- PDBG("%s addr 0x%x len %u\n", __func__, addr, len);
+ pr_debug("%s addr 0x%x len %u\n", __func__, addr, len);
num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE);
c4iw_init_wr_wait(&wr_wait);
for (i = 0; i < num_wqe; i++) {
@@ -231,13 +231,11 @@ out:
static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
void *data, struct sk_buff *skb)
{
- if (is_t5(rdev->lldi.adapter_type) && use_dsgl) {
+ if (rdev->lldi.ulptx_memwrite_dsgl && use_dsgl) {
if (len > inline_threshold) {
if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) {
- printk_ratelimited(KERN_WARNING
- "%s: dma map"
- " failure (non fatal)\n",
- pci_name(rdev->lldi.pdev));
+ pr_warn_ratelimited("%s: dma map failure (non fatal)\n",
+ pci_name(rdev->lldi.pdev));
return _c4iw_write_mem_inline(rdev, addr, len,
data, skb);
} else {
@@ -289,8 +287,8 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
mutex_unlock(&rdev->stats.lock);
*stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff);
}
- PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
- __func__, stag_state, type, pdid, stag_idx);
+ pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+ __func__, stag_state, type, pdid, stag_idx);
/* write TPT entry */
if (reset_tpt_entry)
@@ -331,9 +329,9 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl,
{
int err;
- PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, pbl_addr, rdev->lldi.vr->pbl.start,
- pbl_size);
+ pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ __func__, pbl_addr, rdev->lldi.vr->pbl.start,
+ pbl_size);
err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL);
return err;
@@ -376,7 +374,7 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
}
@@ -426,7 +424,7 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
int ret;
u32 stag = T4_STAG_UNSET;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
php = to_c4iw_pd(pd);
rhp = php->rhp;
@@ -483,7 +481,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct c4iw_pd *php;
struct c4iw_mr *mhp;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
if (length == ~0ULL)
return ERR_PTR(-EINVAL);
@@ -517,7 +515,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err);
}
- shift = ffs(mhp->umem->page_size) - 1;
+ shift = mhp->umem->page_shift;
n = mhp->umem->nmap;
err = alloc_pbl(mhp, n);
@@ -536,7 +534,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
- mhp->umem->page_size * k);
+ (k << shift));
if (i == PAGE_SIZE / sizeof *pages) {
err = write_pbl(&mhp->rhp->rdev,
pages,
@@ -620,7 +618,7 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
ret = -ENOMEM;
goto dealloc_win;
}
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
dealloc_win:
@@ -645,7 +643,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
kfree_skb(mhp->dereg_skb);
kfree(mhp);
- PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
+ pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
return 0;
}
@@ -703,7 +701,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
goto err3;
}
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmr);
err3:
dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
@@ -748,7 +746,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
struct c4iw_mr *mhp;
u32 mmid;
- PDBG("%s ib_mr %p\n", __func__, ib_mr);
+ pr_debug("%s ib_mr %p\n", __func__, ib_mr);
mhp = to_c4iw_mr(ib_mr);
rhp = mhp->rhp;
@@ -766,7 +764,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
kfree((void *) (unsigned long) mhp->kva);
if (mhp->umem)
ib_umem_release(mhp->umem);
- PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
+ pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
kfree(mhp);
return 0;
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index df64417ab6f2..0771e9a4d061 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -59,7 +59,7 @@ module_param(fastreg_support, int, 0644);
MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
@@ -102,7 +102,7 @@ void _c4iw_free_ucontext(struct kref *kref)
ucontext = container_of(kref, struct c4iw_ucontext, kref);
rhp = to_c4iw_dev(ucontext->ibucontext.device);
- PDBG("%s ucontext %p\n", __func__, ucontext);
+ pr_debug("%s ucontext %p\n", __func__, ucontext);
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
@@ -113,7 +113,7 @@ static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
{
struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
- PDBG("%s context %p\n", __func__, context);
+ pr_debug("%s context %p\n", __func__, context);
c4iw_put_ucontext(ucontext);
return 0;
}
@@ -123,12 +123,11 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
{
struct c4iw_ucontext *context;
struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
- static int warned;
struct c4iw_alloc_ucontext_resp uresp;
int ret = 0;
struct c4iw_mm_entry *mm = NULL;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context) {
ret = -ENOMEM;
@@ -141,8 +140,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
kref_init(&context->kref);
if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
- if (!warned++)
- pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled.");
+ pr_err_once("Warning - downlevel libcxgb4 (non-fatal), device status page disabled\n");
rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED;
} else {
mm = kmalloc(sizeof(*mm), GFP_KERNEL);
@@ -187,8 +185,8 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct c4iw_ucontext *ucontext;
u64 addr;
- PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
- key, len);
+ pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
+ key, len);
if (vma->vm_start & (PAGE_SIZE-1))
return -EINVAL;
@@ -253,7 +251,7 @@ static int c4iw_deallocate_pd(struct ib_pd *pd)
php = to_c4iw_pd(pd);
rhp = php->rhp;
- PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
+ pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid);
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur--;
@@ -270,7 +268,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
u32 pdid;
struct c4iw_dev *rhp;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
rhp = (struct c4iw_dev *) ibdev;
pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table);
if (!pdid)
@@ -293,14 +291,14 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max)
rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
mutex_unlock(&rhp->rdev.stats.lock);
- PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
+ pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
return &php->ibpd;
}
static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey)
{
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
*pkey = 0;
return 0;
}
@@ -310,8 +308,8 @@ static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index,
{
struct c4iw_dev *dev;
- PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
- __func__, ibdev, port, index, gid);
+ pr_debug("%s ibdev %p, port %d, index %d, gid %p\n",
+ __func__, ibdev, port, index, gid);
dev = to_c4iw_dev(ibdev);
BUG_ON(port == 0);
memset(&(gid->raw[0]), 0, sizeof(gid->raw));
@@ -325,7 +323,7 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
struct c4iw_dev *dev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
if (uhw->inlen || uhw->outlen)
return -EINVAL;
@@ -366,7 +364,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
struct net_device *netdev;
struct in_device *inetdev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("%s ibdev %p\n", __func__, ibdev);
dev = to_c4iw_dev(ibdev);
netdev = dev->rdev.lldi.ports[port-1];
@@ -408,7 +406,7 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n",
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
@@ -421,7 +419,7 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
struct ethtool_drvinfo info;
struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0];
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
@@ -431,7 +429,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
c4iw_dev->rdev.lldi.pdev->device);
}
@@ -524,7 +522,7 @@ static void get_dev_fw_str(struct ib_device *dev, char *str,
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("%s dev 0x%p\n", __func__, dev);
snprintf(str, str_len, "%u.%u.%u.%u",
FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
@@ -538,7 +536,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
int ret;
int i;
- PDBG("%s c4iw_dev %p\n", __func__, dev);
+ pr_debug("%s c4iw_dev %p\n", __func__, dev);
BUG_ON(!dev->rdev.lldi.ports[0]);
strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
@@ -648,7 +646,7 @@ void c4iw_unregister_device(struct c4iw_dev *dev)
{
int i;
- PDBG("%s c4iw_dev %p\n", __func__, dev);
+ pr_debug("%s c4iw_dev %p\n", __func__, dev);
for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
device_remove_file(&dev->ibdev.dev,
c4iw_class_attributes[i]);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index d4fd2f5c8326..8e4154b4253e 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -254,11 +254,11 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
ret = -ENOMEM;
goto free_sq;
}
- PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
- __func__, wq->sq.queue,
- (unsigned long long)virt_to_phys(wq->sq.queue),
- wq->rq.queue,
- (unsigned long long)virt_to_phys(wq->rq.queue));
+ pr_debug("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
+ __func__, wq->sq.queue,
+ (unsigned long long)virt_to_phys(wq->sq.queue),
+ wq->rq.queue,
+ (unsigned long long)virt_to_phys(wq->rq.queue));
memset(wq->rq.queue, 0, wq->rq.memsize);
dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
@@ -275,7 +275,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
* User mode must have bar2 access.
*/
if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
- pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n",
+ pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
goto free_dma;
}
@@ -362,9 +362,9 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
if (ret)
goto free_dma;
- PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
- __func__, wq->sq.qid, wq->rq.qid, wq->db,
- wq->sq.bar2_va, wq->rq.bar2_va);
+ pr_debug("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
+ __func__, wq->sq.qid, wq->rq.qid, wq->db,
+ wq->sq.bar2_va, wq->rq.bar2_va);
return 0;
free_dma:
@@ -725,7 +725,7 @@ static void free_qp_work(struct work_struct *work)
ucontext = qhp->ucontext;
rhp = qhp->rhp;
- PDBG("%s qhp %p ucontext %p\n", __func__, qhp, ucontext);
+ pr_debug("%s qhp %p ucontext %p\n", __func__, qhp, ucontext);
destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
@@ -739,19 +739,19 @@ static void queue_qp_free(struct kref *kref)
struct c4iw_qp *qhp;
qhp = container_of(kref, struct c4iw_qp, kref);
- PDBG("%s qhp %p\n", __func__, qhp);
+ pr_debug("%s qhp %p\n", __func__, qhp);
queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work);
}
void c4iw_qp_add_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
kref_get(&to_c4iw_qp(qp)->kref);
}
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
+ pr_debug("%s ib_qp %p\n", __func__, qp);
kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free);
}
@@ -959,8 +959,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
break;
default:
- PDBG("%s post of type=%d TBD!\n", __func__,
- wr->opcode);
+ pr_debug("%s post of type=%d TBD!\n", __func__,
+ wr->opcode);
err = -EINVAL;
}
if (err) {
@@ -981,9 +981,10 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
- PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
- __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
- swsqe->opcode, swsqe->read_len);
+ pr_debug("%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
+ __func__,
+ (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
+ swsqe->opcode, swsqe->read_len);
wr = wr->next;
num_wrs--;
t4_sq_produce(&qhp->wq, len16);
@@ -1057,8 +1058,9 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
wqe->recv.r2[1] = 0;
wqe->recv.r2[2] = 0;
wqe->recv.len16 = len16;
- PDBG("%s cookie 0x%llx pidx %u\n", __func__,
- (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
+ pr_debug("%s cookie 0x%llx pidx %u\n",
+ __func__,
+ (unsigned long long)wr->wr_id, qhp->wq.rq.pidx);
t4_rq_produce(&qhp->wq, len16);
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
wr = wr->next;
@@ -1217,8 +1219,8 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
struct sk_buff *skb;
struct terminate_message *term;
- PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
- qhp->ep->hwtid);
+ pr_debug("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
+ qhp->ep->hwtid);
skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
if (WARN_ON(!skb))
@@ -1254,7 +1256,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
int rq_flushed, sq_flushed;
unsigned long flag;
- PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
+ pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
/* locking hierarchy: cq lock first, then qp lock. */
spin_lock_irqsave(&rchp->lock, flag);
@@ -1339,8 +1341,8 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
int ret;
struct sk_buff *skb;
- PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
- ep->hwtid);
+ pr_debug("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
+ ep->hwtid);
skb = skb_dequeue(&ep->com.ep_skb_list);
if (WARN_ON(!skb))
@@ -1366,13 +1368,13 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid,
qhp->wq.sq.qid, __func__);
out:
- PDBG("%s ret %d\n", __func__, ret);
+ pr_debug("%s ret %d\n", __func__, ret);
return ret;
}
static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
{
- PDBG("%s p2p_type = %d\n", __func__, p2p_type);
+ pr_debug("%s p2p_type = %d\n", __func__, p2p_type);
memset(&init->u, 0, sizeof init->u);
switch (p2p_type) {
case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
@@ -1401,8 +1403,8 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
int ret;
struct sk_buff *skb;
- PDBG("%s qhp %p qid 0x%x tid %u ird %u ord %u\n", __func__, qhp,
- qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
+ pr_debug("%s qhp %p qid 0x%x tid %u ird %u ord %u\n", __func__, qhp,
+ qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
if (!skb) {
@@ -1474,7 +1476,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
err1:
free_ird(rhp, qhp->attr.max_ird);
out:
- PDBG("%s ret %d\n", __func__, ret);
+ pr_debug("%s ret %d\n", __func__, ret);
return ret;
}
@@ -1491,9 +1493,10 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
int free = 0;
struct c4iw_ep *ep = NULL;
- PDBG("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n", __func__,
- qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
- (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+ pr_debug("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n",
+ __func__,
+ qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
+ (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
mutex_lock(&qhp->mutex);
@@ -1671,16 +1674,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
goto err;
break;
default:
- printk(KERN_ERR "%s in a bad state %d\n",
- __func__, qhp->attr.state);
+ pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
ret = -EINVAL;
goto err;
break;
}
goto out;
err:
- PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
- qhp->wq.sq.qid);
+ pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
+ qhp->wq.sq.qid);
/* disassociate the LLP connection */
qhp->attr.llp_stream_handle = NULL;
@@ -1716,7 +1718,7 @@ out:
*/
if (free)
c4iw_put_ep(&ep->com);
- PDBG("%s exit state %d\n", __func__, qhp->attr.state);
+ pr_debug("%s exit state %d\n", __func__, qhp->attr.state);
return ret;
}
@@ -1746,7 +1748,7 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
c4iw_qp_rem_ref(ib_qp);
- PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
+ pr_debug("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
return 0;
}
@@ -1765,7 +1767,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("%s ib_pd %p\n", __func__, pd);
if (attrs->qp_type != IB_QPT_RC)
return ERR_PTR(-EINVAL);
@@ -1936,11 +1938,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
qhp->ibqp.qp_num = qhp->wq.sq.qid;
init_timer(&(qhp->timer));
INIT_LIST_HEAD(&qhp->db_fc_entry);
- PDBG("%s sq id %u size %u memsize %zu num_entries %u "
- "rq id %u size %u memsize %zu num_entries %u\n", __func__,
- qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
- attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
- qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
+ pr_debug("%s sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
+ __func__,
+ qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
+ attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
+ qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
return &qhp->ibqp;
err8:
kfree(ma_sync_key_mm);
@@ -1970,7 +1972,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum c4iw_qp_attr_mask mask = 0;
struct c4iw_qp_attributes attrs;
- PDBG("%s ib_qp %p\n", __func__, ibqp);
+ pr_debug("%s ib_qp %p\n", __func__, ibqp);
/* iwarp does not support the RTR state */
if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
@@ -2016,7 +2018,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
{
- PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
+ pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 67df71a7012e..8ff0cbe5cb16 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -90,7 +90,7 @@ u32 c4iw_get_resource(struct c4iw_id_table *id_table)
void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry)
{
- PDBG("%s entry 0x%x\n", __func__, entry);
+ pr_debug("%s entry 0x%x\n", __func__, entry);
c4iw_id_free(id_table, entry);
}
@@ -141,7 +141,7 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
}
out:
mutex_unlock(&uctx->lock);
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
mutex_lock(&rdev->stats.lock);
if (rdev->stats.qid.cur > rdev->stats.qid.max)
rdev->stats.qid.max = rdev->stats.qid.cur;
@@ -157,7 +157,7 @@ void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
entry = kmalloc(sizeof *entry, GFP_KERNEL);
if (!entry)
return;
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
entry->qid = qid;
mutex_lock(&uctx->lock);
list_add_tail(&entry->entry, &uctx->cqids);
@@ -215,7 +215,7 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
}
out:
mutex_unlock(&uctx->lock);
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
mutex_lock(&rdev->stats.lock);
if (rdev->stats.qid.cur > rdev->stats.qid.max)
rdev->stats.qid.max = rdev->stats.qid.cur;
@@ -231,7 +231,7 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
entry = kmalloc(sizeof *entry, GFP_KERNEL);
if (!entry)
return;
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("%s qid 0x%x\n", __func__, qid);
entry->qid = qid;
mutex_lock(&uctx->lock);
list_add_tail(&entry->entry, &uctx->qpids);
@@ -254,7 +254,7 @@ void c4iw_destroy_resource(struct c4iw_resource *rscp)
u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
mutex_lock(&rdev->stats.lock);
if (addr) {
rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT);
@@ -268,7 +268,7 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
mutex_lock(&rdev->stats.lock);
rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT);
mutex_unlock(&rdev->stats.lock);
@@ -290,19 +290,17 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev)
while (pbl_start < pbl_top) {
pbl_chunk = min(pbl_top - pbl_start + 1, pbl_chunk);
if (gen_pool_add(rdev->pbl_pool, pbl_start, pbl_chunk, -1)) {
- PDBG("%s failed to add PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s failed to add PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all PBL chunks (%x/%x)\n",
- pbl_start,
- pbl_top - pbl_start);
+ pr_warn("Failed to add all PBL chunks (%x/%x)\n",
+ pbl_start, pbl_top - pbl_start);
return 0;
}
pbl_chunk >>= 1;
} else {
- PDBG("%s added PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("%s added PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
pbl_start += pbl_chunk;
}
}
@@ -324,9 +322,9 @@ void c4iw_pblpool_destroy(struct c4iw_rdev *rdev)
u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
if (!addr)
- pr_warn_ratelimited(MOD "%s: Out of RQT memory\n",
+ pr_warn_ratelimited("%s: Out of RQT memory\n",
pci_name(rdev->lldi.pdev));
mutex_lock(&rdev->stats.lock);
if (addr) {
@@ -341,7 +339,7 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6);
mutex_lock(&rdev->stats.lock);
rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT);
mutex_unlock(&rdev->stats.lock);
@@ -363,18 +361,17 @@ int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
while (rqt_start < rqt_top) {
rqt_chunk = min(rqt_top - rqt_start + 1, rqt_chunk);
if (gen_pool_add(rdev->rqt_pool, rqt_start, rqt_chunk, -1)) {
- PDBG("%s failed to add RQT chunk (%x/%x)\n",
- __func__, rqt_start, rqt_chunk);
+ pr_debug("%s failed to add RQT chunk (%x/%x)\n",
+ __func__, rqt_start, rqt_chunk);
if (rqt_chunk <= 1024 << MIN_RQT_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all RQT chunks (%x/%x)\n",
- rqt_start, rqt_top - rqt_start);
+ pr_warn("Failed to add all RQT chunks (%x/%x)\n",
+ rqt_start, rqt_top - rqt_start);
return 0;
}
rqt_chunk >>= 1;
} else {
- PDBG("%s added RQT chunk (%x/%x)\n",
- __func__, rqt_start, rqt_chunk);
+ pr_debug("%s added RQT chunk (%x/%x)\n",
+ __func__, rqt_start, rqt_chunk);
rqt_start += rqt_chunk;
}
}
@@ -394,7 +391,7 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
if (addr) {
mutex_lock(&rdev->stats.lock);
rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT);
@@ -407,7 +404,7 @@ u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size)
void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
mutex_lock(&rdev->stats.lock);
rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT);
mutex_unlock(&rdev->stats.lock);
@@ -429,18 +426,17 @@ int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev)
while (start < top) {
chunk = min(top - start + 1, chunk);
if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) {
- PDBG("%s failed to add OCQP chunk (%x/%x)\n",
- __func__, start, chunk);
+ pr_debug("%s failed to add OCQP chunk (%x/%x)\n",
+ __func__, start, chunk);
if (chunk <= 1024 << MIN_OCQP_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all OCQP chunks (%x/%x)\n",
- start, top - start);
+ pr_warn("Failed to add all OCQP chunks (%x/%x)\n",
+ start, top - start);
return 0;
}
chunk >>= 1;
} else {
- PDBG("%s added OCQP chunk (%x/%x)\n",
- __func__, start, chunk);
+ pr_debug("%s added OCQP chunk (%x/%x)\n",
+ __func__, start, chunk);
start += chunk;
}
}
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 640d22148a3e..e765c00303cd 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -466,14 +466,14 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
wmb();
if (wq->sq.bar2_va) {
if (inc == 1 && wq->sq.bar2_qid == 0 && wqe) {
- PDBG("%s: WC wq->sq.pidx = %d\n",
- __func__, wq->sq.pidx);
+ pr_debug("%s: WC wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
pio_copy((u64 __iomem *)
(wq->sq.bar2_va + SGE_UDB_WCDOORBELL),
(u64 *)wqe);
} else {
- PDBG("%s: DB wq->sq.pidx = %d\n",
- __func__, wq->sq.pidx);
+ pr_debug("%s: DB wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
writel(PIDX_T5_V(inc) | QID_V(wq->sq.bar2_qid),
wq->sq.bar2_va + SGE_UDB_KDOORBELL);
}
@@ -493,14 +493,14 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
wmb();
if (wq->rq.bar2_va) {
if (inc == 1 && wq->rq.bar2_qid == 0 && wqe) {
- PDBG("%s: WC wq->rq.pidx = %d\n",
- __func__, wq->rq.pidx);
+ pr_debug("%s: WC wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
pio_copy((u64 __iomem *)
(wq->rq.bar2_va + SGE_UDB_WCDOORBELL),
(void *)wqe);
} else {
- PDBG("%s: DB wq->rq.pidx = %d\n",
- __func__, wq->rq.pidx);
+ pr_debug("%s: DB wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
writel(PIDX_T5_V(inc) | QID_V(wq->rq.bar2_qid),
wq->rq.bar2_va + SGE_UDB_KDOORBELL);
}
@@ -601,7 +601,8 @@ static inline void t4_swcq_produce(struct t4_cq *cq)
{
cq->sw_in_use++;
if (cq->sw_in_use == cq->size) {
- PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+ pr_debug("%s cxgb4 sw cq overflow cqid %u\n",
+ __func__, cq->cqid);
cq->error = 1;
BUG_ON(1);
}
@@ -656,7 +657,7 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
if (cq->queue[prev_cidx].bits_type_ts != cq->bits_type_ts) {
ret = -EOVERFLOW;
cq->error = 1;
- printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
+ pr_err("cq overflow cqid %u\n", cq->cqid);
BUG_ON(1);
} else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
@@ -672,7 +673,8 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq)
{
if (cq->sw_in_use == cq->size) {
- PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+ pr_debug("%s cxgb4 sw cq overflow cqid %u\n",
+ __func__, cq->cqid);
cq->error = 1;
BUG_ON(1);
return NULL;
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a09b64b..88085f65432e 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
- verbs_txreq.o
+ verbs_txreq.o vnic_main.o vnic_sdma.o
hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3b49b5..794e6814a531 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long data)
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
}
-/* Disable interrupt processing for verbs contexts when PSM contexts are open */
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
static inline void aspm_disable_all(struct hfi1_devdata *dd)
{
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
- for (i = 0; i < dd->first_user_ctxt; i++) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
del_timer_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
if (aspm_mode != ASPM_MODE_DYNAMIC)
return;
- for (i = 0; i < dd->first_user_ctxt; i++) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = true;
@@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
(unsigned long)rcd);
rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
aspm_mode == ASPM_MODE_DYNAMIC &&
- rcd->ctxt < rcd->dd->first_user_ctxt;
+ rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
}
static inline void aspm_init(struct hfi1_devdata *dd)
@@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd)
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);
- for (i = 0; i < dd->first_user_ctxt; i++)
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++)
aspm_ctx_init(dd->rcd[i]);
/* Start with ASPM disabled */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 121a4c920f1b..5d6b1eeaa9a0 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -64,6 +64,7 @@
#include "platform.h"
#include "aspm.h"
#include "affinity.h"
+#include "debugfs.h"
#define NUM_IB_PORTS 1
@@ -125,9 +126,16 @@ struct flag_table {
#define DEFAULT_KRCVQS 2
#define MIN_KERNEL_KCTXTS 2
#define FIRST_KERNEL_KCTXT 1
-/* sizes for both the QP and RSM map tables */
-#define NUM_MAP_ENTRIES 256
-#define NUM_MAP_REGS 32
+
+/*
+ * RSM instance allocation
+ * 0 - Verbs
+ * 1 - User Fecn Handling
+ * 2 - Vnic
+ */
+#define RSM_INS_VERBS 0
+#define RSM_INS_FECN 1
+#define RSM_INS_VNIC 2
/* Bit offset into the GUID which carries HFI id information */
#define GUID_HFI_INDEX_SHIFT 39
@@ -138,8 +146,7 @@ struct flag_table {
#define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
#define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
-/* RSM fields */
-
+/* RSM fields for Verbs */
/* packet type */
#define IB_PACKET_TYPE 2ull
#define QW_SHIFT 6ull
@@ -169,6 +176,28 @@ struct flag_table {
/* QPN[m+n:1] QW 1, OFFSET 1 */
#define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull))
+/* RSM fields for Vnic */
+/* L2_TYPE: QW 0, OFFSET 61 - for match */
+#define L2_TYPE_QW 0ull
+#define L2_TYPE_BIT_OFFSET 61ull
+#define L2_TYPE_OFFSET(off) ((L2_TYPE_QW << QW_SHIFT) | (off))
+#define L2_TYPE_MATCH_OFFSET L2_TYPE_OFFSET(L2_TYPE_BIT_OFFSET)
+#define L2_TYPE_MASK 3ull
+#define L2_16B_VALUE 2ull
+
+/* L4_TYPE QW 1, OFFSET 0 - for match */
+#define L4_TYPE_QW 1ull
+#define L4_TYPE_BIT_OFFSET 0ull
+#define L4_TYPE_OFFSET(off) ((L4_TYPE_QW << QW_SHIFT) | (off))
+#define L4_TYPE_MATCH_OFFSET L4_TYPE_OFFSET(L4_TYPE_BIT_OFFSET)
+#define L4_16B_TYPE_MASK 0xFFull
+#define L4_16B_ETH_VALUE 0x78ull
+
+/* 16B VESWID - for select */
+#define L4_16B_HDR_VESWID_OFFSET ((2 << QW_SHIFT) | (16ull))
+/* 16B ENTROPY - for select */
+#define L2_16B_ENTROPY_OFFSET ((1 << QW_SHIFT) | (32ull))
+
/* defines to build power on SC2VL table */
#define SC2VL_VAL( \
num, \
@@ -1026,7 +1055,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
-static void set_partition_keys(struct hfi1_pportdata *);
+static void set_partition_keys(struct hfi1_pportdata *ppd);
static const char *link_state_name(u32 state);
static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
u32 state);
@@ -1039,12 +1068,14 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
-static void handle_temp_err(struct hfi1_devdata *);
-static void dc_shutdown(struct hfi1_devdata *);
-static void dc_start(struct hfi1_devdata *);
+static void handle_temp_err(struct hfi1_devdata *dd);
+static void dc_shutdown(struct hfi1_devdata *dd);
+static void dc_start(struct hfi1_devdata *dd);
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index);
/*
* Error interrupt table entry. This is used as input to the interrupt
@@ -6379,18 +6410,17 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
*
* The expectation is that the caller of this routine would have taken
* care of properly transitioning the link into the correct state.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_shutdown(struct hfi1_devdata *dd)
+static void _dc_shutdown(struct hfi1_devdata *dd)
{
- unsigned long flags;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
- if (dd->dc_shutdown) {
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ if (dd->dc_shutdown)
return;
- }
+
dd->dc_shutdown = 1;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
/* Shutdown the LCB */
lcb_shutdown(dd, 1);
/*
@@ -6401,35 +6431,45 @@ static void dc_shutdown(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_CFG_RST, 0x1);
}
+static void dc_shutdown(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_shutdown(dd);
+ mutex_unlock(&dd->dc8051_lock);
+}
+
/*
* Calling this after the DC has been brought out of reset should not
* do any damage.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_start(struct hfi1_devdata *dd)
+static void _dc_start(struct hfi1_devdata *dd)
{
- unsigned long flags;
- int ret;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
if (!dd->dc_shutdown)
- goto done;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ return;
+
/* Take the 8051 out of reset */
write_csr(dd, DC_DC8051_CFG_RST, 0ull);
/* Wait until 8051 is ready */
- ret = wait_fm_ready(dd, TIMEOUT_8051_START);
- if (ret) {
+ if (wait_fm_ready(dd, TIMEOUT_8051_START))
dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
__func__);
- }
+
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
write_csr(dd, DCC_CFG_RESET, 0x10);
/* lcb_shutdown() with abort=1 does not restore these */
write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
dd->dc_shutdown = 0;
-done:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+}
+
+static void dc_start(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_start(dd);
+ mutex_unlock(&dd->dc8051_lock);
}
/*
@@ -6701,7 +6741,13 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
int i;
/* enable all kernel contexts */
- for (i = 0; i < dd->n_krcv_queues; i++) {
+ for (i = 0; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *rcd = dd->rcd[i];
+
+ /* Ensure all non-user contexts(including vnic) are enabled */
+ if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER))
+ continue;
+
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
@@ -7077,7 +7123,7 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
- /* Sanity check - ppd->pkeys[2] should be 0, or already initalized */
+ /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */
if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY)))
dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
__func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
@@ -7165,7 +7211,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
* set the max_rate field in handle_verify_cap until v0.19.
*/
if ((dd->icode == ICODE_RTL_SILICON) &&
- (dd->dc8051_ver < dc8051_ver(0, 19))) {
+ (dd->dc8051_ver < dc8051_ver(0, 19, 0))) {
/* max_rate: 0 = 12.5G, 1 = 25G */
switch (max_rate) {
case 0:
@@ -7277,15 +7323,6 @@ void handle_verify_cap(struct work_struct *work)
lcb_shutdown(dd, 0);
adjust_lcb_for_fpga_serdes(dd);
- /*
- * These are now valid:
- * remote VerifyCap fields in the general LNI config
- * CSR DC8051_STS_REMOTE_GUID
- * CSR DC8051_STS_REMOTE_NODE_TYPE
- * CSR DC8051_STS_REMOTE_FM_SECURITY
- * CSR DC8051_STS_REMOTE_PORT_NO
- */
-
read_vc_remote_phy(dd, &power_management, &continious);
read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
&partner_supported_crc);
@@ -7350,7 +7387,7 @@ void handle_verify_cap(struct work_struct *work)
}
ppd->link_speed_active = 0; /* invalid value */
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* remote_tx_rate: 0 = 12.5G, 1 = 25G */
switch (remote_tx_rate) {
case 0:
@@ -7416,20 +7453,6 @@ void handle_verify_cap(struct work_struct *work)
write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
set_8051_lcb_access(dd);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_fm_security =
- read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
- DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
- dd_dev_info(dd,
- "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
- ppd->neighbor_guid, ppd->neighbor_type,
- ppd->mgmt_allowed, ppd->neighbor_fm_security);
if (ppd->mgmt_allowed)
add_full_mgmt_pkey(ppd);
@@ -7897,6 +7920,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
}
+ if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
+ reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
+
/* report any remaining errors */
if (reg)
dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
@@ -7995,7 +8021,9 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
if (likely(source < dd->num_rcv_contexts)) {
rcd = dd->rcd[source];
if (rcd) {
- if (source < dd->first_user_ctxt)
+ /* Check for non-user contexts, including vnic */
+ if ((source < dd->first_dyn_alloc_ctxt) ||
+ (rcd->sc && (rcd->sc->type == SC_KERNEL)))
rcd->do_interrupt(rcd, 0);
else
handle_user_interrupt(rcd);
@@ -8023,7 +8051,8 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
rcd = dd->rcd[source];
if (rcd) {
/* only pay attention to user urgent interrupts */
- if (source >= dd->first_user_ctxt)
+ if ((source >= dd->first_dyn_alloc_ctxt) &&
+ (!rcd->sc || (rcd->sc->type == SC_USER)))
handle_user_interrupt(rcd);
return; /* OK */
}
@@ -8156,10 +8185,10 @@ static irqreturn_t sdma_interrupt(int irq, void *data)
/* handle the interrupt(s) */
sdma_engine_interrupt(sde, status);
- } else
+ } else {
dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
sde->this_idx);
-
+ }
return IRQ_HANDLED;
}
@@ -8344,6 +8373,52 @@ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
}
/*
+ * Provide a cache for some of the LCB registers in case the LCB is
+ * unavailable.
+ * (The LCB is unavailable in certain link states, for example.)
+ */
+struct lcb_datum {
+ u32 off;
+ u64 val;
+};
+
+static struct lcb_datum lcb_cache[] = {
+ { DC_LCB_ERR_INFO_RX_REPLAY_CNT, 0},
+ { DC_LCB_ERR_INFO_SEQ_CRC_CNT, 0 },
+ { DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, 0 },
+};
+
+static void update_lcb_cache(struct hfi1_devdata *dd)
+{
+ int i;
+ int ret;
+ u64 val;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ ret = read_lcb_csr(dd, lcb_cache[i].off, &val);
+
+ /* Update if we get good data */
+ if (likely(ret != -EBUSY))
+ lcb_cache[i].val = val;
+ }
+}
+
+static int read_lcb_cache(u32 off, u64 *val)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ if (lcb_cache[i].off == off) {
+ *val = lcb_cache[i].val;
+ return 0;
+ }
+ }
+
+ pr_warn("%s bad offset 0x%x\n", __func__, off);
+ return -1;
+}
+
+/*
* Read an LCB CSR. Access may not be in host control, so check.
* Return 0 on success, -EBUSY on failure.
*/
@@ -8354,9 +8429,13 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
/* if up, go through the 8051 for the value */
if (ppd->host_link_state & HLS_UP)
return read_lcb_via_8051(dd, addr, data);
- /* if going up or down, no access */
- if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
- return -EBUSY;
+ /* if going up or down, check the cache, otherwise, no access */
+ if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) {
+ if (read_lcb_cache(addr, data))
+ return -EBUSY;
+ return 0;
+ }
+
/* otherwise, host has access */
*data = read_csr(dd, addr);
return 0;
@@ -8371,7 +8450,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
int ret;
if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR ||
- (dd->dc8051_ver < dc8051_ver(0, 20))) {
+ (dd->dc8051_ver < dc8051_ver(0, 20, 0))) {
if (acquire_lcb_access(dd, 0) == 0) {
write_csr(dd, addr, data);
release_lcb_access(dd, 0);
@@ -8420,16 +8499,11 @@ static int do_8051_command(
{
u64 reg, completed;
int return_code;
- unsigned long flags;
unsigned long timeout;
hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
- /*
- * Alternative to holding the lock for a long time:
- * - keep busy wait - have other users bounce off
- */
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ mutex_lock(&dd->dc8051_lock);
/* We can't send any commands to the 8051 if it's in reset */
if (dd->dc_shutdown) {
@@ -8455,10 +8529,8 @@ static int do_8051_command(
return_code = -ENXIO;
goto fail;
}
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
- dc_shutdown(dd);
- dc_start(dd);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ _dc_shutdown(dd);
+ _dc_start(dd);
}
/*
@@ -8539,8 +8611,7 @@ static int do_8051_command(
write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
fail:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
-
+ mutex_unlock(&dd->dc8051_lock);
return return_code;
}
@@ -8677,13 +8748,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
& REMOTE_DEVICE_REV_MASK;
}
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch)
{
u32 frame;
read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
- *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
- *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
+ *ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) &
+ STS_FM_VERSION_MAJOR_MASK;
+ *ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) &
+ STS_FM_VERSION_MINOR_MASK;
+
+ read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame);
+ *ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) &
+ STS_FM_VERSION_PATCH_MASK;
}
static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
@@ -8891,8 +8969,6 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message)
*/
static int do_quick_linkup(struct hfi1_devdata *dd)
{
- u64 reg;
- unsigned long timeout;
int ret;
lcb_shutdown(dd, 0);
@@ -8915,19 +8991,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_RUN,
1ull << DC_LCB_CFG_RUN_EN_SHIFT);
- /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
- timeout = jiffies + msecs_to_jiffies(10);
- while (1) {
- reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
- if (reg)
- break;
- if (time_after(jiffies, timeout)) {
- dd_dev_err(dd,
- "timeout waiting for LINK_TRANSFER_ACTIVE\n");
- return -ETIMEDOUT;
- }
- udelay(2);
- }
+ ret = wait_link_transfer_active(dd, 10);
+ if (ret)
+ return ret;
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
@@ -9091,7 +9157,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret)
goto set_local_link_attributes_fail;
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* set the tx rate to the fastest enabled */
if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
ppd->local_tx_rate = 1;
@@ -9274,7 +9340,7 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable on fire\n",
+ dd_dev_info(dd, "%s: QSFP cable temperature too high\n",
__func__);
if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
@@ -9494,8 +9560,11 @@ static int test_qsfp_read(struct hfi1_pportdata *ppd)
int ret;
u8 status;
- /* report success if not a QSFP */
- if (ppd->port_type != PORT_TYPE_QSFP)
+ /*
+ * Report success if not a QSFP or, if it is a QSFP, but the cable is
+ * not present
+ */
+ if (ppd->port_type != PORT_TYPE_QSFP || !qsfp_mod_present(ppd))
return 0;
/* read byte 2, the status byte */
@@ -10082,6 +10151,64 @@ static void check_lni_states(struct hfi1_pportdata *ppd)
decode_state_complete(ppd, last_remote_state, "received");
}
+/* wait for wait_ms for LINK_TRANSFER_ACTIVE to go to 1 */
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms)
+{
+ u64 reg;
+ unsigned long timeout;
+
+ /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
+ timeout = jiffies + msecs_to_jiffies(wait_ms);
+ while (1) {
+ reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
+ if (reg)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(dd,
+ "timeout waiting for LINK_TRANSFER_ACTIVE\n");
+ return -ETIMEDOUT;
+ }
+ udelay(2);
+ }
+ return 0;
+}
+
+/* called when the logical link state is not down as it should be */
+static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+
+ /*
+ * Bring link up in LCB loopback
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
+ DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
+
+ write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
+ write_csr(dd, DC_LCB_CFG_REINIT_AS_SLAVE, 0);
+ write_csr(dd, DC_LCB_CFG_CNT_FOR_SKIP_STALL, 0x110);
+ write_csr(dd, DC_LCB_CFG_LOOPBACK, 0x2);
+
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
+ (void)read_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET);
+ udelay(3);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1);
+ write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
+
+ wait_link_transfer_active(dd, 100);
+
+ /*
+ * Bring the link down again.
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0);
+
+ /* call again to adjust ppd->statusp, if needed */
+ get_logical_state(ppd);
+}
+
/*
* Helper for set_link_state(). Do not call except from that routine.
* Expects ppd->hls_mutex to be held.
@@ -10098,13 +10225,15 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
int do_transition;
int do_wait;
+ update_lcb_cache(dd);
+
previous_state = ppd->host_link_state;
ppd->host_link_state = HLS_GOING_OFFLINE;
pstate = read_physical_state(dd);
if (pstate == PLS_OFFLINE) {
do_transition = 0; /* in right state */
do_wait = 0; /* ...no need to wait */
- } else if ((pstate & 0xff) == PLS_OFFLINE) {
+ } else if ((pstate & 0xf0) == PLS_OFFLINE) {
do_transition = 0; /* in an offline transient state */
do_wait = 1; /* ...wait for it to settle */
} else {
@@ -10135,15 +10264,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
return ret;
}
- /* make sure the logical state is also down */
- wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
-
/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
+
+ /* make sure the logical state is also down */
+ ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
+ if (ret)
+ force_logical_link_state_down(ppd);
+
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
if (ppd->port_type == PORT_TYPE_QSFP &&
@@ -10380,11 +10512,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
goto unexpected;
}
- ppd->host_link_state = HLS_UP_INIT;
ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
if (ret) {
- /* logical state didn't change, stay at going_up */
- ppd->host_link_state = HLS_GOING_UP;
dd_dev_err(dd,
"%s: logical state did not change to INIT\n",
__func__);
@@ -10398,6 +10527,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
handle_linkup_change(dd, 1);
+ ppd->host_link_state = HLS_UP_INIT;
}
break;
case HLS_UP_ARMED:
@@ -11853,6 +11983,10 @@ static void free_cntrs(struct hfi1_devdata *dd)
dd->scntrs = NULL;
kfree(dd->cntrnames);
dd->cntrnames = NULL;
+ if (dd->update_cntr_wq) {
+ destroy_workqueue(dd->update_cntr_wq);
+ dd->update_cntr_wq = NULL;
+ }
}
static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
@@ -12008,7 +12142,7 @@ u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
}
-static void update_synth_timer(unsigned long opaque)
+static void do_update_synth_timer(struct work_struct *work)
{
u64 cur_tx;
u64 cur_rx;
@@ -12017,8 +12151,8 @@ static void update_synth_timer(unsigned long opaque)
int i, j, vl;
struct hfi1_pportdata *ppd;
struct cntr_entry *entry;
-
- struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+ struct hfi1_devdata *dd = container_of(work, struct hfi1_devdata,
+ update_cntr_work);
/*
* Rather than keep beating on the CSRs pick a minimal set that we can
@@ -12101,7 +12235,13 @@ static void update_synth_timer(unsigned long opaque)
} else {
hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
}
+}
+
+static void update_synth_timer(unsigned long opaque)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+ queue_work(dd->update_cntr_wq, &dd->update_cntr_work);
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
}
@@ -12337,6 +12477,13 @@ static int init_cntrs(struct hfi1_devdata *dd)
if (init_cpu_counters(dd))
goto bail;
+ dd->update_cntr_wq = alloc_ordered_workqueue("hfi1_update_cntr_%d",
+ WQ_MEM_RECLAIM, dd->unit);
+ if (!dd->update_cntr_wq)
+ goto bail;
+
+ INIT_WORK(&dd->update_cntr_work, do_update_synth_timer);
+
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
return 0;
bail:
@@ -12515,7 +12662,7 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
#define SET_STATIC_RATE_CONTROL_SMASK(r) \
(r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
-int hfi1_init_ctxt(struct send_context *sc)
+void hfi1_init_ctxt(struct send_context *sc)
{
if (sc) {
struct hfi1_devdata *dd = sc->dd;
@@ -12532,7 +12679,6 @@ int hfi1_init_ctxt(struct send_context *sc)
write_kctxt_csr(dd, sc->hw_context,
SEND_CTXT_CHECK_ENABLE, reg);
}
- return 0;
}
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
@@ -12726,7 +12872,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
first_sdma = last_general;
last_sdma = first_sdma + dd->num_sdma;
first_rx = last_sdma;
- last_rx = first_rx + dd->n_krcv_queues;
+ last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
+
+ /* VNIC MSIx interrupts get mapped when VNIC contexts are created */
+ dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
/*
* Sanity check - the code expects all SDMA chip source
@@ -12740,7 +12889,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
const char *err_info;
irq_handler_t handler;
irq_handler_t thread = NULL;
- void *arg;
+ void *arg = NULL;
int idx;
struct hfi1_ctxtdata *rcd = NULL;
struct sdma_engine *sde = NULL;
@@ -12767,24 +12916,25 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
} else if (first_rx <= i && i < last_rx) {
idx = i - first_rx;
rcd = dd->rcd[idx];
- /* no interrupt if no rcd */
- if (!rcd)
- continue;
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- handler = receive_context_interrupt;
- thread = receive_context_thread;
- arg = rcd;
- snprintf(me->name, sizeof(me->name),
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
- err_info = "receive context";
- remap_intr(dd, IS_RCVAVAIL_START + idx, i);
- me->type = IRQ_RCVCTXT;
+ if (rcd) {
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
+ rcd->imask = ((u64)1) <<
+ ((IS_RCVAVAIL_START + idx) % 64);
+ handler = receive_context_interrupt;
+ thread = receive_context_thread;
+ arg = rcd;
+ snprintf(me->name, sizeof(me->name),
+ DRIVER_NAME "_%d kctxt%d",
+ dd->unit, idx);
+ err_info = "receive context";
+ remap_intr(dd, IS_RCVAVAIL_START + idx, i);
+ me->type = IRQ_RCVCTXT;
+ rcd->msix_intr = i;
+ }
} else {
/* not in our expected range - complain, then
* ignore it
@@ -12822,6 +12972,84 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
return ret;
}
+void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
+{
+ int i;
+
+ if (!dd->num_msix_entries) {
+ synchronize_irq(dd->pcidev->irq);
+ return;
+ }
+
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+ struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
+
+ synchronize_irq(me->msix.vector);
+ }
+}
+
+void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
+
+ if (!me->arg) /* => no irq, no affinity */
+ return;
+
+ hfi1_put_irq_affinity(dd, me);
+ free_irq(me->msix.vector, me->arg);
+
+ me->arg = NULL;
+}
+
+void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ struct hfi1_msix_entry *me;
+ int idx = rcd->ctxt;
+ void *arg = rcd;
+ int ret;
+
+ rcd->msix_intr = dd->vnic.msix_idx++;
+ me = &dd->msix_entries[rcd->msix_intr];
+
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
+ rcd->imask = ((u64)1) <<
+ ((IS_RCVAVAIL_START + idx) % 64);
+
+ snprintf(me->name, sizeof(me->name),
+ DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
+ me->name[sizeof(me->name) - 1] = 0;
+ me->type = IRQ_RCVCTXT;
+
+ remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
+
+ ret = request_threaded_irq(me->msix.vector, receive_context_interrupt,
+ receive_context_thread, 0, me->name, arg);
+ if (ret) {
+ dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n",
+ me->msix.vector, idx, ret);
+ return;
+ }
+ /*
+ * assign arg after request_irq call, so it will be
+ * cleaned up
+ */
+ me->arg = arg;
+
+ ret = hfi1_get_irq_affinity(dd, me);
+ if (ret) {
+ dd_dev_err(dd,
+ "unable to pin IRQ %d\n", ret);
+ free_irq(me->msix.vector, me->arg);
+ }
+}
+
/*
* Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0.
@@ -12853,7 +13081,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
* N interrupts - one per used SDMA engine
* M interrupt - one per kernel receive context
*/
- total = 1 + dd->num_sdma + dd->n_krcv_queues;
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
if (!entries) {
@@ -12918,7 +13146,8 @@ fail:
*
* num_rcv_contexts - number of contexts being used
* n_krcv_queues - number of kernel contexts
- * first_user_ctxt - first non-kernel context in array of contexts
+ * first_dyn_alloc_ctxt - first dynamically allocated context
+ * in array of contexts
* freectxts - number of free user contexts
* num_send_contexts - number of PIO send contexts being used
*/
@@ -12995,10 +13224,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
total_contexts = num_kernel_contexts + num_user_contexts;
}
- /* the first N are kernel contexts, the rest are user contexts */
+ /* Accommodate VNIC contexts */
+ if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts)
+ total_contexts += HFI1_NUM_VNIC_CTXT;
+
+ /* the first N are kernel contexts, the rest are user/vnic contexts */
dd->num_rcv_contexts = total_contexts;
dd->n_krcv_queues = num_kernel_contexts;
- dd->first_user_ctxt = num_kernel_contexts;
+ dd->first_dyn_alloc_ctxt = num_kernel_contexts;
dd->num_user_contexts = num_user_contexts;
dd->freectxts = num_user_contexts;
dd_dev_info(dd,
@@ -13454,11 +13687,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
- for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
- write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
- write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
- write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
- }
+ for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++)
+ clear_rsm_rule(dd, i);
for (i = 0; i < 32; i++)
write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
@@ -13610,14 +13840,14 @@ static void init_chip(struct hfi1_devdata *dd)
dd_dev_info(dd, "Resetting CSRs with FLR\n");
/* do the FLR, the DC reset will remain */
- hfi1_pcie_flr(dd);
+ pcie_flr(dd->pcidev);
/* restore command and BARs */
restore_pci_variables(dd);
if (is_ax(dd)) {
dd_dev_info(dd, "Resetting CSRs with FLR\n");
- hfi1_pcie_flr(dd);
+ pcie_flr(dd->pcidev);
restore_pci_variables(dd);
}
} else {
@@ -13817,6 +14047,16 @@ static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index,
(u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT);
}
+/*
+ * Clear a receive side mapping rule.
+ */
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+{
+ write_csr(dd, RCV_RSM_CFG + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_SELECT + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_MATCH + (8 * rule_index), 0);
+}
+
/* return the number of RSM map table entries that will be used for QOS */
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np)
@@ -13932,7 +14172,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
rrd.value2 = LRH_SC_VALUE;
/* add rule 0 */
- add_rsm_rule(dd, 0, &rrd);
+ add_rsm_rule(dd, RSM_INS_VERBS, &rrd);
/* mark RSM map entries as used */
rmt->used += rmt_entries;
@@ -13962,7 +14202,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
/*
* RSM will extract the destination context as an index into the
* map table. The destination contexts are a sequential block
- * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive).
+ * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive).
* Map entries are accessed as offset + extracted value. Adjust
* the added offset so this sequence can be placed anywhere in
* the table - as long as the entries themselves do not wrap.
@@ -13970,9 +14210,9 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
* start with that to allow for a "negative" offset.
*/
offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
- (int)dd->first_user_ctxt);
+ (int)dd->first_dyn_alloc_ctxt);
- for (i = dd->first_user_ctxt, idx = rmt->used;
+ for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used;
i < dd->num_rcv_contexts; i++, idx++) {
/* replace with identity mapping */
regoff = (idx % 8) * 8;
@@ -14006,11 +14246,84 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
rrd.value2 = 1;
/* add rule 1 */
- add_rsm_rule(dd, 1, &rrd);
+ add_rsm_rule(dd, RSM_INS_FECN, &rrd);
rmt->used += dd->num_user_contexts;
}
+/* Initialize RSM for VNIC */
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+{
+ u8 i, j;
+ u8 ctx_id = 0;
+ u64 reg;
+ u32 regoff;
+ struct rsm_rule_data rrd;
+
+ if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) {
+ dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n",
+ dd->vnic.rmt_start);
+ return;
+ }
+
+ dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n",
+ dd->vnic.rmt_start,
+ dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES);
+
+ /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
+ regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8;
+ reg = read_csr(dd, regoff);
+ for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) {
+ /* Update map register with vnic context */
+ j = (dd->vnic.rmt_start + i) % 8;
+ reg &= ~(0xffllu << (j * 8));
+ reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8);
+ /* Wrap up vnic ctx index */
+ ctx_id %= dd->vnic.num_ctxt;
+ /* Write back map register */
+ if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) {
+ dev_dbg(&(dd)->pcidev->dev,
+ "Vnic rsm map reg[%d] =0x%llx\n",
+ regoff - RCV_RSM_MAP_TABLE, reg);
+
+ write_csr(dd, regoff, reg);
+ regoff += 8;
+ if (i < (NUM_VNIC_MAP_ENTRIES - 1))
+ reg = read_csr(dd, regoff);
+ }
+ }
+
+ /* Add rule for vnic */
+ rrd.offset = dd->vnic.rmt_start;
+ rrd.pkt_type = 4;
+ /* Match 16B packets */
+ rrd.field1_off = L2_TYPE_MATCH_OFFSET;
+ rrd.mask1 = L2_TYPE_MASK;
+ rrd.value1 = L2_16B_VALUE;
+ /* Match ETH L4 packets */
+ rrd.field2_off = L4_TYPE_MATCH_OFFSET;
+ rrd.mask2 = L4_16B_TYPE_MASK;
+ rrd.value2 = L4_16B_ETH_VALUE;
+ /* Calc context from veswid and entropy */
+ rrd.index1_off = L4_16B_HDR_VESWID_OFFSET;
+ rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES);
+ rrd.index2_off = L2_16B_ENTROPY_OFFSET;
+ rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES);
+ add_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+
+ /* Enable RSM if not already enabled */
+ add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+}
+
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
+{
+ clear_rsm_rule(dd, RSM_INS_VNIC);
+
+ /* Disable RSM if used only by vnic */
+ if (dd->vnic.rmt_start == 0)
+ clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+}
+
static void init_rxe(struct hfi1_devdata *dd)
{
struct rsm_map_table *rmt;
@@ -14023,6 +14336,8 @@ static void init_rxe(struct hfi1_devdata *dd)
init_qos(dd, rmt);
init_user_fecn_handling(dd, rmt);
complete_rsm_map_table(dd, rmt);
+ /* record number of used rsm map entries for vnic */
+ dd->vnic.rmt_start = rmt->used;
kfree(rmt);
/*
@@ -14212,30 +14527,24 @@ done:
return ret;
}
-int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
+int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt)
{
- struct hfi1_ctxtdata *rcd;
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (ctxt < dd->num_rcv_contexts) {
- rcd = dd->rcd[ctxt];
- } else {
- ret = -EINVAL;
- goto done;
- }
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ if (!ctxt || !ctxt->sc)
+ return -EINVAL;
+
+ if (ctxt->ctxt >= dd->num_rcv_contexts)
+ return -EINVAL;
+
+ hw_ctxt = ctxt->sc->hw_context;
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
-done:
- return ret;
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
+
+ return 0;
}
/*
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 043fd21dc5f3..cbe455d9ab8b 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,7 +1,7 @@
#ifndef _CHIP_H
#define _CHIP_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -394,7 +394,8 @@
#define LAST_REMOTE_STATE_COMPLETE 0x13
#define LINK_QUALITY_INFO 0x14
#define REMOTE_DEVICE_ID 0x15
-#define LINK_DOWN_REASON 0x16
+#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */
+#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */
/* 8051 lane specific register field IDs */
#define TX_EQ_SETTINGS 0x00
@@ -524,10 +525,12 @@ enum {
#define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B)
/* misc status version fields */
-#define STS_FM_VERSION_A_SHIFT 16
-#define STS_FM_VERSION_A_MASK 0xff
-#define STS_FM_VERSION_B_SHIFT 24
-#define STS_FM_VERSION_B_MASK 0xff
+#define STS_FM_VERSION_MINOR_SHIFT 16
+#define STS_FM_VERSION_MINOR_MASK 0xff
+#define STS_FM_VERSION_MAJOR_SHIFT 24
+#define STS_FM_VERSION_MAJOR_MASK 0xff
+#define STS_FM_VERSION_PATCH_SHIFT 24
+#define STS_FM_VERSION_PATCH_MASK 0xff
/* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */
#define LCB_CRC_16B 0x0 /* 16b CRC */
@@ -633,7 +636,8 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt,
write_csr(dd, offset0 + (0x1000 * ctxt), value);
}
-u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32);
+u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
+ u32 dw_len);
/* firmware.c */
#define SBUS_MASTER_BROADCAST 0xfd
@@ -698,7 +702,8 @@ void fabric_serdes_reset(struct hfi1_devdata *dd);
int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
/* chip.c */
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b);
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch);
void read_guid(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
@@ -724,7 +729,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd);
void set_intr_state(struct hfi1_devdata *dd, u32 enable);
void apply_link_downgrade_policy(struct hfi1_pportdata *ppd,
int refresh_widths);
-void update_usrhead(struct hfi1_ctxtdata *, u32, u32, u32, u32, u32);
+void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
+ u32 intr_adjust, u32 npkts);
int stop_drain_data_vls(struct hfi1_devdata *dd);
int open_fill_data_vls(struct hfi1_devdata *dd);
u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns);
@@ -1343,7 +1349,7 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
struct ib_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr);
-int hfi1_init_ctxt(struct send_context *sc);
+void hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order);
void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
@@ -1356,8 +1362,10 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey);
int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt);
int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey);
-int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt);
+int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt);
void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
/*
* Interrupt source table.
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 1b783bbee4bb..995d62c7f9a7 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -331,12 +331,15 @@ struct diag_pkt {
#define FULL_MGMT_P_KEY 0xFFFF
#define DEFAULT_P_KEY LIM_MGMT_P_KEY
-#define HFI1_FECN_SHIFT 31
-#define HFI1_FECN_MASK 1
-#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
-#define HFI1_BECN_SHIFT 30
-#define HFI1_BECN_MASK 1
-#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
+
+/**
+ * 0xF8 - 4 bits of multicast range and 1 bit for collective range
+ * Example: For 24 bit LID space,
+ * Multicast range: 0xF00000 to 0xF7FFFF
+ * Collective range: 0xF80000 to 0xFFFFFE
+ */
+#define HFI1_MCAST_NR 0x4 /* Number of top bits set */
+#define HFI1_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */
#define HFI1_PSM_IOC_BASE_SEQ 0x0
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 7fe9dd885746..e9fa3c293e42 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -1,6 +1,6 @@
#ifdef CONFIG_DEBUG_FS
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -51,8 +51,12 @@
#include <linux/export.h>
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ratelimit.h>
+#include <linux/fault-inject.h>
#include "hfi.h"
+#include "trace.h"
#include "debugfs.h"
#include "device.h"
#include "qp.h"
@@ -170,7 +174,7 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v)
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
- for (j = 0; j < dd->first_user_ctxt; j++) {
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
if (!dd->rcd[j])
continue;
n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
@@ -196,7 +200,7 @@ static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
if (!*pos)
return SEQ_START_TOKEN;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -210,7 +214,7 @@ static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
return pos;
++*pos;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -1063,6 +1067,222 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
DEBUGFS_FILE_OPS(sdma_cpu_list);
+#ifdef CONFIG_FAULT_INJECTION
+static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ ++*pos;
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void _fault_stats_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _fault_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos, j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
+ if (!dd->rcd[j])
+ continue;
+ n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
+ n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
+ }
+ if (!n_packets && !n_bytes)
+ return SEQ_SKIP;
+ if (!ibd->fault_opcode->n_rxfaults[i] &&
+ !ibd->fault_opcode->n_txfaults[i])
+ return SEQ_SKIP;
+ seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
+ (unsigned long long)n_packets,
+ (unsigned long long)n_bytes,
+ (unsigned long long)ibd->fault_opcode->n_rxfaults[i],
+ (unsigned long long)ibd->fault_opcode->n_txfaults[i]);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(fault_stats);
+DEBUGFS_SEQ_FILE_OPEN(fault_stats);
+DEBUGFS_FILE_OPS(fault_stats);
+
+static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+ debugfs_remove_recursive(ibd->fault_opcode->dir);
+ kfree(ibd->fault_opcode);
+ ibd->fault_opcode = NULL;
+}
+
+static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
+ if (!ibd->fault_opcode)
+ return -ENOMEM;
+
+ ibd->fault_opcode->attr.interval = 1;
+ ibd->fault_opcode->attr.require_end = ULONG_MAX;
+ ibd->fault_opcode->attr.stacktrace_depth = 32;
+ ibd->fault_opcode->attr.dname = NULL;
+ ibd->fault_opcode->attr.verbose = 0;
+ ibd->fault_opcode->fault_by_opcode = false;
+ ibd->fault_opcode->opcode = 0;
+ ibd->fault_opcode->mask = 0xff;
+
+ ibd->fault_opcode->dir =
+ fault_create_debugfs_attr("fault_opcode",
+ parent,
+ &ibd->fault_opcode->attr);
+ if (IS_ERR(ibd->fault_opcode->dir)) {
+ kfree(ibd->fault_opcode);
+ return -ENOENT;
+ }
+
+ DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
+ if (!debugfs_create_bool("fault_by_opcode", 0600,
+ ibd->fault_opcode->dir,
+ &ibd->fault_opcode->fault_by_opcode))
+ goto fail;
+ if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
+ &ibd->fault_opcode->opcode))
+ goto fail;
+ if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
+ &ibd->fault_opcode->mask))
+ goto fail;
+
+ return 0;
+fail:
+ fault_exit_opcode_debugfs(ibd);
+ return -ENOMEM;
+}
+
+static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+ debugfs_remove_recursive(ibd->fault_packet->dir);
+ kfree(ibd->fault_packet);
+ ibd->fault_packet = NULL;
+}
+
+static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
+ if (!ibd->fault_packet)
+ return -ENOMEM;
+
+ ibd->fault_packet->attr.interval = 1;
+ ibd->fault_packet->attr.require_end = ULONG_MAX;
+ ibd->fault_packet->attr.stacktrace_depth = 32;
+ ibd->fault_packet->attr.dname = NULL;
+ ibd->fault_packet->attr.verbose = 0;
+ ibd->fault_packet->fault_by_packet = false;
+
+ ibd->fault_packet->dir =
+ fault_create_debugfs_attr("fault_packet",
+ parent,
+ &ibd->fault_opcode->attr);
+ if (IS_ERR(ibd->fault_packet->dir)) {
+ kfree(ibd->fault_packet);
+ return -ENOENT;
+ }
+
+ if (!debugfs_create_bool("fault_by_packet", 0600,
+ ibd->fault_packet->dir,
+ &ibd->fault_packet->fault_by_packet))
+ goto fail;
+ if (!debugfs_create_u64("fault_stats", 0400,
+ ibd->fault_packet->dir,
+ &ibd->fault_packet->n_faults))
+ goto fail;
+
+ return 0;
+fail:
+ fault_exit_packet_debugfs(ibd);
+ return -ENOMEM;
+}
+
+static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+ fault_exit_opcode_debugfs(ibd);
+ fault_exit_packet_debugfs(ibd);
+}
+
+static int fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+ int ret = 0;
+
+ ret = fault_init_opcode_debugfs(ibd);
+ if (ret)
+ return ret;
+
+ ret = fault_init_packet_debugfs(ibd);
+ if (ret)
+ fault_exit_opcode_debugfs(ibd);
+
+ return ret;
+}
+
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return ibd->fault_suppress_err;
+}
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
+{
+ bool ret = false;
+ struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
+
+ if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
+ return false;
+ if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
+ return false;
+ ret = should_fail(&ibd->fault_opcode->attr, 1);
+ if (ret) {
+ trace_hfi1_fault_opcode(qp, opcode);
+ if (rx)
+ ibd->fault_opcode->n_rxfaults[opcode]++;
+ else
+ ibd->fault_opcode->n_txfaults[opcode]++;
+ }
+ return ret;
+}
+
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+ struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
+ struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
+ bool ret = false;
+
+ if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
+ return false;
+
+ ret = should_fail(&ibd->fault_packet->attr, 1);
+ if (ret) {
+ ++ibd->fault_packet->n_faults;
+ trace_hfi1_fault_packet(packet);
+ }
+ return ret;
+}
+#endif
+
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@@ -1112,12 +1332,22 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
!port_cntr_ops[i].ops.write ?
S_IRUGO : S_IRUGO | S_IWUSR);
}
+
+#ifdef CONFIG_FAULT_INJECTION
+ debugfs_create_bool("fault_suppress_err", 0600,
+ ibd->hfi1_ibdev_dbg,
+ &ibd->fault_suppress_err);
+ fault_init_debugfs(ibd);
+#endif
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
if (!hfi1_dbg_root)
goto out;
+#ifdef CONFIG_FAULT_INJECTION
+ fault_exit_debugfs(ibd);
+#endif
debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index b6fb6814f1b8..38c38a98156d 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -53,23 +53,79 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
void hfi1_dbg_init(void);
void hfi1_dbg_exit(void);
+
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h>
+struct fault_opcode {
+ struct fault_attr attr;
+ struct dentry *dir;
+ bool fault_by_opcode;
+ u64 n_rxfaults[256];
+ u64 n_txfaults[256];
+ u8 opcode;
+ u8 mask;
+};
+
+struct fault_packet {
+ struct fault_attr attr;
+ struct dentry *dir;
+ bool fault_by_packet;
+ u64 n_faults;
+};
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
+#else
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+ u32 opcode, bool rx)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
+#endif
+
#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
}
-void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+{
+}
+
+static inline void hfi1_dbg_init(void)
{
}
-void hfi1_dbg_init(void)
+static inline void hfi1_dbg_exit(void)
{
}
-void hfi1_dbg_exit(void)
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
+ return false;
}
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+ u32 opcode, bool rx)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
#endif
#endif /* _HFI1_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index bf64b5a7bfd7..bbb6069dec2a 100644
--- a/drivers/infiniband/hw/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -69,7 +69,7 @@ int hfi1_cdev_init(int minor, const char *name,
cdev_init(cdev, fops);
cdev->owner = THIS_MODULE;
- cdev->kobj.parent = parent;
+ cdev_set_parent(cdev, parent);
kobject_set_name(&cdev->kobj, name);
ret = cdev_add(cdev, dev, 1);
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 3881c951f6af..a50870e455a3 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -59,6 +59,8 @@
#include "trace.h"
#include "qp.h"
#include "sdma.h"
+#include "debugfs.h"
+#include "vnic.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -83,8 +85,8 @@ module_param_named(cu, hfi1_cu, uint, S_IRUGO);
MODULE_PARM_DESC(cu, "Credit return units");
unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT;
-static int hfi1_caps_set(const char *, const struct kernel_param *);
-static int hfi1_caps_get(char *, const struct kernel_param *);
+static int hfi1_caps_set(const char *val, const struct kernel_param *kp);
+static int hfi1_caps_get(char *buffer, const struct kernel_param *kp);
static const struct kernel_param_ops cap_ops = {
.set = hfi1_caps_set,
.get = hfi1_caps_get
@@ -209,42 +211,6 @@ int hfi1_count_active_units(void)
}
/*
- * Return count of all units, optionally return in arguments
- * the number of usable (present) units, and the number of
- * ports that are up.
- */
-int hfi1_count_units(int *npresentp, int *nupp)
-{
- int nunits = 0, npresent = 0, nup = 0;
- struct hfi1_devdata *dd;
- unsigned long flags;
- int pidx;
- struct hfi1_pportdata *ppd;
-
- spin_lock_irqsave(&hfi1_devs_lock, flags);
-
- list_for_each_entry(dd, &hfi1_dev_list, list) {
- nunits++;
- if ((dd->flags & HFI1_PRESENT) && dd->kregbase)
- npresent++;
- for (pidx = 0; pidx < dd->num_pports; ++pidx) {
- ppd = dd->pport + pidx;
- if (ppd->lid && ppd->linkup)
- nup++;
- }
- }
-
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-
- if (npresentp)
- *npresentp = npresent;
- if (nupp)
- *nupp = nup;
-
- return nunits;
-}
-
-/*
* Get address of eager buffer from it's index (allocated in chunks, not
* contiguous).
*/
@@ -283,7 +249,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
{
struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
- int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
+ int lnh = ib_get_lnh(rhdr);
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -295,7 +261,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
/* For TIDERR and RC QPs preemptively schedule a NAK */
struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = be16_to_cpu(rhdr->lrh[1]);
+ u16 lid = ib_get_dlid(rhdr);
u32 qp_num;
u32 rcv_flags = 0;
@@ -396,7 +362,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
u16 rlid;
u8 svc_type, sl, sc5;
- sc5 = hdr2sc(rhdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
@@ -414,7 +380,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
svc_type = IB_CC_SVCTYPE_UD;
break;
case IB_QPT_UC:
- rlid = be16_to_cpu(rhdr->lrh[3]);
+ rlid = ib_get_slid(rhdr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
@@ -460,7 +426,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = NULL;
u32 rqpn = 0, bth1;
- u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
+ u16 rlid, dlid = ib_get_dlid(hdr);
u8 sc, svc_type;
bool is_mcast = false;
@@ -471,19 +437,19 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
- rlid = be16_to_cpu(hdr->lrh[3]);
+ rlid = ib_get_slid(hdr);
rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
svc_type = IB_CC_SVCTYPE_UD;
is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(dlid != be16_to_cpu(IB_LID_PERMISSIVE));
break;
case IB_QPT_UC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
case IB_QPT_RC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_RC;
break;
@@ -491,16 +457,16 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
return;
}
- sc = hdr2sc(hdr, pkt->rhf);
+ sc = hfi1_9B_get_sc5(hdr, pkt->rhf);
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
+ if (do_cnp && (bth1 & IB_FECN_SMASK)) {
u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
}
- if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) {
+ if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 lqpn = bth1 & RVT_QPN_MASK;
u8 sl = ibp->sc_to_sl[sc];
@@ -621,8 +587,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
hdr = packet->hdr;
-
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
@@ -634,7 +599,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
}
bth1 = be32_to_cpu(packet->ohdr->bth[1]);
- is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
+ is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK));
if (!is_ecn)
goto next;
@@ -652,7 +617,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
rcu_read_unlock();
/* turn off BECN, FECN */
- bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
+ bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK);
packet->ohdr->bth[1] = cpu_to_be32(bth1);
next:
update_ps_mdata(&mdata, rcd);
@@ -872,20 +837,42 @@ bail:
return last;
}
-static inline void set_all_nodma_rtail(struct hfi1_devdata *dd)
+static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt)
{
int i;
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+ /*
+ * For dynamically allocated kernel contexts (like vnic) switch
+ * interrupt handler only for that context. Otherwise, switch
+ * interrupt handler for all statically allocated kernel contexts.
+ */
+ if (ctxt >= dd->first_dyn_alloc_ctxt) {
+ dd->rcd[ctxt]->do_interrupt =
+ &handle_receive_interrupt_nodma_rtail;
+ return;
+ }
+
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
dd->rcd[i]->do_interrupt =
&handle_receive_interrupt_nodma_rtail;
}
-static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
+static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt)
{
int i;
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+ /*
+ * For dynamically allocated kernel contexts (like vnic) switch
+ * interrupt handler only for that context. Otherwise, switch
+ * interrupt handler for all statically allocated kernel contexts.
+ */
+ if (ctxt >= dd->first_dyn_alloc_ctxt) {
+ dd->rcd[ctxt]->do_interrupt =
+ &handle_receive_interrupt_dma_rtail;
+ return;
+ }
+
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
dd->rcd[i]->do_interrupt =
&handle_receive_interrupt_dma_rtail;
}
@@ -895,8 +882,13 @@ void set_all_slowpath(struct hfi1_devdata *dd)
int i;
/* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
- dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *rcd = dd->rcd[i];
+
+ if ((i < dd->first_dyn_alloc_ctxt) ||
+ (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL)))
+ rcd->do_interrupt = &handle_receive_interrupt;
+ }
}
static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
@@ -908,7 +900,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
packet->rhf_addr);
u8 etype = rhf_rcv_type(packet->rhf);
- if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
+ if (etype == RHF_RCV_TYPE_IB &&
+ hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) {
int hwstate = read_logical_state(dd);
if (hwstate != LSTATE_ACTIVE) {
@@ -1006,7 +999,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
last = RCV_PKT_DONE;
if (needset) {
dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
- set_all_nodma_rtail(dd);
+ set_nodma_rtail(dd, rcd->ctxt);
needset = 0;
}
} else {
@@ -1028,7 +1021,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
if (needset) {
dd_dev_info(dd,
"Switching to DMA_RTAIL\n");
- set_all_dma_rtail(dd);
+ set_dma_rtail(dd, rcd->ctxt);
needset = 0;
}
}
@@ -1077,10 +1070,10 @@ void receive_interrupt_work(struct work_struct *work)
set_link_state(ppd, HLS_UP_ACTIVE);
/*
- * Interrupt all kernel contexts that could have had an
- * interrupt during auto activation.
+ * Interrupt all statically allocated kernel contexts that could
+ * have had an interrupt during auto activation.
*/
- for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
+ for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++)
force_recv_intr(dd->rcd[i]);
}
@@ -1294,8 +1287,9 @@ int hfi1_reset_device(int unit)
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (dd->rcd)
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
- if (!dd->rcd[i] || !dd->rcd[i]->cnt)
+ for (i = dd->first_dyn_alloc_ctxt;
+ i < dd->num_rcv_contexts; i++) {
+ if (!dd->rcd[i])
continue;
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
ret = -EBUSY;
@@ -1354,6 +1348,9 @@ void handle_eflags(struct hfi1_packet *packet)
*/
int process_receive_ib(struct hfi1_packet *packet)
{
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
+
trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
packet->rcd->ctxt,
rhf_err_flags(packet->rhf),
@@ -1363,6 +1360,11 @@ int process_receive_ib(struct hfi1_packet *packet)
packet->updegr,
rhf_egr_index(packet->rhf));
+ if (unlikely(
+ (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ (packet->rhf & RHF_DC_ERR))))
+ return RHF_RCV_CONTINUE;
+
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
return RHF_RCV_CONTINUE;
@@ -1372,15 +1374,31 @@ int process_receive_ib(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
}
+static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
+{
+ /* Packet received in VNIC context via RSM */
+ if (packet->rcd->is_vnic)
+ return true;
+
+ if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) &&
+ (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR))
+ return true;
+
+ return false;
+}
+
int process_receive_bypass(struct hfi1_packet *packet)
{
struct hfi1_devdata *dd = packet->rcd->dd;
- if (unlikely(rhf_err_flags(packet->rhf)))
+ if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
+ } else if (hfi1_is_vnic_packet(packet)) {
+ hfi1_vnic_bypass_rcv(packet);
+ return RHF_RCV_CONTINUE;
+ }
- dd_dev_err(dd,
- "Bypass packets are not supported in normal operation. Dropping\n");
+ dd_dev_err(dd, "Unsupported bypass packet. Dropping\n");
incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
u64 *flits = packet->ebuf;
@@ -1398,6 +1416,12 @@ int process_receive_bypass(struct hfi1_packet *packet)
int process_receive_error(struct hfi1_packet *packet)
{
+ /* KHdrHCRCErr -- KDETH packet with a bad HCRC */
+ if (unlikely(
+ hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ rhf_rcv_type_err(packet->rhf) == 3))
+ return RHF_RCV_CONTINUE;
+
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
@@ -1409,6 +1433,8 @@ int process_receive_error(struct hfi1_packet *packet)
int kdeth_process_expected(struct hfi1_packet *packet)
{
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
@@ -1421,6 +1447,8 @@ int kdeth_process_eager(struct hfi1_packet *packet)
{
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index f78c739b330a..3158128d57e8 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
#include <linux/vmalloc.h>
#include <linux/io.h>
#include <linux/sched/mm.h>
+#include <linux/bitmap.h>
#include <rdma/ib.h>
@@ -70,30 +71,37 @@
/*
* File operation functions
*/
-static int hfi1_file_open(struct inode *, struct file *);
-static int hfi1_file_close(struct inode *, struct file *);
-static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *);
-static unsigned int hfi1_poll(struct file *, struct poll_table_struct *);
-static int hfi1_file_mmap(struct file *, struct vm_area_struct *);
-
-static u64 kvirt_to_phys(void *);
-static int assign_ctxt(struct file *, struct hfi1_user_info *);
-static int init_subctxts(struct hfi1_ctxtdata *, const struct hfi1_user_info *);
-static int user_init(struct file *);
-static int get_ctxt_info(struct file *, void __user *, __u32);
-static int get_base_info(struct file *, void __user *, __u32);
-static int setup_ctxt(struct file *);
-static int setup_subctxt(struct hfi1_ctxtdata *);
-static int get_user_context(struct file *, struct hfi1_user_info *, int);
-static int find_shared_ctxt(struct file *, const struct hfi1_user_info *);
-static int allocate_ctxt(struct file *, struct hfi1_devdata *,
- struct hfi1_user_info *);
-static unsigned int poll_urgent(struct file *, struct poll_table_struct *);
-static unsigned int poll_next(struct file *, struct poll_table_struct *);
-static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
-static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
-static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
-static int vma_fault(struct vm_fault *);
+static int hfi1_file_open(struct inode *inode, struct file *fp);
+static int hfi1_file_close(struct inode *inode, struct file *fp);
+static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from);
+static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt);
+static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma);
+
+static u64 kvirt_to_phys(void *addr);
+static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo);
+static int init_subctxts(struct hfi1_ctxtdata *uctxt,
+ const struct hfi1_user_info *uinfo);
+static int init_user_ctxt(struct hfi1_filedata *fd);
+static void user_init(struct hfi1_ctxtdata *uctxt);
+static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
+ __u32 len);
+static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
+ __u32 len);
+static int setup_base_ctxt(struct hfi1_filedata *fd);
+static int setup_subctxt(struct hfi1_ctxtdata *uctxt);
+
+static int find_sub_ctxt(struct hfi1_filedata *fd,
+ const struct hfi1_user_info *uinfo);
+static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
+ struct hfi1_user_info *uinfo);
+static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt);
+static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt);
+static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
+ unsigned long events);
+static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey);
+static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
+ int start_stop);
+static int vma_fault(struct vm_fault *vmf);
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
unsigned long arg);
@@ -173,6 +181,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
struct hfi1_devdata,
user_cdev);
+ if (!((dd->flags & HFI1_PRESENT) && dd->kregbase))
+ return -EINVAL;
+
if (!atomic_inc_not_zero(&dd->user_refcount))
return -ENXIO;
@@ -187,6 +198,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
fd->rec_cpu_num = -1; /* no cpu affinity by default */
fd->mm = current->mm;
mmgrab(fd->mm);
+ fd->dd = dd;
fp->private_data = fd;
} else {
fp->private_data = NULL;
@@ -229,20 +241,14 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(uinfo)))
return -EFAULT;
- ret = assign_ctxt(fp, &uinfo);
- if (ret < 0)
- return ret;
- ret = setup_ctxt(fp);
- if (ret)
- return ret;
- ret = user_init(fp);
+ ret = assign_ctxt(fd, &uinfo);
break;
case HFI1_IOCTL_CTXT_INFO:
- ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg,
+ ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg,
sizeof(struct hfi1_ctxt_info));
break;
case HFI1_IOCTL_USER_INFO:
- ret = get_base_info(fp, (void __user *)(unsigned long)arg,
+ ret = get_base_info(fd, (void __user *)(unsigned long)arg,
sizeof(struct hfi1_base_info));
break;
case HFI1_IOCTL_CREDIT_UPD:
@@ -256,7 +262,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(tinfo)))
return -EFAULT;
- ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
+ ret = hfi1_user_exp_rcv_setup(fd, &tinfo);
if (!ret) {
/*
* Copy the number of tidlist entries we used
@@ -278,7 +284,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(tinfo)))
return -EFAULT;
- ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
+ ret = hfi1_user_exp_rcv_clear(fd, &tinfo);
if (ret)
break;
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
@@ -293,7 +299,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(tinfo)))
return -EFAULT;
- ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
+ ret = hfi1_user_exp_rcv_invalid(fd, &tinfo);
if (ret)
break;
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
@@ -430,7 +436,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
unsigned long count = 0;
ret = hfi1_user_sdma_process_request(
- kiocb->ki_filp, (struct iovec *)(from->iov + done),
+ fd, (struct iovec *)(from->iov + done),
dim, &count);
if (ret) {
reqs = ret;
@@ -586,8 +592,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* knows where it's own bitmap is within the page.
*/
memaddr = (unsigned long)(dd->events +
- ((uctxt->ctxt - dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
+ ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
+ HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
memlen = PAGE_SIZE;
/*
* v3.7 removes VM_RESERVED but the effect is kept by
@@ -597,6 +603,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vmf = 1;
break;
case STATUS:
+ if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) {
+ ret = -EPERM;
+ goto done;
+ }
memaddr = kvirt_to_phys((void *)dd->status);
memlen = PAGE_SIZE;
flags |= VM_IO | VM_DONTEXPAND;
@@ -752,16 +762,19 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
/* release the cpu */
hfi1_put_proc_affinity(fdata->rec_cpu_num);
+ /* clean up rcv side */
+ hfi1_user_exp_rcv_free(fdata);
+
/*
* Clear any left over, unhandled events so the next process that
* gets this context doesn't get confused.
*/
- ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
+ ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fdata->subctxt;
*ev = 0;
- if (--uctxt->cnt) {
- uctxt->active_slaves &= ~(1 << fdata->subctxt);
+ __clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
+ if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
mutex_unlock(&hfi1_mutex);
goto done;
}
@@ -791,8 +804,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
dd->rcd[uctxt->ctxt] = NULL;
- hfi1_user_exp_rcv_free(fdata);
- hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
+ hfi1_user_exp_rcv_grp_free(uctxt);
+ hfi1_clear_ctxt_pkey(dd, uctxt);
uctxt->rcvwait_to = 0;
uctxt->piowait_to = 0;
@@ -832,121 +845,135 @@ static u64 kvirt_to_phys(void *addr)
return paddr;
}
-static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
+static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
{
- int i_minor, ret = 0;
+ int ret;
unsigned int swmajor, swminor;
swmajor = uinfo->userversion >> 16;
- if (swmajor != HFI1_USER_SWMAJOR) {
- ret = -ENODEV;
- goto done;
- }
+ if (swmajor != HFI1_USER_SWMAJOR)
+ return -ENODEV;
swminor = uinfo->userversion & 0xffff;
mutex_lock(&hfi1_mutex);
- /* First, lets check if we need to setup a shared context? */
+ /*
+ * Get a sub context if necessary.
+ * ret < 0 error, 0 no context, 1 sub-context found
+ */
+ ret = 0;
if (uinfo->subctxt_cnt) {
- struct hfi1_filedata *fd = fp->private_data;
-
- ret = find_shared_ctxt(fp, uinfo);
- if (ret < 0)
- goto done_unlock;
- if (ret) {
+ ret = find_sub_ctxt(fd, uinfo);
+ if (ret > 0)
fd->rec_cpu_num =
hfi1_get_proc_affinity(fd->uctxt->numa_id);
- }
}
/*
- * We execute the following block if we couldn't find a
- * shared context or if context sharing is not required.
+ * Allocate a base context if context sharing is not required or we
+ * couldn't find a sub context.
*/
- if (!ret) {
- i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
- ret = get_user_context(fp, uinfo, i_minor);
- }
-done_unlock:
+ if (!ret)
+ ret = allocate_ctxt(fd, fd->dd, uinfo);
+
mutex_unlock(&hfi1_mutex);
-done:
+
+ /* Depending on the context type, do the appropriate init */
+ if (ret > 0) {
+ /*
+ * sub-context info can only be set up after the base
+ * context has been completed.
+ */
+ ret = wait_event_interruptible(fd->uctxt->wait, !test_bit(
+ HFI1_CTXT_BASE_UNINIT,
+ &fd->uctxt->event_flags));
+ if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) {
+ clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
+ return -ENOMEM;
+ }
+ /* The only thing a sub context needs is the user_xxx stuff */
+ if (!ret)
+ ret = init_user_ctxt(fd);
+
+ if (ret)
+ clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
+ } else if (!ret) {
+ ret = setup_base_ctxt(fd);
+ if (fd->uctxt->subctxt_cnt) {
+ /* If there is an error, set the failed bit. */
+ if (ret)
+ set_bit(HFI1_CTXT_BASE_FAILED,
+ &fd->uctxt->event_flags);
+ /*
+ * Base context is done, notify anybody using a
+ * sub-context that is waiting for this completion
+ */
+ clear_bit(HFI1_CTXT_BASE_UNINIT,
+ &fd->uctxt->event_flags);
+ wake_up(&fd->uctxt->wait);
+ }
+ }
+
return ret;
}
-static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo,
- int devno)
+/*
+ * The hfi1_mutex must be held when this function is called. It is
+ * necessary to ensure serialized access to the bitmask in_use_ctxts.
+ */
+static int find_sub_ctxt(struct hfi1_filedata *fd,
+ const struct hfi1_user_info *uinfo)
{
- struct hfi1_devdata *dd = NULL;
- int devmax, npresent, nup;
+ int i;
+ struct hfi1_devdata *dd = fd->dd;
+ u16 subctxt;
- devmax = hfi1_count_units(&npresent, &nup);
- if (!npresent)
- return -ENXIO;
+ for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *uctxt = dd->rcd[i];
- if (!nup)
- return -ENETDOWN;
+ /* Skip ctxts which are not yet open */
+ if (!uctxt ||
+ bitmap_empty(uctxt->in_use_ctxts,
+ HFI1_MAX_SHARED_CTXTS))
+ continue;
- dd = hfi1_lookup(devno);
- if (!dd)
- return -ENODEV;
- else if (!dd->freectxts)
- return -EBUSY;
+ /* Skip dynamically allocted kernel contexts */
+ if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
+ continue;
- return allocate_ctxt(fp, dd, uinfo);
-}
+ /* Skip ctxt if it doesn't match the requested one */
+ if (memcmp(uctxt->uuid, uinfo->uuid,
+ sizeof(uctxt->uuid)) ||
+ uctxt->jkey != generate_jkey(current_uid()) ||
+ uctxt->subctxt_id != uinfo->subctxt_id ||
+ uctxt->subctxt_cnt != uinfo->subctxt_cnt)
+ continue;
-static int find_shared_ctxt(struct file *fp,
- const struct hfi1_user_info *uinfo)
-{
- int devmax, ndev, i;
- int ret = 0;
- struct hfi1_filedata *fd = fp->private_data;
+ /* Verify the sharing process matches the master */
+ if (uctxt->userversion != uinfo->userversion)
+ return -EINVAL;
- devmax = hfi1_count_units(NULL, NULL);
+ /* Find an unused context */
+ subctxt = find_first_zero_bit(uctxt->in_use_ctxts,
+ HFI1_MAX_SHARED_CTXTS);
+ if (subctxt >= uctxt->subctxt_cnt)
+ return -EBUSY;
- for (ndev = 0; ndev < devmax; ndev++) {
- struct hfi1_devdata *dd = hfi1_lookup(ndev);
+ fd->uctxt = uctxt;
+ fd->subctxt = subctxt;
+ __set_bit(fd->subctxt, uctxt->in_use_ctxts);
- if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase))
- continue;
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
- struct hfi1_ctxtdata *uctxt = dd->rcd[i];
-
- /* Skip ctxts which are not yet open */
- if (!uctxt || !uctxt->cnt)
- continue;
- /* Skip ctxt if it doesn't match the requested one */
- if (memcmp(uctxt->uuid, uinfo->uuid,
- sizeof(uctxt->uuid)) ||
- uctxt->jkey != generate_jkey(current_uid()) ||
- uctxt->subctxt_id != uinfo->subctxt_id ||
- uctxt->subctxt_cnt != uinfo->subctxt_cnt)
- continue;
-
- /* Verify the sharing process matches the master */
- if (uctxt->userversion != uinfo->userversion ||
- uctxt->cnt >= uctxt->subctxt_cnt) {
- ret = -EINVAL;
- goto done;
- }
- fd->uctxt = uctxt;
- fd->subctxt = uctxt->cnt++;
- uctxt->active_slaves |= 1 << fd->subctxt;
- ret = 1;
- goto done;
- }
+ return 1;
}
-done:
- return ret;
+ return 0;
}
-static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
+static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
struct hfi1_user_info *uinfo)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt;
- unsigned ctxt;
+ unsigned int ctxt;
int ret, numa;
if (dd->flags & HFI1_FROZEN) {
@@ -960,7 +987,16 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
return -EIO;
}
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++)
+ /*
+ * This check is sort of redundant to the next EBUSY error. It would
+ * also indicate an inconsistancy in the driver if this value was
+ * zero, but there were still contexts available.
+ */
+ if (!dd->freectxts)
+ return -EBUSY;
+
+ for (ctxt = dd->first_dyn_alloc_ctxt;
+ ctxt < dd->num_rcv_contexts; ctxt++)
if (!dd->rcd[ctxt])
break;
@@ -1002,12 +1038,12 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
goto ctxdata_free;
/*
- * Setup shared context resources if the user-level has requested
- * shared contexts and this is the 'master' process.
+ * Setup sub context resources if the user-level has requested
+ * sub contexts.
* This has to be done here so the rest of the sub-contexts find the
* proper master.
*/
- if (uinfo->subctxt_cnt && !fd->subctxt) {
+ if (uinfo->subctxt_cnt) {
ret = init_subctxts(uctxt, uinfo);
/*
* On error, we don't need to disable and de-allocate the
@@ -1044,7 +1080,7 @@ ctxdata_free:
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
const struct hfi1_user_info *uinfo)
{
- unsigned num_subctxts;
+ u16 num_subctxts;
num_subctxts = uinfo->subctxt_cnt;
if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
@@ -1052,9 +1088,8 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt,
uctxt->subctxt_cnt = uinfo->subctxt_cnt;
uctxt->subctxt_id = uinfo->subctxt_id;
- uctxt->active_slaves = 1;
uctxt->redirect_seq_cnt = 1;
- set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
+ set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
return 0;
}
@@ -1062,13 +1097,12 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt,
static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
{
int ret = 0;
- unsigned num_subctxts = uctxt->subctxt_cnt;
+ u16 num_subctxts = uctxt->subctxt_cnt;
uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE);
- if (!uctxt->subctxt_uregbase) {
- ret = -ENOMEM;
- goto bail;
- }
+ if (!uctxt->subctxt_uregbase)
+ return -ENOMEM;
+
/* We can take the size of the RcvHdr Queue from the master */
uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size *
num_subctxts);
@@ -1083,25 +1117,22 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
ret = -ENOMEM;
goto bail_rhdr;
}
- goto bail;
+
+ return 0;
+
bail_rhdr:
vfree(uctxt->subctxt_rcvhdr_base);
+ uctxt->subctxt_rcvhdr_base = NULL;
bail_ureg:
vfree(uctxt->subctxt_uregbase);
uctxt->subctxt_uregbase = NULL;
-bail:
+
return ret;
}
-static int user_init(struct file *fp)
+static void user_init(struct hfi1_ctxtdata *uctxt)
{
unsigned int rcvctrl_ops = 0;
- struct hfi1_filedata *fd = fp->private_data;
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
-
- /* make sure that the context has already been setup */
- if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
- return -EFAULT;
/* initialize poll variables... */
uctxt->urgent = 0;
@@ -1149,20 +1180,12 @@ static int user_init(struct file *fp)
else
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
-
- /* Notify any waiting slaves */
- if (uctxt->subctxt_cnt) {
- clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
- wake_up(&uctxt->wait);
- }
-
- return 0;
}
-static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
+static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
+ __u32 len)
{
struct hfi1_ctxt_info cinfo;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
int ret = 0;
@@ -1200,75 +1223,71 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
return ret;
}
-static int setup_ctxt(struct file *fp)
+static int init_user_ctxt(struct hfi1_filedata *fd)
+{
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ int ret;
+
+ ret = hfi1_user_sdma_alloc_queues(uctxt, fd);
+ if (ret)
+ return ret;
+
+ ret = hfi1_user_exp_rcv_init(fd);
+
+ return ret;
+}
+
+static int setup_base_ctxt(struct hfi1_filedata *fd)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
- /*
- * Context should be set up only once, including allocation and
- * programming of eager buffers. This is done if context sharing
- * is not requested or by the master process.
- */
- if (!uctxt->subctxt_cnt || !fd->subctxt) {
- ret = hfi1_init_ctxt(uctxt->sc);
- if (ret)
- goto done;
+ hfi1_init_ctxt(uctxt->sc);
- /* Now allocate the RcvHdr queue and eager buffers. */
- ret = hfi1_create_rcvhdrq(dd, uctxt);
- if (ret)
- goto done;
- ret = hfi1_setup_eagerbufs(uctxt);
- if (ret)
- goto done;
- if (uctxt->subctxt_cnt && !fd->subctxt) {
- ret = setup_subctxt(uctxt);
- if (ret)
- goto done;
- }
- } else {
- ret = wait_event_interruptible(uctxt->wait, !test_bit(
- HFI1_CTXT_MASTER_UNINIT,
- &uctxt->event_flags));
- if (ret)
- goto done;
- }
+ /* Now allocate the RcvHdr queue and eager buffers. */
+ ret = hfi1_create_rcvhdrq(dd, uctxt);
+ if (ret)
+ return ret;
- ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
+ ret = hfi1_setup_eagerbufs(uctxt);
if (ret)
- goto done;
- /*
- * Expected receive has to be setup for all processes (including
- * shared contexts). However, it has to be done after the master
- * context has been fully configured as it depends on the
- * eager/expected split of the RcvArray entries.
- * Setting it up here ensures that the subcontexts will be waiting
- * (due to the above wait_event_interruptible() until the master
- * is setup.
- */
- ret = hfi1_user_exp_rcv_init(fp);
+ goto setup_failed;
+
+ /* If sub-contexts are enabled, do the appropriate setup */
+ if (uctxt->subctxt_cnt)
+ ret = setup_subctxt(uctxt);
if (ret)
- goto done;
+ goto setup_failed;
- set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
-done:
+ ret = hfi1_user_exp_rcv_grp_init(fd);
+ if (ret)
+ goto setup_failed;
+
+ ret = init_user_ctxt(fd);
+ if (ret)
+ goto setup_failed;
+
+ user_init(uctxt);
+
+ return 0;
+
+setup_failed:
+ hfi1_free_ctxtdata(dd, uctxt);
return ret;
}
-static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
+static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
+ __u32 len)
{
struct hfi1_base_info binfo;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
ssize_t sz;
unsigned offset;
int ret = 0;
- trace_hfi1_uctxtdata(uctxt->dd, uctxt);
+ trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt);
memset(&binfo, 0, sizeof(binfo));
binfo.hw_version = dd->revision;
@@ -1306,7 +1325,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
*/
binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt,
fd->subctxt, 0);
- offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) *
+ offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fd->subctxt) *
sizeof(*dd->events));
binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt,
@@ -1400,12 +1419,12 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
}
spin_lock_irqsave(&dd->uctxt_lock, flags);
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts;
+ for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts;
ctxt++) {
uctxt = dd->rcd[ctxt];
if (uctxt) {
unsigned long *evs = dd->events +
- (uctxt->ctxt - dd->first_user_ctxt) *
+ (uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS;
int i;
/*
@@ -1432,7 +1451,7 @@ done:
* overflow conditions. start_stop==1 re-enables, to be used to
* re-init the software copy of the head register
*/
-static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
+static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
int start_stop)
{
struct hfi1_devdata *dd = uctxt->dd;
@@ -1467,7 +1486,7 @@ bail:
* User process then performs actions appropriate to bit having been
* set, if desired, and checks again in future.
*/
-static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
+static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
unsigned long events)
{
int i;
@@ -1477,7 +1496,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
if (!dd->events)
return 0;
- evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
+ evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + subctxt;
for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) {
@@ -1488,8 +1507,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
return 0;
}
-static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
- u16 pkey)
+static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey)
{
int ret = -ENOENT, i, intable = 0;
struct hfi1_pportdata *ppd = uctxt->ppd;
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 0dd50cdb039a..4042c11b2742 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -1004,7 +1004,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
{
u64 reg;
int ret;
- u8 ver_a, ver_b;
+ u8 ver_major;
+ u8 ver_minor;
+ u8 ver_patch;
/*
* DC Reset sequence
@@ -1073,10 +1075,10 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
return -ETIMEDOUT;
}
- read_misc_status(dd, &ver_a, &ver_b);
- dd_dev_info(dd, "8051 firmware version %d.%d\n",
- (int)ver_b, (int)ver_a);
- dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
+ read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
+ dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
+ (int)ver_major, (int)ver_minor, (int)ver_patch);
+ dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 0808e3c3ba39..da322e6668cc 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_KERNEL_H
#define _HFI1_KERNEL_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,6 +54,7 @@
#include <linux/list.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <linux/idr.h>
#include <linux/io.h>
#include <linux/fs.h>
#include <linux/completion.h>
@@ -66,6 +67,7 @@
#include <linux/i2c-algo-bit.h>
#include <rdma/ib_hdrs.h>
#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
@@ -194,12 +196,6 @@ struct hfi1_ctxtdata {
void *rcvhdrq;
/* kernel virtual address where hdrqtail is updated */
volatile __le64 *rcvhdrtail_kvaddr;
- /*
- * Shared page for kernel to signal user processes that send buffers
- * need disarming. The process should call HFI1_CMD_DISARM_BUFS
- * or HFI1_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set.
- */
- unsigned long *user_event_mask;
/* when waiting for rcv or pioavail */
wait_queue_head_t wait;
/* rcvhdrq size (for freeing) */
@@ -222,13 +218,12 @@ struct hfi1_ctxtdata {
* (ignoring forks, dup, etc. for now)
*/
int cnt;
+ /* Device context index */
+ unsigned ctxt;
/*
- * how much space to leave at start of eager TID entries for
- * protocol use, on each TID
+ * non-zero if ctxt can be shared, and defines the maximum number of
+ * sub-contexts for this device context.
*/
- /* instead of calculating it */
- unsigned ctxt;
- /* non-zero if ctxt is being shared. */
u16 subctxt_cnt;
/* non-zero if ctxt is being shared. */
u16 subctxt_id;
@@ -278,16 +273,18 @@ struct hfi1_ctxtdata {
struct hfi1_devdata *dd;
/* so functions that need physical port can get it easily */
struct hfi1_pportdata *ppd;
+ /* associated msix interrupt */
+ u32 msix_intr;
/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
void *subctxt_uregbase;
/* An array of pages for the eager receive buffers * N */
void *subctxt_rcvegrbuf;
/* An array of pages for the eager header queue entries * N */
void *subctxt_rcvhdr_base;
+ /* Bitmask of in use context(s) */
+ DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
/* The version of the library which opened this ctxt */
u32 userversion;
- /* Bitmask of active slaves */
- u32 active_slaves;
/* Type of packets or conditions we want to poll for */
u16 poll_type;
/* receive packet sequence counter */
@@ -337,6 +334,12 @@ struct hfi1_ctxtdata {
* packets with the wrong interrupt handler.
*/
int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+
+ /* Indicates that this is vnic context */
+ bool is_vnic;
+
+ /* vnic queue index this context is mapped to */
+ u8 vnic_q_idx;
};
/*
@@ -474,7 +477,7 @@ struct rvt_sge_state;
#define HFI1_PART_ENFORCE_OUT 0x2
/* how often we check for synthetic counter wrap around */
-#define SYNTH_CNT_TIME 2
+#define SYNTH_CNT_TIME 3
/* Counter flags */
#define CNTR_NORMAL 0x0 /* Normal counters, just read register */
@@ -808,6 +811,32 @@ struct hfi1_asic_data {
struct hfi1_i2c_bus *i2c_bus1;
};
+/* sizes for both the QP and RSM map tables */
+#define NUM_MAP_ENTRIES 256
+#define NUM_MAP_REGS 32
+
+/*
+ * Number of VNIC contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_NUM_VNIC_CTXT 8
+
+/* Number of VNIC RSM entries */
+#define NUM_VNIC_MAP_ENTRIES 8
+
+/* Virtual NIC information */
+struct hfi1_vnic_data {
+ struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
+ struct kmem_cache *txreq_cache;
+ u8 num_vports;
+ struct idr vesw_idr;
+ u8 rmt_start;
+ u8 num_ctxt;
+ u32 msix_idx;
+};
+
+struct hfi1_vnic_vport_info;
+
/* device data struct now contains only "general per-device" info.
* fields related to a physical IB port are in a hfi1_pportdata struct.
*/
@@ -926,8 +955,9 @@ struct hfi1_devdata {
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
/* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */
spinlock_t uctxt_lock; /* rcd and user context changes */
- /* exclusive access to 8051 */
- spinlock_t dc8051_lock;
+ struct mutex dc8051_lock; /* exclusive access to 8051 */
+ struct workqueue_struct *update_cntr_wq;
+ struct work_struct update_cntr_work;
/* exclusive access to 8051 memory */
spinlock_t dc8051_memlock;
int dc8051_timed_out; /* remember if the 8051 timed out */
@@ -1020,7 +1050,7 @@ struct hfi1_devdata {
u8 qos_shift;
u16 irev; /* implementation revision */
- u16 dc8051_ver; /* 8051 firmware version */
+ u32 dc8051_ver; /* 8051 firmware version */
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
@@ -1031,6 +1061,7 @@ struct hfi1_devdata {
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
u32 num_msix_entries;
+ u32 first_dyn_msix_idx;
/* INTx information */
u32 requested_intx_irq; /* did we request one? */
@@ -1115,6 +1146,9 @@ struct hfi1_devdata {
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+ int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
/* hfi1_pportdata, points to array of (physical) port-specific
* data structs, indexed by pidx (0..n-1)
*/
@@ -1126,8 +1160,8 @@ struct hfi1_devdata {
u16 flags;
/* Number of physical ports available */
u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
+ /* Lowest context number which can be used by user processes or VNIC */
+ u8 first_dyn_alloc_ctxt;
/* adding a new field here would make it part of this cacheline */
/* seqlock for sc2vl */
@@ -1167,15 +1201,24 @@ struct hfi1_devdata {
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
- struct rhashtable sdma_rht;
+ struct rhashtable *sdma_rht;
struct kobject kobj;
+
+ /* vnic data */
+ struct hfi1_vnic_data vnic;
};
+static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
+{
+ return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES;
+}
+
/* 8051 firmware version helper */
-#define dc8051_ver(a, b) ((a) << 8 | (b))
-#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
-#define dc8051_ver_min(a) (a & 0x00ff)
+#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
+#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16)
+#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8)
+#define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */
#define PT_EXPECTED 0
@@ -1188,10 +1231,11 @@ struct mmu_rb_handler;
/* Private data for file operations */
struct hfi1_filedata {
+ struct hfi1_devdata *dd;
struct hfi1_ctxtdata *uctxt;
- unsigned subctxt;
struct hfi1_user_sdma_comp_q *cq;
struct hfi1_user_sdma_pkt_q *pq;
+ u16 subctxt;
/* for cpu affinity; -1 if none */
int rec_cpu_num;
u32 tid_n_pinned;
@@ -1213,28 +1257,31 @@ struct hfi1_devdata *hfi1_lookup(int unit);
extern u32 hfi1_cpulist_count;
extern unsigned long *hfi1_cpulist;
-int hfi1_init(struct hfi1_devdata *, int);
-int hfi1_count_units(int *npresentp, int *nupp);
+int hfi1_init(struct hfi1_devdata *dd, int reinit);
int hfi1_count_active_units(void);
-int hfi1_diag_add(struct hfi1_devdata *);
-void hfi1_diag_remove(struct hfi1_devdata *);
+int hfi1_diag_add(struct hfi1_devdata *dd);
+void hfi1_diag_remove(struct hfi1_devdata *dd);
void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup);
void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
-int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *);
-int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *);
+int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
+int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd);
int hfi1_create_ctxts(struct hfi1_devdata *dd);
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int);
-void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *,
- struct hfi1_devdata *, u8, u8);
-void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
-
-int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
-int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
-int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
+ int numa);
+void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
+ struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
+void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
+
+int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
void set_all_slowpath(struct hfi1_devdata *dd);
+void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
+void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
+void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
extern const struct pci_device_id hfi1_pci_tbl[];
@@ -1254,16 +1301,24 @@ int hfi1_reset_device(int);
/* return the driver's idea of the logical OPA port state */
static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
{
- return ppd->lstate; /* use the cached value */
+ /*
+ * The driver does some processing from the time the logical
+ * link state is at INIT to the time the SM can be notified
+ * as such. Return IB_PORT_DOWN until the software state
+ * is ready.
+ */
+ if (ppd->lstate == IB_PORT_INIT && !(ppd->host_link_state & HLS_UP))
+ return IB_PORT_DOWN;
+ else
+ return ppd->lstate;
}
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
-static inline int hdr2sc(struct ib_header *hdr, u64 rhf)
+static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf)
{
- return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
- ((!!(rhf_dc_info(rhf))) << 4);
+ return ib_get_sc(hdr) | ((!!(rhf_dc_info(rhf))) << 4);
}
#define HFI1_JKEY_WIDTH 16
@@ -1519,7 +1574,7 @@ bad:
u32 lrh_max_header_bytes(struct hfi1_devdata *dd);
int mtu_to_enum(u32 mtu, int default_if_bad);
-u16 enum_to_mtu(int);
+u16 enum_to_mtu(int mtu);
static inline int valid_ib_mtu(unsigned int mtu)
{
return mtu == 256 || mtu == 512 ||
@@ -1533,15 +1588,15 @@ static inline int valid_opa_max_mtu(unsigned int mtu)
(valid_ib_mtu(mtu) || mtu == 8192 || mtu == 10240);
}
-int set_mtu(struct hfi1_pportdata *);
+int set_mtu(struct hfi1_pportdata *ppd);
-int hfi1_set_lid(struct hfi1_pportdata *, u32, u8);
-void hfi1_disable_after_error(struct hfi1_devdata *);
-int hfi1_set_uevent_bits(struct hfi1_pportdata *, const int);
-int hfi1_rcvbuf_validate(u32, u8, u16 *);
+int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc);
+void hfi1_disable_after_error(struct hfi1_devdata *dd);
+int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit);
+int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode);
-int fm_get_table(struct hfi1_pportdata *, int, void *);
-int fm_set_table(struct hfi1_pportdata *, int, void *);
+int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t);
+int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t);
void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
void reset_link_credits(struct hfi1_devdata *dd);
@@ -1597,9 +1652,9 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
u32 bth1;
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
+ if (unlikely(bth1 & (IB_BECN_SMASK | IB_FECN_SMASK))) {
hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
- return bth1 & HFI1_FECN_SMASK;
+ return !!(bth1 & IB_FECN_SMASK);
}
return false;
}
@@ -1663,19 +1718,19 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
#define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1)
/* ctxt_flag bit offsets */
- /* context has been setup */
-#define HFI1_CTXT_SETUP_DONE 1
+ /* base context has not finished initializing */
+#define HFI1_CTXT_BASE_UNINIT 1
+ /* base context initaliation failed */
+#define HFI1_CTXT_BASE_FAILED 2
/* waiting for a packet to arrive */
-#define HFI1_CTXT_WAITING_RCV 2
- /* master has not finished initializing */
-#define HFI1_CTXT_MASTER_UNINIT 4
+#define HFI1_CTXT_WAITING_RCV 3
/* waiting for an urgent packet to arrive */
-#define HFI1_CTXT_WAITING_URG 5
+#define HFI1_CTXT_WAITING_URG 4
/* free up any allocated data at closes */
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *,
- const struct pci_device_id *);
-void hfi1_free_devdata(struct hfi1_devdata *);
+struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
+ const struct pci_device_id *ent);
+void hfi1_free_devdata(struct hfi1_devdata *dd);
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
/* LED beaconing functions */
@@ -1750,24 +1805,24 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
extern const char ib_hfi1_version[];
-int hfi1_device_create(struct hfi1_devdata *);
-void hfi1_device_remove(struct hfi1_devdata *);
+int hfi1_device_create(struct hfi1_devdata *dd);
+void hfi1_device_remove(struct hfi1_devdata *dd);
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
struct kobject *kobj);
-int hfi1_verbs_register_sysfs(struct hfi1_devdata *);
-void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *);
+int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd);
+void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
/* Hook for sysfs read of QSFP */
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
-int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *);
-void hfi1_pcie_cleanup(struct pci_dev *);
-int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *);
+int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent);
+void hfi1_pcie_cleanup(struct pci_dev *pdev);
+int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
-void hfi1_pcie_flr(struct hfi1_devdata *);
-int pcie_speeds(struct hfi1_devdata *);
-void request_msix(struct hfi1_devdata *, u32 *, struct hfi1_msix_entry *);
-void hfi1_enable_intx(struct pci_dev *);
+int pcie_speeds(struct hfi1_devdata *dd);
+void request_msix(struct hfi1_devdata *dd, u32 *nent,
+ struct hfi1_msix_entry *entry);
+void hfi1_enable_intx(struct pci_dev *pdev);
void restore_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index f40864e9a3b2..4a11d4da4c92 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -53,6 +53,7 @@
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/hrtimer.h>
+#include <linux/bitmap.h>
#include <rdma/rdma_vt.h>
#include "hfi.h"
@@ -65,10 +66,12 @@
#include "verbs.h"
#include "aspm.h"
#include "affinity.h"
+#include "vnic.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
+#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
/*
* min buffers we want to have per context, after driver
*/
@@ -100,9 +103,9 @@ static unsigned hfi1_rcvarr_split = 25;
module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
MODULE_PARM_DESC(rcvarr_split, "Percent of context's RcvArray entries used for Eager buffers");
-static uint eager_buffer_size = (2 << 20); /* 2MB */
+static uint eager_buffer_size = (8 << 20); /* 8MB */
module_param(eager_buffer_size, uint, S_IRUGO);
-MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 2MB");
+MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 8MB");
static uint rcvhdrcnt = 2048; /* 2x the max eager buffer count */
module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO);
@@ -116,7 +119,7 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */
module_param(user_credit_return_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
-static inline u64 encode_rcv_header_entry_size(u16);
+static inline u64 encode_rcv_header_entry_size(u16 size);
static struct idr hfi1_unit_table;
u32 hfi1_cpulist_count;
@@ -139,7 +142,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
goto nomem;
/* create one or more kernel contexts */
- for (i = 0; i < dd->first_user_ctxt; ++i) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
struct hfi1_pportdata *ppd;
struct hfi1_ctxtdata *rcd;
@@ -174,13 +177,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
goto nomem;
}
- ret = hfi1_init_ctxt(rcd->sc);
- if (ret < 0) {
- dd_dev_err(dd,
- "Failed to setup kernel receive context, failing\n");
- ret = -EFAULT;
- goto bail;
- }
+ hfi1_init_ctxt(rcd->sc);
}
/*
@@ -192,7 +189,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
return 0;
nomem:
ret = -ENOMEM;
-bail:
+
if (dd->rcd) {
for (i = 0; i < dd->num_rcv_contexts; ++i)
hfi1_free_ctxtdata(dd, dd->rcd[i]);
@@ -214,9 +211,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
u32 base;
if (dd->rcv_entries.nctxt_extra >
- dd->num_rcv_contexts - dd->first_user_ctxt)
+ dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)
kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
- (dd->num_rcv_contexts - dd->first_user_ctxt));
+ (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt));
rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
if (rcd) {
u32 rcvtids, max_entries;
@@ -226,7 +223,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
INIT_LIST_HEAD(&rcd->qp_wait_list);
rcd->ppd = ppd;
rcd->dd = dd;
- rcd->cnt = 1;
+ __set_bit(0, rcd->in_use_ctxts);
rcd->ctxt = ctxt;
dd->rcd[ctxt] = rcd;
rcd->numa_id = numa;
@@ -238,27 +235,29 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
* Calculate the context's RcvArray entry starting point.
* We do this here because we have to take into account all
* the RcvArray entries that previous context would have
- * taken and we have to account for any extra groups
- * assigned to the kernel or user contexts.
+ * taken and we have to account for any extra groups assigned
+ * to the static (kernel) or dynamic (vnic/user) contexts.
*/
- if (ctxt < dd->first_user_ctxt) {
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
if (ctxt < kctxt_ngroups) {
base = ctxt * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base = kctxt_ngroups +
(ctxt * dd->rcv_entries.ngroups);
+ }
} else {
- u16 ct = ctxt - dd->first_user_ctxt;
+ u16 ct = ctxt - dd->first_dyn_alloc_ctxt;
base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) +
kctxt_ngroups);
if (ct < dd->rcv_entries.nctxt_extra) {
base += ct * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base += dd->rcv_entries.nctxt_extra +
(ct * dd->rcv_entries.ngroups);
+ }
}
rcd->eager_base = base * dd->rcv_entries.group_size;
@@ -322,7 +321,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
}
rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
- if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
+ /* Applicable only for statically created kernel contexts */
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
GFP_KERNEL, numa);
if (!rcd->opstats)
@@ -482,6 +482,9 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
default_pkey_idx = 1;
ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
+ ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
+ ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
+
if (loopback) {
hfi1_early_err(&pdev->dev,
"Faking data partition 0x8001 in idx %u\n",
@@ -585,7 +588,7 @@ static void enable_chip(struct hfi1_devdata *dd)
* Enable kernel ctxts' receive and receive interrupt.
* Other ctxts done as user opens and initializes them.
*/
- for (i = 0; i < dd->first_user_ctxt; ++i) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
@@ -616,7 +619,7 @@ static int create_workqueues(struct hfi1_devdata *dd)
alloc_workqueue(
"hfi%d_%d",
WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
- dd->num_sdma,
+ HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES,
dd->unit, pidx);
if (!ppd->hfi1_wq)
goto wq_error;
@@ -679,6 +682,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
dd->process_pio_send = hfi1_verbs_send_pio;
dd->process_dma_send = hfi1_verbs_send_dma;
dd->pio_inline_send = pio_copy;
+ dd->process_vnic_dma_send = hfi1_vnic_send_dma;
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
@@ -714,7 +718,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
}
/* dd->rcd can be NULL if early initialization failed */
- for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) {
+ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
/*
* Set up the (kernel) rcvhdr queue and egr TIDs. If doing
* re-init, the simplest way to handle this is to free
@@ -960,7 +964,6 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
kfree(rcd->egrbufs.buffers);
sc_free(rcd->sc);
- vfree(rcd->user_event_mask);
vfree(rcd->subctxt_uregbase);
vfree(rcd->subctxt_rcvegrbuf);
vfree(rcd->subctxt_rcvhdr_base);
@@ -1078,11 +1081,11 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->uctxt_lock);
spin_lock_init(&dd->hfi1_diag_trans_lock);
spin_lock_init(&dd->sc_init_lock);
- spin_lock_init(&dd->dc8051_lock);
spin_lock_init(&dd->dc8051_memlock);
seqlock_init(&dd->sc2vl_lock);
spin_lock_init(&dd->sde_map_lock);
spin_lock_init(&dd->pio_map_lock);
+ mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue);
dd->int_counter = alloc_percpu(u64);
@@ -1425,6 +1428,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* First, lock the non-writable module parameters */
HFI1_CAP_LOCK();
+ /* Validate dev ids */
+ if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
+ ent->device == PCI_DEVICE_ID_INTEL1)) {
+ hfi1_early_err(&pdev->dev,
+ "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
+ ret = -ENODEV;
+ goto bail;
+ }
+
/* Validate some global module parameters */
ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
if (ret)
@@ -1470,15 +1483,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ret)
goto bail;
- if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
- ent->device == PCI_DEVICE_ID_INTEL1)) {
- hfi1_early_err(&pdev->dev,
- "Failing on unknown Intel deviceid 0x%x\n",
- ent->device);
- ret = -ENODEV;
- goto clean_bail;
- }
-
/*
* Do device-specific initialization, function table setup, dd
* allocation, etc.
@@ -1497,6 +1501,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* do the generic initialization */
initfail = hfi1_init(dd, 0);
+ /* setup vnic */
+ hfi1_vnic_setup(dd);
+
ret = hfi1_register_ib_device(dd);
/*
@@ -1530,6 +1537,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
hfi1_device_remove(dd);
if (!ret)
hfi1_unregister_ib_device(dd);
+ hfi1_vnic_cleanup(dd);
postinit_cleanup(dd);
if (initfail)
ret = initfail;
@@ -1574,6 +1582,9 @@ static void remove_one(struct pci_dev *pdev)
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
+ /* cleanup vnic */
+ hfi1_vnic_cleanup(dd);
+
/*
* Disable the IB link, disable interrupts on the device,
* clear dma engines, etc.
@@ -1613,8 +1624,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
sizeof(u32));
- gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
- GFP_USER : GFP_KERNEL;
+ if ((rcd->ctxt < dd->first_dyn_alloc_ctxt) ||
+ (rcd->sc && (rcd->sc->type == SC_KERNEL)))
+ gfp_flags = GFP_KERNEL;
+ else
+ gfp_flags = GFP_USER;
rcd->rcvhdrq = dma_zalloc_coherent(
&dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
gfp_flags | __GFP_COMP);
@@ -1668,8 +1682,6 @@ bail_free:
dd_dev_err(dd,
"attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
rcd->ctxt);
- vfree(rcd->user_event_mask);
- rcd->user_event_mask = NULL;
dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
@@ -1758,6 +1770,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
rcd->ctxt);
+ ret = -ENOMEM;
goto bail_rcvegrbuf_phys;
}
@@ -1835,7 +1848,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
"ctxt%u: current Eager buffer size is invalid %u\n",
rcd->ctxt, rcd->egrbufs.rcvtid_size);
ret = -EINVAL;
- goto bail;
+ goto bail_rcvegrbuf_phys;
}
for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
@@ -1843,7 +1856,8 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.rcvtids[idx].dma, order);
cond_resched();
}
- goto bail;
+
+ return 0;
bail_rcvegrbuf_phys:
for (idx = 0; idx < rcd->egrbufs.alloced &&
@@ -1857,6 +1871,6 @@ bail_rcvegrbuf_phys:
rcd->egrbufs.buffers[idx].dma = 0;
rcd->egrbufs.buffers[idx].len = 0;
}
-bail:
+
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 65348d16ab2f..ba265d0ae93b 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -47,6 +47,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
+#include <linux/bitmap.h>
#include "hfi.h"
#include "common.h"
@@ -131,19 +132,24 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
set_up_vl15(dd, dd->vau, dd->vl15_init);
assign_remote_cm_au_table(dd, dd->vcu);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_port_number =
- read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n",
- ppd->neighbor_guid,
- ppd->neighbor_type);
}
+ ppd->neighbor_guid =
+ read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
+ ppd->neighbor_type =
+ read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
+ DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
+ ppd->neighbor_port_number =
+ read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
+ DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
+ ppd->neighbor_fm_security =
+ read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
+ DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
+ dd_dev_info(dd,
+ "Neighbor Guid %llx, Type %d, Port Num %d\n",
+ ppd->neighbor_guid, ppd->neighbor_type,
+ ppd->neighbor_port_number);
+
/* physical link went up */
ppd->linkup = 1;
ppd->offline_disabled_reason =
@@ -184,7 +190,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd)
unsigned long flags;
spin_lock_irqsave(&dd->uctxt_lock, flags);
- if (!rcd->cnt)
+ if (bitmap_empty(rcd->in_use_ctxts, HFI1_MAX_SHARED_CTXTS))
goto done;
if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) {
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 09cda3c35e82..5977673a52d4 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -53,6 +53,7 @@
#include "mad.h"
#include "trace.h"
#include "qp.h"
+#include "vnic.h"
/* the reset value from the FM is supposed to be 0xffff, handle both */
#define OPA_LINK_WIDTH_RESET_OLD 0x0fff
@@ -650,9 +651,11 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
pi->port_packet_format.supported =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
pi->port_packet_format.enabled =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
/* flit_control.interleave is (OPA V1, version .76):
* bits use
@@ -701,7 +704,13 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
pi->buffer_units = cpu_to_be32(buffer_units);
- pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
+ pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported |
+ OPA_CAP_MASK3_IsEthOnFabricSupported);
+ /* Driver does not support mcast/collective configuration */
+ pi->opa_cap_mask &=
+ cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported);
+ pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7)
+ << 3 | (HFI1_MCAST_NR & 0x7));
/* HFI supports a replay buffer 128 LTPs in size */
pi->replay_depth.buffer = 0x80;
@@ -1146,16 +1155,6 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ppd->linkinit_reason =
(pi->partenforce_filterraw &
OPA_PI_MASK_LINKINIT_REASON);
- /* enable/disable SW pkey checking as per FM control */
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
- ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
-
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
- ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
/* Must be a valid unicast LID address. */
if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
@@ -1167,9 +1166,9 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
spin_lock_irqsave(&ibp->rvp.lock, flags);
if (ibp->rvp.sm_ah) {
if (smlid != ibp->rvp.sm_lid)
- ibp->rvp.sm_ah->attr.dlid = smlid;
+ rdma_ah_set_dlid(&ibp->rvp.sm_ah->attr, smlid);
if (msl != ibp->rvp.sm_sl)
- ibp->rvp.sm_ah->attr.sl = msl;
+ rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
}
spin_unlock_irqrestore(&ibp->rvp.lock, flags);
if (smlid != ibp->rvp.sm_lid)
@@ -1465,25 +1464,15 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
}
-static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
-{
- u64 *val = data;
-
- *val++ = read_csr(dd, SEND_SC2VLT0);
- *val++ = read_csr(dd, SEND_SC2VLT1);
- *val++ = read_csr(dd, SEND_SC2VLT2);
- *val++ = read_csr(dd, SEND_SC2VLT3);
- return 0;
-}
-
#define ILLEGAL_VL 12
/*
* filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
* for SC15, which must map to VL15). If we don't remap things this
* way it is possible for VL15 counters to increment when we try to
* send on a SC which is mapped to an invalid VL.
+ * When getting the table convert ILLEGAL_VL back to VL15.
*/
-static void filter_sc2vlt(void *data)
+static void filter_sc2vlt(void *data, bool set)
{
int i;
u8 *pd = data;
@@ -1491,8 +1480,14 @@ static void filter_sc2vlt(void *data)
for (i = 0; i < OPA_MAX_SCS; i++) {
if (i == 15)
continue;
- if ((pd[i] & 0x1f) == 0xf)
- pd[i] = ILLEGAL_VL;
+
+ if (set) {
+ if ((pd[i] & 0x1f) == 0xf)
+ pd[i] = ILLEGAL_VL;
+ } else {
+ if ((pd[i] & 0x1f) == ILLEGAL_VL)
+ pd[i] = 0xf;
+ }
}
}
@@ -1500,7 +1495,7 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
{
u64 *val = data;
- filter_sc2vlt(data);
+ filter_sc2vlt(data, true);
write_csr(dd, SEND_SC2VLT0, *val++);
write_csr(dd, SEND_SC2VLT1, *val++);
@@ -1512,6 +1507,19 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
return 0;
}
+static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
+{
+ u64 *val = (u64 *)data;
+
+ *val++ = read_csr(dd, SEND_SC2VLT0);
+ *val++ = read_csr(dd, SEND_SC2VLT1);
+ *val++ = read_csr(dd, SEND_SC2VLT2);
+ *val++ = read_csr(dd, SEND_SC2VLT3);
+
+ filter_sc2vlt((u64 *)data, false);
+ return 0;
+}
+
static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
u32 *resp_len)
@@ -1986,31 +1994,6 @@ struct opa_pma_mad {
u8 data[2024];
} __packed;
-struct opa_class_port_info {
- u8 base_version;
- u8 class_version;
- __be16 cap_mask;
- __be32 cap_mask2_resp_time;
-
- u8 redirect_gid[16];
- __be32 redirect_tc_fl;
- __be32 redirect_lid;
- __be32 redirect_sl_qp;
- __be32 redirect_qkey;
-
- u8 trap_gid[16];
- __be32 trap_tc_fl;
- __be32 trap_lid;
- __be32 trap_hl_qp;
- __be32 trap_qkey;
-
- __be16 trap_pkey;
- __be16 redirect_pkey;
-
- u8 trap_sl_rsvd;
- u8 reserved[3];
-} __packed;
-
struct opa_port_status_req {
__u8 port_num;
__u8 reserved[3];
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 0829fce06172..93faf86d54b6 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -240,36 +240,6 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
iounmap(dd->piobase);
}
-/*
- * Do a Function Level Reset (FLR) on the device.
- * Based on static function drivers/pci/pci.c:pcie_flr().
- */
-void hfi1_pcie_flr(struct hfi1_devdata *dd)
-{
- int i;
- u16 status;
-
- /* no need to check for the capability - we know the device has it */
-
- /* wait for Transaction Pending bit to clear, at most a few ms */
- for (i = 0; i < 4; i++) {
- if (i)
- msleep((1 << (i - 1)) * 100);
-
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVSTA, &status);
- if (!(status & PCI_EXP_DEVSTA_TRPND))
- goto clear;
- }
-
- dd_dev_err(dd, "Transaction Pending bit is not clearing, proceeding with reset anyway\n");
-
-clear:
- pcie_capability_set_word(dd->pcidev, PCI_EXP_DEVCTL,
- PCI_EXP_DEVCTL_BCR_FLR);
- /* PCIe spec requires the function to be back within 100ms */
- msleep(100);
-}
-
static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt,
struct hfi1_msix_entry *hfi1_msix_entry)
{
@@ -583,7 +553,7 @@ pci_mmio_enabled(struct pci_dev *pdev)
if (words == ~0ULL)
ret = PCI_ERS_RESULT_NEED_RESET;
dd_dev_info(dd,
- "HFI1 mmio_enabled function called, read wordscntr %Lx, returning %d\n",
+ "HFI1 mmio_enabled function called, read wordscntr %llx, returning %d\n",
words, ret);
}
return ret;
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 615be68e40b3..ed72b5aca139 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -703,6 +703,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
{
struct send_context_info *sci;
struct send_context *sc = NULL;
+ int req_type = type;
dma_addr_t dma;
unsigned long flags;
u64 reg;
@@ -729,6 +730,13 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
return NULL;
}
+ /*
+ * VNIC contexts are dynamically allocated.
+ * Hence, pick a user context for VNIC.
+ */
+ if (type == SC_VNIC)
+ type = SC_USER;
+
spin_lock_irqsave(&dd->sc_lock, flags);
ret = sc_hw_alloc(dd, type, &sw_index, &hw_context);
if (ret) {
@@ -738,6 +746,15 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
return NULL;
}
+ /*
+ * VNIC contexts are used by kernel driver.
+ * Hence, mark them as kernel contexts.
+ */
+ if (req_type == SC_VNIC) {
+ dd->send_contexts[sw_index].type = SC_KERNEL;
+ type = SC_KERNEL;
+ }
+
sci = &dd->send_contexts[sw_index];
sci->sc = sc;
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 867e5ffc3595..99ca5edb0b43 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -1,7 +1,7 @@
#ifndef _PIO_H
#define _PIO_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,6 +54,12 @@
#define SC_USER 3 /* must be the last one: it may take all left */
#define SC_MAX 4 /* count of send context types */
+/*
+ * SC_VNIC types are allocated (dynamically) from the user context pool,
+ * (SC_USER) and used by kernel driver as kernel contexts (SC_KERNEL).
+ */
+#define SC_VNIC SC_MAX
+
/* invalid send context index */
#define INVALID_SCI 0xff
@@ -195,7 +201,7 @@ struct sc_config_sizes {
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | ksc[n] -> sc n |
+ * |--------------------------| --/ | ksc[n-1] -> sc n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
@@ -208,21 +214,21 @@ struct sc_config_sizes {
* |--------------------------| |--------------------|
* | map[vls - 1] |- | * |
* +--------------------------+ \- |--------------------|
- * \- | ksc[m] -> sc m+n |
+ * \- | ksc[m-1] -> sc m+n |
* \ +--------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | ksc[0] -> sc 1+m+n |
- * \- |--------------------|
- * >| ksc[1] -> sc 2+m+n |
- * |--------------------|
- * | * |
- * |--------------------|
- * | ksc[o] -> sc o+m+n |
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | ksc[0] -> sc 1+m+n |
+ * \- |----------------------|
+ * >| ksc[1] -> sc 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | ksc[o-1] -> sc o+m+n |
+ * +----------------------+
*
*/
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index c4ebd097b20f..650305cc0373 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -294,7 +294,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
ah = ibah_to_rvtah(wqe->ud_wr.ah);
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
- if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
+ if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
return -EINVAL;
default:
break;
@@ -631,8 +631,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->s_tail, qp->s_head, qp->s_size,
qp->s_avail,
qp->remote_qpn,
- qp->remote_ah_attr.dlid,
- qp->remote_ah_attr.sl,
+ rdma_ah_get_dlid(&qp->remote_ah_attr),
+ rdma_ah_get_sl(&qp->remote_ah_attr),
qp->pmtu,
qp->s_retry,
qp->s_retry_cnt,
@@ -731,9 +731,7 @@ void quiesce_qp(struct rvt_qp *qp)
void notify_qp_reset(struct rvt_qp *qp)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- priv->r_adefered = 0;
+ qp->r_adefered = 0;
clear_ahg(qp);
}
@@ -748,7 +746,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp)
qp->s_mig_state = IB_MIG_MIGRATED;
qp->remote_ah_attr = qp->alt_ah_attr;
- qp->port_num = qp->alt_ah_attr.port_num;
+ qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
qp->s_pkey_index = qp->s_alt_pkey_index;
qp->s_flags |= RVT_S_AHG_CLEAR;
priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
@@ -778,7 +776,7 @@ u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
u8 sc, vl;
ibp = &dd->pport[qp->port_num - 1].ibport_data;
- sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
vl = sc_to_vlt(dd, sc);
mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
@@ -861,7 +859,7 @@ void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
if (qp->port_num == ppd->port &&
(qp->ibqp.qp_type == IB_QPT_UC ||
qp->ibqp.qp_type == IB_QPT_RC) &&
- qp->remote_ah_attr.sl == sl &&
+ rdma_ah_get_sl(&qp->remote_ah_attr) == sl &&
(ib_rvt_state_ops[qp->state] &
RVT_POST_SEND_OK)) {
spin_lock_irq(&qp->r_lock);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 7382be11afca..069bdaf061ab 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -274,7 +274,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail_no_tx;
ohdr = &ps->s_txreq->phdr.hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
/* Sending responses has higher priority over sending requests. */
@@ -727,10 +727,9 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
struct ib_header hdr;
struct ib_other_headers *ohdr;
unsigned long flags;
- struct hfi1_qp_priv *priv = qp->priv;
/* clear the defer count */
- priv->r_adefered = 0;
+ qp->r_adefered = 0;
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
if (qp->s_flags & RVT_S_RESP_PENDING)
@@ -744,9 +743,10 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
/* Construct the header */
/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */
hwords = 6;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
hwords += hfi1_make_grh(ibp, &hdr.u.l.grh,
- &qp->remote_ah_attr.grh, hwords, 0);
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ hwords, 0);
ohdr = &hdr.u.l.oth;
lrh0 = HFI1_LRH_GRH;
} else {
@@ -763,17 +763,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
IB_AETH_CREDIT_SHIFT));
else
ohdr->u.aeth = rvt_compute_aeth(qp);
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
- lrh0 |= (sc5 & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+ lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr)
+ & 0xf) << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
- hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ hdr.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
- hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
+ hdr.lrh[3] = cpu_to_be16(ppd->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
- ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << HFI1_BECN_SHIFT);
+ ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT);
ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
/* Don't try to send ACKs if the link isn't ACTIVE */
@@ -994,12 +996,12 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
return;
/* Find out where the BTH is */
- if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = ib_bth_get_opcode(ohdr);
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
WARN_ON(!qp->s_rdma_ack_cnt);
@@ -1028,13 +1030,17 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
s_last = qp->s_last;
+ trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
rvt_put_swqe(wqe);
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before re-sending,
@@ -1076,12 +1082,16 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
rvt_put_swqe(wqe);
s_last = qp->s_last;
+ trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -1092,10 +1102,11 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
*/
if (ppd->dd->flags & HFI1_HAS_SEND_DMA) {
struct sdma_engine *engine;
+ u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr);
u8 sc5;
/* For now use sc to find engine */
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc5 = ibp->sl_to_sc[sl];
engine = qp_to_sdma_engine(qp, sc5);
sdma_engine_progress_schedule(engine);
}
@@ -1516,7 +1527,7 @@ read_middle:
if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
goto ack_done;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/*
* Check that the data size is >= 0 && <= pmtu.
* Remember to account for ICRC (4).
@@ -1540,7 +1551,7 @@ read_middle:
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/*
* Check that the data size is >= 1 && <= pmtu.
* Remember to account for ICRC (4).
@@ -1592,9 +1603,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
static inline void rc_cancel_ack(struct rvt_qp *qp)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- priv->r_adefered = 0;
+ qp->r_adefered = 0;
if (list_empty(&qp->rspwait))
return;
list_del_init(&qp->rspwait);
@@ -1922,7 +1931,8 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
int diff;
struct ib_reth *reth;
unsigned long flags;
- int ret, is_fecn = 0;
+ int ret;
+ bool is_fecn = false;
bool copy_last = false;
u32 rkey;
@@ -1934,7 +1944,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
is_fecn = process_ecn(qp, packet, false);
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/*
* Process responses (ACKs) before anything else. Note that the
@@ -2065,7 +2075,7 @@ no_immediate_data:
wc.ex.imm_data = 0;
send_last:
/* Get the number of bytes the message was padded by. */
- pad = (bth0 >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -2089,7 +2099,7 @@ send_last:
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
@@ -2101,7 +2111,7 @@ send_last:
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
- wc.sl = qp->remote_ah_attr.sl;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
@@ -2301,13 +2311,11 @@ send_last:
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
if (psn & IB_BTH_REQ_ACK) {
- struct hfi1_qp_priv *priv = qp->priv;
-
if (packet->numpkt == 0) {
rc_cancel_ack(qp);
goto send_ack;
}
- if (priv->r_adefered >= HFI1_PSN_CREDIT) {
+ if (qp->r_adefered >= HFI1_PSN_CREDIT) {
rc_cancel_ack(qp);
goto send_ack;
}
@@ -2315,7 +2323,7 @@ send_last:
rc_cancel_ack(qp);
goto send_ack;
}
- priv->r_adefered++;
+ qp->r_adefered++;
rc_defered_ack(rcd, qp);
}
return;
@@ -2378,7 +2386,7 @@ void hfi1_rc_hdrerr(
return;
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/* Only deal with RDMA Writes for now */
if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index aa15bcbfb079..3a17daba28a9 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -219,72 +219,84 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
{
__be64 guid;
unsigned long flags;
- u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
if (!has_grh) {
- if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->alt_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
guid))
goto err;
if (!gid_ok(
&hdr->u.l.grh.sgid,
- qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
- qp->alt_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
+ ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ ib_get_sl(hdr),
0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ ib_get_slid(hdr),
+ ib_get_dlid(hdr));
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
- ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
+ if (ib_get_slid(hdr) !=
+ rdma_ah_get_dlid(&qp->alt_ah_attr) ||
+ ppd_from_ibp(ibp)->port !=
+ rdma_ah_get_port_num(&qp->alt_ah_attr))
goto err;
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_migrate_qp(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
if (!has_grh) {
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp,
- qp->remote_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->remote_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
guid))
goto err;
if (!gid_ok(
&hdr->u.l.grh.sgid,
- qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
- qp->remote_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
+ ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ ib_get_sl(hdr),
0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ ib_get_slid(hdr),
+ ib_get_dlid(hdr));
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
+ if (ib_get_slid(hdr) !=
+ rdma_ah_get_dlid(&qp->remote_ah_attr) ||
ppd_from_ibp(ibp)->port != qp->port_num)
goto err;
if (qp->s_mig_state == IB_MIG_REARM &&
@@ -542,8 +554,8 @@ do_write:
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -637,7 +649,7 @@ done:
* Return the size of the header in 32 bit words.
*/
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords)
+ const struct ib_global_route *grh, u32 hwords, u32 nwords)
{
hdr->version_tclass_flow =
cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
@@ -731,15 +743,17 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
extra_bytes = -ps->s_txreq->s_cur_size & 3;
nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
lrh0 = HFI1_LRH_BTH;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- qp->s_hdrwords += hfi1_make_grh(ibp,
- &ps->s_txreq->phdr.hdr.u.l.grh,
- &qp->remote_ah_attr.grh,
- qp->s_hdrwords, nwords);
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
+ qp->s_hdrwords +=
+ hfi1_make_grh(ibp,
+ &ps->s_txreq->phdr.hdr.u.l.grh,
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
middle = 0;
}
- lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+ lrh0 |= (priv->s_sc & 0xf) << 12 |
+ (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4;
/*
* reset s_ahg/AHG fields
*
@@ -763,11 +777,13 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
else
qp->s_flags &= ~RVT_S_AHG_VALID;
ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
- ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ ps->s_txreq->phdr.hdr.lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
ps->s_txreq->phdr.hdr.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
- qp->remote_ah_attr.src_path_bits);
+ ps->s_txreq->phdr.hdr.lrh[3] =
+ cpu_to_be16(ppd_from_ibp(ibp)->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0);
@@ -775,7 +791,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
if (qp->s_flags & RVT_S_ECN) {
qp->s_flags &= ~RVT_S_ECN;
/* we recently received a FECN, so return a BECN */
- bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
+ bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT);
}
ohdr->bth[1] = cpu_to_be32(bth1);
ohdr->bth[2] = cpu_to_be32(bth2);
@@ -784,59 +800,102 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
/* when sending, force a reschedule every one of these periods */
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
+/**
+ * schedule_send_yield - test for a yield required for QP send engine
+ * @timeout: Final time for timeout slice for jiffies
+ * @qp: a pointer to QP
+ * @ps: a pointer to a structure with commonly lookup values for
+ * the the send engine progress
+ *
+ * This routine checks if the time slice for the QP has expired
+ * for RC QPs, if so an additional work entry is queued. At this
+ * point, other QPs have an opportunity to be scheduled. It
+ * returns true if a yield is required, otherwise, false
+ * is returned.
+ */
+static bool schedule_send_yield(struct rvt_qp *qp,
+ struct hfi1_pkt_state *ps)
+{
+ if (unlikely(time_after(jiffies, ps->timeout))) {
+ if (!ps->in_thread ||
+ workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
+ spin_lock_irqsave(&qp->s_lock, ps->flags);
+ qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, ps->flags);
+ this_cpu_inc(*ps->ppd->dd->send_schedule);
+ trace_hfi1_rc_expired_time_slice(qp, true);
+ return true;
+ }
+
+ cond_resched();
+ this_cpu_inc(*ps->ppd->dd->send_schedule);
+ ps->timeout = jiffies + ps->timeout_int;
+ }
+
+ trace_hfi1_rc_expired_time_slice(qp, false);
+ return false;
+}
+
+void hfi1_do_send_from_rvt(struct rvt_qp *qp)
+{
+ hfi1_do_send(qp, false);
+}
+
void _hfi1_do_send(struct work_struct *work)
{
struct iowait *wait = container_of(work, struct iowait, iowork);
struct rvt_qp *qp = iowait_to_qp(wait);
- hfi1_do_send(qp);
+ hfi1_do_send(qp, true);
}
/**
* hfi1_do_send - perform a send on a QP
* @work: contains a pointer to the QP
+ * @in_thread: true if in a workqueue thread
*
* Process entries in the send work queue until credit or queue is
* exhausted. Only allow one CPU to send a packet per QP.
* Otherwise, two threads could send packets out of order.
*/
-void hfi1_do_send(struct rvt_qp *qp)
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
{
struct hfi1_pkt_state ps;
struct hfi1_qp_priv *priv = qp->priv;
int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
- unsigned long timeout;
- unsigned long timeout_int;
- int cpu;
ps.dev = to_idev(qp->ibqp.device);
ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
ps.ppd = ppd_from_ibp(ps.ibp);
+ ps.in_thread = in_thread;
+
+ trace_hfi1_rc_do_send(qp, in_thread);
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
- if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
- ) - 1)) ==
- ps.ppd->lid)) {
+ if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ps.ppd->lmc) - 1)) ==
+ ps.ppd->lid)) {
ruc_loopback(qp);
return;
}
make_req = hfi1_make_rc_req;
- timeout_int = (qp->timeout_jiffies);
+ ps.timeout_int = qp->timeout_jiffies;
break;
case IB_QPT_UC:
- if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
- ) - 1)) ==
- ps.ppd->lid)) {
+ if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ps.ppd->lmc) - 1)) ==
+ ps.ppd->lid)) {
ruc_loopback(qp);
return;
}
make_req = hfi1_make_uc_req;
- timeout_int = SEND_RESCHED_TIMEOUT;
+ ps.timeout_int = SEND_RESCHED_TIMEOUT;
break;
default:
make_req = hfi1_make_ud_req;
- timeout_int = SEND_RESCHED_TIMEOUT;
+ ps.timeout_int = SEND_RESCHED_TIMEOUT;
}
spin_lock_irqsave(&qp->s_lock, ps.flags);
@@ -849,9 +908,11 @@ void hfi1_do_send(struct rvt_qp *qp)
qp->s_flags |= RVT_S_BUSY;
- timeout = jiffies + (timeout_int) / 8;
- cpu = priv->s_sde ? priv->s_sde->cpu :
+ ps.timeout_int = ps.timeout_int / 8;
+ ps.timeout = jiffies + ps.timeout_int;
+ ps.cpu = priv->s_sde ? priv->s_sde->cpu :
cpumask_first(cpumask_of_node(ps.ppd->dd->node));
+
/* insure a pre-built packet is handled */
ps.s_txreq = get_waiting_verbs_txreq(qp);
do {
@@ -867,28 +928,9 @@ void hfi1_do_send(struct rvt_qp *qp)
/* Record that s_ahg is empty. */
qp->s_hdrwords = 0;
/* allow other tasks to run */
- if (unlikely(time_after(jiffies, timeout))) {
- if (workqueue_congested(cpu,
- ps.ppd->hfi1_wq)) {
- spin_lock_irqsave(
- &qp->s_lock,
- ps.flags);
- qp->s_flags &= ~RVT_S_BUSY;
- hfi1_schedule_send(qp);
- spin_unlock_irqrestore(
- &qp->s_lock,
- ps.flags);
- this_cpu_inc(
- *ps.ppd->dd->send_schedule);
- return;
- }
- if (!irqs_disabled()) {
- cond_resched();
- this_cpu_inc(
- *ps.ppd->dd->send_schedule);
- }
- timeout = jiffies + (timeout_int) / 8;
- }
+ if (schedule_send_yield(qp, &ps))
+ return;
+
spin_lock_irqsave(&qp->s_lock, ps.flags);
}
} while (make_req(qp, &ps));
@@ -909,8 +951,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
last = qp->s_last;
old_last = last;
+ trace_hfi1_qp_send_completion(qp, wqe, last);
if (++last >= qp->s_size)
last = 0;
+ trace_hfi1_qp_send_completion(qp, wqe, last);
qp->s_last = last;
/* See post_send() */
barrier();
@@ -920,7 +964,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- rvt_qp_swqe_complete(qp, wqe, status);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 5cde1ecda0fe..bfd0d5187e9b 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -868,7 +868,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
cpu_id = smp_processor_id();
rcu_read_lock();
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
sdma_rht_params);
if (rht_node && rht_node->map[vl]) {
@@ -962,7 +962,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
continue;
}
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ if (vl >= ARRAY_SIZE(rht_node->map)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (!rht_node) {
rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
@@ -982,7 +987,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
rht_node->map[vl]->ctr = 1;
rht_node->map[vl]->sde[0] = sde;
- ret = rhashtable_insert_fast(&dd->sdma_rht,
+ ret = rhashtable_insert_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
if (ret) {
@@ -1025,7 +1030,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
if (cpumask_test_cpu(cpu, mask))
continue;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (rht_node) {
bool empty = true;
@@ -1049,7 +1054,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
}
if (empty) {
- ret = rhashtable_remove_fast(&dd->sdma_rht,
+ ret = rhashtable_remove_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
WARN_ON(ret);
@@ -1108,7 +1113,7 @@ void sdma_seqfile_dump_cpu_list(struct seq_file *s,
struct sdma_rht_node *rht_node;
int i, j;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
sdma_rht_params);
if (!rht_node)
return;
@@ -1322,6 +1327,12 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
synchronize_rcu();
kfree(dd->per_sdma);
dd->per_sdma = NULL;
+
+ if (dd->sdma_rht) {
+ rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
+ kfree(dd->sdma_rht);
+ dd->sdma_rht = NULL;
+ }
}
/**
@@ -1341,12 +1352,14 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
{
unsigned this_idx;
struct sdma_engine *sde;
+ struct rhashtable *tmp_sdma_rht;
u16 descq_cnt;
void *curr_head;
struct hfi1_pportdata *ppd = dd->pport + port;
u32 per_sdma_credits;
uint idle_cnt = sdma_idle_cnt;
size_t num_engines = dd->chip_sdma_engines;
+ int ret = -ENOMEM;
if (!HFI1_CAP_IS_KSET(SDMA)) {
HFI1_CAP_CLEAR(SDMA_AHG);
@@ -1378,7 +1391,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
/* alloc memory for array of send engines */
dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
if (!dd->per_sdma)
- return -ENOMEM;
+ return ret;
idle_cnt = ns_to_cclock(dd, idle_cnt);
if (!sdma_desct_intr)
@@ -1507,18 +1520,27 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->flags |= HFI1_HAS_SEND_DMA;
dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
dd->num_sdma = num_engines;
- if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
+ ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
+ if (ret < 0)
goto bail;
- if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+ tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
+ if (!tmp_sdma_rht) {
+ ret = -ENOMEM;
goto bail;
+ }
+
+ ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
+ if (ret < 0)
+ goto bail;
+ dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
bail:
sdma_clean(dd, num_engines);
- return -ENOMEM;
+ return ret;
}
/**
@@ -1604,7 +1626,6 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
- rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 21f1e2834f37..64f10b8b5db8 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -966,34 +966,34 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | sde[n] -> eng n |
+ * |--------------------------| --/ | sde[n-1] -> eng n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
- * |--------------------------| +--------------------+
- * | map[1] |--- | mask |
- * |--------------------------| \---- |--------------------|
- * | * | \-- | sde[0] -> eng 1+n |
- * | * | \---- |--------------------|
- * | * | \->| sde[1] -> eng 2+n |
- * |--------------------------| |--------------------|
- * | map[vls - 1] |- | * |
- * +--------------------------+ \- |--------------------|
- * \- | sde[m] -> eng m+n |
- * \ +--------------------+
+ * |--------------------------| +---------------------+
+ * | map[1] |--- | mask |
+ * |--------------------------| \---- |---------------------|
+ * | * | \-- | sde[0] -> eng 1+n |
+ * | * | \---- |---------------------|
+ * | * | \->| sde[1] -> eng 2+n |
+ * |--------------------------| |---------------------|
+ * | map[vls - 1] |- | * |
+ * +--------------------------+ \- |---------------------|
+ * \- | sde[m-1] -> eng m+n |
+ * \ +---------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | sde[0] -> eng 1+m+n|
- * \- |--------------------|
- * >| sde[1] -> eng 2+m+n|
- * |--------------------|
- * | * |
- * |--------------------|
- * | sde[o] -> eng o+m+n|
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | sde[0] -> eng 1+m+n |
+ * \- |----------------------|
+ * >| sde[1] -> eng 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | sde[o-1] -> eng o+m+n|
+ * +----------------------+
*
*/
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 919a5474e651..50d140d25e38 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -542,7 +542,7 @@ static ssize_t show_nctxts(struct device *device,
* give a more accurate picture of total contexts available.
*/
return scnprintf(buf, PAGE_SIZE, "%u\n",
- min(dd->num_rcv_contexts - dd->first_user_ctxt,
+ min(dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt,
(u32)dd->sc_sizes[SC_USER].count));
}
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index e86798af6903..eafae487face 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -51,13 +51,12 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
u8 opcode;
- u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- if (lnh == HFI1_LRH_BTH)
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = ib_bth_get_opcode(ohdr);
return hdr_len_by_opcode[opcode] == 0 ?
0 : hdr_len_by_opcode[opcode] - (12 + 8);
}
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 26ae789e47cf..4eb4cc798035 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -57,12 +57,14 @@
#define UCTXT_FMT \
"cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \
- "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx"
+ "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx, subctxt_cnt:%u"
TRACE_EVENT(hfi1_uctxtdata,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
- TP_ARGS(dd, uctxt),
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt,
+ unsigned int subctxt),
+ TP_ARGS(dd, uctxt, subctxt),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(unsigned int, ctxt)
+ __field(unsigned int, subctxt)
__field(u32, credits)
__field(u64, hw_free)
__field(void __iomem *, piobase)
@@ -70,9 +72,11 @@ TRACE_EVENT(hfi1_uctxtdata,
__field(u64, rcvhdrq_dma)
__field(u32, eager_cnt)
__field(u64, rcvegr_dma)
+ __field(unsigned int, subctxt_cnt)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = uctxt->ctxt;
+ __entry->subctxt = subctxt;
__entry->credits = uctxt->sc->credits;
__entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
__entry->piobase = uctxt->sc->base_addr;
@@ -80,17 +84,20 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
__entry->eager_cnt = uctxt->egrbufs.alloced;
__entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
+ __entry->subctxt_cnt = uctxt->subctxt_cnt;
),
- TP_printk("[%s] ctxt %u " UCTXT_FMT,
+ TP_printk("[%s] ctxt %u:%u " UCTXT_FMT,
__get_str(dev),
__entry->ctxt,
+ __entry->subctxt,
__entry->credits,
__entry->hw_free,
__entry->piobase,
__entry->rcvhdrq_cnt,
__entry->rcvhdrq_dma,
__entry->eager_cnt,
- __entry->rcvegr_dma
+ __entry->rcvegr_dma,
+ __entry->subctxt_cnt
)
);
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index 382fcda3a5f6..090f6b506953 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -139,11 +139,11 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
__entry->pkey =
be32_to_cpu(ohdr->bth[0]) & 0xffff;
__entry->f =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
- HFI1_FECN_MASK;
+ (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) &
+ IB_FECN_MASK;
__entry->b =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
- HFI1_BECN_MASK;
+ (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) &
+ IB_BECN_MASK;
__entry->qpn =
be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
__entry->a =
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
index d308454af7fd..deac77ddaeab 100644
--- a/drivers/infiniband/hw/hfi1/trace_misc.h
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt,
__entry->src)
);
+#ifdef CONFIG_FAULT_INJECTION
+TRACE_EVENT(hfi1_fault_opcode,
+ TP_PROTO(struct rvt_qp *qp, u8 opcode),
+ TP_ARGS(qp, opcode),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u8, opcode)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->opcode = opcode;
+ ),
+ TP_printk("[%s] qpn 0x%x opcode 0x%x",
+ __get_str(dev), __entry->qpn, __entry->opcode)
+);
+
+TRACE_EVENT(hfi1_fault_packet,
+ TP_PROTO(struct hfi1_packet *packet),
+ TP_ARGS(packet),
+ TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd)
+ __field(u64, eflags)
+ __field(u32, ctxt)
+ __field(u32, hlen)
+ __field(u32, tlen)
+ __field(u32, updegr)
+ __field(u32, etail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd);
+ __entry->eflags = rhf_err_flags(packet->rhf);
+ __entry->ctxt = packet->rcd->ctxt;
+ __entry->hlen = packet->hlen;
+ __entry->tlen = packet->tlen;
+ __entry->updegr = packet->updegr;
+ __entry->etail = rhf_egr_index(packet->rhf);
+ ),
+ TP_printk(
+ "[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->eflags,
+ __entry->hlen,
+ __entry->tlen,
+ __entry->updegr,
+ __entry->etail
+ )
+);
+#endif
+
#endif /* __HFI1_TRACE_MISC_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h
index 5ea5005f9f41..8ce476570462 100644
--- a/drivers/infiniband/hw/hfi1/trace_rc.h
+++ b/drivers/infiniband/hw/hfi1/trace_rc.h
@@ -1,5 +1,5 @@
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015, 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -104,11 +104,6 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_ack,
TP_ARGS(qp, psn)
);
-DEFINE_EVENT(hfi1_rc_template, hfi1_timeout,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 415d6be42c5d..c59809a7f121 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -633,6 +633,83 @@ DEFINE_EVENT(hfi1_bct_template, bct_get,
TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
TP_ARGS(dd, bc));
+TRACE_EVENT(
+ hfi1_qp_send_completion,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx),
+ TP_ARGS(qp, wqe, idx),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(struct rvt_swqe *, wqe)
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, qpt)
+ __field(u32, length)
+ __field(u32, idx)
+ __field(u32, ssn)
+ __field(enum ib_wr_opcode, opcode)
+ __field(int, send_flags)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->wqe = wqe;
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->qpt = qp->ibqp.qp_type;
+ __entry->length = wqe->length;
+ __entry->idx = idx;
+ __entry->ssn = wqe->ssn;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->send_flags = wqe->wr.send_flags;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->qpt,
+ __entry->wqe,
+ __entry->idx,
+ __entry->wr_id,
+ __entry->length,
+ __entry->ssn,
+ __entry->opcode,
+ __entry->send_flags
+ )
+);
+
+DECLARE_EVENT_CLASS(
+ hfi1_do_send_template,
+ TP_PROTO(struct rvt_qp *qp, bool flag),
+ TP_ARGS(qp, flag),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(bool, flag)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->flag = flag;
+ ),
+ TP_printk(
+ "[%s] qpn %x flag %d",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->flag
+ )
+);
+
+DEFINE_EVENT(
+ hfi1_do_send_template, hfi1_rc_do_send,
+ TP_PROTO(struct rvt_qp *qp, bool flag),
+ TP_ARGS(qp, flag)
+);
+
+DEFINE_EVENT(
+ hfi1_do_send_template, hfi1_rc_expired_time_slice,
+ TP_PROTO(struct rvt_qp *qp, bool flag),
+ TP_ARGS(qp, flag)
+);
+
#endif /* __HFI1_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 4b2a8400c823..5da1e4546543 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -94,7 +94,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
ohdr = &ps->s_txreq->phdr.hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
/* Get the next send request. */
@@ -320,7 +320,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
process_ecn(qp, packet, true);
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/* Compare the PSN verses the expected PSN. */
if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
@@ -433,7 +433,7 @@ no_immediate_data:
wc.wc_flags = 0;
send_last:
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -451,7 +451,7 @@ last_imm:
wc.status = IB_WC_SUCCESS;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
@@ -463,7 +463,7 @@ last_imm:
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
- wc.sl = qp->remote_ah_attr.sl;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
@@ -528,7 +528,7 @@ rdma_last_imm:
wc.wc_flags = IB_WC_WITH_IMM;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -555,7 +555,7 @@ rdma_last_imm:
case OP(RDMA_WRITE_LAST):
rdma_last:
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 13ea4eb6ef3d..6a4e95cefae5 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -68,7 +68,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
struct hfi1_pportdata *ppd;
struct rvt_qp *qp;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
unsigned long flags;
struct rvt_sge_state ssge;
struct rvt_sge *sge;
@@ -103,17 +103,17 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (qp->ibqp.qp_num > 1) {
u16 pkey;
u16 slid;
- u8 sc5 = ibp->sl_to_sc[ah_attr->sl];
+ u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index);
- slid = ppd->lid | (ah_attr->src_path_bits &
+ slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
if (unlikely(ingress_pkey_check(ppd, pkey, sc5,
qp->s_pkey_index, slid))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
- slid, ah_attr->dlid);
+ slid, rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@@ -131,13 +131,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (unlikely(qkey != qp->qkey)) {
u16 lid;
- lid = ppd->lid | (ah_attr->src_path_bits &
+ lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
lid,
- ah_attr->dlid);
+ rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@@ -183,11 +183,11 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
goto bail_unlock;
}
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
struct ib_grh grh;
- struct ib_global_route grd = ah_attr->grh;
+ const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr);
- hfi1_make_grh(ibp, &grh, &grd, 0, 0);
+ hfi1_make_grh(ibp, &grh, grd, 0, 0);
hfi1_copy_sge(&qp->r_sge, &grh,
sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
@@ -243,12 +243,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
} else {
wc.pkey_index = 0;
}
- wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
+ wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1));
/* Check for loopback when the port lid is not set */
if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI)
wc.slid = be16_to_cpu(IB_LID_PERMISSIVE);
- wc.sl = ah_attr->sl;
- wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
+ wc.sl = rdma_ah_get_sl(ah_attr);
+ wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -272,7 +273,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct ib_other_headers *ohdr;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
struct rvt_swqe *wqe;
@@ -319,9 +320,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
- if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
- ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
- lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
+ if (rdma_ah_get_dlid(ah_attr) < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1);
if (unlikely(!loopback &&
(lid == ppd->lid ||
(lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
@@ -356,7 +357,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_hdrwords = 7;
ps->s_txreq->s_cur_size = wqe->length;
ps->s_txreq->ss = &qp->s_sge;
- qp->s_srate = ah_attr->static_rate;
+ qp->s_srate = rdma_ah_get_static_rate(ah_attr);
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
qp->s_wqe = wqe;
qp->s_sge.sge = wqe->sg_list[0];
@@ -364,11 +365,11 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
/* Header size in 32-bit words. */
qp->s_hdrwords += hfi1_make_grh(ibp,
&ps->s_txreq->phdr.hdr.u.l.grh,
- &ah_attr->grh,
+ rdma_ah_read_grh(ah_attr),
qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
@@ -388,8 +389,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
} else {
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
}
- sc5 = ibp->sl_to_sc[ah_attr->sl];
- lrh0 |= (ah_attr->sl & 0xf) << 4;
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
+ lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
if (qp->ibqp.qp_type == IB_QPT_SMI) {
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
priv->s_sc = 0xf;
@@ -402,15 +403,17 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
ps->s_txreq->psc = priv->s_sendcontext;
ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
- ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);
+ ps->s_txreq->phdr.hdr.lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(ah_attr));
ps->s_txreq->phdr.hdr.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ if (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) {
ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
} else {
lid = ppd->lid;
if (lid) {
- lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
+ lid |= rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1);
ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid);
} else {
ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
@@ -537,7 +540,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
bth0 = pkey | (IB_OPCODE_CNP << 24);
ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << HFI1_BECN_SHIFT));
+ ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));
ohdr->bth[2] = 0; /* PSN 0 */
hdr.lrh[0] = cpu_to_be16(lrh0);
@@ -680,7 +683,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
- u8 sc5 = hdr2sc(hdr, packet->rhf);
+ u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
u32 bth1;
u8 sl_from_sc, sl;
u16 slid;
@@ -688,18 +691,16 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
- dlid = be16_to_cpu(hdr->lrh[1]);
+ dlid = ib_get_dlid(hdr);
bth1 = be32_to_cpu(ohdr->bth[1]);
- slid = be16_to_cpu(hdr->lrh[3]);
- pkey = (u16)be32_to_cpu(ohdr->bth[0]);
- sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ slid = ib_get_slid(hdr);
+ pkey = ib_bth_get_pkey(ohdr);
+ opcode = ib_bth_get_opcode(ohdr);
+ sl = ib_get_sl(hdr);
+ extra_bytes = ib_bth_get_pad(ohdr);
extra_bytes += (SIZE_OF_CRC << 2);
sl_from_sc = ibp->sc_to_sl[sc5];
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- opcode &= 0xff;
-
process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
/*
* Get the number of bytes the message was padded by
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 4a8295399e71..a8f0aa4722f6 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -53,7 +53,7 @@
struct tid_group {
struct list_head list;
- unsigned base;
+ u32 base;
u8 size;
u8 used;
u8 map;
@@ -82,20 +82,25 @@ struct tid_pageset {
(unsigned long)(len) - 1) & PAGE_MASK) - \
((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
-static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
- struct hfi1_filedata *);
-static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
-static int set_rcvarray_entry(struct file *, unsigned long, u32,
- struct tid_group *, struct page **, unsigned);
-static int tid_rb_insert(void *, struct mmu_rb_node *);
+static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
+ struct exp_tid_set *set,
+ struct hfi1_filedata *fd);
+static u32 find_phys_blocks(struct page **pages, unsigned npages,
+ struct tid_pageset *list);
+static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr,
+ u32 rcventry, struct tid_group *grp,
+ struct page **pages, unsigned npages);
+static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode);
-static void tid_rb_remove(void *, struct mmu_rb_node *);
-static int tid_rb_invalidate(void *, struct mmu_rb_node *);
-static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
- struct tid_pageset *, unsigned, u16, struct page **,
- u32 *, unsigned *, unsigned *);
-static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
+static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
+static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
+static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
+ struct tid_group *grp, struct tid_pageset *sets,
+ unsigned start, u16 count, struct page **pages,
+ u32 *tidlist, unsigned *tididx, unsigned *pmapped);
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
+ struct tid_group **grp);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
static struct mmu_rb_ops tid_rb_ops = {
@@ -149,62 +154,72 @@ static inline void tid_group_move(struct tid_group *group,
tid_group_add_tail(group, s2);
}
+int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd)
+{
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+ struct hfi1_devdata *dd = fd->dd;
+ u32 tidbase;
+ u32 i;
+ struct tid_group *grp, *gptr;
+
+ exp_tid_group_init(&uctxt->tid_group_list);
+ exp_tid_group_init(&uctxt->tid_used_list);
+ exp_tid_group_init(&uctxt->tid_full_list);
+
+ tidbase = uctxt->expected_base;
+ for (i = 0; i < uctxt->expected_count /
+ dd->rcv_entries.group_size; i++) {
+ grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+ if (!grp)
+ goto grp_failed;
+
+ grp->size = dd->rcv_entries.group_size;
+ grp->base = tidbase;
+ tid_group_add_tail(grp, &uctxt->tid_group_list);
+ tidbase += dd->rcv_entries.group_size;
+ }
+
+ return 0;
+
+grp_failed:
+ list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
+ list) {
+ list_del_init(&grp->list);
+ kfree(grp);
+ }
+
+ return -ENOMEM;
+}
+
/*
* Initialize context and file private data needed for Expected
* receive caching. This needs to be done after the context has
* been configured with the eager/expected RcvEntry counts.
*/
-int hfi1_user_exp_rcv_init(struct file *fp)
+int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
- unsigned tidbase;
- int i, ret = 0;
+ int ret = 0;
spin_lock_init(&fd->tid_lock);
spin_lock_init(&fd->invalid_lock);
- if (!uctxt->subctxt_cnt || !fd->subctxt) {
- exp_tid_group_init(&uctxt->tid_group_list);
- exp_tid_group_init(&uctxt->tid_used_list);
- exp_tid_group_init(&uctxt->tid_full_list);
-
- tidbase = uctxt->expected_base;
- for (i = 0; i < uctxt->expected_count /
- dd->rcv_entries.group_size; i++) {
- struct tid_group *grp;
-
- grp = kzalloc(sizeof(*grp), GFP_KERNEL);
- if (!grp) {
- /*
- * If we fail here, the groups already
- * allocated will be freed by the close
- * call.
- */
- ret = -ENOMEM;
- goto done;
- }
- grp->size = dd->rcv_entries.group_size;
- grp->base = tidbase;
- tid_group_add_tail(grp, &uctxt->tid_group_list);
- tidbase += dd->rcv_entries.group_size;
- }
- }
-
fd->entry_to_rb = kcalloc(uctxt->expected_count,
- sizeof(struct rb_node *),
- GFP_KERNEL);
+ sizeof(struct rb_node *),
+ GFP_KERNEL);
if (!fd->entry_to_rb)
return -ENOMEM;
if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) {
fd->invalid_tid_idx = 0;
- fd->invalid_tids = kzalloc(uctxt->expected_count *
- sizeof(u32), GFP_KERNEL);
+ fd->invalid_tids = kcalloc(uctxt->expected_count,
+ sizeof(*fd->invalid_tids),
+ GFP_KERNEL);
if (!fd->invalid_tids) {
- ret = -ENOMEM;
- goto done;
+ kfree(fd->entry_to_rb);
+ fd->entry_to_rb = NULL;
+ return -ENOMEM;
}
/*
@@ -247,41 +262,44 @@ int hfi1_user_exp_rcv_init(struct file *fp)
fd->tid_limit = uctxt->expected_count;
}
spin_unlock(&fd->tid_lock);
-done:
+
return ret;
}
-int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
+void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_group *grp, *gptr;
- if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
- return 0;
+ list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
+ list) {
+ list_del_init(&grp->list);
+ kfree(grp);
+ }
+ hfi1_clear_tids(uctxt);
+}
+
+void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
+{
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+
/*
* The notifier would have been removed when the process'es mm
* was freed.
*/
- if (fd->handler)
+ if (fd->handler) {
hfi1_mmu_rb_unregister(fd->handler);
-
- kfree(fd->invalid_tids);
-
- if (!uctxt->cnt) {
+ } else {
if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
- list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
- list) {
- list_del_init(&grp->list);
- kfree(grp);
- }
- hfi1_clear_tids(uctxt);
}
+ kfree(fd->invalid_tids);
+ fd->invalid_tids = NULL;
+
kfree(fd->entry_to_rb);
- return 0;
+ fd->entry_to_rb = NULL;
}
/*
@@ -350,10 +368,10 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
* can fit into the group. If the group becomes fully
* used, move it to tid_full_list.
*/
-int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo)
{
int ret = 0, need_group = 0, pinned;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets,
@@ -450,7 +468,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
struct tid_group *grp =
tid_group_pop(&uctxt->tid_group_list);
- ret = program_rcvarray(fp, vaddr, grp, pagesets,
+ ret = program_rcvarray(fd, vaddr, grp, pagesets,
pageidx, dd->rcv_entries.group_size,
pages, tidlist, &tididx, &mapped);
/*
@@ -496,7 +514,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
unsigned use = min_t(unsigned, pageset_count - pageidx,
grp->size - grp->used);
- ret = program_rcvarray(fp, vaddr, grp, pagesets,
+ ret = program_rcvarray(fd, vaddr, grp, pagesets,
pageidx, use, pages, tidlist,
&tididx, &mapped);
if (ret < 0) {
@@ -546,7 +564,7 @@ nomem:
* everything done so far so we don't leak resources.
*/
tinfo->tidlist = (unsigned long)&tidlist;
- hfi1_user_exp_rcv_clear(fp, tinfo);
+ hfi1_user_exp_rcv_clear(fd, tinfo);
tinfo->tidlist = 0;
ret = -EFAULT;
goto bail;
@@ -570,14 +588,17 @@ bail:
return ret > 0 ? 0 : ret;
}
-int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo)
{
int ret = 0;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
u32 *tidinfo;
unsigned tididx;
+ if (unlikely(tinfo->tidcnt > fd->tid_used))
+ return -EINVAL;
+
tidinfo = memdup_user((void __user *)(unsigned long)tinfo->tidlist,
sizeof(tidinfo[0]) * tinfo->tidcnt);
if (IS_ERR(tidinfo))
@@ -585,7 +606,7 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
mutex_lock(&uctxt->exp_lock);
for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
- ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL);
+ ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL);
if (ret) {
hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
ret);
@@ -602,12 +623,12 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
return ret;
}
-int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
unsigned long *ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+ (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
u32 *array;
int ret = 0;
@@ -719,7 +740,7 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
/**
* program_rcvarray() - program an RcvArray group with receive buffers
- * @fp: file pointer
+ * @fd: filedata pointer
* @vaddr: starting user virtual address
* @grp: RcvArray group
* @sets: array of struct tid_pageset holding information on physically
@@ -744,13 +765,12 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
* -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or
* number of RcvArray entries programmed.
*/
-static int program_rcvarray(struct file *fp, unsigned long vaddr,
+static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
struct tid_group *grp,
struct tid_pageset *sets,
unsigned start, u16 count, struct page **pages,
u32 *tidlist, unsigned *tididx, unsigned *pmapped)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
u16 idx;
@@ -791,7 +811,7 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr,
npages = sets[setidx].count;
pageidx = sets[setidx].idx;
- ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE),
+ ret = set_rcvarray_entry(fd, vaddr + (pageidx * PAGE_SIZE),
rcventry, grp, pages + pageidx,
npages);
if (ret)
@@ -813,12 +833,11 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr,
return idx;
}
-static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
+static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr,
u32 rcventry, struct tid_group *grp,
struct page **pages, unsigned npages)
{
int ret;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_rb_node *node;
struct hfi1_devdata *dd = uctxt->dd;
@@ -872,10 +891,9 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
return 0;
}
-static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
struct tid_group **grp)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
struct tid_rb_node *node;
@@ -1011,8 +1029,8 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
* process in question.
*/
ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+ (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
+ HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
}
fdata->invalid_tid_idx++;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 9bc8d9fba87e..5250c897298d 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_USER_EXP_RCV_H
#define _HFI1_USER_EXP_RCV_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -70,10 +70,15 @@
(tid) |= EXP_TID_SET(field, (value)); \
} while (0)
-int hfi1_user_exp_rcv_init(struct file *);
-int hfi1_user_exp_rcv_free(struct hfi1_filedata *);
-int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *);
-int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *);
-int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *);
+void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt);
+int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd);
+int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd);
+void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd);
+int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo);
+int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo);
+int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo);
#endif /* _HFI1_USER_EXP_RCV_H */
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 68295a12b771..e341e6dcc388 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -73,7 +73,8 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
{
unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
size = (cache_size * (1UL << 20)); /* convert to bytes */
- unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt;
+ unsigned int usr_ctxts =
+ dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
bool can_lock = capable(CAP_IPC_LOCK);
/*
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index e6811c4edc73..d55339f5d73b 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -143,7 +143,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* KDETH OM multipliers and switch over point */
#define KDETH_OM_SMALL 4
+#define KDETH_OM_SMALL_SHIFT 2
#define KDETH_OM_LARGE 64
+#define KDETH_OM_LARGE_SHIFT 6
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
/* Tx request flag bits */
@@ -153,9 +155,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* SDMA request flag bits */
#define SDMA_REQ_FOR_THREAD 1
#define SDMA_REQ_SEND_DONE 2
-#define SDMA_REQ_HAVE_AHG 3
-#define SDMA_REQ_HAS_ERROR 4
-#define SDMA_REQ_DONE_ERROR 5
+#define SDMA_REQ_HAS_ERROR 3
+#define SDMA_REQ_DONE_ERROR 4
#define SDMA_PKT_Q_INACTIVE BIT(0)
#define SDMA_PKT_Q_ACTIVE BIT(1)
@@ -214,7 +215,7 @@ struct user_sdma_request {
* each request will need it's own engine pointer.
*/
struct sdma_engine *sde;
- u8 ahg_idx;
+ s8 ahg_idx;
u32 ahg[9];
/*
* KDETH.Offset (Eager) field
@@ -229,12 +230,6 @@ struct user_sdma_request {
*/
u32 tidoffset;
/*
- * KDETH.OM
- * Remember this because the header template always sets it
- * to 0.
- */
- u8 omfactor;
- /*
* We copy the iovs for this request (based on
* info.iovcnt). These are only the data vectors
*/
@@ -284,39 +279,43 @@ struct user_sdma_txreq {
hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \
(pq)->subctxt, ##__VA_ARGS__)
-static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
-static int num_user_pages(const struct iovec *);
-static void user_sdma_txreq_cb(struct sdma_txreq *, int);
-static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
-static void user_sdma_free_request(struct user_sdma_request *, bool);
-static int pin_vector_pages(struct user_sdma_request *,
- struct user_sdma_iovec *);
-static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned,
- unsigned);
-static int check_header_template(struct user_sdma_request *,
- struct hfi1_pkt_header *, u32, u32);
-static int set_txreq_header(struct user_sdma_request *,
- struct user_sdma_txreq *, u32);
-static int set_txreq_header_ahg(struct user_sdma_request *,
- struct user_sdma_txreq *, u32);
-static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
- struct hfi1_user_sdma_comp_q *,
- u16, enum hfi1_sdma_comp_state, int);
-static inline u32 set_pkt_bth_psn(__be32, u8, u32);
+static int user_sdma_send_pkts(struct user_sdma_request *req,
+ unsigned maxpkts);
+static int num_user_pages(const struct iovec *iov);
+static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
+static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
+static int pin_vector_pages(struct user_sdma_request *req,
+ struct user_sdma_iovec *iovec);
+static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+ unsigned start, unsigned npages);
+static int check_header_template(struct user_sdma_request *req,
+ struct hfi1_pkt_header *hdr, u32 lrhlen,
+ u32 datalen);
+static int set_txreq_header(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx, u32 datalen);
+static int set_txreq_header_ahg(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx, u32 len);
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
+ struct hfi1_user_sdma_comp_q *cq,
+ u16 idx, enum hfi1_sdma_comp_state state,
+ int ret);
+static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags);
static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
static int defer_packet_queue(
- struct sdma_engine *,
- struct iowait *,
- struct sdma_txreq *,
- unsigned seq);
-static void activate_packet_queue(struct iowait *, int);
-static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
-static int sdma_rb_insert(void *, struct mmu_rb_node *);
+ struct sdma_engine *sde,
+ struct iowait *wait,
+ struct sdma_txreq *txreq,
+ unsigned int seq);
+static void activate_packet_queue(struct iowait *wait, int reason);
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len);
+static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode);
static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
void *arg2, bool *stop);
-static void sdma_rb_remove(void *, struct mmu_rb_node *);
-static int sdma_rb_invalidate(void *, struct mmu_rb_node *);
+static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
+static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
static struct mmu_rb_ops sdma_rb_ops = {
.filter = sdma_rb_filter,
@@ -372,44 +371,27 @@ static void sdma_kmem_cache_ctor(void *obj)
memset(tx, 0, sizeof(*tx));
}
-int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
+int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
+ struct hfi1_filedata *fd)
{
- struct hfi1_filedata *fd;
- int ret = 0;
- unsigned memsize;
+ int ret = -ENOMEM;
char buf[64];
struct hfi1_devdata *dd;
struct hfi1_user_sdma_comp_q *cq;
struct hfi1_user_sdma_pkt_q *pq;
unsigned long flags;
- if (!uctxt || !fp) {
- ret = -EBADF;
- goto done;
- }
+ if (!uctxt || !fd)
+ return -EBADF;
- fd = fp->private_data;
-
- if (!hfi1_sdma_comp_ring_size) {
- ret = -EINVAL;
- goto done;
- }
+ if (!hfi1_sdma_comp_ring_size)
+ return -EINVAL;
dd = uctxt->dd;
pq = kzalloc(sizeof(*pq), GFP_KERNEL);
if (!pq)
- goto pq_nomem;
-
- memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
- pq->reqs = kzalloc(memsize, GFP_KERNEL);
- if (!pq->reqs)
- goto pq_reqs_nomem;
-
- memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long);
- pq->req_in_use = kzalloc(memsize, GFP_KERNEL);
- if (!pq->req_in_use)
- goto pq_reqs_no_in_use;
+ return -ENOMEM;
INIT_LIST_HEAD(&pq->list);
pq->dd = dd;
@@ -425,10 +407,23 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
activate_packet_queue, NULL);
pq->reqidx = 0;
+
+ pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
+ sizeof(*pq->reqs),
+ GFP_KERNEL);
+ if (!pq->reqs)
+ goto pq_reqs_nomem;
+
+ pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
+ sizeof(*pq->req_in_use),
+ GFP_KERNEL);
+ if (!pq->req_in_use)
+ goto pq_reqs_no_in_use;
+
snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
fd->subctxt);
pq->txreq_cache = kmem_cache_create(buf,
- sizeof(struct user_sdma_txreq),
+ sizeof(struct user_sdma_txreq),
L1_CACHE_BYTES,
SLAB_HWCACHE_ALIGN,
sdma_kmem_cache_ctor);
@@ -437,31 +432,36 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
uctxt->ctxt);
goto pq_txreq_nomem;
}
- fd->pq = pq;
+
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
if (!cq)
goto cq_nomem;
- memsize = PAGE_ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size);
- cq->comps = vmalloc_user(memsize);
+ cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps)
+ * hfi1_sdma_comp_ring_size));
if (!cq->comps)
goto cq_comps_nomem;
cq->nentries = hfi1_sdma_comp_ring_size;
- fd->cq = cq;
ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
&pq->handler);
if (ret) {
dd_dev_err(dd, "Failed to register with MMU %d", ret);
- goto done;
+ goto pq_mmu_fail;
}
+ fd->pq = pq;
+ fd->cq = cq;
+
spin_lock_irqsave(&uctxt->sdma_qlock, flags);
list_add(&pq->list, &uctxt->sdma_queues);
spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
- goto done;
+ return 0;
+
+pq_mmu_fail:
+ vfree(cq->comps);
cq_comps_nomem:
kfree(cq);
cq_nomem:
@@ -472,10 +472,7 @@ pq_reqs_no_in_use:
kfree(pq->reqs);
pq_reqs_nomem:
kfree(pq);
- fd->pq = NULL;
-pq_nomem:
- ret = -ENOMEM;
-done:
+
return ret;
}
@@ -535,11 +532,11 @@ static u8 dlid_to_selector(u16 dlid)
return mapping[hash];
}
-int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
- unsigned long dim, unsigned long *count)
+int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+ struct iovec *iovec, unsigned long dim,
+ unsigned long *count)
{
int ret = 0, i;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq;
@@ -615,6 +612,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
req->pq = pq;
req->cq = cq;
req->status = -1;
+ req->ahg_idx = -1;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
@@ -704,7 +702,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
/* Save all the IO vector structures */
for (i = 0; i < req->data_iovs; i++) {
INIT_LIST_HEAD(&req->iovs[i].list);
- memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
+ memcpy(&req->iovs[i].iov,
+ iovec + idx++,
+ sizeof(req->iovs[i].iov));
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->status = ret;
@@ -763,14 +763,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
}
/* We don't need an AHG entry if the request contains only one packet */
- if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) {
- int ahg = sdma_ahg_alloc(req->sde);
-
- if (likely(ahg >= 0)) {
- req->ahg_idx = (u8)ahg;
- set_bit(SDMA_REQ_HAVE_AHG, &req->flags);
- }
- }
+ if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG))
+ req->ahg_idx = sdma_ahg_alloc(req->sde);
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
atomic_inc(&pq->n_reqs);
@@ -988,7 +982,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
}
}
- if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) {
+ if (req->ahg_idx >= 0) {
if (!req->seqnum) {
u16 pbclen = le16_to_cpu(req->hdr.pbc[0]);
u32 lrhlen = get_lrh_len(req->hdr,
@@ -1118,7 +1112,7 @@ dosend:
* happen due to the sequential manner in which
* descriptors are processed.
*/
- if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
+ if (req->ahg_idx >= 0)
sdma_ahg_free(req->sde, req->ahg_idx);
}
return ret;
@@ -1320,6 +1314,7 @@ static int set_txreq_header(struct user_sdma_request *req,
{
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &tx->hdr;
+ u8 omfactor; /* KDETH.OM */
u16 pbclen;
int ret;
u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
@@ -1397,8 +1392,9 @@ static int set_txreq_header(struct user_sdma_request *req,
}
tidval = req->tids[req->tididx];
}
- req->omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
- KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE : KDETH_OM_SMALL;
+ omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
+ KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
+ KDETH_OM_SMALL_SHIFT;
/* Set KDETH.TIDCtrl based on value for this TID. */
KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
EXP_TID_GET(tidval, CTRL));
@@ -1413,12 +1409,12 @@ static int set_txreq_header(struct user_sdma_request *req,
* transfer.
*/
SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
- req->tidoffset, req->tidoffset / req->omfactor,
- req->omfactor != KDETH_OM_SMALL);
+ req->tidoffset, req->tidoffset >> omfactor,
+ omfactor != KDETH_OM_SMALL_SHIFT);
KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
- req->tidoffset / req->omfactor);
+ req->tidoffset >> omfactor);
KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
- req->omfactor != KDETH_OM_SMALL);
+ omfactor != KDETH_OM_SMALL_SHIFT);
}
done:
trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
@@ -1430,6 +1426,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
struct user_sdma_txreq *tx, u32 len)
{
int diff = 0;
+ u8 omfactor; /* KDETH.OM */
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &req->hdr;
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
@@ -1481,14 +1478,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
}
tidval = req->tids[req->tididx];
}
- req->omfactor = ((EXP_TID_GET(tidval, LEN) *
+ omfactor = ((EXP_TID_GET(tidval, LEN) *
PAGE_SIZE) >=
- KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE :
- KDETH_OM_SMALL;
+ KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
+ KDETH_OM_SMALL_SHIFT;
/* KDETH.OM and KDETH.OFFSET (TID) */
AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
- ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
- ((req->tidoffset / req->omfactor) & 0x7fff)));
+ ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
+ ((req->tidoffset >> omfactor)
+ & 0x7fff)));
/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
(EXP_TID_GET(tidval, IDX) & 0x3ff));
@@ -1615,9 +1613,10 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
{
hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
- cq->comps[idx].status = state;
if (state == ERROR)
cq->comps[idx].errcode = -ret;
+ smp_wmb(); /* make sure errcode is visible first */
+ cq->comps[idx].status = state;
trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
idx, state, ret);
}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 39001714f551..e5b10aefe212 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -1,5 +1,7 @@
+#ifndef _HFI1_USER_SDMA_H
+#define _HFI1_USER_SDMA_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -56,7 +58,7 @@ extern uint extended_psn;
struct hfi1_user_sdma_pkt_q {
struct list_head list;
unsigned ctxt;
- unsigned subctxt;
+ u16 subctxt;
u16 n_max_reqs;
atomic_t n_reqs;
u16 reqidx;
@@ -78,7 +80,11 @@ struct hfi1_user_sdma_comp_q {
struct hfi1_sdma_comp_entry *comps;
};
-int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *, struct file *);
-int hfi1_user_sdma_free_queues(struct hfi1_filedata *);
-int hfi1_user_sdma_process_request(struct file *, struct iovec *, unsigned long,
- unsigned long *);
+int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
+ struct hfi1_filedata *fd);
+int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd);
+int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+ struct iovec *iovec, unsigned long dim,
+ unsigned long *count);
+
+#endif /* _HFI1_USER_SDMA_H */
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 222315fadab1..90e7b77d68e8 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -60,6 +60,8 @@
#include "trace.h"
#include "qp.h"
#include "verbs_txreq.h"
+#include "debugfs.h"
+#include "vnic.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -297,6 +299,22 @@ static inline bool wss_exceeds_threshold(void)
}
/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+ [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [IB_WR_REG_MR] = IB_WC_REG_MR
+};
+
+/*
* Length of header by opcode, 0 --> not supported
*/
const u8 hdr_len_by_opcode[256] = {
@@ -501,6 +519,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
return NULL;
}
+static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
+{
+#ifdef CONFIG_FAULT_INJECTION
+ if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
+ /*
+ * In order to drop non-IB traffic we
+ * set PbcInsertHrc to NONE (0x2).
+ * The packet will still be delivered
+ * to the receiving node but a
+ * KHdrHCRCErr (KDETH packet with a bad
+ * HCRC) will be triggered and the
+ * packet will not be delivered to the
+ * correct context.
+ */
+ pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
+ else
+ /*
+ * In order to drop regular verbs
+ * traffic we set the PbcTestEbp
+ * flag. The packet will still be
+ * delivered to the receiving node but
+ * a 'late ebp error' will be
+ * triggered and will be dropped.
+ */
+ pbc |= PBC_TEST_EBP;
+#endif
+ return pbc;
+}
+
/**
* hfi1_ib_rcv - process an incoming packet
* @packet: data packet information
@@ -525,7 +572,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
u16 lid;
/* Check for GRH */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
@@ -544,12 +591,12 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
trace_input_ibhdr(rcd->dd, hdr);
- opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
+ opcode = ib_bth_get_opcode(packet->ohdr);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* Get the destination QP number. */
qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
- lid = be16_to_cpu(hdr->lrh[1]);
+ lid = ib_get_dlid(hdr);
if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
struct rvt_mcast *mcast;
@@ -557,7 +604,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
if (lnh != HFI1_LRH_GRH)
goto drop;
- mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+ mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
if (!mcast)
goto drop;
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
@@ -583,6 +630,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
rcu_read_unlock();
goto drop;
}
+ if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
+ true))) {
+ rcu_read_unlock();
+ goto drop;
+ }
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(opcode, packet);
if (likely(packet_handler))
@@ -781,7 +833,6 @@ static int build_verbs_tx_desc(
if (ret)
goto bail_txadd;
}
-
/* add the ulp payload - if any. tx->ss can be NULL for acks */
if (tx->ss)
ret = build_verbs_ulp_payload(sde, length, tx);
@@ -800,7 +851,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
struct verbs_txreq *tx;
- u64 pbc_flags = 0;
u8 sc5 = priv->s_sc;
int ret;
@@ -809,12 +859,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (!sdma_txreq_built(&tx->txreq)) {
if (likely(pbc == 0)) {
u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* No vl15 here */
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd,
- pbc_flags,
+ pbc,
qp->srate_mbps,
vl,
plen);
@@ -917,7 +971,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
- u64 pbc_flags = 0;
u8 sc5;
unsigned long flags = 0;
struct send_context *sc;
@@ -942,9 +995,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (likely(pbc == 0)) {
u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ struct verbs_txreq *tx = ps->s_txreq;
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
- pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
+ pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
@@ -1173,7 +1231,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hdr = &ps->s_txreq->phdr.hdr;
/* locate the pkey within the headers */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_GRH)
ohdr = &hdr->u.l.oth;
else
@@ -1220,17 +1278,20 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
{
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- u16 ver = dd->dc8051_ver;
+ u32 ver = dd->dc8051_ver;
memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
- rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) |
- (u64)dc8051_ver_min(ver);
+ rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) |
+ ((u64)(dc8051_ver_min(ver)) << 16) |
+ (u64)dc8051_ver_patch(ver);
+
rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
- IB_DEVICE_MEM_MGT_EXTENSIONS;
+ IB_DEVICE_MEM_MGT_EXTENSIONS |
+ IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1398,14 +1459,14 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
/*
* convert ah port,sl to sc
*/
-u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah)
+u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah)
{
- struct hfi1_ibport *ibp = to_iport(ibdev, ah->port_num);
+ struct hfi1_ibport *ibp = to_iport(ibdev, rdma_ah_get_port_num(ah));
- return ibp->sl_to_sc[ah->sl];
+ return ibp->sl_to_sc[rdma_ah_get_sl(ah)];
}
-static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
{
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
@@ -1413,9 +1474,9 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
u8 sc5;
/* test the mapping for validity */
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- sc5 = ibp->sl_to_sc[ah_attr->sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
dd = dd_from_ppd(ppd);
if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
return -EINVAL;
@@ -1423,7 +1484,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
}
static void hfi1_notify_new_ah(struct ib_device *ibdev,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct rvt_ah *ah)
{
struct hfi1_ibport *ibp;
@@ -1436,9 +1497,9 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev,
* done being setup. We can however modify things which we need to set.
*/
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- sc5 = ibp->sl_to_sc[ah->attr.sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)];
dd = dd_from_ppd(ppd);
ah->vl = sc_to_vlt(dd, sc5);
if (ah->vl < num_vls || ah->vl == 15)
@@ -1447,17 +1508,21 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev,
struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
{
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
struct ib_ah *ah = ERR_PTR(-EINVAL);
struct rvt_qp *qp0;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ struct hfi1_devdata *dd = dd_from_ppd(ppd);
+ u8 port_num = ppd->port;
memset(&attr, 0, sizeof(attr));
- attr.dlid = dlid;
- attr.port_num = ppd_from_ibp(ibp)->port;
+ attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
+ rdma_ah_set_dlid(&attr, dlid);
+ rdma_ah_set_port_num(&attr, ppd_from_ibp(ibp)->port);
rcu_read_lock();
qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
- ah = ib_create_ah(qp0->ibqp.pd, &attr);
+ ah = rdma_create_ah(qp0->ibqp.pd, &attr);
rcu_read_unlock();
return ah;
}
@@ -1504,10 +1569,10 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
struct hfi1_ibdev *dev = dev_from_rdi(rdi);
- u16 ver = dd_from_dev(dev)->dc8051_ver;
+ u32 ver = dd_from_dev(dev)->dc8051_ver;
- snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver),
- dc8051_ver_min(ver));
+ snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver),
+ dc8051_ver_min(ver), dc8051_ver_patch(ver));
}
static const char * const driver_cntr_names[] = {
@@ -1524,6 +1589,7 @@ static const char * const driver_cntr_names[] = {
"DRIVER_EgrHdrFull"
};
+static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
static const char **dev_cntr_names;
static const char **port_cntr_names;
static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
@@ -1578,6 +1644,7 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
{
int i, err;
+ mutex_lock(&cntr_names_lock);
if (!cntr_names_initialized) {
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -1586,8 +1653,10 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
num_driver_cntrs,
&num_dev_cntrs,
&dev_cntr_names);
- if (err)
+ if (err) {
+ mutex_unlock(&cntr_names_lock);
return NULL;
+ }
for (i = 0; i < num_driver_cntrs; i++)
dev_cntr_names[num_dev_cntrs + i] =
@@ -1601,10 +1670,12 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
if (err) {
kfree(dev_cntr_names);
dev_cntr_names = NULL;
+ mutex_unlock(&cntr_names_lock);
return NULL;
}
cntr_names_initialized = 1;
}
+ mutex_unlock(&cntr_names_lock);
if (!port_num)
return rdma_alloc_hw_stats_struct(
@@ -1707,6 +1778,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
ibdev->modify_device = modify_device;
ibdev->alloc_hw_stats = alloc_hw_stats;
ibdev->get_hw_stats = get_hw_stats;
+ ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn;
+ ibdev->free_rdma_netdev = hfi1_vnic_free_rn;
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
@@ -1751,7 +1824,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
- dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
+ dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt;
dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
@@ -1823,9 +1896,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
del_timer_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
+ mutex_lock(&cntr_names_lock);
kfree(dev_cntr_names);
kfree(port_cntr_names);
+ dev_cntr_names = NULL;
+ port_cntr_names = NULL;
cntr_names_initialized = 0;
+ mutex_unlock(&cntr_names_lock);
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
@@ -1840,12 +1917,12 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
switch (packet->qp->ibqp.qp_type) {
case IB_QPT_UC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
case IB_QPT_RC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_RC;
break;
@@ -1859,7 +1936,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
return;
}
- sc5 = hdr2sc(hdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 3a0b589e41c2..cd635d0c1d3b 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -125,7 +125,6 @@ struct hfi1_qp_priv {
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
u8 s_sc; /* SC[0..4] for next packet */
- u8 r_adefered; /* number of acks defered */
struct iowait s_iowait;
struct rvt_qp *owner;
};
@@ -140,6 +139,10 @@ struct hfi1_pkt_state {
struct hfi1_pportdata *ppd;
struct verbs_txreq *s_txreq;
unsigned long flags;
+ unsigned long timeout;
+ unsigned long timeout_int;
+ int cpu;
+ bool in_thread;
};
#define HFI1_PSN_CREDIT 16
@@ -195,6 +198,11 @@ struct hfi1_ibdev {
struct dentry *hfi1_ibdev_dbg;
/* per HFI symlinks to above */
struct dentry *hfi1_ibdev_link;
+#ifdef CONFIG_FAULT_INJECTION
+ struct fault_opcode *fault_opcode;
+ struct fault_packet *fault_packet;
+ bool fault_suppress_err;
+#endif
#endif
};
@@ -303,7 +311,7 @@ void hfi1_rc_hdrerr(
u32 rcv_flags,
struct rvt_qp *qp);
-u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid);
@@ -342,7 +350,7 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords);
+ const struct ib_global_route *grh, u32 hwords, u32 nwords);
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
@@ -350,7 +358,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
void _hfi1_do_send(struct work_struct *work);
-void hfi1_do_send(struct rvt_qp *qp);
+void hfi1_do_send_from_rvt(struct rvt_qp *qp);
+
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status);
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
new file mode 100644
index 000000000000..e2c455299b53
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -0,0 +1,184 @@
+#ifndef _HFI1_VNIC_H
+#define _HFI1_VNIC_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/opa_vnic.h>
+#include "hfi.h"
+#include "sdma.h"
+
+#define HFI1_VNIC_MAX_TXQ 16
+#define HFI1_VNIC_MAX_PAD 12
+
+/* L2 header definitions */
+#define HFI1_L2_TYPE_OFFSET 0x7
+#define HFI1_L2_TYPE_SHFT 0x5
+#define HFI1_L2_TYPE_MASK 0x3
+
+#define HFI1_GET_L2_TYPE(hdr) \
+ ((*((u8 *)(hdr) + HFI1_L2_TYPE_OFFSET) >> HFI1_L2_TYPE_SHFT) & \
+ HFI1_L2_TYPE_MASK)
+
+/* L4 type definitions */
+#define HFI1_L4_TYPE_OFFSET 8
+
+#define HFI1_GET_L4_TYPE(data) \
+ (*((u8 *)(data) + HFI1_L4_TYPE_OFFSET))
+
+/* L4 header definitions */
+#define HFI1_VNIC_L4_HDR_OFFSET OPA_VNIC_L2_HDR_LEN
+
+#define HFI1_VNIC_GET_L4_HDR(data) \
+ (*((u16 *)((u8 *)(data) + HFI1_VNIC_L4_HDR_OFFSET)))
+
+#define HFI1_VNIC_GET_VESWID(data) \
+ (HFI1_VNIC_GET_L4_HDR(data) & 0xFFF)
+
+/* Service class */
+#define HFI1_VNIC_SC_OFFSET_LOW 6
+#define HFI1_VNIC_SC_OFFSET_HI 7
+#define HFI1_VNIC_SC_SHIFT 4
+
+#define HFI1_VNIC_MAX_QUEUE 16
+
+/**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+ struct hfi1_devdata *dd;
+ struct sdma_engine *sde;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct iowait wait;
+ struct sdma_txreq stx;
+ unsigned int state;
+ u8 q_idx;
+};
+
+/**
+ * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue
+ * @idx: queue index
+ * @vinfo: pointer to vport information
+ * @netdev: network device
+ * @napi: netdev napi structure
+ * @skbq: queue of received socket buffers
+ */
+struct hfi1_vnic_rx_queue {
+ u8 idx;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct napi_struct napi;
+ struct sk_buff_head skbq;
+};
+
+/**
+ * struct hfi1_vnic_vport_info - HFI1 VNIC virtual port information
+ * @dd: device data pointer
+ * @netdev: net device pointer
+ * @flags: state flags
+ * @lock: vport lock
+ * @num_tx_q: number of transmit queues
+ * @num_rx_q: number of receive queues
+ * @vesw_id: virtual switch id
+ * @rxq: Array of receive queues
+ * @stats: per queue stats
+ * @sdma: VNIC SDMA structure per TXQ
+ */
+struct hfi1_vnic_vport_info {
+ struct hfi1_devdata *dd;
+ struct net_device *netdev;
+ unsigned long flags;
+
+ /* Lock used around state updates */
+ struct mutex lock;
+
+ u8 num_tx_q;
+ u8 num_rx_q;
+ u16 vesw_id;
+ struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT];
+
+ struct opa_vnic_stats stats[HFI1_VNIC_MAX_QUEUE];
+ struct hfi1_vnic_sdma sdma[HFI1_VNIC_MAX_TXQ];
+};
+
+#define v_dbg(format, arg...) \
+ netdev_dbg(vinfo->netdev, format, ## arg)
+#define v_err(format, arg...) \
+ netdev_err(vinfo->netdev, format, ## arg)
+#define v_info(format, arg...) \
+ netdev_info(vinfo->netdev, format, ## arg)
+
+/* vnic hfi1 internal functions */
+void hfi1_vnic_setup(struct hfi1_devdata *dd);
+void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx);
+
+/* vnic rdma netdev operations */
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
+void hfi1_vnic_free_rn(struct net_device *netdev);
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
+
+#endif /* _HFI1_VNIC_H */
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
new file mode 100644
index 000000000000..b601c2929f8f
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -0,0 +1,903 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for VNIC functionality
+ */
+
+#include <linux/io.h>
+#include <linux/if_vlan.h>
+
+#include "vnic.h"
+
+#define HFI_TX_TIMEOUT_MS 1000
+
+#define HFI1_VNIC_RCV_Q_SIZE 1024
+
+#define HFI1_VNIC_UP 0
+
+static DEFINE_SPINLOCK(vport_cntr_lock);
+
+static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
+{
+ unsigned int rcvctrl_ops = 0;
+ int ret;
+
+ hfi1_init_ctxt(uctxt->sc);
+
+ uctxt->do_interrupt = &handle_receive_interrupt;
+
+ /* Now allocate the RcvHdr queue and eager buffers. */
+ ret = hfi1_create_rcvhdrq(dd, uctxt);
+ if (ret)
+ goto done;
+
+ ret = hfi1_setup_eagerbufs(uctxt);
+ if (ret)
+ goto done;
+
+ if (uctxt->rcvhdrtail_kvaddr)
+ clear_rcvhdrtail(uctxt);
+
+ rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
+ rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
+
+ if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+ rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+ rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+
+ hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
+
+ uctxt->is_vnic = true;
+done:
+ return ret;
+}
+
+static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **vnic_ctxt)
+{
+ struct hfi1_ctxtdata *uctxt;
+ unsigned int ctxt;
+ int ret;
+
+ if (dd->flags & HFI1_FROZEN)
+ return -EIO;
+
+ for (ctxt = dd->first_dyn_alloc_ctxt;
+ ctxt < dd->num_rcv_contexts; ctxt++)
+ if (!dd->rcd[ctxt])
+ break;
+
+ if (ctxt == dd->num_rcv_contexts)
+ return -EBUSY;
+
+ uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node);
+ if (!uctxt) {
+ dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
+ return -ENOMEM;
+ }
+
+ uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
+ HFI1_CAP_KGET(NODROP_RHQ_FULL) |
+ HFI1_CAP_KGET(NODROP_EGR_FULL) |
+ HFI1_CAP_KGET(DMA_RTAIL);
+ uctxt->seq_cnt = 1;
+
+ /* Allocate and enable a PIO send context */
+ uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize,
+ uctxt->numa_id);
+
+ ret = uctxt->sc ? 0 : -ENOMEM;
+ if (ret)
+ goto bail;
+
+ dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n",
+ uctxt->sc->sw_index, uctxt->sc->hw_context);
+ ret = sc_enable(uctxt->sc);
+ if (ret)
+ goto bail;
+
+ if (dd->num_msix_entries)
+ hfi1_set_vnic_msix_info(uctxt);
+
+ hfi1_stats.sps_ctxts++;
+ dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
+ *vnic_ctxt = uctxt;
+
+ return ret;
+bail:
+ /*
+ * hfi1_free_ctxtdata() also releases send_context
+ * structure if uctxt->sc is not null
+ */
+ dd->rcd[uctxt->ctxt] = NULL;
+ hfi1_free_ctxtdata(dd, uctxt);
+ dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
+ return ret;
+}
+
+static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata *uctxt)
+{
+ unsigned long flags;
+
+ dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
+ flush_wc();
+
+ if (dd->num_msix_entries)
+ hfi1_reset_vnic_msix_info(uctxt);
+
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ /*
+ * Disable receive context and interrupt available, reset all
+ * RcvCtxtCtrl bits to default values.
+ */
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
+ HFI1_RCVCTRL_TIDFLOW_DIS |
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
+ HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
+ /*
+ * VNIC contexts are allocated from user context pool.
+ * Release them back to user context pool.
+ *
+ * Reset context integrity checks to default.
+ * (writes to CSRs probably belong in chip.c)
+ */
+ write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
+ hfi1_pkt_default_send_ctxt_mask(dd, SC_USER));
+ sc_disable(uctxt->sc);
+
+ dd->send_contexts[uctxt->sc->sw_index].type = SC_USER;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+
+ dd->rcd[uctxt->ctxt] = NULL;
+ uctxt->event_flags = 0;
+
+ hfi1_clear_tids(uctxt);
+ hfi1_clear_ctxt_pkey(dd, uctxt);
+
+ hfi1_stats.sps_ctxts--;
+ hfi1_free_ctxtdata(dd, uctxt);
+}
+
+void hfi1_vnic_setup(struct hfi1_devdata *dd)
+{
+ idr_init(&dd->vnic.vesw_idr);
+}
+
+void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
+{
+ idr_destroy(&dd->vnic.vesw_idr);
+}
+
+#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
+ u64 *src64, *dst64; \
+ for (src64 = &qstats->x_grp.unicast, \
+ dst64 = &stats->x_grp.unicast; \
+ dst64 <= &stats->x_grp.s_1519_max;) { \
+ *dst64++ += *src64++; \
+ } \
+ } while (0)
+
+/* hfi1_vnic_update_stats - update statistics */
+static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
+ struct opa_vnic_stats *stats)
+{
+ struct net_device *netdev = vinfo->netdev;
+ u8 i;
+
+ /* add tx counters on different queues */
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
+ stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
+ stats->tx_drop_state += qstats->tx_drop_state;
+ stats->tx_dlid_zero += qstats->tx_dlid_zero;
+
+ SUM_GRP_COUNTERS(stats, qstats, tx_grp);
+ stats->netstats.tx_packets += qnstats->tx_packets;
+ stats->netstats.tx_bytes += qnstats->tx_bytes;
+ }
+
+ /* add rx counters on different queues */
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
+ stats->netstats.rx_nohandler += qnstats->rx_nohandler;
+ stats->rx_drop_state += qstats->rx_drop_state;
+ stats->rx_oversize += qstats->rx_oversize;
+ stats->rx_runt += qstats->rx_runt;
+
+ SUM_GRP_COUNTERS(stats, qstats, rx_grp);
+ stats->netstats.rx_packets += qnstats->rx_packets;
+ stats->netstats.rx_bytes += qnstats->rx_bytes;
+ }
+
+ stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
+ stats->netstats.tx_carrier_errors +
+ stats->tx_drop_state + stats->tx_dlid_zero;
+ stats->netstats.tx_dropped = stats->netstats.tx_errors;
+
+ stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
+ stats->netstats.rx_nohandler +
+ stats->rx_drop_state + stats->rx_oversize +
+ stats->rx_runt;
+ stats->netstats.rx_dropped = stats->netstats.rx_errors;
+
+ netdev->stats.tx_packets = stats->netstats.tx_packets;
+ netdev->stats.tx_bytes = stats->netstats.tx_bytes;
+ netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
+ netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
+ netdev->stats.tx_errors = stats->netstats.tx_errors;
+ netdev->stats.tx_dropped = stats->netstats.tx_dropped;
+
+ netdev->stats.rx_packets = stats->netstats.rx_packets;
+ netdev->stats.rx_bytes = stats->netstats.rx_bytes;
+ netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
+ netdev->stats.multicast = stats->rx_grp.mcastbcast;
+ netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
+ netdev->stats.rx_errors = stats->netstats.rx_errors;
+ netdev->stats.rx_dropped = stats->netstats.rx_dropped;
+}
+
+/* update_len_counters - update pkt's len histogram counters */
+static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
+ int len)
+{
+ /* account for 4 byte FCS */
+ if (len >= 1515)
+ grp->s_1519_max++;
+ else if (len >= 1020)
+ grp->s_1024_1518++;
+ else if (len >= 508)
+ grp->s_512_1023++;
+ else if (len >= 252)
+ grp->s_256_511++;
+ else if (len >= 124)
+ grp->s_128_255++;
+ else if (len >= 61)
+ grp->s_65_127++;
+ else
+ grp->s_64++;
+}
+
+/* hfi1_vnic_update_tx_counters - update transmit counters */
+static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.tx_packets++;
+ stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(tx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ tx_grp->mcastbcast++;
+ else
+ tx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ tx_grp->vlan++;
+ else
+ tx_grp->untagged++;
+}
+
+/* hfi1_vnic_update_rx_counters - update receive counters */
+static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.rx_packets++;
+ stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(rx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ rx_grp->mcastbcast++;
+ else
+ rx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ rx_grp->vlan++;
+ else
+ rx_grp->untagged++;
+}
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void hfi1_vnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_update_stats(vinfo, vstats);
+}
+
+static u64 create_bypass_pbc(u32 vl, u32 dw_len)
+{
+ u64 pbc;
+
+ pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
+ | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
+ | PBC_PACKET_BYPASS
+ | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
+ | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
+
+ return pbc;
+}
+
+/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
+static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ netif_stop_subqueue(vinfo->netdev, q_idx);
+ if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
+ return;
+
+ netif_start_subqueue(vinfo->netdev, q_idx);
+}
+
+static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ u8 pad_len, q_idx = skb->queue_mapping;
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct opa_vnic_skb_mdata *mdata;
+ u32 pkt_len, total_len;
+ int err = -EINVAL;
+ u64 pbc;
+
+ v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
+ if (unlikely(!netif_oper_up(netdev))) {
+ vinfo->stats[q_idx].tx_drop_state++;
+ goto tx_finish;
+ }
+
+ /* take out meta data */
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ skb_pull(skb, sizeof(*mdata));
+ if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
+ vinfo->stats[q_idx].tx_dlid_zero++;
+ goto tx_finish;
+ }
+
+ /* add tail padding (for 8 bytes size alignment) and icrc */
+ pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+ pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+ /*
+ * pkt_len is how much data we have to write, includes header and data.
+ * total_len is length of the packet in Dwords plus the PBC should not
+ * include the CRC.
+ */
+ pkt_len = (skb->len + pad_len) >> 2;
+ total_len = pkt_len + 2; /* PBC + packet */
+
+ pbc = create_bypass_pbc(mdata->vl, total_len);
+
+ skb_get(skb);
+ v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
+ err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
+ if (unlikely(err)) {
+ if (err == -ENOMEM)
+ vinfo->stats[q_idx].netstats.tx_fifo_errors++;
+ else if (err != -EBUSY)
+ vinfo->stats[q_idx].netstats.tx_carrier_errors++;
+ }
+ /* remove the header before updating tx counters */
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ if (unlikely(err == -EBUSY)) {
+ hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_BUSY;
+ }
+
+tx_finish:
+ /* update tx counters */
+ hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+static u16 hfi1_vnic_select_queue(struct net_device *netdev,
+ struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ struct opa_vnic_skb_mdata *mdata;
+ struct sdma_engine *sde;
+
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
+ return sde->this_idx;
+}
+
+/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
+static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
+ struct sk_buff *skb)
+{
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
+ int rc = -EFAULT;
+
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ /* Validate Packet length */
+ if (unlikely(skb->len > max_len))
+ vinfo->stats[rxq->idx].rx_oversize++;
+ else if (unlikely(skb->len < ETH_ZLEN))
+ vinfo->stats[rxq->idx].rx_runt++;
+ else
+ rc = 0;
+ return rc;
+}
+
+static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
+{
+ unsigned char *pad_info;
+ struct sk_buff *skb;
+
+ skb = skb_dequeue(&rxq->skbq);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* remove tail padding and icrc */
+ pad_info = skb->data + skb->len - 1;
+ skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
+ ((*pad_info) & 0x7)));
+
+ return skb;
+}
+
+/* hfi1_vnic_handle_rx - handle skb receive */
+static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
+ int *work_done, int work_to_do)
+{
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ struct sk_buff *skb;
+ int rc;
+
+ while (1) {
+ if (*work_done >= work_to_do)
+ break;
+
+ skb = hfi1_vnic_get_skb(rxq);
+ if (unlikely(!skb))
+ break;
+
+ rc = hfi1_vnic_decap_skb(rxq, skb);
+ /* update rx counters */
+ hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
+ if (unlikely(rc)) {
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, rxq->netdev);
+
+ napi_gro_receive(&rxq->napi, skb);
+ (*work_done)++;
+ }
+}
+
+/* hfi1_vnic_napi - napi receive polling callback function */
+static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
+{
+ struct hfi1_vnic_rx_queue *rxq = container_of(napi,
+ struct hfi1_vnic_rx_queue, napi);
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ int work_done = 0;
+
+ v_dbg("napi %d budget %d\n", rxq->idx, budget);
+ hfi1_vnic_handle_rx(rxq, &work_done, budget);
+
+ v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
+ if (work_done < budget)
+ napi_complete(napi);
+
+ return work_done;
+}
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_devdata *dd = packet->rcd->dd;
+ struct hfi1_vnic_vport_info *vinfo = NULL;
+ struct hfi1_vnic_rx_queue *rxq;
+ struct sk_buff *skb;
+ int l4_type, vesw_id = -1;
+ u8 q_idx;
+
+ l4_type = HFI1_GET_L4_TYPE(packet->ebuf);
+ if (likely(l4_type == OPA_VNIC_L4_ETHR)) {
+ vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
+ vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
+
+ /*
+ * In case of invalid vesw id, count the error on
+ * the first available vport.
+ */
+ if (unlikely(!vinfo)) {
+ struct hfi1_vnic_vport_info *vinfo_tmp;
+ int id_tmp = 0;
+
+ vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
+ if (vinfo_tmp) {
+ spin_lock(&vport_cntr_lock);
+ vinfo_tmp->stats[0].netstats.rx_nohandler++;
+ spin_unlock(&vport_cntr_lock);
+ }
+ }
+ }
+
+ if (unlikely(!vinfo)) {
+ dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
+ l4_type, vesw_id, packet->rcd->ctxt);
+ return;
+ }
+
+ q_idx = packet->rcd->vnic_q_idx;
+ rxq = &vinfo->rxq[q_idx];
+ if (unlikely(!netif_oper_up(vinfo->netdev))) {
+ vinfo->stats[q_idx].rx_drop_state++;
+ skb_queue_purge(&rxq->skbq);
+ return;
+ }
+
+ if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
+ vinfo->stats[q_idx].netstats.rx_fifo_errors++;
+ return;
+ }
+
+ skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
+ if (unlikely(!skb)) {
+ vinfo->stats[q_idx].netstats.rx_fifo_errors++;
+ return;
+ }
+
+ memcpy(skb->data, packet->ebuf, packet->tlen);
+ skb_put(skb, packet->tlen);
+ skb_queue_tail(&rxq->skbq, skb);
+
+ if (napi_schedule_prep(&rxq->napi)) {
+ v_dbg("napi %d scheduling\n", q_idx);
+ __napi_schedule(&rxq->napi);
+ }
+}
+
+static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct net_device *netdev = vinfo->netdev;
+ int i, rc;
+
+ /* ensure virtual eth switch id is valid */
+ if (!vinfo->vesw_id)
+ return -EINVAL;
+
+ rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
+ vinfo->vesw_id + 1, GFP_NOWAIT);
+ if (rc < 0)
+ return rc;
+
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ skb_queue_head_init(&rxq->skbq);
+ napi_enable(&rxq->napi);
+ }
+
+ netif_carrier_on(netdev);
+ netif_tx_start_all_queues(netdev);
+ set_bit(HFI1_VNIC_UP, &vinfo->flags);
+
+ return 0;
+}
+
+static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ u8 i;
+
+ clear_bit(HFI1_VNIC_UP, &vinfo->flags);
+ netif_carrier_off(vinfo->netdev);
+ netif_tx_disable(vinfo->netdev);
+ idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
+
+ /* ensure irqs see the change */
+ hfi1_vnic_synchronize_irq(dd);
+
+ /* remove unread skbs */
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ napi_disable(&rxq->napi);
+ skb_queue_purge(&rxq->skbq);
+ }
+}
+
+static int hfi1_netdev_open(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ int rc;
+
+ mutex_lock(&vinfo->lock);
+ rc = hfi1_vnic_up(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return rc;
+}
+
+static int hfi1_netdev_close(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
+ hfi1_vnic_down(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return 0;
+}
+
+static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **vnic_ctxt)
+{
+ int rc;
+
+ rc = allocate_vnic_ctxt(dd, vnic_ctxt);
+ if (rc) {
+ dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
+ return rc;
+ }
+
+ rc = setup_vnic_ctxt(dd, *vnic_ctxt);
+ if (rc) {
+ dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
+ deallocate_vnic_ctxt(dd, *vnic_ctxt);
+ *vnic_ctxt = NULL;
+ }
+
+ return rc;
+}
+
+static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ int i, rc = 0;
+
+ mutex_lock(&hfi1_mutex);
+ if (!dd->vnic.num_vports) {
+ rc = hfi1_vnic_txreq_init(dd);
+ if (rc)
+ goto txreq_fail;
+
+ dd->vnic.msix_idx = dd->first_dyn_msix_idx;
+ }
+
+ for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
+ rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
+ if (rc)
+ break;
+ dd->vnic.ctxt[i]->vnic_q_idx = i;
+ }
+
+ if (i < vinfo->num_rx_q) {
+ /*
+ * If required amount of contexts is not
+ * allocated successfully then remaining contexts
+ * are released.
+ */
+ while (i-- > dd->vnic.num_ctxt) {
+ deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ dd->vnic.ctxt[i] = NULL;
+ }
+ goto alloc_fail;
+ }
+
+ if (dd->vnic.num_ctxt != i) {
+ dd->vnic.num_ctxt = i;
+ hfi1_init_vnic_rsm(dd);
+ }
+
+ dd->vnic.num_vports++;
+ hfi1_vnic_sdma_init(vinfo);
+alloc_fail:
+ if (!dd->vnic.num_vports)
+ hfi1_vnic_txreq_deinit(dd);
+txreq_fail:
+ mutex_unlock(&hfi1_mutex);
+ return rc;
+}
+
+static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ int i;
+
+ mutex_lock(&hfi1_mutex);
+ if (--dd->vnic.num_vports == 0) {
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ dd->vnic.ctxt[i] = NULL;
+ }
+ hfi1_deinit_vnic_rsm(dd);
+ dd->vnic.num_ctxt = 0;
+ hfi1_vnic_txreq_deinit(dd);
+ }
+ mutex_unlock(&hfi1_mutex);
+}
+
+static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ bool reopen = false;
+
+ /*
+ * If vesw_id is being changed, and if the vnic port is up,
+ * reset the vnic port to ensure new vesw_id gets picked up
+ */
+ if (id != vinfo->vesw_id) {
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
+ hfi1_vnic_down(vinfo);
+ reopen = true;
+ }
+
+ vinfo->vesw_id = id;
+ if (reopen)
+ hfi1_vnic_up(vinfo);
+
+ mutex_unlock(&vinfo->lock);
+ }
+}
+
+/* netdev ops */
+static const struct net_device_ops hfi1_netdev_ops = {
+ .ndo_open = hfi1_netdev_open,
+ .ndo_stop = hfi1_netdev_close,
+ .ndo_start_xmit = hfi1_netdev_start_xmit,
+ .ndo_select_queue = hfi1_vnic_select_queue,
+ .ndo_get_stats64 = hfi1_vnic_get_stats64,
+};
+
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+ int i, size, rc;
+
+ if (!port_num || (port_num > dd->num_pports))
+ return ERR_PTR(-EINVAL);
+
+ if (type != RDMA_NETDEV_OPA_VNIC)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
+ netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
+ dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+
+ rn = netdev_priv(netdev);
+ vinfo = opa_vnic_dev_priv(netdev);
+ vinfo->dd = dd;
+ vinfo->num_tx_q = dd->chip_sdma_engines;
+ vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT;
+ vinfo->netdev = netdev;
+ rn->set_id = hfi1_vnic_set_vesw_id;
+
+ netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
+ netdev->hw_features = netdev->features;
+ netdev->vlan_features = netdev->features;
+ netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
+ netdev->netdev_ops = &hfi1_netdev_ops;
+ mutex_init(&vinfo->lock);
+
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ rxq->idx = i;
+ rxq->vinfo = vinfo;
+ rxq->netdev = netdev;
+ netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
+ }
+
+ rc = hfi1_vnic_init(vinfo);
+ if (rc)
+ goto init_fail;
+
+ return netdev;
+init_fail:
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+ return ERR_PTR(rc);
+}
+
+void hfi1_vnic_free_rn(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_deinit(vinfo);
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+}
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
new file mode 100644
index 000000000000..51a817d3aa14
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for VNIC SDMA functionality
+ */
+
+#include "sdma.h"
+#include "vnic.h"
+
+#define HFI1_VNIC_SDMA_Q_ACTIVE BIT(0)
+#define HFI1_VNIC_SDMA_Q_DEFERRED BIT(1)
+
+#define HFI1_VNIC_TXREQ_NAME_LEN 32
+#define HFI1_VNIC_SDMA_DESC_WTRMRK 64
+#define HFI1_VNIC_SDMA_RETRY_COUNT 1
+
+/*
+ * struct vnic_txreq - VNIC transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma: vnic sdma pointer
+ * @skb: skb to send
+ * @pad: pad buffer
+ * @plen: pad length
+ * @pbc_val: pbc value
+ * @retry_count: tx retry count
+ */
+struct vnic_txreq {
+ struct sdma_txreq txreq;
+ struct hfi1_vnic_sdma *sdma;
+
+ struct sk_buff *skb;
+ unsigned char pad[HFI1_VNIC_MAX_PAD];
+ u16 plen;
+ __le64 pbc_val;
+
+ u32 retry_count;
+};
+
+static void vnic_sdma_complete(struct sdma_txreq *txreq,
+ int status)
+{
+ struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
+ struct hfi1_vnic_sdma *vnic_sdma = tx->sdma;
+
+ sdma_txclean(vnic_sdma->dd, txreq);
+ dev_kfree_skb_any(tx->skb);
+ kmem_cache_free(vnic_sdma->dd->vnic.txreq_cache, tx);
+}
+
+static noinline int build_vnic_ulp_payload(struct sdma_engine *sde,
+ struct vnic_txreq *tx)
+{
+ int i, ret = 0;
+
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ tx->skb->data,
+ skb_headlen(tx->skb));
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) {
+ struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i];
+
+ /* combine physically continuous fragments later? */
+ ret = sdma_txadd_page(sde->dd,
+ &tx->txreq,
+ skb_frag_page(frag),
+ frag->page_offset,
+ skb_frag_size(frag));
+ if (unlikely(ret))
+ goto bail_txadd;
+ }
+
+ if (tx->plen)
+ ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
+ tx->pad + HFI1_VNIC_MAX_PAD - tx->plen,
+ tx->plen);
+
+bail_txadd:
+ return ret;
+}
+
+static int build_vnic_tx_desc(struct sdma_engine *sde,
+ struct vnic_txreq *tx,
+ u64 pbc)
+{
+ int ret = 0;
+ u16 hdrbytes = 2 << 2; /* PBC */
+
+ ret = sdma_txinit_ahg(
+ &tx->txreq,
+ 0,
+ hdrbytes + tx->skb->len + tx->plen,
+ 0,
+ 0,
+ NULL,
+ 0,
+ vnic_sdma_complete);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add pbc */
+ tx->pbc_val = cpu_to_le64(pbc);
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ &tx->pbc_val,
+ hdrbytes);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add the ulp payload */
+ ret = build_vnic_ulp_payload(sde, tx);
+bail_txadd:
+ return ret;
+}
+
+/* setup the last plen bypes of pad */
+static inline void hfi1_vnic_update_pad(unsigned char *pad, u8 plen)
+{
+ pad[HFI1_VNIC_MAX_PAD - 1] = plen - OPA_VNIC_ICRC_TAIL_LEN;
+}
+
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+ struct sdma_engine *sde = vnic_sdma->sde;
+ struct vnic_txreq *tx;
+ int ret = -ECOMM;
+
+ if (unlikely(READ_ONCE(vnic_sdma->state) != HFI1_VNIC_SDMA_Q_ACTIVE))
+ goto tx_err;
+
+ if (unlikely(!sde || !sdma_running(sde)))
+ goto tx_err;
+
+ tx = kmem_cache_alloc(dd->vnic.txreq_cache, GFP_ATOMIC);
+ if (unlikely(!tx)) {
+ ret = -ENOMEM;
+ goto tx_err;
+ }
+
+ tx->sdma = vnic_sdma;
+ tx->skb = skb;
+ hfi1_vnic_update_pad(tx->pad, plen);
+ tx->plen = plen;
+ ret = build_vnic_tx_desc(sde, tx, pbc);
+ if (unlikely(ret))
+ goto free_desc;
+ tx->retry_count = 0;
+
+ ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq);
+ /* When -ECOMM, sdma callback will be called with ABORT status */
+ if (unlikely(ret && unlikely(ret != -ECOMM)))
+ goto free_desc;
+
+ return ret;
+
+free_desc:
+ sdma_txclean(dd, &tx->txreq);
+ kmem_cache_free(dd->vnic.txreq_cache, tx);
+tx_err:
+ if (ret != -EBUSY)
+ dev_kfree_skb_any(skb);
+ return ret;
+}
+
+/*
+ * hfi1_vnic_sdma_sleep - vnic sdma sleep function
+ *
+ * This function gets called from sdma_send_txreq() when there are not enough
+ * sdma descriptors available to send the packet. It adds Tx queue's wait
+ * structure to sdma engine's dmawait list to be woken up when descriptors
+ * become available.
+ */
+static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
+ struct iowait *wait,
+ struct sdma_txreq *txreq,
+ unsigned int seq)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait, struct hfi1_vnic_sdma, wait);
+ struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
+ struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
+
+ if (sdma_progress(sde, seq, txreq))
+ if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT)
+ return -EAGAIN;
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
+ write_seqlock(&dev->iowait_lock);
+ if (list_empty(&vnic_sdma->wait.list))
+ list_add_tail(&vnic_sdma->wait.list, &sde->dmawait);
+ write_sequnlock(&dev->iowait_lock);
+ return -EBUSY;
+}
+
+/*
+ * hfi1_vnic_sdma_wakeup - vnic sdma wakeup function
+ *
+ * This function gets called when SDMA descriptors becomes available and Tx
+ * queue's wait structure was previously added to sdma engine's dmawait list.
+ * It notifies the upper driver about Tx queue wakeup.
+ */
+static void hfi1_vnic_sdma_wakeup(struct iowait *wait, int reason)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait, struct hfi1_vnic_sdma, wait);
+ struct hfi1_vnic_vport_info *vinfo = vnic_sdma->vinfo;
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+ if (__netif_subqueue_stopped(vinfo->netdev, vnic_sdma->q_idx))
+ netif_wake_subqueue(vinfo->netdev, vnic_sdma->q_idx);
+};
+
+inline bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+
+ return (READ_ONCE(vnic_sdma->state) == HFI1_VNIC_SDMA_Q_ACTIVE);
+}
+
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ int i;
+
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
+
+ iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
+ hfi1_vnic_sdma_wakeup, NULL);
+ vnic_sdma->sde = &vinfo->dd->per_sdma[i];
+ vnic_sdma->dd = vinfo->dd;
+ vnic_sdma->vinfo = vinfo;
+ vnic_sdma->q_idx = i;
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+
+ /* Add a free descriptor watermark for wakeups */
+ if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
+ INIT_LIST_HEAD(&vnic_sdma->stx.list);
+ vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
+ list_add_tail(&vnic_sdma->stx.list,
+ &vnic_sdma->wait.tx_head);
+ }
+ }
+}
+
+static void hfi1_vnic_txreq_kmem_cache_ctor(void *obj)
+{
+ struct vnic_txreq *tx = (struct vnic_txreq *)obj;
+
+ memset(tx, 0, sizeof(*tx));
+}
+
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd)
+{
+ char buf[HFI1_VNIC_TXREQ_NAME_LEN];
+
+ snprintf(buf, sizeof(buf), "hfi1_%u_vnic_txreq_cache", dd->unit);
+ dd->vnic.txreq_cache = kmem_cache_create(buf,
+ sizeof(struct vnic_txreq),
+ 0, SLAB_HWCACHE_ALIGN,
+ hfi1_vnic_txreq_kmem_cache_ctor);
+ if (!dd->vnic.txreq_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd)
+{
+ kmem_cache_destroy(dd->vnic.txreq_cache);
+ dd->vnic.txreq_cache = NULL;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 0ac294db3b29..f78a733a63ec 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -39,7 +39,8 @@
#define HNS_ROCE_VLAN_SL_BIT_MASK 7
#define HNS_ROCE_VLAN_SL_SHIFT 13
-struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
+struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
@@ -48,6 +49,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
struct hns_roce_ah *ah;
u16 vlan_tag = 0xffff;
struct in6_addr in6;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
union ib_gid sgid;
int ret;
@@ -56,15 +58,20 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
return ERR_PTR(-ENOMEM);
/* Get mac address */
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(ah_attr->grh.dgid.raw));
- if (rdma_is_multicast_addr(&in6))
+ memcpy(&in6, grh->dgid.raw, sizeof(grh->dgid.raw));
+ if (rdma_is_multicast_addr(&in6)) {
rdma_get_mcast_mac(&in6, ah->av.mac);
- else
- memcpy(ah->av.mac, ah_attr->dmac, sizeof(ah_attr->dmac));
+ } else {
+ u8 *dmac = rdma_ah_retrieve_dmac(ah_attr);
+
+ if (!dmac)
+ return ERR_PTR(-EINVAL);
+ memcpy(ah->av.mac, dmac, ETH_ALEN);
+ }
/* Get source gid */
- ret = ib_get_cached_gid(ibpd->device, ah_attr->port_num,
- ah_attr->grh.sgid_index, &sgid, &gid_attr);
+ ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index, &sgid, &gid_attr);
if (ret) {
dev_err(dev, "get sgid failed! ret = %d\n", ret);
kfree(ah);
@@ -78,45 +85,46 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
}
if (vlan_tag < 0x1000)
- vlan_tag |= (ah_attr->sl & HNS_ROCE_VLAN_SL_BIT_MASK) <<
+ vlan_tag |= (rdma_ah_get_sl(ah_attr) &
+ HNS_ROCE_VLAN_SL_BIT_MASK) <<
HNS_ROCE_VLAN_SL_SHIFT;
- ah->av.port_pd = cpu_to_be32(to_hr_pd(ibpd)->pdn | (ah_attr->port_num <<
+ ah->av.port_pd = cpu_to_be32(to_hr_pd(ibpd)->pdn |
+ (rdma_ah_get_port_num(ah_attr) <<
HNS_ROCE_PORT_NUM_SHIFT));
- ah->av.gid_index = ah_attr->grh.sgid_index;
+ ah->av.gid_index = grh->sgid_index;
ah->av.vlan = cpu_to_le16(vlan_tag);
dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
ah->av.vlan);
- if (ah_attr->static_rate)
+ if (rdma_ah_get_static_rate(ah_attr))
ah->av.stat_rate = IB_RATE_10_GBPS;
- memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, HNS_ROCE_GID_SIZE);
- ah->av.sl_tclass_flowlabel = cpu_to_le32(ah_attr->sl <<
+ memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
+ ah->av.sl_tclass_flowlabel = cpu_to_le32(rdma_ah_get_sl(ah_attr) <<
HNS_ROCE_SL_SHIFT);
return &ah->ibah;
}
-int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct hns_roce_ah *ah = to_hr_ah(ibah);
memset(ah_attr, 0, sizeof(*ah_attr));
- ah_attr->sl = le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_SL_SHIFT;
- ah_attr->port_num = le32_to_cpu(ah->av.port_pd) >>
- HNS_ROCE_PORT_NUM_SHIFT;
- ah_attr->static_rate = ah->av.stat_rate;
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.traffic_class = le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_TCLASS_SHIFT;
- ah_attr->grh.flow_label = le32_to_cpu(ah->av.sl_tclass_flowlabel) &
- HNS_ROCE_FLOW_LABLE_MASK;
- ah_attr->grh.hop_limit = ah->av.hop_limit;
- ah_attr->grh.sgid_index = ah->av.gid_index;
- memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, HNS_ROCE_GID_SIZE);
+ rdma_ah_set_sl(ah_attr, (le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
+ HNS_ROCE_SL_SHIFT));
+ rdma_ah_set_port_num(ah_attr, (le32_to_cpu(ah->av.port_pd) >>
+ HNS_ROCE_PORT_NUM_SHIFT));
+ rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate);
+ rdma_ah_set_grh(ah_attr, NULL,
+ (le32_to_cpu(ah->av.sl_tclass_flowlabel) &
+ HNS_ROCE_FLOW_LABLE_MASK), ah->av.gid_index,
+ ah->av.hop_limit,
+ (le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
+ HNS_ROCE_TCLASS_SHIFT));
+ rdma_ah_set_dgid_raw(ah_attr, ah->av.dgid);
return 0;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 8c1f7a6f84d2..b94dcd823ad1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -299,9 +299,9 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev)
struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
int i;
- hr_cmd->context = kmalloc(hr_cmd->max_cmds *
- sizeof(struct hns_roce_cmd_context),
- GFP_KERNEL);
+ hr_cmd->context = kmalloc_array(hr_cmd->max_cmds,
+ sizeof(*hr_cmd->context),
+ GFP_KERNEL);
if (!hr_cmd->context)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 589496c8fb9e..b89fd711019e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -219,8 +219,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
return PTR_ERR(*umem);
ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
- ilog2((unsigned int)(*umem)->page_size),
- &buf->hr_mtt);
+ (*umem)->page_shift, &buf->hr_mtt);
if (ret)
goto err_buf;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 1a6cb5d7a0dd..e493a61e14e1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -687,9 +687,10 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
unsigned long obj, int cnt,
int rr);
-struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *hns_roce_create_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
-int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int hns_roce_destroy_ah(struct ib_ah *ah);
struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index b8111b0c8877..37d5d29597a4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -33,6 +33,7 @@
#include <linux/platform_device.h>
#include <linux/acpi.h>
#include <linux/etherdevice.h>
+#include <linux/of.h>
#include <rdma/ib_umem.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
@@ -657,6 +658,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
struct hns_roce_qp *hr_qp;
struct ib_cq *cq;
struct ib_pd *pd;
+ union ib_gid dgid;
u64 subnet_prefix;
int attr_mask = 0;
int i;
@@ -707,12 +709,8 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
attr.rnr_retry = 7;
attr.timeout = 0x12;
attr.path_mtu = IB_MTU_256;
- attr.ah_attr.ah_flags = 1;
- attr.ah_attr.static_rate = 3;
- attr.ah_attr.grh.sgid_index = 0;
- attr.ah_attr.grh.hop_limit = 1;
- attr.ah_attr.grh.flow_label = 0;
- attr.ah_attr.grh.traffic_class = 0;
+ rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0);
+ rdma_ah_set_static_rate(&attr.ah_attr, 3);
subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
@@ -741,24 +739,22 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
hr_qp->ibqp.recv_cq = cq;
hr_qp->ibqp.send_cq = cq;
- attr.ah_attr.port_num = phy_port + 1;
- attr.ah_attr.sl = sl;
+ rdma_ah_set_port_num(&attr.ah_attr, phy_port + 1);
+ rdma_ah_set_sl(&attr.ah_attr, phy_port + 1);
attr.port_num = phy_port + 1;
attr.dest_qp_num = hr_qp->qpn;
- memcpy(attr.ah_attr.dmac, hr_dev->dev_addr[phy_port],
+ memcpy(rdma_ah_retrieve_dmac(&attr.ah_attr),
+ hr_dev->dev_addr[phy_port],
MAC_ADDR_OCTET_NUM);
- memcpy(attr.ah_attr.grh.dgid.raw,
- &subnet_prefix, sizeof(u64));
- memcpy(&attr.ah_attr.grh.dgid.raw[8],
- hr_dev->dev_addr[phy_port], 3);
- memcpy(&attr.ah_attr.grh.dgid.raw[13],
- hr_dev->dev_addr[phy_port] + 3, 3);
- attr.ah_attr.grh.dgid.raw[11] = 0xff;
- attr.ah_attr.grh.dgid.raw[12] = 0xfe;
- attr.ah_attr.grh.dgid.raw[8] ^= 2;
-
+ memcpy(&dgid.raw, &subnet_prefix, sizeof(u64));
+ memcpy(&dgid.raw[8], hr_dev->dev_addr[phy_port], 3);
+ memcpy(&dgid.raw[13], hr_dev->dev_addr[phy_port] + 3, 3);
+ dgid.raw[11] = 0xff;
+ dgid.raw[12] = 0xfe;
+ dgid.raw[8] ^= 2;
+ rdma_ah_set_dgid_raw(&attr.ah_attr, dgid.raw);
attr_mask |= IB_QP_PORT;
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
@@ -1725,7 +1721,7 @@ int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_L_KEY_IDX_H_M,
MPT_BYTE_64_L_KEY_IDX_H_S, mtpt_idx >> MTPT_IDX_SHIFT);
- /* DMA momery regsiter */
+ /* DMA memory register */
if (mr->type == MR_TYPE_DMA)
return 0;
@@ -1851,6 +1847,7 @@ void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
u32 doorbell[2];
doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1);
+ doorbell[1] = 0;
roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
@@ -2565,6 +2562,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_qp_context *context;
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
dma_addr_t dma_handle_2 = 0;
dma_addr_t dma_handle = 0;
uint32_t doorbell[2] = {0};
@@ -2573,6 +2571,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
int ret = -EINVAL;
u64 *mtts = NULL;
int port;
+ u8 port_num;
u8 *dmac;
u8 *smac;
@@ -2739,7 +2738,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
goto out;
}
- dmac = (u8 *)attr->ah_attr.dmac;
+ dmac = (u8 *)attr->ah_attr.roce.dmac;
context->sq_rq_bt_l = (u32)(dma_handle);
roce_set_field(context->qpc_bytes_24,
@@ -2780,7 +2779,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
roce_set_bit(context->qpc_bytes_32,
QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
- attr->ah_attr.ah_flags);
+ rdma_ah_get_ah_flags(&attr->ah_attr));
roce_set_field(context->qpc_bytes_32,
QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M,
QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S,
@@ -2792,12 +2791,13 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
attr->dest_qp_num);
/* Configure GID index */
+ port_num = rdma_ah_get_port_num(&attr->ah_attr);
roce_set_field(context->qpc_bytes_36,
QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S,
- hns_get_gid_index(hr_dev,
- attr->ah_attr.port_num - 1,
- attr->ah_attr.grh.sgid_index));
+ hns_get_gid_index(hr_dev,
+ port_num - 1,
+ grh->sgid_index));
memcpy(&(context->dmac_l), dmac, 4);
@@ -2808,26 +2808,26 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
roce_set_field(context->qpc_bytes_44,
QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M,
QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S,
- attr->ah_attr.static_rate);
+ rdma_ah_get_static_rate(&attr->ah_attr));
roce_set_field(context->qpc_bytes_44,
QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
QP_CONTEXT_QPC_BYTES_44_HOPLMT_S,
- attr->ah_attr.grh.hop_limit);
+ grh->hop_limit);
roce_set_field(context->qpc_bytes_48,
QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S,
- attr->ah_attr.grh.flow_label);
+ grh->flow_label);
roce_set_field(context->qpc_bytes_48,
QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
QP_CONTEXT_QPC_BYTES_48_TCLASS_S,
- attr->ah_attr.grh.traffic_class);
+ grh->traffic_class);
roce_set_field(context->qpc_bytes_48,
QP_CONTEXT_QPC_BYTES_48_MTU_M,
QP_CONTEXT_QPC_BYTES_48_MTU_S, attr->path_mtu);
- memcpy(context->dgid, attr->ah_attr.grh.dgid.raw,
- sizeof(attr->ah_attr.grh.dgid.raw));
+ memcpy(context->dgid, grh->dgid.raw,
+ sizeof(grh->dgid.raw));
dev_dbg(dev, "dmac:%x :%lx\n", context->dmac_l,
roce_get_field(context->qpc_bytes_44,
@@ -2907,8 +2907,9 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
hr_qp->phy_port);
roce_set_field(context->qpc_bytes_156,
QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl);
- hr_qp->sl = attr->ah_attr.sl;
+ QP_CONTEXT_QPC_BYTES_156_SL_S,
+ rdma_ah_get_sl(&attr->ah_attr));
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
} else if (cur_state == IB_QPS_RTR &&
new_state == IB_QPS_RTS) {
/* If exist optional param, return error */
@@ -3019,8 +3020,9 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
hr_qp->phy_port);
roce_set_field(context->qpc_bytes_156,
QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl);
- hr_qp->sl = attr->ah_attr.sl;
+ QP_CONTEXT_QPC_BYTES_156_SL_S,
+ rdma_ah_get_sl(&attr->ah_attr));
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
roce_set_field(context->qpc_bytes_156,
QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S,
@@ -3355,28 +3357,33 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
hr_qp->ibqp.qp_type == IB_QPT_UC) {
- qp_attr->ah_attr.sl = roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S);
- qp_attr->ah_attr.grh.flow_label = roce_get_field(
- context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S);
- qp_attr->ah_attr.grh.sgid_index = roce_get_field(
- context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S);
- qp_attr->ah_attr.grh.hop_limit = roce_get_field(
- context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_S);
- qp_attr->ah_attr.grh.traffic_class = roce_get_field(
- context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_S);
-
- memcpy(qp_attr->ah_attr.grh.dgid.raw, context->dgid,
- sizeof(qp_attr->ah_attr.grh.dgid.raw));
+ struct ib_global_route *grh =
+ rdma_ah_retrieve_grh(&qp_attr->ah_attr);
+
+ rdma_ah_set_sl(&qp_attr->ah_attr,
+ roce_get_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_SL_M,
+ QP_CONTEXT_QPC_BYTES_156_SL_S));
+ rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH);
+ grh->flow_label =
+ roce_get_field(context->qpc_bytes_48,
+ QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
+ QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S);
+ grh->sgid_index =
+ roce_get_field(context->qpc_bytes_36,
+ QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S);
+ grh->hop_limit =
+ roce_get_field(context->qpc_bytes_44,
+ QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
+ QP_CONTEXT_QPC_BYTES_44_HOPLMT_S);
+ grh->traffic_class =
+ roce_get_field(context->qpc_bytes_48,
+ QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
+ QP_CONTEXT_QPC_BYTES_48_TCLASS_S);
+
+ memcpy(grh->dgid.raw, context->dgid,
+ sizeof(grh->dgid.raw));
}
qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12,
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 4139abee3b54..80fc01ffd8bd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -127,11 +127,12 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
-
- buddy->bits = kzalloc((buddy->max_order + 1) * sizeof(long *),
- GFP_KERNEL);
- buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof(int *),
- GFP_KERNEL);
+ buddy->bits = kcalloc(buddy->max_order + 1,
+ sizeof(*buddy->bits),
+ GFP_KERNEL);
+ buddy->num_free = kcalloc(buddy->max_order + 1,
+ sizeof(*buddy->num_free),
+ GFP_KERNEL);
if (!buddy->bits || !buddy->num_free)
goto err_out;
@@ -204,7 +205,7 @@ int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
return 0;
}
- /* Note: if page_shift is zero, FAST memory regsiter */
+ /* Note: if page_shift is zero, FAST memory register */
mtt->page_shift = page_shift;
/* Compute MTT entry necessary */
@@ -503,7 +504,8 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(sg) + umem->page_size * k;
+ pages[i++] = sg_dma_address(sg) +
+ (k << umem->page_shift);
if (i == PAGE_SIZE / sizeof(u64)) {
ret = hns_roce_write_mtt(hr_dev, mtt, n, i,
pages);
@@ -563,9 +565,9 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
n = ib_umem_page_count(mr->umem);
- if (mr->umem->page_size != HNS_ROCE_HEM_PAGE_SIZE) {
- dev_err(dev, "Just support 4K page size but is 0x%x now!\n",
- mr->umem->page_size);
+ if (mr->umem->page_shift != HNS_ROCE_HEM_PAGE_SHIFT) {
+ dev_err(dev, "Just support 4K page size but is 0x%lx now!\n",
+ BIT(mr->umem->page_shift));
ret = -EINVAL;
goto err_umem;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 3f44f2f91f03..054c52699090 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -437,8 +437,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
}
ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(hr_qp->umem),
- ilog2((unsigned int)hr_qp->umem->page_size),
- &hr_qp->mtt);
+ hr_qp->umem->page_shift, &hr_qp->mtt);
if (ret) {
dev_err(dev, "hns_roce_mtt_init error for create qp\n");
goto err_buf;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 95a0586a4da8..f3bc01bce483 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -3184,9 +3184,8 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev)
INIT_LIST_HEAD(&cm_core->connected_nodes);
INIT_LIST_HEAD(&cm_core->listen_nodes);
- init_timer(&cm_core->tcp_timer);
- cm_core->tcp_timer.function = i40iw_cm_timer_tick;
- cm_core->tcp_timer.data = (unsigned long)cm_core;
+ setup_timer(&cm_core->tcp_timer, i40iw_cm_timer_tick,
+ (unsigned long)cm_core);
spin_lock_init(&cm_core->ht_lock);
spin_lock_init(&cm_core->listen_list_lock);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 70c3e9e79508..409a3781e735 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -844,10 +844,9 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
iwqp = (struct i40iw_qp *)qp->back_qp;
i40iw_add_ref(&iwqp->ibqp);
- init_timer(&iwqp->terminate_timer);
- iwqp->terminate_timer.function = i40iw_terminate_timeout;
+ setup_timer(&iwqp->terminate_timer, i40iw_terminate_timeout,
+ (unsigned long)iwqp);
iwqp->terminate_timer.expires = jiffies + HZ;
- iwqp->terminate_timer.data = (unsigned long)iwqp;
add_timer(&iwqp->terminate_timer);
}
@@ -1436,9 +1435,8 @@ void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi)
{
struct i40iw_vsi_pestat *devstat = vsi->pestat;
- init_timer(&devstat->stats_timer);
- devstat->stats_timer.function = i40iw_hw_stats_timeout;
- devstat->stats_timer.data = (unsigned long)vsi;
+ setup_timer(&devstat->stats_timer, i40iw_hw_stats_timeout,
+ (unsigned long)vsi);
mod_timer(&devstat->stats_timer,
jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 9b2849979756..4dbe61ec7a77 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -1345,7 +1345,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
{
struct ib_umem *region = iwmr->region;
struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- int chunk_pages, entry, pg_shift, i;
+ int chunk_pages, entry, i;
struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
struct i40iw_pble_info *pinfo;
struct scatterlist *sg;
@@ -1354,14 +1354,14 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
- pg_shift = ffs(region->page_size) - 1;
for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
- chunk_pages = sg_dma_len(sg) >> pg_shift;
+ chunk_pages = sg_dma_len(sg) >> region->page_shift;
if ((iwmr->type == IW_MEMREG_TYPE_QP) &&
!iwpbl->qp_mr.sq_page)
iwpbl->qp_mr.sq_page = sg_page(sg);
for (i = 0; i < chunk_pages; i++) {
- pg_addr = sg_dma_address(sg) + region->page_size * i;
+ pg_addr = sg_dma_address(sg) +
+ (i << region->page_shift);
if ((entry + i) == 0)
*pbl = cpu_to_le64(pg_addr & iwmr->page_msk);
@@ -1847,7 +1847,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
iwmr->ibmr.device = pd->device;
ucontext = to_ucontext(pd->uobject->context);
- iwmr->page_size = region->page_size;
+ iwmr->page_size = PAGE_SIZE;
iwmr->page_msk = PAGE_MASK;
if (region->hugetlb && (req.reg_type == IW_MEMREG_TYPE_MEM))
@@ -2696,7 +2696,7 @@ static int i40iw_query_pkey(struct ib_device *ibdev,
* @ah_attr: address handle attributes
*/
static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd,
- struct ib_ah_attr *attr,
+ struct rdma_ah_attr *attr,
struct ib_udata *udata)
{
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 077c33d2dc75..538c46a73248 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -40,36 +40,45 @@
#include "mlx4_ib.h"
-static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+static struct ib_ah *create_ib_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
struct mlx4_ib_ah *ah)
{
struct mlx4_dev *dev = to_mdev(pd->device)->dev;
- ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
- ah->av.ib.g_slid = ah_attr->src_path_bits;
- ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
+ (rdma_ah_get_port_num(ah_attr) << 24));
+ ah->av.ib.g_slid = rdma_ah_get_path_bits(ah_attr);
+ ah->av.ib.sl_tclass_flowlabel =
+ cpu_to_be32(rdma_ah_get_sl(ah_attr) << 28);
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
ah->av.ib.g_slid |= 0x80;
- ah->av.ib.gid_index = ah_attr->grh.sgid_index;
- ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
+ ah->av.ib.gid_index = grh->sgid_index;
+ ah->av.ib.hop_limit = grh->hop_limit;
ah->av.ib.sl_tclass_flowlabel |=
- cpu_to_be32((ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label);
- memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
+ cpu_to_be32((grh->traffic_class << 20) |
+ grh->flow_label);
+ memcpy(ah->av.ib.dgid, grh->dgid.raw, 16);
}
- ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid);
- if (ah_attr->static_rate) {
- ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
- while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
- !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
- --ah->av.ib.stat_rate;
+ ah->av.ib.dlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));
+ if (rdma_ah_get_static_rate(ah_attr)) {
+ u8 static_rate = rdma_ah_get_static_rate(ah_attr) +
+ MLX4_STAT_RATE_OFFSET;
+
+ while (static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+ !(1 << static_rate & dev->caps.stat_rate_support))
+ --static_rate;
+ ah->av.ib.stat_rate = static_rate;
}
return &ah->ibah;
}
-static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
struct mlx4_ib_ah *ah)
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
@@ -79,17 +88,18 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
u16 vlan_tag = 0xffff;
union ib_gid sgid;
struct ib_gid_attr gid_attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
int ret;
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ memcpy(&in6, grh->dgid.raw, sizeof(in6));
if (rdma_is_multicast_addr(&in6)) {
is_mcast = 1;
rdma_get_mcast_mac(&in6, ah->av.eth.mac);
} else {
- memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
+ memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN);
}
- ret = ib_get_cached_gid(pd->device, ah_attr->port_num,
- ah_attr->grh.sgid_index, &sgid, &gid_attr);
+ ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index, &sgid, &gid_attr);
if (ret)
return ERR_PTR(ret);
eth_zero_addr(ah->av.eth.s_mac);
@@ -100,36 +110,40 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
dev_put(gid_attr.ndev);
}
if (vlan_tag < 0x1000)
- vlan_tag |= (ah_attr->sl & 7) << 13;
- ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
- ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
+ vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
+ ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
+ (rdma_ah_get_port_num(ah_attr) << 24));
+ ret = mlx4_ib_gid_index_to_real_index(ibdev,
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index);
if (ret < 0)
return ERR_PTR(ret);
ah->av.eth.gid_index = ret;
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
- ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
- if (ah_attr->static_rate) {
- ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+ ah->av.eth.hop_limit = grh->hop_limit;
+ if (rdma_ah_get_static_rate(ah_attr)) {
+ ah->av.eth.stat_rate = rdma_ah_get_static_rate(ah_attr) +
+ MLX4_STAT_RATE_OFFSET;
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
--ah->av.eth.stat_rate;
}
ah->av.eth.sl_tclass_flowlabel |=
- cpu_to_be32((ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label);
+ cpu_to_be32((grh->traffic_class << 20) |
+ grh->flow_label);
/*
* HW requires multicast LID so we just choose one.
*/
if (is_mcast)
ah->av.ib.dlid = cpu_to_be16(0xc000);
- memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
- ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29);
-
+ memcpy(ah->av.eth.dgid, grh->dgid.raw, 16);
+ ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(rdma_ah_get_sl(ah_attr)
+ << 29);
return &ah->ibah;
}
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
@@ -140,8 +154,8 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
if (!ah)
return ERR_PTR(-ENOMEM);
- if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
- if (!(ah_attr->ah_flags & IB_AH_GRH)) {
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
ret = ERR_PTR(-EINVAL);
} else {
/*
@@ -163,34 +177,40 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
return create_ib_ah(pd, ah_attr, ah); /* never fails */
}
-int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct mlx4_ib_ah *ah = to_mah(ibah);
- enum rdma_link_layer ll;
+ int port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
- ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
- if (ll == IB_LINK_LAYER_ETHERNET)
- ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29;
- else
- ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
-
- ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
- if (ah->av.ib.stat_rate)
- ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
- ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;
+ ah_attr->type = ibah->type;
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ rdma_ah_set_dlid(ah_attr, 0);
+ rdma_ah_set_sl(ah_attr,
+ be32_to_cpu(ah->av.eth.sl_tclass_flowlabel)
+ >> 29);
+ } else {
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(ah->av.ib.dlid));
+ rdma_ah_set_sl(ah_attr,
+ be32_to_cpu(ah->av.ib.sl_tclass_flowlabel)
+ >> 28);
+ }
+
+ rdma_ah_set_port_num(ah_attr, port_num);
+ if (ah->av.ib.stat_rate)
+ rdma_ah_set_static_rate(ah_attr,
+ ah->av.ib.stat_rate -
+ MLX4_STAT_RATE_OFFSET);
+ rdma_ah_set_path_bits(ah_attr, ah->av.ib.g_slid & 0x7F);
if (mlx4_ib_ah_grh_present(ah)) {
- ah_attr->ah_flags = IB_AH_GRH;
-
- ah_attr->grh.traffic_class =
- be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20;
- ah_attr->grh.flow_label =
- be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
- ah_attr->grh.hop_limit = ah->av.ib.hop_limit;
- ah_attr->grh.sgid_index = ah->av.ib.gid_index;
- memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
+ u32 tc_fl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel);
+
+ rdma_ah_set_grh(ah_attr, NULL,
+ tc_fl & 0xfffff, ah->av.ib.gid_index,
+ ah->av.ib.hop_limit,
+ tc_fl >> 20);
+ rdma_ah_set_dgid_raw(ah_attr, ah->av.ib.dgid);
}
return 0;
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index d64845335e87..1e6c526450d9 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -279,14 +279,14 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
}
static struct id_map_entry *
-id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
+id_map_get(struct ib_device *ibdev, int *pv_cm_id, int slave_id, int sl_cm_id)
{
struct id_map_entry *ent;
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
spin_lock(&sriov->id_map_lock);
if (*pv_cm_id == -1) {
- ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
+ ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
if (ent)
*pv_cm_id = (int) ent->pv_cm_id;
} else
@@ -414,7 +414,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
struct rb_root *sl_id_map = &sriov->sl_id_map;
struct list_head lh;
struct rb_node *nd;
- int need_flush = 1;
+ int need_flush = 0;
struct id_map_entry *map, *tmp_map;
/* cancel all delayed work queue entries */
INIT_LIST_HEAD(&lh);
@@ -422,13 +422,13 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
if (slave < 0 || slave == map->slave_id) {
if (map->scheduled_delete)
- need_flush &= !!cancel_delayed_work(&map->timeout);
+ need_flush |= !cancel_delayed_work(&map->timeout);
}
}
spin_unlock(&sriov->id_map_lock);
- if (!need_flush)
+ if (need_flush)
flush_scheduled_work(); /* make sure all timers were flushed */
/* now, remove all leftover entries from databases*/
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 6a0fec357dae..4f5a143fc0a7 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -147,7 +147,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
return PTR_ERR(*umem);
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem),
- ilog2((*umem)->page_size), &buf->mtt);
+ (*umem)->page_shift, &buf->mtt);
if (err)
goto err_buf;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index db564ccc0f92..b4694717f6f3 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -189,25 +189,26 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
if (!dev->send_agent[port_num - 1][0])
return;
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = lid;
- ah_attr.sl = sl;
- ah_attr.port_num = port_num;
+ ah_attr.type = rdma_ah_find_type(&dev->ib_dev, port_num);
+ rdma_ah_set_dlid(&ah_attr, lid);
+ rdma_ah_set_sl(&ah_attr, sl);
+ rdma_ah_set_port_num(&ah_attr, port_num);
- new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
+ &ah_attr);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ rdma_destroy_ah(dev->sm_ah[port_num - 1]);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -509,7 +510,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
struct mlx4_ib_demux_pv_ctx *tun_ctx;
struct mlx4_ib_demux_pv_qp *tun_qp;
struct mlx4_rcv_tunnel_mad *tun_mad;
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
struct ib_ah *ah;
struct ib_qp *src_qp = NULL;
unsigned tun_tx_ix = 0;
@@ -555,15 +556,18 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
/* create ah. Just need an empty one with the port num for the post send.
* The driver will set the force loopback bit in post_send */
memset(&attr, 0, sizeof attr);
- attr.port_num = port;
+ attr.type = rdma_ah_find_type(&dev->ib_dev, port);
+
+ rdma_ah_set_port_num(&attr, port);
if (is_eth) {
union ib_gid sgid;
+ union ib_gid dgid;
- if (get_gids_from_l3_hdr(grh, &sgid, &attr.grh.dgid))
+ if (get_gids_from_l3_hdr(grh, &sgid, &dgid))
return -EINVAL;
- attr.ah_flags = IB_AH_GRH;
+ rdma_ah_set_grh(&attr, &dgid, 0, 0, 0, 0);
}
- ah = ib_create_ah(tun_ctx->pd, &attr);
+ ah = rdma_create_ah(tun_ctx->pd, &attr);
if (IS_ERR(ah))
return -ENOMEM;
@@ -580,7 +584,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
if (tun_qp->tx_ring[tun_tx_ix].ah)
- ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
tun_qp->tx_ring[tun_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
@@ -653,7 +657,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_unlock(&tun_qp->tx_lock);
tun_qp->tx_ring[tun_tx_ix].ah = NULL;
end:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
return ret;
}
@@ -1018,7 +1022,7 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0])
- ib_destroy_ah(mad_send_wc->send_buf->context[0]);
+ rdma_destroy_ah(mad_send_wc->send_buf->context[0]);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -1073,7 +1077,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
}
if (dev->sm_ah[p])
- ib_destroy_ah(dev->sm_ah[p]);
+ rdma_destroy_ah(dev->sm_ah[p]);
}
}
@@ -1352,7 +1356,7 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index,
- u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
+ u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr,
u8 *s_mac, u16 vlan_id, struct ib_mad *mad)
{
struct ib_sge list;
@@ -1363,6 +1367,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
struct mlx4_mad_snd_buf *sqp_mad;
struct ib_ah *ah;
struct ib_qp *send_qp = NULL;
+ struct ib_global_route *grh;
unsigned wire_tx_ix = 0;
int ret = 0;
u16 wire_pkey_ix;
@@ -1389,12 +1394,13 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
send_qp = sqp->qp;
/* create ah */
- sgid_index = attr->grh.sgid_index;
- attr->grh.sgid_index = 0;
- ah = ib_create_ah(sqp_ctx->pd, attr);
+ grh = rdma_ah_retrieve_grh(attr);
+ sgid_index = grh->sgid_index;
+ grh->sgid_index = 0;
+ ah = rdma_create_ah(sqp_ctx->pd, attr);
if (IS_ERR(ah))
return -ENOMEM;
- attr->grh.sgid_index = sgid_index;
+ grh->sgid_index = sgid_index;
to_mah(ah)->av.ib.gid_index = sgid_index;
/* get rid of force-loopback bit */
to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
@@ -1410,7 +1416,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
if (sqp->tx_ring[wire_tx_ix].ah)
- ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
+ rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
sqp->tx_ring[wire_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
sqp->tx_ring[wire_tx_ix].buf.map,
@@ -1442,7 +1448,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
if (s_mac)
memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
if (vlan_id < 0x1000)
- vlan_id |= (attr->sl & 7) << 13;
+ vlan_id |= (rdma_ah_get_sl(attr) & 7) << 13;
to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
@@ -1455,7 +1461,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_unlock(&sqp->tx_lock);
sqp->tx_ring[wire_tx_ix].ah = NULL;
out:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
return ret;
}
@@ -1467,12 +1473,13 @@ static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
}
static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
+ struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
- ah_attr->grh.sgid_index = slave;
+ grh->sgid_index = slave;
else
- ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
+ grh->sgid_index += get_slave_base_gid_ix(dev, slave, port);
}
static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
@@ -1482,11 +1489,13 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
struct mlx4_ib_ah ah;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
u8 *slave_id;
int slave;
int port;
u16 vlan_id;
+ u8 qos;
+ u8 *dmac;
/* Get slave that sent this packet */
if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1571,14 +1580,16 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
ah.av.ib.port_pd = cpu_to_be32(port << 24 | (be32_to_cpu(ah.av.ib.port_pd) & 0xffffff));
mlx4_ib_query_ah(&ah.ibah, &ah_attr);
- if (ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&ah_attr) & IB_AH_GRH)
fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
-
- memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
+ dmac = rdma_ah_retrieve_dmac(&ah_attr);
+ if (dmac)
+ memcpy(dmac, tunnel->hdr.mac, ETH_ALEN);
vlan_id = be16_to_cpu(tunnel->hdr.vlan);
/* if slave have default vlan use it */
- mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
- &vlan_id, &ah_attr.sl);
+ if (mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
+ &vlan_id, &qos))
+ rdma_ah_set_sl(&ah_attr, qos);
mlx4_ib_send_to_wire(dev, slave, ctx->port,
is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1714,7 +1725,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
if (tun_qp->tx_ring[i].ah)
- ib_destroy_ah(tun_qp->tx_ring[i].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[i].ah);
}
kfree(tun_qp->tx_ring);
kfree(tun_qp->ring);
@@ -1746,7 +1757,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
pr_debug("received tunnel send completion:"
"wrid=0x%llx, status=0x%x\n",
wc.wr_id, wc.status);
- ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
@@ -1763,7 +1774,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
" status = %d, wrid = 0x%llx\n",
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
- ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
@@ -1900,7 +1911,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
if (wc.status == IB_WC_SUCCESS) {
switch (wc.opcode) {
case IB_WC_SEND:
- ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+ rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
@@ -1930,7 +1941,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
" status = %d, wrid = 0x%llx\n",
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
- ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+ rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index fba94df28cf1..521d0def2d9e 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1173,7 +1173,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
/* need to protect from a race on closing the vma as part of
* mlx4_ib_vma_close().
*/
- down_read(&owning_mm->mmap_sem);
+ down_write(&owning_mm->mmap_sem);
for (i = 0; i < HW_BAR_COUNT; i++) {
vma = context->hw_bar_info[i].vma;
if (!vma)
@@ -1187,11 +1187,13 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
BUG_ON(1);
}
+ context->hw_bar_info[i].vma->vm_flags &=
+ ~(VM_SHARED | VM_MAYSHARE);
/* context going to be destroyed, should not access ops any more */
context->hw_bar_info[i].vma->vm_ops = NULL;
}
- up_read(&owning_mm->mmap_sem);
+ up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
@@ -2867,23 +2869,19 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
- dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
- if (!iboe->nb.notifier_call) {
- iboe->nb.notifier_call = mlx4_ib_netdev_event;
- err = register_netdevice_notifier(&iboe->nb);
- if (err) {
- iboe->nb.notifier_call = NULL;
- goto err_notif;
- }
- }
- if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
- err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
- if (err) {
- goto err_notif;
- }
+ if (!iboe->nb.notifier_call) {
+ iboe->nb.notifier_call = mlx4_ib_netdev_event;
+ err = register_netdevice_notifier(&iboe->nb);
+ if (err) {
+ iboe->nb.notifier_call = NULL;
+ goto err_notif;
}
}
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+ if (err)
+ goto err_notif;
+ }
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
if (device_create_file(&ibdev->ib_dev.dev,
@@ -2941,6 +2939,7 @@ err_counter:
mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
err_map:
+ mlx4_ib_free_eqs(dev, ibdev);
iounmap(ibdev->uar_map);
err_uar:
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index e010fe459e67..3405e947dc1e 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -209,7 +209,7 @@ static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
{
struct mlx4_ib_dev *dev = ctx->dev;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
spin_lock_irqsave(&dev->sm_lock, flags);
@@ -231,20 +231,20 @@ static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
struct mlx4_ib_dev *dev = ctx->dev;
struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
struct ib_wc wc;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
/* Our agent might not yet be registered when mads start to arrive */
if (!agent)
return -EAGAIN;
- ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
+ rdma_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index))
return -EINVAL;
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = ctx->port;
- wc.slid = ah_attr.dlid; /* opensm lid */
+ wc.slid = rdma_ah_get_dlid(&ah_attr); /* opensm lid */
wc.src_qp = 1;
return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
}
@@ -1102,7 +1102,8 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
while ((p = rb_first(&ctx->mcg_table)) != NULL) {
group = rb_entry(p, struct mcast_group, node);
if (atomic_read(&group->refcount))
- mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
+ mcg_debug_group(group, "group refcount %d!!! (pointer %p)\n",
+ atomic_read(&group->refcount), group);
force_clean_group(group);
}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 64fed44b43a6..c2b9cbf4da05 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -742,9 +742,9 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
-int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int mlx4_ib_destroy_ah(struct ib_ah *ah);
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
@@ -833,7 +833,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
- u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
+ u32 qkey, struct rdma_ah_attr *attr, u8 *s_mac,
u16 vlan_id, struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 433bcdbdd680..e6f77f63da75 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -107,7 +107,7 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
len = sg_dma_len(sg) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(sg) +
- umem->page_size * k;
+ (k << umem->page_shift);
/*
* Be friendly to mlx4_write_mtt() and
* pass it chunks of appropriate size.
@@ -155,7 +155,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
n = ib_umem_page_count(mr->umem);
- shift = ilog2(mr->umem->page_size);
+ shift = mr->umem->page_shift;
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
@@ -239,7 +239,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
goto release_mpt_entry;
}
n = ib_umem_page_count(mmr->umem);
- shift = ilog2(mmr->umem->page_size);
+ shift = mmr->umem->page_shift;
err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
virt_addr, length, n, shift,
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index c34eebc7db65..996e9058e515 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -745,7 +745,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
- ilog2(qp->umem->page_size), &qp->mtt);
+ qp->umem->page_shift, &qp->mtt);
if (err)
goto err_buf;
@@ -1383,31 +1383,31 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
-static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
+static int _mlx4_set_path(struct mlx4_ib_dev *dev,
+ const struct rdma_ah_attr *ah,
u64 smac, u16 vlan_tag, struct mlx4_qp_path *path,
struct mlx4_roce_smac_vlan_info *smac_info, u8 port)
{
- int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
- IB_LINK_LAYER_ETHERNET;
int vidx;
int smac_index;
int err;
-
- path->grh_mylmc = ah->src_path_bits & 0x7f;
- path->rlid = cpu_to_be16(ah->dlid);
- if (ah->static_rate) {
- path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
+ path->grh_mylmc = rdma_ah_get_path_bits(ah) & 0x7f;
+ path->rlid = cpu_to_be16(rdma_ah_get_dlid(ah));
+ if (rdma_ah_get_static_rate(ah)) {
+ path->static_rate = rdma_ah_get_static_rate(ah) +
+ MLX4_STAT_RATE_OFFSET;
while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << path->static_rate & dev->dev->caps.stat_rate_support))
--path->static_rate;
} else
path->static_rate = 0;
- if (ah->ah_flags & IB_AH_GRH) {
- int real_sgid_index = mlx4_ib_gid_index_to_real_index(dev,
- port,
- ah->grh.sgid_index);
+ if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah);
+ int real_sgid_index =
+ mlx4_ib_gid_index_to_real_index(dev, port,
+ grh->sgid_index);
if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) {
pr_err("sgid_index (%u) too large. max is %d\n",
@@ -1417,19 +1417,19 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
path->grh_mylmc |= 1 << 7;
path->mgid_index = real_sgid_index;
- path->hop_limit = ah->grh.hop_limit;
+ path->hop_limit = grh->hop_limit;
path->tclass_flowlabel =
- cpu_to_be32((ah->grh.traffic_class << 20) |
- (ah->grh.flow_label));
- memcpy(path->rgid, ah->grh.dgid.raw, 16);
+ cpu_to_be32((grh->traffic_class << 20) |
+ (grh->flow_label));
+ memcpy(path->rgid, grh->dgid.raw, 16);
}
- if (is_eth) {
- if (!(ah->ah_flags & IB_AH_GRH))
+ if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (!(rdma_ah_get_ah_flags(ah) & IB_AH_GRH))
return -1;
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
- ((port - 1) << 6) | ((ah->sl & 7) << 3);
+ ((port - 1) << 6) | ((rdma_ah_get_sl(ah) & 7) << 3);
path->feup |= MLX4_FEUP_FORCE_ETH_UP;
if (vlan_tag < 0x1000) {
@@ -1488,14 +1488,13 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
} else {
smac_index = smac_info->smac_index;
}
-
- memcpy(path->dmac, ah->dmac, 6);
+ memcpy(path->dmac, ah->roce.dmac, 6);
path->ackto = MLX4_IB_LINK_TYPE_ETH;
/* put MAC table smac index for IBoE */
path->grh_mylmc = (u8) (smac_index) | 0x80;
} else {
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
- ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
+ ((port - 1) << 6) | ((rdma_ah_get_sl(ah) & 0xf) << 2);
}
return 0;
@@ -1767,11 +1766,13 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
u16 vlan = 0xffff;
u8 smac[ETH_ALEN];
int status = 0;
- int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
- attr->ah_attr.ah_flags & IB_AH_GRH;
+ int is_eth =
+ rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+ rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
- if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) {
- int index = attr->ah_attr.grh.sgid_index;
+ if (is_eth) {
+ int index =
+ rdma_ah_read_grh(&attr->ah_attr)->sgid_index;
status = ib_get_cached_gid(ibqp->device, port_num,
index, &gid, &gid_attr);
@@ -3394,39 +3395,37 @@ static int to_ib_qp_access_flags(int mlx4_flags)
return ib_flags;
}
-static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
- struct mlx4_qp_path *path)
+static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev,
+ struct rdma_ah_attr *ah_attr,
+ struct mlx4_qp_path *path)
{
struct mlx4_dev *dev = ibdev->dev;
- int is_eth;
-
- memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
- ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;
+ u8 port_num = path->sched_queue & 0x40 ? 2 : 1;
- if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num);
+ if (port_num == 0 || port_num > dev->caps.num_ports)
return;
- is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
- IB_LINK_LAYER_ETHERNET;
- if (is_eth)
- ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
- ((path->sched_queue & 4) << 1);
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE)
+ rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) |
+ ((path->sched_queue & 4) << 1));
else
- ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
+ rdma_ah_set_sl(ah_attr, (path->sched_queue >> 2) & 0xf);
+ rdma_ah_set_port_num(ah_attr, port_num);
- ib_ah_attr->dlid = be16_to_cpu(path->rlid);
- ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
- ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
- ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
- if (ib_ah_attr->ah_flags) {
- ib_ah_attr->grh.sgid_index = path->mgid_index;
- ib_ah_attr->grh.hop_limit = path->hop_limit;
- ib_ah_attr->grh.traffic_class =
- (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
- ib_ah_attr->grh.flow_label =
- be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
- memcpy(ib_ah_attr->grh.dgid.raw,
- path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid));
+ rdma_ah_set_path_bits(ah_attr, path->grh_mylmc & 0x7f);
+ rdma_ah_set_static_rate(ah_attr,
+ path->static_rate ? path->static_rate - 5 : 0);
+ if (path->grh_mylmc & (1 << 7)) {
+ rdma_ah_set_grh(ah_attr, NULL,
+ be32_to_cpu(path->tclass_flowlabel) & 0xfffff,
+ path->mgid_index,
+ path->hop_limit,
+ (be32_to_cpu(path->tclass_flowlabel)
+ >> 20) & 0xff);
+ rdma_ah_set_dgid_raw(ah_attr, path->rgid);
}
}
@@ -3467,10 +3466,11 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
to_ib_qp_access_flags(be32_to_cpu(context.params2));
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
- to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
- to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
+ to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
+ to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
- qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ qp_attr->alt_port_num =
+ rdma_ah_get_port_num(&qp_attr->alt_ah_attr);
}
qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 7dd3f267f06b..e32dd58937a8 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -122,7 +122,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
- ilog2(srq->umem->page_size), &srq->mtt);
+ srq->umem->page_shift, &srq->mtt);
if (err)
goto err_buf;
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index d090e96f6f01..3363e29157f6 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -34,50 +34,51 @@
static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
struct mlx5_ib_ah *ah,
- struct ib_ah_attr *ah_attr,
- enum rdma_link_layer ll)
+ struct rdma_ah_attr *ah_attr)
{
- if (ah_attr->ah_flags & IB_AH_GRH) {
- memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
- ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label |
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
+ memcpy(ah->av.rgid, &grh->dgid, 16);
+ ah->av.grh_gid_fl = cpu_to_be32(grh->flow_label |
(1 << 30) |
- ah_attr->grh.sgid_index << 20);
- ah->av.hop_limit = ah_attr->grh.hop_limit;
- ah->av.tclass = ah_attr->grh.traffic_class;
+ grh->sgid_index << 20);
+ ah->av.hop_limit = grh->hop_limit;
+ ah->av.tclass = grh->traffic_class;
}
- ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
+ ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
- if (ll == IB_LINK_LAYER_ETHERNET) {
- memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ memcpy(ah->av.rmac, ah_attr->roce.dmac,
+ sizeof(ah_attr->roce.dmac));
ah->av.udp_sport =
- mlx5_get_roce_udp_sport(dev,
- ah_attr->port_num,
- ah_attr->grh.sgid_index);
- ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
+ mlx5_get_roce_udp_sport(dev,
+ rdma_ah_get_port_num(ah_attr),
+ rdma_ah_read_grh(ah_attr)->sgid_index);
+ ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
} else {
- ah->av.rlid = cpu_to_be16(ah_attr->dlid);
- ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
- ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
+ ah->av.rlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));
+ ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f;
+ ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf);
}
return &ah->ibah;
}
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
struct mlx5_ib_ah *ah;
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- enum rdma_link_layer ll;
-
- ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+ enum rdma_ah_attr_type ah_type = ah_attr->type;
- if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
+ if ((ah_type == RDMA_AH_ATTR_TYPE_ROCE) &&
+ !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
return ERR_PTR(-EINVAL);
- if (ll == IB_LINK_LAYER_ETHERNET && udata) {
+ if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) {
int err;
struct mlx5_ib_create_ah_resp resp = {};
u32 min_resp_len = offsetof(typeof(resp), dmac) +
@@ -92,7 +93,7 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
if (err)
return ERR_PTR(err);
- memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN);
+ memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
return ERR_PTR(err);
@@ -102,28 +103,29 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
if (!ah)
return ERR_PTR(-ENOMEM);
- return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
+ return create_ib_ah(dev, ah, ah_attr); /* never fails */
}
-int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct mlx5_ib_ah *ah = to_mah(ibah);
u32 tmp;
memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = ibah->type;
tmp = be32_to_cpu(ah->av.grh_gid_fl);
if (tmp & (1 << 30)) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.sgid_index = (tmp >> 20) & 0xff;
- ah_attr->grh.flow_label = tmp & 0xfffff;
- memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16);
- ah_attr->grh.hop_limit = ah->av.hop_limit;
- ah_attr->grh.traffic_class = ah->av.tclass;
+ rdma_ah_set_grh(ah_attr, NULL,
+ tmp & 0xfffff,
+ (tmp >> 20) & 0xff,
+ ah->av.hop_limit,
+ ah->av.tclass);
+ rdma_ah_set_dgid_raw(ah_attr, ah->av.rgid);
}
- ah_attr->dlid = be16_to_cpu(ah->av.rlid);
- ah_attr->static_rate = ah->av.stat_rate_sl >> 4;
- ah_attr->sl = ah->av.stat_rate_sl & 0xf;
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(ah->av.rlid));
+ rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate_sl >> 4);
+ rdma_ah_set_sl(ah_attr, ah->av.stat_rate_sl & 0xf);
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index cdc2d3017da7..18d5e1db93ed 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -46,3 +46,14 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
null_mkey);
return err;
}
+
+int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+ bool reset, void *out, int out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
+
+ MLX5_SET(query_cong_statistics_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+ MLX5_SET(query_cong_statistics_in, in, clear, reset);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 7ca8a7b6434d..fa09228193a6 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -37,4 +37,6 @@
#include <linux/mlx5/driver.h>
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
+int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+ bool reset, void *out, int out_size);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 31803b367104..94c049b62c2f 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -172,6 +172,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
struct mlx5_ib_srq *srq;
struct mlx5_ib_wq *wq;
u16 wqe_ctr;
+ u8 roce_packet_type;
+ bool vlan_present;
u8 g;
if (qp->ibqp.srq || qp->ibqp.xrcd) {
@@ -223,7 +225,6 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
break;
}
wc->slid = be16_to_cpu(cqe->slid);
- wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
wc->dlid_path_bits = cqe->ml_path;
g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
@@ -237,10 +238,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->pkey_index = 0;
}
- if (ll != IB_LINK_LAYER_ETHERNET)
+ if (ll != IB_LINK_LAYER_ETHERNET) {
+ wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
return;
+ }
+
+ vlan_present = cqe->l4_l3_hdr_type & 0x1;
+ roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
+ if (vlan_present) {
+ wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff;
+ wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7;
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ } else {
+ wc->sl = 0;
+ }
- switch (wc->sl & 0x3) {
+ switch (roce_packet_type) {
case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
wc->network_hdr_type = RDMA_NETWORK_IB;
break;
@@ -818,7 +831,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
return 0;
err_cqb:
- kfree(cqb);
+ kfree(*cqb);
err_db:
mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 8dacb49eabd9..f1b56de64871 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -187,6 +187,8 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
port_xmit_discards);
MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors,
port_xmit_constraint_errors);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_wait,
+ port_xmit_wait);
MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors,
port_rcv_constraint_errors);
MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4dc0a8785fe0..d45772da0963 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -57,6 +57,7 @@
#include <linux/mlx5/fs.h>
#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
+#include "cmd.h"
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "2.2-1"
@@ -165,15 +166,82 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
return ndev;
}
-static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
- struct ib_port_attr *props)
+static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+ u8 *active_width)
+{
+ switch (eth_proto_oper) {
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_KX):
+ case MLX5E_PROT_MASK(MLX5E_100BASE_TX):
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_T):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_SDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_T):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_KR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_CR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_SR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_ER):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_CR):
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_KR):
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_SR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2):
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2):
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_56GBASE_R4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_FDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_core_dev *mdev = dev->mdev;
struct net_device *ndev, *upper;
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
+ u32 eth_prot_oper;
- /* props being zeroed by the caller, avoid zeroing it here */
+ /* Possible bad flows are checked before filling out props so in case
+ * of an error it will still be zeroed out.
+ */
+ if (mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num))
+ return;
+
+ translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
+ &props->active_width);
props->port_cap_flags |= IB_PORT_CM_SUP;
props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
@@ -191,7 +259,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
ndev = mlx5_ib_get_netdev(device, port_num);
if (!ndev)
- return 0;
+ return;
if (mlx5_lag_is_active(dev->mdev)) {
rcu_read_lock();
@@ -214,11 +282,6 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
dev_put(ndev);
props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
-
- props->active_width = IB_WIDTH_4X; /* TODO */
- props->active_speed = IB_SPEED_QDR; /* TODO */
-
- return 0;
}
static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
@@ -924,7 +987,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
return mlx5_query_hca_port(ibdev, port, props);
case MLX5_VPORT_ACCESS_METHOD_NIC:
- return mlx5_query_port_roce(ibdev, port, props);
+ mlx5_query_port_roce(ibdev, port, props);
+ return 0;
default:
return -EINVAL;
@@ -1478,7 +1542,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
/* need to protect from a race on closing the vma as part of
* mlx5_ib_vma_close.
*/
- down_read(&owning_mm->mmap_sem);
+ down_write(&owning_mm->mmap_sem);
list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
list) {
vma = vma_private->vma;
@@ -1488,11 +1552,12 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
/* context going to be destroyed, should
* not access ops any more.
*/
+ vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
vma->vm_ops = NULL;
list_del(&vma_private->list);
kfree(vma_private);
}
- up_read(&owning_mm->mmap_sem);
+ up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
@@ -1726,6 +1791,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
#define LAST_TCP_UDP_FIELD src_port
#define LAST_TUNNEL_FIELD tunnel_id
#define LAST_FLOW_TAG_FIELD tag_id
+#define LAST_DROP_FIELD size
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1735,8 +1801,11 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
offsetof(typeof(filter), field) -\
sizeof(filter.field))
-static int parse_flow_attr(u32 *match_c, u32 *match_v,
- const union ib_flow_spec *ib_spec, u32 *tag_id)
+#define IPV4_VERSION 4
+#define IPV6_VERSION 6
+static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
+ u32 *match_v, const union ib_flow_spec *ib_spec,
+ u32 *tag_id, bool *is_drop)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
@@ -1744,17 +1813,22 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
misc_parameters);
void *headers_c;
void *headers_v;
+ int match_ipv;
if (ib_spec->type & IB_FLOW_SPEC_INNER) {
headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
inner_headers);
headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
inner_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version);
} else {
headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
outer_headers);
headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
outer_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
}
switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
@@ -1810,10 +1884,17 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
return -EOPNOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IP);
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, IPV4_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IP);
+ }
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
@@ -1842,10 +1923,17 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
return -EOPNOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IPV6);
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, IPV6_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IPV6);
+ }
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
@@ -1937,6 +2025,12 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
*tag_id = ib_spec->flow_tag.tag_id;
break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
+ LAST_DROP_FIELD))
+ return -EOPNOTSUPP;
+ *is_drop = true;
+ break;
default:
return -EINVAL;
}
@@ -1967,26 +2061,60 @@ static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
is_multicast_ether_addr(eth_spec->val.dst_mac);
}
-static bool is_valid_attr(const struct ib_flow_attr *flow_attr)
+static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr,
+ bool check_inner)
{
union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
- bool has_ipv4_spec = false;
- bool eth_type_ipv4 = true;
+ int match_ipv = check_inner ?
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
+ bool ipv4_spec_valid, ipv6_spec_valid;
+ unsigned int ip_spec_type = 0;
+ bool has_ethertype = false;
unsigned int spec_index;
+ bool mask_valid = true;
+ u16 eth_type = 0;
+ bool type_valid;
/* Validate that ethertype is correct */
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- if (ib_spec->type == IB_FLOW_SPEC_ETH &&
+ if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
ib_spec->eth.mask.ether_type) {
- if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
- ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
- eth_type_ipv4 = false;
- } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
- has_ipv4_spec = true;
+ mask_valid = (ib_spec->eth.mask.ether_type ==
+ htons(0xffff));
+ has_ethertype = true;
+ eth_type = ntohs(ib_spec->eth.val.ether_type);
+ } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
+ (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
+ ip_spec_type = ib_spec->type;
}
ib_spec = (void *)ib_spec + ib_spec->size;
}
- return !has_ipv4_spec || eth_type_ipv4;
+
+ type_valid = (!has_ethertype) || (!ip_spec_type);
+ if (!type_valid && mask_valid) {
+ ipv4_spec_valid = (eth_type == ETH_P_IP) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
+ ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
+
+ type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
+ (((eth_type == ETH_P_MPLS_UC) ||
+ (eth_type == ETH_P_MPLS_MC)) && match_ipv);
+ }
+
+ return type_valid;
+}
+
+static bool is_valid_attr(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr)
+{
+ return is_valid_ethertype(mdev, flow_attr, false) &&
+ is_valid_ethertype(mdev, flow_attr, true);
}
static void put_flow_table(struct mlx5_ib_dev *dev,
@@ -2038,8 +2166,8 @@ enum flow_table_type {
MLX5_IB_FT_TX
};
-#define MLX5_FS_MAX_TYPES 10
-#define MLX5_FS_MAX_ENTRIES 32000UL
+#define MLX5_FS_MAX_TYPES 6
+#define MLX5_FS_MAX_ENTRIES BIT(16)
static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
struct ib_flow_attr *flow_attr,
enum flow_table_type ft_type)
@@ -2048,11 +2176,14 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_table *ft;
+ int max_table_size;
int num_entries;
int num_groups;
int priority;
int err = 0;
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_is_multicast_only(flow_attr) &&
!dont_trap)
@@ -2091,6 +2222,9 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
if (!ns)
return ERR_PTR(-ENOTSUPP);
+ if (num_entries > max_table_size)
+ return ERR_PTR(-ENOMEM);
+
ft = prio->flow_table;
if (!ft) {
ft = mlx5_create_auto_grouped_flow_table(ns, priority,
@@ -2118,12 +2252,15 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_handler *handler;
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_spec *spec;
+ struct mlx5_flow_destination *rule_dst = dst;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ bool is_drop = false;
int err = 0;
+ int dest_num = 1;
- if (!is_valid_attr(flow_attr))
+ if (!is_valid_attr(dev->mdev, flow_attr))
return ERR_PTR(-EINVAL);
spec = mlx5_vzalloc(sizeof(*spec));
@@ -2136,8 +2273,9 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
INIT_LIST_HEAD(&handler->list);
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(spec->match_criteria,
- spec->match_value, ib_flow, &flow_tag);
+ err = parse_flow_attr(dev->mdev, spec->match_criteria,
+ spec->match_value,
+ ib_flow, &flow_tag, &is_drop);
if (err < 0)
goto free;
@@ -2145,8 +2283,14 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
- flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ if (is_drop) {
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ rule_dst = NULL;
+ dest_num = 0;
+ } else {
+ flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ }
if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
(flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
@@ -2159,7 +2303,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
flow_act.flow_tag = flow_tag;
handler->rule = mlx5_add_flow_rules(ft, spec,
&flow_act,
- dst, 1);
+ rule_dst, dest_num);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
@@ -2315,7 +2459,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
int err;
if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
- return ERR_PTR(-ENOSPC);
+ return ERR_PTR(-ENOMEM);
if (domain != IB_FLOW_DOMAIN_USER ||
flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
@@ -3133,7 +3277,7 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-struct mlx5_ib_q_counter {
+struct mlx5_ib_counter {
const char *name;
size_t offset;
};
@@ -3141,18 +3285,18 @@ struct mlx5_ib_q_counter {
#define INIT_Q_COUNTER(_name) \
{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
-static const struct mlx5_ib_q_counter basic_q_cnts[] = {
+static const struct mlx5_ib_counter basic_q_cnts[] = {
INIT_Q_COUNTER(rx_write_requests),
INIT_Q_COUNTER(rx_read_requests),
INIT_Q_COUNTER(rx_atomic_requests),
INIT_Q_COUNTER(out_of_buffer),
};
-static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = {
+static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
INIT_Q_COUNTER(out_of_sequence),
};
-static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
+static const struct mlx5_ib_counter retrans_q_cnts[] = {
INIT_Q_COUNTER(duplicate_request),
INIT_Q_COUNTER(rnr_nak_retry_err),
INIT_Q_COUNTER(packet_seq_err),
@@ -3160,22 +3304,31 @@ static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
INIT_Q_COUNTER(local_ack_timeout_err),
};
-static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
+#define INIT_CONG_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
+
+static const struct mlx5_ib_counter cong_cnts[] = {
+ INIT_CONG_COUNTER(rp_cnp_ignored),
+ INIT_CONG_COUNTER(rp_cnp_handled),
+ INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
+ INIT_CONG_COUNTER(np_cnp_sent),
+};
+
+static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
for (i = 0; i < dev->num_ports; i++) {
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnts.set_id);
- kfree(dev->port[i].q_cnts.names);
- kfree(dev->port[i].q_cnts.offsets);
+ dev->port[i].cnts.set_id);
+ kfree(dev->port[i].cnts.names);
+ kfree(dev->port[i].cnts.offsets);
}
}
-static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
- const char ***names,
- size_t **offsets,
- u32 *num)
+static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_counters *cnts)
{
u32 num_counters;
@@ -3186,27 +3339,32 @@ static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
num_counters += ARRAY_SIZE(retrans_q_cnts);
+ cnts->num_q_counters = num_counters;
- *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL);
- if (!*names)
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
+ num_counters += ARRAY_SIZE(cong_cnts);
+ }
+
+ cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
+ if (!cnts->names)
return -ENOMEM;
- *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL);
- if (!*offsets)
+ cnts->offsets = kcalloc(num_counters,
+ sizeof(cnts->offsets), GFP_KERNEL);
+ if (!cnts->offsets)
goto err_names;
- *num = num_counters;
-
return 0;
err_names:
- kfree(*names);
+ kfree(cnts->names);
return -ENOMEM;
}
-static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
- const char **names,
- size_t *offsets)
+static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
+ const char **names,
+ size_t *offsets)
{
int i;
int j = 0;
@@ -3229,9 +3387,16 @@ static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
offsets[j] = retrans_q_cnts[i].offset;
}
}
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
+ names[j] = cong_cnts[i].name;
+ offsets[j] = cong_cnts[i].offset;
+ }
+ }
}
-static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
+static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
int i;
int ret;
@@ -3240,7 +3405,7 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
struct mlx5_ib_port *port = &dev->port[i];
ret = mlx5_core_alloc_q_counter(dev->mdev,
- &port->q_cnts.set_id);
+ &port->cnts.set_id);
if (ret) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
@@ -3248,15 +3413,12 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
goto dealloc_counters;
}
- ret = __mlx5_ib_alloc_q_counters(dev,
- &port->q_cnts.names,
- &port->q_cnts.offsets,
- &port->q_cnts.num_counters);
+ ret = __mlx5_ib_alloc_counters(dev, &port->cnts);
if (ret)
goto dealloc_counters;
- mlx5_ib_fill_q_counters(dev, port->q_cnts.names,
- port->q_cnts.offsets);
+ mlx5_ib_fill_counters(dev, port->cnts.names,
+ port->cnts.offsets);
}
return 0;
@@ -3264,7 +3426,7 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
dealloc_counters:
while (--i >= 0)
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnts.set_id);
+ dev->port[i].cnts.set_id);
return ret;
}
@@ -3279,44 +3441,113 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
if (port_num == 0)
return NULL;
- return rdma_alloc_hw_stats_struct(port->q_cnts.names,
- port->q_cnts.num_counters,
+ return rdma_alloc_hw_stats_struct(port->cnts.names,
+ port->cnts.num_q_counters +
+ port->cnts.num_cong_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u8 port_num, int index)
+static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_port *port,
+ struct rdma_hw_stats *stats)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
__be32 val;
- int ret;
- int i;
-
- if (!stats)
- return -ENOSYS;
+ int ret, i;
out = mlx5_vzalloc(outlen);
if (!out)
return -ENOMEM;
ret = mlx5_core_query_q_counter(dev->mdev,
- port->q_cnts.set_id, 0,
+ port->cnts.set_id, 0,
out, outlen);
if (ret)
goto free;
- for (i = 0; i < port->q_cnts.num_counters; i++) {
- val = *(__be32 *)(out + port->q_cnts.offsets[i]);
+ for (i = 0; i < port->cnts.num_q_counters; i++) {
+ val = *(__be32 *)(out + port->cnts.offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
free:
kvfree(out);
- return port->q_cnts.num_counters;
+ return ret;
+}
+
+static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_port *port,
+ struct rdma_hw_stats *stats)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+ void *out;
+ int ret, i;
+ int offset = port->cnts.num_q_counters;
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen);
+ if (ret)
+ goto free;
+
+ for (i = 0; i < port->cnts.num_cong_counters; i++) {
+ stats->value[i + offset] =
+ be64_to_cpup((__be64 *)(out +
+ port->cnts.offsets[i + offset]));
+ }
+
+free:
+ kvfree(out);
+ return ret;
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port_num, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_port *port = &dev->port[port_num - 1];
+ int ret, num_counters;
+
+ if (!stats)
+ return -EINVAL;
+
+ ret = mlx5_ib_query_q_counters(dev, port, stats);
+ if (ret)
+ return ret;
+ num_counters = port->cnts.num_q_counters;
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ ret = mlx5_ib_query_cong_counters(dev, port, stats);
+ if (ret)
+ return ret;
+ num_counters += port->cnts.num_cong_counters;
+ }
+
+ return num_counters;
+}
+
+static struct net_device*
+mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
+{
+ if (type != RDMA_NETDEV_IPOIB)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
+ name, setup);
+}
+
+static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
+{
+ return mlx5_rdma_netdev_free(netdev);
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
@@ -3449,6 +3680,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
+ dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
+ dev->ib_dev.free_rdma_netdev = mlx5_ib_free_rdma_netdev;
if (mlx5_core_is_pf(mdev)) {
dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
@@ -3523,14 +3756,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
goto err_rsrc;
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- err = mlx5_ib_alloc_q_counters(dev);
+ err = mlx5_ib_alloc_counters(dev);
if (err)
goto err_odp;
}
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
if (!dev->mdev->priv.uar)
- goto err_q_cnt;
+ goto err_cnt;
err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
if (err)
@@ -3574,9 +3807,9 @@ err_bfreg:
err_uar_page:
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
-err_q_cnt:
+err_cnt:
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
- mlx5_ib_dealloc_q_counters(dev);
+ mlx5_ib_dealloc_counters(dev);
err_odp:
mlx5_ib_odp_remove_one(dev);
@@ -3610,7 +3843,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
- mlx5_ib_dealloc_q_counters(dev);
+ mlx5_ib_dealloc_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 778d8a18925f..914f212e7ef6 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -59,15 +59,14 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
u64 pfn;
struct scatterlist *sg;
int entry;
- unsigned long page_shift = ilog2(umem->page_size);
+ unsigned long page_shift = umem->page_shift;
- /* With ODP we must always match OS page size. */
if (umem->odp_data) {
- *count = ib_umem_page_count(umem);
- *shift = PAGE_SHIFT;
- *ncont = *count;
+ *ncont = ib_umem_page_count(umem);
+ *count = *ncont << (page_shift - PAGE_SHIFT);
+ *shift = page_shift;
if (order)
- *order = ilog2(roundup_pow_of_two(*count));
+ *order = ilog2(roundup_pow_of_two(*ncont));
return;
}
@@ -156,7 +155,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, size_t offset, size_t num_pages,
__be64 *pas, int access_flags)
{
- unsigned long umem_page_shift = ilog2(umem->page_size);
+ unsigned long umem_page_shift = umem->page_shift;
int shift = page_shift - umem_page_shift;
int mask = (1 << shift) - 1;
int i, k, idx;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index ce8ba617d46e..38c877bc45e5 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -513,6 +513,7 @@ struct mlx5_ib_mr {
struct mlx5_ib_mw {
struct ib_mw ibmw;
struct mlx5_core_mkey mmkey;
+ int ndescs;
};
struct mlx5_ib_umr_context {
@@ -595,15 +596,16 @@ struct mlx5_ib_resources {
struct mutex mutex;
};
-struct mlx5_ib_q_counters {
+struct mlx5_ib_counters {
const char **names;
size_t *offsets;
- u32 num_counters;
+ u32 num_q_counters;
+ u32 num_cong_counters;
u16 set_id;
};
struct mlx5_ib_port {
- struct mlx5_ib_q_counters q_cnts;
+ struct mlx5_ib_counters cnts;
};
struct mlx5_roce {
@@ -738,9 +740,9 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
-int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int mlx5_ib_destroy_ah(struct ib_ah *ah);
struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index b8f9382a8b7d..366433f71b58 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1009,7 +1009,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
}
if (!xlt) {
- uctx = to_mucontext(mr->ibmr.uobject->context);
+ uctx = to_mucontext(mr->ibmr.pd->uobject->context);
mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
size = PAGE_SIZE;
xlt = (void *)uctx->upd_xlt_page;
@@ -1045,8 +1045,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
for (pages_mapped = 0;
pages_mapped < pages_to_map && !err;
pages_mapped += pages_iter, idx += pages_iter) {
+ npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
- npages = populate_xlt(mr, idx, pages_iter, xlt,
+ npages = populate_xlt(mr, idx, npages, xlt,
page_shift, size, flags);
dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
@@ -1687,6 +1688,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
mw->mmkey.type = MLX5_MKEY_MW;
mw->ibmw.rkey = mw->mmkey.key;
+ mw->ndescs = ndescs;
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
@@ -1782,7 +1784,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
klms[i].key = cpu_to_be32(lkey);
- mr->ibmr.length += sg_dma_len(sg);
+ mr->ibmr.length += sg_dma_len(sg) - sg_offset;
sg_offset = 0;
}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index d7b12f0750e2..ae0746754008 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -57,7 +57,7 @@ static int check_parent(struct ib_umem_odp *odp,
{
struct mlx5_ib_mr *mr = odp->private;
- return mr && mr->parent == parent;
+ return mr && mr->parent == parent && !odp->dying;
}
static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
@@ -158,13 +158,6 @@ static void mr_leaf_free_action(struct work_struct *work)
mr->parent = NULL;
synchronize_srcu(&mr->dev->mr_srcu);
- if (!READ_ONCE(odp->dying)) {
- mr->parent = imr;
- if (atomic_dec_and_test(&imr->num_leaf_free))
- wake_up(&imr->q_leaf_free);
- return;
- }
-
ib_umem_release(odp->umem);
if (imr->live)
mlx5_ib_update_xlt(imr, idx, 1, 0,
@@ -206,8 +199,8 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
* but they will write 0s as well, so no difference in the end result.
*/
- for (addr = start; addr < end; addr += (u64)umem->page_size) {
- idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+ for (addr = start; addr < end; addr += BIT(umem->page_shift)) {
+ idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
/*
* Strive to write the MTTs in chunks, but avoid overwriting
* non-existing MTTs. The huristic here can be improved to
@@ -225,8 +218,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
if (in_block && umr_offset == 0) {
mlx5_ib_update_xlt(mr, blk_start_idx,
- idx - blk_start_idx,
- PAGE_SHIFT,
+ idx - blk_start_idx, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
@@ -235,8 +227,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
}
if (in_block)
mlx5_ib_update_xlt(mr, blk_start_idx,
- idx - blk_start_idx + 1,
- PAGE_SHIFT,
+ idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
/*
@@ -297,24 +288,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
return;
}
-static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
- u32 key)
-{
- u32 base_key = mlx5_base_mkey(key);
- struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key);
- struct mlx5_ib_mr *mr;
-
- if (!mmkey || mmkey->key != key || mmkey->type != MLX5_MKEY_MR)
- return NULL;
-
- mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
-
- if (!mr->live)
- return NULL;
-
- return container_of(mmkey, struct mlx5_ib_mr, mmkey);
-}
-
static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
struct mlx5_pagefault *pfault,
int error)
@@ -436,8 +409,6 @@ next_mr:
nentries++;
}
- odp->dying = 0;
-
/* Return first odp if region not covered by single one */
if (likely(!result))
result = odp;
@@ -522,77 +493,38 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
}
-/*
- * Handle a single data segment in a page-fault WQE or RDMA region.
- *
- * Returns number of pages retrieved on success. The caller may continue to
- * the next data segment.
- * Can return the following error codes:
- * -EAGAIN to designate a temporary error. The caller will abort handling the
- * page fault and resolve it.
- * -EFAULT when there's an error mapping the requested pages. The caller will
- * abort the page fault handling.
- */
-static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
- u32 key, u64 io_virt, size_t bcnt,
- u32 *bytes_committed,
- u32 *bytes_mapped)
+static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ u64 io_virt, size_t bcnt, u32 *bytes_mapped)
{
- int srcu_key;
- unsigned int current_seq = 0;
- u64 start_idx;
- int npages = 0, ret = 0;
- struct mlx5_ib_mr *mr;
u64 access_mask = ODP_READ_ALLOWED_BIT;
+ int npages = 0, page_shift, np;
+ u64 start_idx, page_mask;
struct ib_umem_odp *odp;
- int implicit = 0;
+ int current_seq;
size_t size;
-
- srcu_key = srcu_read_lock(&dev->mr_srcu);
- mr = mlx5_ib_odp_find_mr_lkey(dev, key);
- /*
- * If we didn't find the MR, it means the MR was closed while we were
- * handling the ODP event. In this case we return -EFAULT so that the
- * QP will be closed.
- */
- if (!mr || !mr->ibmr.pd) {
- mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
- key);
- ret = -EFAULT;
- goto srcu_unlock;
- }
- if (!mr->umem->odp_data) {
- mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
- if (bytes_mapped)
- *bytes_mapped +=
- (bcnt - *bytes_committed);
- goto srcu_unlock;
- }
-
- /*
- * Avoid branches - this code will perform correctly
- * in all iterations (in iteration 2 and above,
- * bytes_committed == 0).
- */
- io_virt += *bytes_committed;
- bcnt -= *bytes_committed;
+ int ret;
if (!mr->umem->odp_data->page_list) {
odp = implicit_mr_get_data(mr, io_virt, bcnt);
- if (IS_ERR(odp)) {
- ret = PTR_ERR(odp);
- goto srcu_unlock;
- }
+ if (IS_ERR(odp))
+ return PTR_ERR(odp);
mr = odp->private;
- implicit = 1;
} else {
odp = mr->umem->odp_data;
}
next_mr:
+ size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
+
+ page_shift = mr->umem->page_shift;
+ page_mask = ~(BIT(page_shift) - 1);
+ start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+
+ if (mr->umem->writable)
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
+
current_seq = READ_ONCE(odp->notifiers_seq);
/*
* Ensure the sequence number is valid for some time before we call
@@ -600,51 +532,43 @@ next_mr:
*/
smp_rmb();
- size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
- start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
-
- if (mr->umem->writable)
- access_mask |= ODP_WRITE_ALLOWED_BIT;
-
ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
access_mask, current_seq);
if (ret < 0)
- goto srcu_unlock;
+ goto out;
- if (ret > 0) {
- int np = ret;
-
- mutex_lock(&odp->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
- /*
- * No need to check whether the MTTs really belong to
- * this MR, since ib_umem_odp_map_dma_pages already
- * checks this.
- */
- ret = mlx5_ib_update_xlt(mr, start_idx, np,
- PAGE_SHIFT,
- MLX5_IB_UPD_XLT_ATOMIC);
- } else {
- ret = -EAGAIN;
- }
- mutex_unlock(&odp->umem_mutex);
- if (ret < 0) {
- if (ret != -EAGAIN)
- mlx5_ib_err(dev, "Failed to update mkey page tables\n");
- goto srcu_unlock;
- }
+ np = ret;
- if (bytes_mapped) {
- u32 new_mappings = np * PAGE_SIZE -
- (io_virt - round_down(io_virt, PAGE_SIZE));
- *bytes_mapped += min_t(u32, new_mappings, size);
- }
+ mutex_lock(&odp->umem_mutex);
+ if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+ /*
+ * No need to check whether the MTTs really belong to
+ * this MR, since ib_umem_odp_map_dma_pages already
+ * checks this.
+ */
+ ret = mlx5_ib_update_xlt(mr, start_idx, np,
+ page_shift, MLX5_IB_UPD_XLT_ATOMIC);
+ } else {
+ ret = -EAGAIN;
+ }
+ mutex_unlock(&odp->umem_mutex);
- npages += np;
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ mlx5_ib_err(dev, "Failed to update mkey page tables\n");
+ goto out;
+ }
+
+ if (bytes_mapped) {
+ u32 new_mappings = (np << page_shift) -
+ (io_virt - round_down(io_virt, 1 << page_shift));
+ *bytes_mapped += min_t(u32, new_mappings, size);
}
+ npages += np << (page_shift - PAGE_SHIFT);
bcnt -= size;
+
if (unlikely(bcnt)) {
struct ib_umem_odp *next;
@@ -653,17 +577,18 @@ next_mr:
if (unlikely(!next || next->umem->address != io_virt)) {
mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
io_virt, next);
- ret = -EAGAIN;
- goto srcu_unlock_no_wait;
+ return -EAGAIN;
}
odp = next;
mr = odp->private;
goto next_mr;
}
-srcu_unlock:
+ return npages;
+
+out:
if (ret == -EAGAIN) {
- if (implicit || !odp->dying) {
+ if (mr->parent || !odp->dying) {
unsigned long timeout =
msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
@@ -679,7 +604,155 @@ srcu_unlock:
}
}
-srcu_unlock_no_wait:
+ return ret;
+}
+
+struct pf_frame {
+ struct pf_frame *next;
+ u32 key;
+ u64 io_virt;
+ size_t bcnt;
+ int depth;
+};
+
+/*
+ * Handle a single data segment in a page-fault WQE or RDMA region.
+ *
+ * Returns number of OS pages retrieved on success. The caller may continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ * page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ * abort the page fault handling.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
+ u32 key, u64 io_virt, size_t bcnt,
+ u32 *bytes_committed,
+ u32 *bytes_mapped)
+{
+ int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
+ struct pf_frame *head = NULL, *frame;
+ struct mlx5_core_mkey *mmkey;
+ struct mlx5_ib_mw *mw;
+ struct mlx5_ib_mr *mr;
+ struct mlx5_klm *pklm;
+ u32 *out = NULL;
+ size_t offset;
+
+ srcu_key = srcu_read_lock(&dev->mr_srcu);
+
+ io_virt += *bytes_committed;
+ bcnt -= *bytes_committed;
+
+next_mr:
+ mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key));
+ if (!mmkey || mmkey->key != key) {
+ mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ switch (mmkey->type) {
+ case MLX5_MKEY_MR:
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ if (!mr->live || !mr->ibmr.pd) {
+ mlx5_ib_dbg(dev, "got dead MR\n");
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped);
+ if (ret < 0)
+ goto srcu_unlock;
+
+ npages += ret;
+ ret = 0;
+ break;
+
+ case MLX5_MKEY_MW:
+ mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
+
+ if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
+ mlx5_ib_dbg(dev, "indirection level exceeded\n");
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
+ sizeof(*pklm) * (mw->ndescs - 2);
+
+ if (outlen > cur_outlen) {
+ kfree(out);
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out) {
+ ret = -ENOMEM;
+ goto srcu_unlock;
+ }
+ cur_outlen = outlen;
+ }
+
+ pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
+ bsf0_klm0_pas_mtt0_1);
+
+ ret = mlx5_core_query_mkey(dev->mdev, &mw->mmkey, out, outlen);
+ if (ret)
+ goto srcu_unlock;
+
+ offset = io_virt - MLX5_GET64(query_mkey_out, out,
+ memory_key_mkey_entry.start_addr);
+
+ for (i = 0; bcnt && i < mw->ndescs; i++, pklm++) {
+ if (offset >= be32_to_cpu(pklm->bcount)) {
+ offset -= be32_to_cpu(pklm->bcount);
+ continue;
+ }
+
+ frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+ if (!frame) {
+ ret = -ENOMEM;
+ goto srcu_unlock;
+ }
+
+ frame->key = be32_to_cpu(pklm->key);
+ frame->io_virt = be64_to_cpu(pklm->va) + offset;
+ frame->bcnt = min_t(size_t, bcnt,
+ be32_to_cpu(pklm->bcount) - offset);
+ frame->depth = depth + 1;
+ frame->next = head;
+ head = frame;
+
+ bcnt -= frame->bcnt;
+ }
+ break;
+
+ default:
+ mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type);
+ ret = -EFAULT;
+ goto srcu_unlock;
+ }
+
+ if (head) {
+ frame = head;
+ head = frame->next;
+
+ key = frame->key;
+ io_virt = frame->io_virt;
+ bcnt = frame->bcnt;
+ depth = frame->depth;
+ kfree(frame);
+
+ goto next_mr;
+ }
+
+srcu_unlock:
+ while (head) {
+ frame = head;
+ head = frame->next;
+ kfree(frame);
+ }
+ kfree(out);
+
srcu_read_unlock(&dev->mr_srcu, srcu_key);
*bytes_committed = 0;
return ret ? ret : npages;
@@ -997,9 +1070,6 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
resume_with_error = 0;
goto resolve_page_fault;
} else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
- if (ret != -ENOENT)
- mlx5_ib_err(dev, "PAGE FAULT error: %d. QP 0x%x. type: 0x%x\n",
- ret, pfault->wqe.wq_num, pfault->type);
goto resolve_page_fault;
}
@@ -1059,8 +1129,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
} else if (ret < 0 || pages_in_range(address, length) > ret) {
mlx5_ib_page_fault_resume(dev, pfault, 1);
if (ret != -ENOENT)
- mlx5_ib_warn(dev, "PAGE FAULT error %d. QP 0x%x, type: 0x%x\n",
- ret, pfault->token, pfault->type);
+ mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%x, type: 0x%x\n",
+ ret, pfault->token, pfault->type);
return;
}
@@ -1081,8 +1151,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
prefetch_len,
&bytes_committed, NULL);
if (ret < 0 && ret != -EAGAIN) {
- mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
- ret, pfault->token, address, prefetch_len);
+ mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
+ ret, pfault->token, address, prefetch_len);
}
}
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index ed6320186f89..93959e1e43a3 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2206,63 +2206,65 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
}
static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- const struct ib_ah_attr *ah,
+ const struct rdma_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
u32 path_flags, const struct ib_qp_attr *attr,
bool alt)
{
- enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah);
int err;
enum ib_gid_type gid_type;
+ u8 ah_flags = rdma_ah_get_ah_flags(ah);
+ u8 sl = rdma_ah_get_sl(ah);
if (attr_mask & IB_QP_PKEY_INDEX)
path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
attr->pkey_index);
- if (ah->ah_flags & IB_AH_GRH) {
- if (ah->grh.sgid_index >=
+ if (ah_flags & IB_AH_GRH) {
+ if (grh->sgid_index >=
dev->mdev->port_caps[port - 1].gid_table_len) {
pr_err("sgid_index (%u) too large. max is %d\n",
- ah->grh.sgid_index,
+ grh->sgid_index,
dev->mdev->port_caps[port - 1].gid_table_len);
return -EINVAL;
}
}
- if (ll == IB_LINK_LAYER_ETHERNET) {
- if (!(ah->ah_flags & IB_AH_GRH))
+ if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (!(ah_flags & IB_AH_GRH))
return -EINVAL;
- err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index,
+ err = mlx5_get_roce_gid_type(dev, port, grh->sgid_index,
&gid_type);
if (err)
return err;
- memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
+ memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac));
path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
- ah->grh.sgid_index);
- path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+ grh->sgid_index);
+ path->dci_cfi_prio_sl = (sl & 0x7) << 4;
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
- path->ecn_dscp = (ah->grh.traffic_class >> 2) & 0x3f;
+ path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f;
} else {
path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
path->fl_free_ar |=
(path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0;
- path->rlid = cpu_to_be16(ah->dlid);
- path->grh_mlid = ah->src_path_bits & 0x7f;
- if (ah->ah_flags & IB_AH_GRH)
+ path->rlid = cpu_to_be16(rdma_ah_get_dlid(ah));
+ path->grh_mlid = rdma_ah_get_path_bits(ah) & 0x7f;
+ if (ah_flags & IB_AH_GRH)
path->grh_mlid |= 1 << 7;
- path->dci_cfi_prio_sl = ah->sl & 0xf;
+ path->dci_cfi_prio_sl = sl & 0xf;
}
- if (ah->ah_flags & IB_AH_GRH) {
- path->mgid_index = ah->grh.sgid_index;
- path->hop_limit = ah->grh.hop_limit;
+ if (ah_flags & IB_AH_GRH) {
+ path->mgid_index = grh->sgid_index;
+ path->hop_limit = grh->hop_limit;
path->tclass_flowlabel =
- cpu_to_be32((ah->grh.traffic_class << 20) |
- (ah->grh.flow_label));
- memcpy(path->rgid, ah->grh.dgid.raw, 16);
+ cpu_to_be32((grh->traffic_class << 20) |
+ (grh->flow_label));
+ memcpy(path->rgid, grh->dgid.raw, 16);
}
- err = ib_rate_to_mlx5(dev, ah->static_rate);
+ err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah));
if (err < 0)
return err;
path->static_rate = err;
@@ -2274,7 +2276,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
return modify_raw_packet_eth_prio(dev->mdev,
&qp->raw_packet_qp.sq,
- ah->sl & 0xf);
+ sl & 0xf);
return 0;
}
@@ -2799,7 +2801,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->port) - 1;
mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
- cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24);
+ cpu_to_be32((u32)(mibport->cnts.set_id) << 24);
}
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
@@ -2827,7 +2829,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
raw_qp_param.operation = op;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- raw_qp_param.rq_q_ctr_id = mibport->q_cnts.set_id;
+ raw_qp_param.rq_q_ctr_id = mibport->cnts.set_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
@@ -4249,33 +4251,36 @@ static int to_ib_qp_access_flags(int mlx5_flags)
return ib_flags;
}
-static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
- struct mlx5_qp_path *path)
+static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
+ struct rdma_ah_attr *ah_attr,
+ struct mlx5_qp_path *path)
{
struct mlx5_core_dev *dev = ibdev->mdev;
- memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
- ib_ah_attr->port_num = path->port;
+ memset(ah_attr, 0, sizeof(*ah_attr));
- if (ib_ah_attr->port_num == 0 ||
- ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
+ ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port);
+ rdma_ah_set_port_num(ah_attr, path->port);
+ if (rdma_ah_get_port_num(ah_attr) == 0 ||
+ rdma_ah_get_port_num(ah_attr) > MLX5_CAP_GEN(dev, num_ports))
return;
- ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
+ rdma_ah_set_port_num(ah_attr, path->port);
+ rdma_ah_set_sl(ah_attr, path->dci_cfi_prio_sl & 0xf);
- ib_ah_attr->dlid = be16_to_cpu(path->rlid);
- ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
- ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
- ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
- if (ib_ah_attr->ah_flags) {
- ib_ah_attr->grh.sgid_index = path->mgid_index;
- ib_ah_attr->grh.hop_limit = path->hop_limit;
- ib_ah_attr->grh.traffic_class =
- (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
- ib_ah_attr->grh.flow_label =
- be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
- memcpy(ib_ah_attr->grh.dgid.raw,
- path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid));
+ rdma_ah_set_path_bits(ah_attr, path->grh_mlid & 0x7f);
+ rdma_ah_set_static_rate(ah_attr,
+ path->static_rate ? path->static_rate - 5 : 0);
+ if (path->grh_mlid & (1 << 7)) {
+ u32 tc_fl = be32_to_cpu(path->tclass_flowlabel);
+
+ rdma_ah_set_grh(ah_attr, NULL,
+ tc_fl & 0xfffff,
+ path->mgid_index,
+ path->hop_limit,
+ (tc_fl >> 20) & 0xff);
+ rdma_ah_set_dgid_raw(ah_attr, path->rgid);
}
}
@@ -4440,11 +4445,12 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
to_ib_qp_access_flags(be32_to_cpu(context->params2));
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
- to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
- to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+ to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+ to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
qp_attr->alt_pkey_index =
be16_to_cpu(context->alt_path.pkey_index);
- qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ qp_attr->alt_port_num =
+ rdma_ah_get_port_num(&qp_attr->alt_ah_attr);
}
qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index);
@@ -4965,7 +4971,8 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
MLX5_SET64(modify_rq_in, in, modify_bitmask,
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
- MLX5_SET(rqc, rqc, counter_set_id, dev->port->q_cnts.set_id);
+ MLX5_SET(rqc, rqc, counter_set_id,
+ dev->port->cnts.set_id);
} else
pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
dev->ib_dev.name);
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index c9f0f364f484..2aec9908c40a 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -152,7 +152,7 @@ u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct mthca_ah *ah)
{
u32 index = -1;
@@ -196,21 +196,26 @@ on_hca_fail:
ah->key = pd->ntmr.ibmr.lkey;
- av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24));
- av->g_slid = ah_attr->src_path_bits;
- av->dlid = cpu_to_be16(ah_attr->dlid);
+ av->port_pd = cpu_to_be32(pd->pd_num |
+ (rdma_ah_get_port_num(ah_attr) << 24));
+ av->g_slid = rdma_ah_get_path_bits(ah_attr);
+ av->dlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));
av->msg_sr = (3 << 4) | /* 2K message */
- mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num);
- av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ mthca_get_rate(dev, rdma_ah_get_static_rate(ah_attr),
+ rdma_ah_get_port_num(ah_attr));
+ av->sl_tclass_flowlabel = cpu_to_be32(rdma_ah_get_sl(ah_attr) << 28);
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
av->g_slid |= 0x80;
- av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len +
- ah_attr->grh.sgid_index;
- av->hop_limit = ah_attr->grh.hop_limit;
+ av->gid_index = (rdma_ah_get_port_num(ah_attr) - 1) *
+ dev->limits.gid_table_len +
+ grh->sgid_index;
+ av->hop_limit = grh->hop_limit;
av->sl_tclass_flowlabel |=
- cpu_to_be32((ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label);
- memcpy(av->dgid, ah_attr->grh.dgid.raw, 16);
+ cpu_to_be32((grh->traffic_class << 20) |
+ grh->flow_label);
+ memcpy(av->dgid, grh->dgid.raw, 16);
} else {
/* Arbel workaround -- low byte of GID must be 2 */
av->dgid[3] = cpu_to_be32(2);
@@ -287,33 +292,35 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
return 0;
}
-int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr)
+int mthca_ah_query(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
struct mthca_ah *ah = to_mah(ibah);
struct mthca_dev *dev = to_mdev(ibah->device);
+ u8 port_num = be32_to_cpu(ah->av->port_pd) >> 24;
/* Only implement for MAD and memfree ah for now. */
if (ah->type == MTHCA_AH_ON_HCA)
return -ENOSYS;
memset(attr, 0, sizeof *attr);
- attr->dlid = be16_to_cpu(ah->av->dlid);
- attr->sl = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
- attr->port_num = be32_to_cpu(ah->av->port_pd) >> 24;
- attr->static_rate = mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7,
- attr->port_num);
- attr->src_path_bits = ah->av->g_slid & 0x7F;
- attr->ah_flags = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0;
-
- if (attr->ah_flags) {
- attr->grh.traffic_class =
- be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20;
- attr->grh.flow_label =
- be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff;
- attr->grh.hop_limit = ah->av->hop_limit;
- attr->grh.sgid_index = ah->av->gid_index &
- (dev->limits.gid_table_len - 1);
- memcpy(attr->grh.dgid.raw, ah->av->dgid, 16);
+ attr->type = ibah->type;
+ rdma_ah_set_dlid(attr, be16_to_cpu(ah->av->dlid));
+ rdma_ah_set_sl(attr, be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28);
+ rdma_ah_set_port_num(attr, port_num);
+ rdma_ah_set_static_rate(attr,
+ mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7,
+ port_num));
+ rdma_ah_set_path_bits(attr, ah->av->g_slid & 0x7F);
+ if (mthca_ah_grh_present(ah)) {
+ u32 tc_fl = be32_to_cpu(ah->av->sl_tclass_flowlabel);
+
+ rdma_ah_set_grh(attr, NULL,
+ tc_fl & 0xfffff,
+ ah->av->gid_index &
+ (dev->limits.gid_table_len - 1),
+ ah->av->hop_limit,
+ (tc_fl >> 20) & 0xff);
+ rdma_ah_set_dgid_raw(attr, ah->av->dgid);
}
return 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index c7f49bbb0c72..9d83a53c0c67 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -367,12 +367,16 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
goto out;
}
- if (out_is_imm)
+ if (out_is_imm && out_param) {
*out_param =
(u64) be32_to_cpu((__force __be32)
__raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
(u64) be32_to_cpu((__force __be32)
__raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4));
+ } else if (out_is_imm) {
+ err = -EINVAL;
+ goto out;
+ }
status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
if (status) {
@@ -450,8 +454,12 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
err = mthca_status_to_errno(context->status);
}
- if (out_is_imm)
+ if (out_is_imm && out_param) {
*out_param = context->out_param;
+ } else if (out_is_imm) {
+ err = -EINVAL;
+ goto out;
+ }
out:
spin_lock(&dev->cmd.context_lock);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 4393a022867b..ec7da9a474cd 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -560,12 +560,12 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct mthca_ah *ah);
int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
struct ib_ud_header *header);
-int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
+int mthca_ah_query(struct ib_ah *ibah, struct rdma_ah_attr *attr);
int mthca_ah_grh_present(struct mthca_ah *ah);
u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port);
enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 9139405c4810..7df3db71777a 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -75,25 +75,26 @@ static void update_sm_ah(struct mthca_dev *dev,
u8 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
if (!dev->send_agent[port_num - 1][0])
return;
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = lid;
- ah_attr.sl = sl;
- ah_attr.port_num = port_num;
+ ah_attr.type = rdma_ah_find_type(&dev->ib_dev, port_num);
+ rdma_ah_set_dlid(&ah_attr, lid);
+ rdma_ah_set_sl(&ah_attr, sl);
+ rdma_ah_set_port_num(&ah_attr, port_num);
- new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
+ &ah_attr);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ rdma_destroy_ah(dev->sm_ah[port_num - 1]);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -345,6 +346,6 @@ void mthca_free_agents(struct mthca_dev *dev)
}
if (dev->sm_ah[p])
- ib_destroy_ah(dev->sm_ah[p]);
+ rdma_destroy_ah(dev->sm_ah[p]);
}
}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 22d0e6ee5af6..c197cd9b193f 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -410,7 +410,7 @@ static int mthca_dealloc_pd(struct ib_pd *pd)
}
static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
@@ -937,7 +937,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err;
}
- shift = ffs(mr->umem->page_size) - 1;
+ shift = mr->umem->page_shift;
n = mr->umem->nmap;
mr->mtt = mthca_alloc_mtt(dev, n);
@@ -959,8 +959,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(sg) +
- mr->umem->page_size * k;
+ pages[i++] = sg_dma_address(sg) + (k << shift);
/*
* Be friendly to write_mtt and pass it chunks
* of appropriate size.
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 96e5fb91fb48..d21960cd9a49 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -393,31 +393,36 @@ static int to_ib_qp_access_flags(int mthca_flags)
return ib_flags;
}
-static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
- struct mthca_qp_path *path)
+static void to_rdma_ah_attr(struct mthca_dev *dev,
+ struct rdma_ah_attr *ah_attr,
+ struct mthca_qp_path *path)
{
- memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
- ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
+ u8 port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
- if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
- return;
+ memset(ah_attr, 0, sizeof(*ah_attr));
- ib_ah_attr->dlid = be16_to_cpu(path->rlid);
- ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
- ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
- ib_ah_attr->static_rate = mthca_rate_to_ib(dev,
- path->static_rate & 0xf,
- ib_ah_attr->port_num);
- ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
- if (ib_ah_attr->ah_flags) {
- ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
- ib_ah_attr->grh.hop_limit = path->hop_limit;
- ib_ah_attr->grh.traffic_class =
- (be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff;
- ib_ah_attr->grh.flow_label =
- be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff;
- memcpy(ib_ah_attr->grh.dgid.raw,
- path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+ if (port_num == 0 || port_num > dev->limits.num_ports)
+ return;
+ ah_attr->type = rdma_ah_find_type(&dev->ib_dev, port_num);
+ rdma_ah_set_port_num(ah_attr, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid));
+ rdma_ah_set_sl(ah_attr, be32_to_cpu(path->sl_tclass_flowlabel) >> 28);
+ rdma_ah_set_path_bits(ah_attr, path->g_mylmc & 0x7f);
+ rdma_ah_set_static_rate(ah_attr,
+ mthca_rate_to_ib(dev,
+ path->static_rate & 0xf,
+ port_num));
+ if (path->g_mylmc & (1 << 7)) {
+ u32 tc_fl = be32_to_cpu(path->sl_tclass_flowlabel);
+
+ rdma_ah_set_grh(ah_attr, NULL,
+ tc_fl & 0xfffff,
+ path->mgid_index &
+ (dev->limits.gid_table_len - 1),
+ path->hop_limit,
+ (tc_fl >> 20) & 0xff);
+ rdma_ah_set_dgid_raw(ah_attr, path->rgid);
}
}
@@ -468,11 +473,12 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
to_ib_qp_access_flags(be32_to_cpu(context->params2));
if (qp->transport == RC || qp->transport == UC) {
- to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
- to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+ to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+ to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
qp_attr->alt_pkey_index =
be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
- qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ qp_attr->alt_port_num =
+ rdma_ah_get_port_num(&qp_attr->alt_ah_attr);
}
qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
@@ -512,30 +518,36 @@ out:
return err;
}
-static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,
+static int mthca_path_set(struct mthca_dev *dev, const struct rdma_ah_attr *ah,
struct mthca_qp_path *path, u8 port)
{
- path->g_mylmc = ah->src_path_bits & 0x7f;
- path->rlid = cpu_to_be16(ah->dlid);
- path->static_rate = mthca_get_rate(dev, ah->static_rate, port);
+ path->g_mylmc = rdma_ah_get_path_bits(ah) & 0x7f;
+ path->rlid = cpu_to_be16(rdma_ah_get_dlid(ah));
+ path->static_rate = mthca_get_rate(dev, rdma_ah_get_static_rate(ah),
+ port);
+
+ if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah);
- if (ah->ah_flags & IB_AH_GRH) {
- if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
+ if (grh->sgid_index >= dev->limits.gid_table_len) {
mthca_dbg(dev, "sgid_index (%u) too large. max is %d\n",
- ah->grh.sgid_index, dev->limits.gid_table_len-1);
+ grh->sgid_index,
+ dev->limits.gid_table_len - 1);
return -1;
}
path->g_mylmc |= 1 << 7;
- path->mgid_index = ah->grh.sgid_index;
- path->hop_limit = ah->grh.hop_limit;
+ path->mgid_index = grh->sgid_index;
+ path->hop_limit = grh->hop_limit;
path->sl_tclass_flowlabel =
- cpu_to_be32((ah->sl << 28) |
- (ah->grh.traffic_class << 20) |
- (ah->grh.flow_label));
- memcpy(path->rgid, ah->grh.dgid.raw, 16);
- } else
- path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);
+ cpu_to_be32((rdma_ah_get_sl(ah) << 28) |
+ (grh->traffic_class << 20) |
+ (grh->flow_label));
+ memcpy(path->rgid, grh->dgid.raw, 16);
+ } else {
+ path->sl_tclass_flowlabel = cpu_to_be32(rdma_ah_get_sl(ah) <<
+ 28);
+ }
return 0;
}
@@ -680,7 +692,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp,
}
if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
- attr->alt_ah_attr.port_num))
+ rdma_ah_get_port_num(&attr->alt_ah_attr)))
goto out_mailbox;
qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 19acd13c6cb1..8f9d8b4ad583 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1849,9 +1849,8 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
wqe_count -= counter;
nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id);
} while (wqe_count);
- init_timer(&nesvnic->rq_wqes_timer);
- nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout;
- nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic;
+ setup_timer(&nesvnic->rq_wqes_timer, nes_rq_wqes_timeout,
+ (unsigned long)nesvnic);
nes_debug(NES_DBG_INIT, "NAPI support Enabled\n");
if (nesdev->nesadapter->et_use_adaptive_rx_coalesce)
{
@@ -3055,7 +3054,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
barrier();
- opcode = cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX];
+ opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]);
if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
else
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
index 33624f17c347..77226cf4ea02 100644
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ b/drivers/infiniband/hw/nes/nes_mgt.c
@@ -1040,9 +1040,8 @@ int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct
mgtvnic->mgt.rx_skb[counter] = skb;
}
- init_timer(&mgtvnic->rq_wqes_timer);
- mgtvnic->rq_wqes_timer.function = nes_mgt_rq_wqes_timeout;
- mgtvnic->rq_wqes_timer.data = (unsigned long)mgtvnic;
+ setup_timer(&mgtvnic->rq_wqes_timer, nes_mgt_rq_wqes_timeout,
+ (unsigned long)mgtvnic);
wqe_count = NES_MGT_WQ_COUNT - 1;
mgtvnic->mgt.rq_head = wqe_count;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index ccf0a4cffe9c..25dcd7573df9 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -761,7 +761,8 @@ static int nes_dealloc_pd(struct ib_pd *ibpd)
/**
* nes_create_ah
*/
-static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+static struct ib_ah *nes_create_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
return ERR_PTR(-ENOSYS);
@@ -1308,9 +1309,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
init_completion(&nesqp->rq_drained);
nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
- init_timer(&nesqp->terminate_timer);
- nesqp->terminate_timer.function = nes_terminate_timeout;
- nesqp->terminate_timer.data = (unsigned long)nesqp;
+ setup_timer(&nesqp->terminate_timer, nes_terminate_timeout,
+ (unsigned long)nesqp);
/* update the QP table */
nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
@@ -2165,9 +2165,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u,"
- " offset = %u, page size = %u.\n",
+ " offset = %u, page size = %lu.\n",
(unsigned long int)start, (unsigned long int)virt, (u32)length,
- ib_umem_offset(region), region->page_size);
+ ib_umem_offset(region), BIT(region->page_shift));
skip_pages = ((u32)ib_umem_offset(region)) >> 12;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 45bdfa0e3b2b..7baedc74e39d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -527,17 +527,17 @@ static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe)
}
static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev,
- struct ib_ah_attr *ah_attr, u8 *mac_addr)
+ struct rdma_ah_attr *ah_attr, u8 *mac_addr)
{
struct in6_addr in6;
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ memcpy(&in6, rdma_ah_read_grh(ah_attr)->dgid.raw, sizeof(in6));
if (rdma_is_multicast_addr(&in6))
rdma_get_mcast_mac(&in6, mac_addr);
else if (rdma_link_local_addr(&in6))
rdma_get_ll_mac(&in6, mac_addr);
else
- memcpy(mac_addr, ah_attr->dmac, ETH_ALEN);
+ memcpy(mac_addr, ah_attr->roce.dmac, ETH_ALEN);
return 0;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index cd66e1e45dd7..d0249e463338 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -71,7 +71,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
}
static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
- struct ib_ah_attr *attr, union ib_gid *sgid,
+ struct rdma_ah_attr *attr, union ib_gid *sgid,
int pdid, bool *isvlan, u16 vlan_tag)
{
int status;
@@ -81,6 +81,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
u16 proto_num = 0;
u8 nxthdr = 0x11;
struct iphdr ipv4;
+ const struct ib_global_route *ib_grh;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
@@ -120,32 +121,33 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
status = ocrdma_resolve_dmac(dev, attr, &eth.dmac[0]);
if (status)
return status;
- ah->sgid_index = attr->grh.sgid_index;
+ ib_grh = rdma_ah_read_grh(attr);
+ ah->sgid_index = ib_grh->sgid_index;
/* Eth HDR */
memcpy(&ah->av->eth_hdr, &eth, eth_sz);
if (ah->hdr_type == RDMA_NETWORK_IPV4) {
*((__be16 *)&ipv4) = htons((4 << 12) | (5 << 8) |
- attr->grh.traffic_class);
+ ib_grh->traffic_class);
ipv4.id = cpu_to_be16(pdid);
ipv4.frag_off = htons(IP_DF);
ipv4.tot_len = htons(0);
- ipv4.ttl = attr->grh.hop_limit;
+ ipv4.ttl = ib_grh->hop_limit;
ipv4.protocol = nxthdr;
rdma_gid2ip(&sgid_addr._sockaddr, sgid);
ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr;
- rdma_gid2ip(&dgid_addr._sockaddr, &attr->grh.dgid);
+ rdma_gid2ip(&dgid_addr._sockaddr, &ib_grh->dgid);
ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr;
memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr));
} else {
memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid));
grh.tclass_flow = cpu_to_be32((6 << 28) |
- (attr->grh.traffic_class << 24) |
- attr->grh.flow_label);
- memcpy(&grh.dgid[0], attr->grh.dgid.raw,
- sizeof(attr->grh.dgid.raw));
+ (ib_grh->traffic_class << 24) |
+ ib_grh->flow_label);
+ memcpy(&grh.dgid[0], ib_grh->dgid.raw,
+ sizeof(ib_grh->dgid.raw));
grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
(nxthdr << 8) |
- attr->grh.hop_limit);
+ ib_grh->hop_limit);
memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
}
if (*isvlan)
@@ -154,7 +156,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
return status;
}
-struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
struct ib_udata *udata)
{
u32 *ahid_addr;
@@ -165,11 +167,14 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
struct ib_gid_attr sgid_attr;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+ const struct ib_global_route *grh;
union ib_gid sgid;
- if (!(attr->ah_flags & IB_AH_GRH))
+ if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
+ !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))
return ERR_PTR(-EINVAL);
+ grh = rdma_ah_read_grh(attr);
if (atomic_cmpxchg(&dev->update_sl, 1, 0))
ocrdma_init_service_level(dev);
@@ -181,7 +186,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
if (status)
goto av_err;
- status = ib_get_cached_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid,
+ status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, &sgid,
&sgid_attr);
if (status) {
pr_err("%s(): Failed to query sgid, status = %d\n",
@@ -197,10 +202,11 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
if ((pd->uctx) &&
- (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
- (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
- status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid,
- attr->dmac, &vlan_tag,
+ (!rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw)) &&
+ (!rdma_link_local_addr((struct in6_addr *)grh->dgid.raw))) {
+ status = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ attr->roce.dmac,
+ &vlan_tag,
&sgid_attr.ndev->ifindex,
NULL);
if (status) {
@@ -216,7 +222,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
/* if pd is for the user process, pass the ah_id to user space */
if ((pd->uctx) && (pd->uctx->ah_tbl.va)) {
- ahid_addr = pd->uctx->ah_tbl.va + attr->dlid;
+ ahid_addr = pd->uctx->ah_tbl.va + rdma_ah_get_dlid(attr);
*ahid_addr = 0;
*ahid_addr |= ah->id & OCRDMA_AH_ID_MASK;
if (ocrdma_is_udp_encap_supported(dev)) {
@@ -248,30 +254,32 @@ int ocrdma_destroy_ah(struct ib_ah *ibah)
return 0;
}
-int ocrdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
struct ocrdma_av *av = ah->av;
struct ocrdma_grh *grh;
- attr->ah_flags |= IB_AH_GRH;
+
+ attr->type = ibah->type;
if (ah->av->valid & OCRDMA_AV_VALID) {
grh = (struct ocrdma_grh *)((u8 *)ah->av +
sizeof(struct ocrdma_eth_vlan));
- attr->sl = be16_to_cpu(av->eth_hdr.vlan_tag) >> 13;
+ rdma_ah_set_sl(attr, be16_to_cpu(av->eth_hdr.vlan_tag) >> 13);
} else {
grh = (struct ocrdma_grh *)((u8 *)ah->av +
sizeof(struct ocrdma_eth_basic));
- attr->sl = 0;
+ rdma_ah_set_sl(attr, 0);
}
- memcpy(&attr->grh.dgid.raw[0], &grh->dgid[0], sizeof(grh->dgid));
- attr->grh.sgid_index = ah->sgid_index;
- attr->grh.hop_limit = be32_to_cpu(grh->pdid_hoplimit) & 0xff;
- attr->grh.traffic_class = be32_to_cpu(grh->tclass_flow) >> 24;
- attr->grh.flow_label = be32_to_cpu(grh->tclass_flow) & 0x00ffffffff;
+ rdma_ah_set_grh(attr, NULL,
+ be32_to_cpu(grh->tclass_flow) & 0xffffffff,
+ ah->sgid_index,
+ be32_to_cpu(grh->pdid_hoplimit) & 0xff,
+ be32_to_cpu(grh->tclass_flow) >> 24);
+ rdma_ah_set_dgid_raw(attr, &grh->dgid[0]);
return 0;
}
-int ocrdma_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+int ocrdma_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
/* modify_ah is unsupported */
return -ENOSYS;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 0704a24b17c8..1a65c47945aa 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -51,11 +51,11 @@ enum {
OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */
};
-struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *,
- struct ib_udata *);
-int ocrdma_destroy_ah(struct ib_ah *);
-int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
-int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *);
+struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
+ struct ib_udata *udata);
+int ocrdma_destroy_ah(struct ib_ah *ah);
+int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+int ocrdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int ocrdma_process_mad(struct ib_device *,
int process_mad_flags,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index aa6967197620..dcb5942f9fb5 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -2499,7 +2499,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
int attr_mask)
{
int status;
- struct ib_ah_attr *ah_attr = &attrs->ah_attr;
+ struct rdma_ah_attr *ah_attr = &attrs->ah_attr;
union ib_gid sgid, zgid;
struct ib_gid_attr sgid_attr;
u32 vlan_id = 0xFFFF;
@@ -2510,25 +2510,28 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
+ const struct ib_global_route *grh;
- if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
+ if ((rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) == 0)
return -EINVAL;
+ grh = rdma_ah_read_grh(ah_attr);
if (atomic_cmpxchg(&dev->update_sl, 1, 0))
ocrdma_init_service_level(dev);
cmd->params.tclass_sq_psn |=
- (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
+ (grh->traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
cmd->params.rnt_rc_sl_fl |=
- (ah_attr->grh.flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK);
- cmd->params.rnt_rc_sl_fl |= (ah_attr->sl << OCRDMA_QP_PARAMS_SL_SHIFT);
+ (grh->flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK);
+ cmd->params.rnt_rc_sl_fl |= (rdma_ah_get_sl(ah_attr) <<
+ OCRDMA_QP_PARAMS_SL_SHIFT);
cmd->params.hop_lmt_rq_psn |=
- (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
+ (grh->hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
/* GIDs */
- memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
+ memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0],
sizeof(cmd->params.dgid));
- status = ib_get_cached_gid(&dev->ibdev, 1, ah_attr->grh.sgid_index,
+ status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index,
&sgid, &sgid_attr);
if (!status && sgid_attr.ndev) {
vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
@@ -2540,7 +2543,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
if (!memcmp(&sgid, &zgid, sizeof(zgid)))
return -EINVAL;
- qp->sgid_idx = ah_attr->grh.sgid_index;
+ qp->sgid_idx = grh->sgid_index;
memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]);
if (status)
@@ -2551,7 +2554,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
if (hdr_type == RDMA_NETWORK_IPV4) {
rdma_gid2ip(&sgid_addr._sockaddr, &sgid);
- rdma_gid2ip(&dgid_addr._sockaddr, &ah_attr->grh.dgid);
+ rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid);
memcpy(&cmd->params.dgid[0],
&dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
memcpy(&cmd->params.sgid[0],
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index f8e4b0a6486f..66056f9a9700 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -643,7 +643,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp,
struct ocrdma_stats *pstats = filp->private_data;
struct ocrdma_dev *dev = pstats->dev;
- if (count > 32)
+ if (*ppos != 0 || count == 0 || count > sizeof(tmp_str))
goto err;
if (copy_from_user(tmp_str, buffer, count))
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index c52edeafd616..2f30bda8457a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -914,21 +914,18 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
pbe = (struct ocrdma_pbe *)pbl_tbl->va;
pbe_cnt = 0;
- shift = ilog2(umem->page_size);
+ shift = umem->page_shift;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
pages = sg_dma_len(sg) >> shift;
for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
/* store the page address in pbe */
pbe->pa_lo =
- cpu_to_le32(sg_dma_address
- (sg) +
- (umem->page_size * pg_cnt));
+ cpu_to_le32(sg_dma_address(sg) +
+ (pg_cnt << shift));
pbe->pa_hi =
- cpu_to_le32(upper_32_bits
- ((sg_dma_address
- (sg) +
- umem->page_size * pg_cnt)));
+ cpu_to_le32(upper_32_bits(sg_dma_address(sg) +
+ (pg_cnt << shift)));
pbe_cnt += 1;
total_num_pbes += 1;
pbe++;
@@ -978,7 +975,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
if (status)
goto umem_err;
- mr->hwmr.pbe_size = mr->umem->page_size;
+ mr->hwmr.pbe_size = BIT(mr->umem->page_shift);
mr->hwmr.fbo = ib_umem_offset(mr->umem);
mr->hwmr.va = usr_addr;
mr->hwmr.len = len;
@@ -1601,23 +1598,24 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
qp_attr->cap.max_recv_sge = qp->rq.max_sges;
qp_attr->cap.max_inline_data = qp->max_inline_data;
qp_init_attr->cap = qp_attr->cap;
- memcpy(&qp_attr->ah_attr.grh.dgid, &params.dgid[0],
- sizeof(params.dgid));
- qp_attr->ah_attr.grh.flow_label = params.rnt_rc_sl_fl &
- OCRDMA_QP_PARAMS_FLOW_LABEL_MASK;
- qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
- qp_attr->ah_attr.grh.hop_limit = (params.hop_lmt_rq_psn &
- OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
- OCRDMA_QP_PARAMS_HOP_LMT_SHIFT;
- qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn &
- OCRDMA_QP_PARAMS_TCLASS_MASK) >>
- OCRDMA_QP_PARAMS_TCLASS_SHIFT;
-
- qp_attr->ah_attr.ah_flags = IB_AH_GRH;
- qp_attr->ah_attr.port_num = 1;
- qp_attr->ah_attr.sl = (params.rnt_rc_sl_fl &
- OCRDMA_QP_PARAMS_SL_MASK) >>
- OCRDMA_QP_PARAMS_SL_SHIFT;
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+
+ rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
+ params.rnt_rc_sl_fl &
+ OCRDMA_QP_PARAMS_FLOW_LABEL_MASK,
+ qp->sgid_idx,
+ (params.hop_lmt_rq_psn &
+ OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
+ OCRDMA_QP_PARAMS_HOP_LMT_SHIFT,
+ (params.tclass_sq_psn &
+ OCRDMA_QP_PARAMS_TCLASS_MASK) >>
+ OCRDMA_QP_PARAMS_TCLASS_SHIFT);
+ rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid[0]);
+
+ rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
+ rdma_ah_set_sl(&qp_attr->ah_attr, (params.rnt_rc_sl_fl &
+ OCRDMA_QP_PARAMS_SL_MASK) >>
+ OCRDMA_QP_PARAMS_SL_SHIFT);
qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
@@ -1630,8 +1628,8 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
qp_attr->min_rnr_timer = 0;
qp_attr->pkey_index = 0;
qp_attr->port_num = 1;
- qp_attr->ah_attr.src_path_bits = 0;
- qp_attr->ah_attr.static_rate = 0;
+ rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
+ rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
qp_attr->alt_pkey_index = 0;
qp_attr->alt_port_num = 0;
qp_attr->alt_timeout = 0;
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index ced0461d6e9f..6a72095d6c7a 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -35,6 +35,7 @@
#include <rdma/ib_user_verbs.h>
#include <linux/netdevice.h>
#include <linux/iommu.h>
+#include <linux/pci.h>
#include <net/addrconf.h>
#include <linux/qed/qede_roce.h>
#include <linux/qed/qed_chain.h>
@@ -340,43 +341,58 @@ static void qedr_remove_sysfiles(struct qedr_dev *dev)
static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev)
{
struct pci_dev *bridge;
- u32 val;
-
- dev->atomic_cap = IB_ATOMIC_NONE;
+ u32 ctl2, cap2;
+ u16 flags;
+ int rc;
bridge = pdev->bus->self;
if (!bridge)
- return;
-
- /* Check whether we are connected directly or via a switch */
- while (bridge && bridge->bus->parent) {
- DP_DEBUG(dev, QEDR_MSG_INIT,
- "Device is not connected directly to root. bridge->bus->number=%d primary=%d\n",
- bridge->bus->number, bridge->bus->primary);
- /* Need to check Atomic Op Routing Supported all the way to
- * root complex.
- */
- pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
- if (!(val & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) {
- pcie_capability_clear_word(pdev,
- PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
- return;
- }
+ goto disable;
+
+ /* Check atomic routing support all the way to root complex */
+ while (bridge->bus->parent) {
+ rc = pcie_capability_read_word(bridge, PCI_EXP_FLAGS, &flags);
+ if (rc || ((flags & PCI_EXP_FLAGS_VERS) < 2))
+ goto disable;
+
+ rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap2);
+ if (rc)
+ goto disable;
+
+ rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, &ctl2);
+ if (rc)
+ goto disable;
+
+ if (!(cap2 & PCI_EXP_DEVCAP2_ATOMIC_ROUTE) ||
+ (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK))
+ goto disable;
bridge = bridge->bus->parent->self;
}
- bridge = pdev->bus->self;
- /* according to bridge capability */
- pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
- if (val & PCI_EXP_DEVCAP2_ATOMIC_COMP64) {
- pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
- dev->atomic_cap = IB_ATOMIC_GLOB;
- } else {
- pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
- }
+ rc = pcie_capability_read_word(bridge, PCI_EXP_FLAGS, &flags);
+ if (rc || ((flags & PCI_EXP_FLAGS_VERS) < 2))
+ goto disable;
+
+ rc = pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap2);
+ if (rc || !(cap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP64))
+ goto disable;
+
+ /* Set atomic operations */
+ pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2,
+ PCI_EXP_DEVCTL2_ATOMIC_REQ);
+ dev->atomic_cap = IB_ATOMIC_GLOB;
+
+ DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability enabled\n");
+
+ return;
+
+disable:
+ pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2,
+ PCI_EXP_DEVCTL2_ATOMIC_REQ);
+ dev->atomic_cap = IB_ATOMIC_NONE;
+
+ DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability disabled\n");
+
}
static const struct qed_rdma_ops *qed_ops;
@@ -423,14 +439,21 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
cq->arm_flags = 0;
- if (cq->ibcq.comp_handler)
+ if (!cq->destroyed && cq->ibcq.comp_handler)
(*cq->ibcq.comp_handler)
(&cq->ibcq, cq->ibcq.cq_context);
+ /* The CQ's CNQ notification counter is checked before
+ * destroying the CQ in a busy-wait loop that waits for all of
+ * the CQ's CNQ interrupts to be processed. It is increased
+ * here, only after the completion handler, to ensure that the
+ * the handler is not running when the CQ is destroyed.
+ */
+ cq->cnq_notif++;
+
sw_comp_cons = qed_chain_get_cons_idx(&cnq->pbl);
cnq->n_comp++;
-
}
qed_ops->rdma_cnq_prod_update(cnq->dev->rdma_ctx, cnq->index,
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 5cb9195513bd..aa08c76a4245 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -272,6 +272,8 @@ struct qedr_cq {
u32 cq_cons;
struct qedr_userq q;
+ u8 destroyed;
+ u16 cnq_notif;
};
struct qedr_pd {
@@ -390,7 +392,7 @@ struct qedr_qp {
struct qedr_ah {
struct ib_ah ibah;
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
};
enum qedr_mr_type {
@@ -429,7 +431,8 @@ struct qedr_mr {
RDMA_CQE_RESPONDER_IMM_FLG_SHIFT)
#define QEDR_RESP_RDMA (RDMA_CQE_RESPONDER_RDMA_FLG_MASK << \
RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT)
-#define QEDR_RESP_RDMA_IMM (QEDR_RESP_IMM | QEDR_RESP_RDMA)
+#define QEDR_RESP_INV (RDMA_CQE_RESPONDER_INV_FLG_MASK << \
+ RDMA_CQE_RESPONDER_INV_FLG_SHIFT)
static inline void qedr_inc_sw_cons(struct qedr_qp_hwq_info *info)
{
@@ -443,19 +446,24 @@ static inline void qedr_inc_sw_prod(struct qedr_qp_hwq_info *info)
}
static inline int qedr_get_dmac(struct qedr_dev *dev,
- struct ib_ah_attr *ah_attr, u8 *mac_addr)
+ struct rdma_ah_attr *ah_attr, u8 *mac_addr)
{
union ib_gid zero_sgid = { { 0 } };
struct in6_addr in6;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+ u8 *dmac;
- if (!memcmp(&ah_attr->grh.dgid, &zero_sgid, sizeof(union ib_gid))) {
+ if (!memcmp(&grh->dgid, &zero_sgid, sizeof(union ib_gid))) {
DP_ERR(dev, "Local port GID not supported\n");
eth_zero_addr(mac_addr);
return -EINVAL;
}
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
- ether_addr_copy(mac_addr, ah_attr->dmac);
+ memcpy(&in6, grh->dgid.raw, sizeof(in6));
+ dmac = rdma_ah_retrieve_dmac(ah_attr);
+ if (!dmac)
+ return -EINVAL;
+ ether_addr_copy(mac_addr, dmac);
return 0;
}
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c
index a6280ce3e2a5..3d7705cec770 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_cm.c
@@ -243,8 +243,8 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
int *roce_mode)
{
bool has_vlan = false, has_grh_ipv6 = true;
- struct ib_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr;
- struct ib_global_route *grh = &ah_attr->grh;
+ struct rdma_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
union ib_gid sgid;
int send_size = 0;
u16 vlan_id = 0;
@@ -260,12 +260,13 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
for (i = 0; i < swr->num_sge; ++i)
send_size += swr->sg_list[i].length;
- rc = ib_get_cached_gid(qp->ibqp.device, ah_attr->port_num,
+ rc = ib_get_cached_gid(qp->ibqp.device, rdma_ah_get_port_num(ah_attr),
grh->sgid_index, &sgid, &sgid_attr);
if (rc) {
DP_ERR(dev,
"gsi post send: failed to get cached GID (port=%d, ix=%d)\n",
- ah_attr->port_num, grh->sgid_index);
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index);
return rc;
}
@@ -277,7 +278,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
if (!memcmp(&sgid, &zgid, sizeof(sgid))) {
DP_ERR(dev, "gsi post send: GID not found GID index %d\n",
- ah_attr->grh.sgid_index);
+ grh->sgid_index);
return -ENOENT;
}
@@ -307,7 +308,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
}
/* ENET + VLAN headers */
- ether_addr_copy(udh->eth.dmac_h, ah_attr->dmac);
+ ether_addr_copy(udh->eth.dmac_h, ah_attr->roce.dmac);
ether_addr_copy(udh->eth.smac_h, dev->ndev->dev_addr);
if (has_vlan) {
udh->eth.type = htons(ETH_P_8021Q);
@@ -341,13 +342,13 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
u32 ipv4_addr;
udh->ip4.protocol = IPPROTO_UDP;
- udh->ip4.tos = htonl(ah_attr->grh.flow_label);
+ udh->ip4.tos = htonl(grh->flow_label);
udh->ip4.frag_off = htons(IP_DF);
- udh->ip4.ttl = ah_attr->grh.hop_limit;
+ udh->ip4.ttl = grh->hop_limit;
ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw);
udh->ip4.saddr = ipv4_addr;
- ipv4_addr = qedr_get_ipv4_from_gid(ah_attr->grh.dgid.raw);
+ ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw);
udh->ip4.daddr = ipv4_addr;
/* note: checksum is calculated by the device */
}
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.h b/drivers/infiniband/hw/qedr/qedr_cm.h
index 78efb1b056d1..a55916323ea9 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.h
+++ b/drivers/infiniband/hw/qedr/qedr_cm.h
@@ -39,7 +39,7 @@
#define QEDR_ROCE_V2_UDP_SPORT (0000)
-static inline u32 qedr_get_ipv4_from_gid(u8 *gid)
+static inline u32 qedr_get_ipv4_from_gid(const u8 *gid)
{
return *(u32 *)(void *)&gid[12];
}
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 2091902848e6..17685cfea6a2 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -681,16 +681,16 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
pbe_cnt = 0;
- shift = ilog2(umem->page_size);
+ shift = umem->page_shift;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
pages = sg_dma_len(sg) >> shift;
for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
/* store the page address in pbe */
pbe->lo = cpu_to_le32(sg_dma_address(sg) +
- umem->page_size * pg_cnt);
+ (pg_cnt << shift));
addr = upper_32_bits(sg_dma_address(sg) +
- umem->page_size * pg_cnt);
+ (pg_cnt << shift));
pbe->hi = cpu_to_le32(addr);
pbe_cnt++;
total_num_pbes++;
@@ -822,6 +822,17 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct qedr_cq *cq = get_qedr_cq(ibcq);
unsigned long sflags;
+ struct qedr_dev *dev;
+
+ dev = get_qedr_dev(ibcq->device);
+
+ if (cq->destroyed) {
+ DP_ERR(dev,
+ "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
+ cq, cq->icid);
+ return -EINVAL;
+ }
+
if (cq->cq_type == QEDR_CQ_TYPE_GSI)
return 0;
@@ -987,35 +998,82 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
return 0;
}
+#define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
+#define QEDR_DESTROY_CQ_ITER_DURATION (10)
+
int qedr_destroy_cq(struct ib_cq *ibcq)
{
struct qedr_dev *dev = get_qedr_dev(ibcq->device);
struct qed_rdma_destroy_cq_out_params oparams;
struct qed_rdma_destroy_cq_in_params iparams;
struct qedr_cq *cq = get_qedr_cq(ibcq);
+ int iter;
+ int rc;
- DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq: cq_id %d", cq->icid);
+ DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
+
+ cq->destroyed = 1;
/* GSIs CQs are handled by driver, so they don't exist in the FW */
- if (cq->cq_type != QEDR_CQ_TYPE_GSI) {
- int rc;
+ if (cq->cq_type == QEDR_CQ_TYPE_GSI)
+ goto done;
- iparams.icid = cq->icid;
- rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams,
- &oparams);
- if (rc)
- return rc;
- dev->ops->common->chain_free(dev->cdev, &cq->pbl);
- }
+ iparams.icid = cq->icid;
+ rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
+ if (rc)
+ return rc;
+
+ dev->ops->common->chain_free(dev->cdev, &cq->pbl);
if (ibcq->uobject && ibcq->uobject->context) {
qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
ib_umem_release(cq->q.umem);
}
+ /* We don't want the IRQ handler to handle a non-existing CQ so we
+ * wait until all CNQ interrupts, if any, are received. This will always
+ * happen and will always happen very fast. If not, then a serious error
+ * has occured. That is why we can use a long delay.
+ * We spin for a short time so we don’t lose time on context switching
+ * in case all the completions are handled in that span. Otherwise
+ * we sleep for a while and check again. Since the CNQ may be
+ * associated with (only) the current CPU we use msleep to allow the
+ * current CPU to be freed.
+ * The CNQ notification is increased in qedr_irq_handler().
+ */
+ iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
+ while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
+ udelay(QEDR_DESTROY_CQ_ITER_DURATION);
+ iter--;
+ }
+
+ iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
+ while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
+ msleep(QEDR_DESTROY_CQ_ITER_DURATION);
+ iter--;
+ }
+
+ if (oparams.num_cq_notif != cq->cnq_notif)
+ goto err;
+
+ /* Note that we don't need to have explicit code to wait for the
+ * completion of the event handler because it is invoked from the EQ.
+ * Since the destroy CQ ramrod has also been received on the EQ we can
+ * be certain that there's no event handler in process.
+ */
+done:
+ cq->sig = ~cq->sig;
+
kfree(cq);
return 0;
+
+err:
+ DP_ERR(dev,
+ "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
+ cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
+
+ return -EINVAL;
}
static inline int get_gid_info_from_table(struct ib_qp *ibqp,
@@ -1026,13 +1084,15 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
{
enum rdma_network_type nw_type;
struct ib_gid_attr gid_attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
union ib_gid gid;
u32 ipv4_addr;
int rc = 0;
int i;
- rc = ib_get_cached_gid(ibqp->device, attr->ah_attr.port_num,
- attr->ah_attr.grh.sgid_index, &gid, &gid_attr);
+ rc = ib_get_cached_gid(ibqp->device,
+ rdma_ah_get_port_num(&attr->ah_attr),
+ grh->sgid_index, &gid, &gid_attr);
if (rc)
return rc;
@@ -1049,7 +1109,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
sizeof(qp_params->sgid));
memcpy(&qp_params->dgid.bytes[0],
- &attr->ah_attr.grh.dgid,
+ &grh->dgid,
sizeof(qp_params->dgid));
qp_params->roce_mode = ROCE_V2_IPV6;
SET_FIELD(qp_params->modify_flags,
@@ -1059,7 +1119,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
sizeof(qp_params->sgid));
memcpy(&qp_params->dgid.bytes[0],
- &attr->ah_attr.grh.dgid,
+ &grh->dgid,
sizeof(qp_params->dgid));
qp_params->roce_mode = ROCE_V1;
break;
@@ -1069,7 +1129,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
qp_params->sgid.ipv4_addr = ipv4_addr;
ipv4_addr =
- qedr_get_ipv4_from_gid(attr->ah_attr.grh.dgid.raw);
+ qedr_get_ipv4_from_gid(grh->dgid.raw);
qp_params->dgid.ipv4_addr = ipv4_addr;
SET_FIELD(qp_params->modify_flags,
QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
@@ -1691,6 +1751,7 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct qedr_qp *qp = get_qedr_qp(ibqp);
struct qed_rdma_modify_qp_in_params qp_params = { 0 };
struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
enum ib_qp_state old_qp_state, new_qp_state;
int rc = 0;
@@ -1773,17 +1834,17 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
SET_FIELD(qp_params.modify_flags,
QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
- qp_params.traffic_class_tos = attr->ah_attr.grh.traffic_class;
- qp_params.flow_label = attr->ah_attr.grh.flow_label;
- qp_params.hop_limit_ttl = attr->ah_attr.grh.hop_limit;
+ qp_params.traffic_class_tos = grh->traffic_class;
+ qp_params.flow_label = grh->flow_label;
+ qp_params.hop_limit_ttl = grh->hop_limit;
- qp->sgid_idx = attr->ah_attr.grh.sgid_index;
+ qp->sgid_idx = grh->sgid_index;
rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
if (rc) {
DP_ERR(dev,
"modify qp: problems with GID index %d (rc=%d)\n",
- attr->ah_attr.grh.sgid_index, rc);
+ grh->sgid_index, rc);
return rc;
}
@@ -1968,25 +2029,21 @@ int qedr_query_qp(struct ib_qp *ibqp,
qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
qp_init_attr->cap = qp_attr->cap;
- memcpy(&qp_attr->ah_attr.grh.dgid.raw[0], &params.dgid.bytes[0],
- sizeof(qp_attr->ah_attr.grh.dgid.raw));
-
- qp_attr->ah_attr.grh.flow_label = params.flow_label;
- qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
- qp_attr->ah_attr.grh.hop_limit = params.hop_limit_ttl;
- qp_attr->ah_attr.grh.traffic_class = params.traffic_class_tos;
-
- qp_attr->ah_attr.ah_flags = IB_AH_GRH;
- qp_attr->ah_attr.port_num = 1;
- qp_attr->ah_attr.sl = 0;
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
+ params.flow_label, qp->sgid_idx,
+ params.hop_limit_ttl, params.traffic_class_tos);
+ rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
+ rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
+ rdma_ah_set_sl(&qp_attr->ah_attr, 0);
qp_attr->timeout = params.timeout;
qp_attr->rnr_retry = params.rnr_retry;
qp_attr->retry_cnt = params.retry_cnt;
qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
qp_attr->pkey_index = params.pkey_index;
qp_attr->port_num = 1;
- qp_attr->ah_attr.src_path_bits = 0;
- qp_attr->ah_attr.static_rate = 0;
+ rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
+ rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
qp_attr->alt_pkey_index = 0;
qp_attr->alt_port_num = 0;
qp_attr->alt_timeout = 0;
@@ -2054,7 +2111,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp)
return rc;
}
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
struct ib_udata *udata)
{
struct qedr_ah *ah;
@@ -2190,7 +2247,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
- mr->hw_mr.page_size_log = ilog2(mr->umem->page_size);
+ mr->hw_mr.page_size_log = mr->umem->page_shift;
mr->hw_mr.fbo = ib_umem_offset(mr->umem);
mr->hw_mr.length = len;
mr->hw_mr.vaddr = usr_addr;
@@ -2625,6 +2682,8 @@ static int qedr_prepare_reg(struct qedr_qp *qp,
fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
fwqe1->l_key = wr->key;
+ fwqe2->access_ctrl = 0;
+
SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
!!(wr->access & IB_ACCESS_REMOTE_READ));
SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
@@ -3271,57 +3330,81 @@ static int qedr_poll_cq_req(struct qedr_dev *dev,
return cnt;
}
-static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
- struct qedr_cq *cq, struct ib_wc *wc,
- struct rdma_cqe_responder *resp, u64 wr_id)
+static inline int qedr_cqe_resp_status_to_ib(u8 status)
{
- enum ib_wc_status wc_status = IB_WC_SUCCESS;
- u8 flags;
-
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = 0;
-
- switch (resp->status) {
+ switch (status) {
case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
- wc_status = IB_WC_LOC_ACCESS_ERR;
- break;
+ return IB_WC_LOC_ACCESS_ERR;
case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
- wc_status = IB_WC_LOC_LEN_ERR;
- break;
+ return IB_WC_LOC_LEN_ERR;
case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
- wc_status = IB_WC_LOC_QP_OP_ERR;
- break;
+ return IB_WC_LOC_QP_OP_ERR;
case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
- wc_status = IB_WC_LOC_PROT_ERR;
- break;
+ return IB_WC_LOC_PROT_ERR;
case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
- wc_status = IB_WC_MW_BIND_ERR;
- break;
+ return IB_WC_MW_BIND_ERR;
case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
- wc_status = IB_WC_REM_INV_RD_REQ_ERR;
- break;
+ return IB_WC_REM_INV_RD_REQ_ERR;
case RDMA_CQE_RESP_STS_OK:
- wc_status = IB_WC_SUCCESS;
- wc->byte_len = le32_to_cpu(resp->length);
+ return IB_WC_SUCCESS;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
- flags = resp->flags & QEDR_RESP_RDMA_IMM;
+static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
+ struct ib_wc *wc)
+{
+ wc->status = IB_WC_SUCCESS;
+ wc->byte_len = le32_to_cpu(resp->length);
- if (flags == QEDR_RESP_RDMA_IMM)
+ if (resp->flags & QEDR_RESP_IMM) {
+ wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
+ wc->wc_flags |= IB_WC_WITH_IMM;
+
+ if (resp->flags & QEDR_RESP_RDMA)
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
- if (flags == QEDR_RESP_RDMA_IMM || flags == QEDR_RESP_IMM) {
- wc->ex.imm_data =
- le32_to_cpu(resp->imm_data_or_inv_r_Key);
- wc->wc_flags |= IB_WC_WITH_IMM;
- }
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- DP_ERR(dev, "Invalid CQE status detected\n");
+ if (resp->flags & QEDR_RESP_INV)
+ return -EINVAL;
+
+ } else if (resp->flags & QEDR_RESP_INV) {
+ wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+
+ if (resp->flags & QEDR_RESP_RDMA)
+ return -EINVAL;
+
+ } else if (resp->flags & QEDR_RESP_RDMA) {
+ return -EINVAL;
}
- /* fill WC */
- wc->status = wc_status;
+ return 0;
+}
+
+static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
+ struct qedr_cq *cq, struct ib_wc *wc,
+ struct rdma_cqe_responder *resp, u64 wr_id)
+{
+ /* Must fill fields before qedr_set_ok_cqe_resp_wc() */
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = 0;
+
+ if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
+ if (qedr_set_ok_cqe_resp_wc(resp, wc))
+ DP_ERR(dev,
+ "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
+ cq, cq->icid, resp->flags);
+
+ } else {
+ wc->status = qedr_cqe_resp_status_to_ib(resp->status);
+ if (wc->status == IB_WC_GENERAL_ERR)
+ DP_ERR(dev,
+ "CQ %p (icid=%d) contains an invalid CQE status %d\n",
+ cq, cq->icid, resp->status);
+ }
+
+ /* Fill the rest of the WC */
wc->vendor_err = 0;
wc->src_qp = qp->id;
wc->qp = &qp->ibqp;
@@ -3416,6 +3499,13 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
int update = 0;
int done = 0;
+ if (cq->destroyed) {
+ DP_ERR(dev,
+ "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
+ cq, cq->icid);
+ return 0;
+ }
+
if (cq->cq_type == QEDR_CQ_TYPE_GSI)
return qedr_gsi_poll_cq(ibcq, num_entries, wc);
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 070677ca4d19..0f8ab49d5a1a 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -70,7 +70,7 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *);
int qedr_destroy_qp(struct ib_qp *ibqp);
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
struct ib_udata *udata);
int qedr_destroy_ah(struct ib_ah *ibah);
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index f1e66efea98a..1d940a2885c9 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -512,7 +512,7 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent)
unsigned long flags;
int ret;
- static struct tree_descr files[] = {
+ static const struct tree_descr files[] = {
[2] = {"driver_stats", &driver_ops[0], S_IRUGO},
[3] = {"driver_stats_names", &driver_ops[1], S_IRUGO},
{""},
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 06de1cbcf67d..e423b71e6ea0 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -3295,13 +3295,11 @@ static int init_6120_variables(struct qib_devdata *dd)
dd->rhdrhead_intr_off = 1ULL << 32;
/* setup the stats timer; the add_timer is done at end of init */
- init_timer(&dd->stats_timer);
- dd->stats_timer.function = qib_get_6120_faststats;
- dd->stats_timer.data = (unsigned long) dd;
+ setup_timer(&dd->stats_timer, qib_get_6120_faststats,
+ (unsigned long)dd);
- init_timer(&dd->cspec->pma_timer);
- dd->cspec->pma_timer.function = pma_6120_timer;
- dd->cspec->pma_timer.data = (unsigned long) ppd;
+ setup_timer(&dd->cspec->pma_timer, pma_6120_timer,
+ (unsigned long)ppd);
dd->ureg_align = qib_read_kreg32(dd, kr_palign);
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 55a18384c22d..c3679c48e61c 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -4074,9 +4074,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
if (!qib_mini_init)
qib_write_kreg(dd, kr_rcvbthqp, QIB_KD_QP);
- init_timer(&ppd->cpspec->chase_timer);
- ppd->cpspec->chase_timer.function = reenable_7220_chase;
- ppd->cpspec->chase_timer.data = (unsigned long)ppd;
+ setup_timer(&ppd->cpspec->chase_timer, reenable_7220_chase,
+ (unsigned long)ppd);
qib_num_cfg_vls = 1; /* if any 7220's, only one VL */
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index af9f596bb68b..bb2439fff8fa 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -6611,9 +6611,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
if (!qib_mini_init)
write_7322_init_portregs(ppd);
- init_timer(&cp->chase_timer);
- cp->chase_timer.function = reenable_chase;
- cp->chase_timer.data = (unsigned long)ppd;
+ setup_timer(&cp->chase_timer, reenable_chase,
+ (unsigned long)ppd);
ppd++;
}
@@ -6639,9 +6638,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
(u64) rcv_int_count << IBA7322_HDRHEAD_PKTINT_SHIFT;
/* setup the stats timer; the add_timer is done at end of init */
- init_timer(&dd->stats_timer);
- dd->stats_timer.function = qib_get_7322_faststats;
- dd->stats_timer.data = (unsigned long) dd;
+ setup_timer(&dd->stats_timer, qib_get_7322_faststats,
+ (unsigned long)dd);
dd->ureg_align = 0x10000; /* 64KB alignment */
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index b50240b1d5a4..6c16ba1107ba 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -233,9 +233,8 @@ int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
spin_lock_init(&ppd->cc_shadow_lock);
init_waitqueue_head(&ppd->state_wait);
- init_timer(&ppd->symerr_clear_timer);
- ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup;
- ppd->symerr_clear_timer.data = (unsigned long)ppd;
+ setup_timer(&ppd->symerr_clear_timer, qib_clear_symerror_on_linkup,
+ (unsigned long)ppd);
ppd->qib_wq = NULL;
ppd->ibport_data.pmastats =
@@ -429,9 +428,8 @@ static int loadtime_init(struct qib_devdata *dd)
qib_get_eeprom_info(dd);
/* setup time (don't start yet) to verify we got interrupt */
- init_timer(&dd->intrchk_timer);
- dd->intrchk_timer.function = verify_interrupt;
- dd->intrchk_timer.data = (unsigned long) dd;
+ setup_timer(&dd->intrchk_timer, verify_interrupt,
+ (unsigned long)dd);
done:
return ret;
}
@@ -755,9 +753,8 @@ done:
continue;
if (dd->flags & QIB_HAS_SEND_DMA)
ret = qib_setup_sdma(ppd);
- init_timer(&ppd->hol_timer);
- ppd->hol_timer.function = qib_hol_event;
- ppd->hol_timer.data = (unsigned long)ppd;
+ setup_timer(&ppd->hol_timer, qib_hol_event,
+ (unsigned long)ppd);
ppd->hol_state = QIB_HOL_UP;
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index d2ac29861af5..da295e0392ed 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -717,9 +717,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
spin_lock_irqsave(&ibp->rvp.lock, flags);
if (ibp->rvp.sm_ah) {
if (smlid != ibp->rvp.sm_lid)
- ibp->rvp.sm_ah->attr.dlid = smlid;
+ rdma_ah_set_dlid(&ibp->rvp.sm_ah->attr,
+ smlid);
if (msl != ibp->rvp.sm_sl)
- ibp->rvp.sm_ah->attr.sl = msl;
+ rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
}
spin_unlock_irqrestore(&ibp->rvp.lock, flags);
if (smlid != ibp->rvp.sm_lid)
@@ -2500,5 +2501,5 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx)
del_timer_sync(&dd->pport[port_idx].cong_stats.timer);
if (dd->pport[port_idx].ibport_data.smi_ah)
- ib_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah);
+ rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah);
}
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 2ac0c0f79e74..5984981e7dd4 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -489,7 +489,7 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
qp->s_last, qp->s_acked, qp->s_cur,
qp->s_tail, qp->s_head, qp->s_size,
qp->remote_qpn,
- qp->remote_ah_attr.dlid);
+ rdma_ah_get_dlid(&qp->remote_ah_attr));
}
#endif
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 12658e3fe154..fc8b88514da5 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -234,7 +234,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
int delta;
ohdr = &priv->s_hdr->u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &priv->s_hdr->u.l.oth;
/* Sending responses has higher priority over sending requests. */
@@ -637,9 +637,11 @@ void qib_send_rc_ack(struct rvt_qp *qp)
lrh0 = QIB_LRH_BTH;
/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
hwords = 6;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH)) {
hwords += qib_make_grh(ibp, &hdr.u.l.grh,
- &qp->remote_ah_attr.grh, hwords, 0);
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ hwords, 0);
ohdr = &hdr.u.l.oth;
lrh0 = QIB_LRH_GRH;
}
@@ -653,12 +655,13 @@ void qib_send_rc_ack(struct rvt_qp *qp)
IB_AETH_CREDIT_SHIFT));
else
ohdr->u.aeth = rvt_compute_aeth(qp);
- lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
- qp->remote_ah_attr.sl << 4;
+ lrh0 |= ibp->sl_to_vl[rdma_ah_get_sl(&qp->remote_ah_attr)] << 12 |
+ rdma_ah_get_sl(&qp->remote_ah_attr) << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
- hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ hdr.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
- hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
+ hdr.lrh[3] = cpu_to_be16(ppd->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK);
@@ -938,7 +941,10 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
/* see post_send() */
barrier();
rvt_put_swqe(wqe);
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_qib_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before resending,
@@ -983,7 +989,10 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
qp->s_last = s_last;
/* see post_send() */
barrier();
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_qib_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
} else
this_cpu_inc(*ibp->rvp.rc_delayed_comp);
@@ -1898,8 +1907,8 @@ send_last:
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 17655cc3e6fe..bd09de7c6e56 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -195,7 +195,7 @@ void qib_migrate_qp(struct rvt_qp *qp)
qp->s_mig_state = IB_MIG_MIGRATED;
qp->remote_ah_attr = qp->alt_ah_attr;
- qp->port_num = qp->alt_ah_attr.port_num;
+ qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
qp->s_pkey_index = qp->s_alt_pkey_index;
ev.device = qp->ibqp.device;
@@ -235,18 +235,23 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
if (!has_grh) {
- if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->alt_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid,
ibp->rvp.gid_prefix, guid))
goto err;
if (!gid_ok(&hdr->u.l.grh.sgid,
- qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
- qp->alt_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
if (!qib_pkey_ok((u16)bth0,
@@ -259,27 +264,33 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
- ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
+ if ((be16_to_cpu(hdr->lrh[3]) !=
+ rdma_ah_get_dlid(&qp->alt_ah_attr)) ||
+ ppd_from_ibp(ibp)->port !=
+ rdma_ah_get_port_num(&qp->alt_ah_attr))
goto err;
spin_lock_irqsave(&qp->s_lock, flags);
qib_migrate_qp(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
if (!has_grh) {
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp,
- qp->remote_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->remote_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid,
ibp->rvp.gid_prefix, guid))
goto err;
if (!gid_ok(&hdr->u.l.grh.sgid,
- qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
- qp->remote_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
if (!qib_pkey_ok((u16)bth0,
@@ -292,7 +303,8 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
+ if (be16_to_cpu(hdr->lrh[3]) !=
+ rdma_ah_get_dlid(&qp->remote_ah_attr) ||
ppd_from_ibp(ibp)->port != qp->port_num)
goto err;
if (qp->s_mig_state == IB_MIG_REARM &&
@@ -528,8 +540,8 @@ again:
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -619,7 +631,7 @@ done:
* Return the size of the header in 32 bit words.
*/
u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords)
+ const struct ib_global_route *grh, u32 hwords, u32 nwords)
{
hdr->version_tclass_flow =
cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
@@ -652,20 +664,23 @@ void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
extra_bytes = -qp->s_cur_size & 3;
nwords = (qp->s_cur_size + extra_bytes) >> 2;
lrh0 = QIB_LRH_BTH;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
- &qp->remote_ah_attr.grh,
- qp->s_hdrwords, nwords);
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
+ qp->s_hdrwords +=
+ qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ qp->s_hdrwords, nwords);
lrh0 = QIB_LRH_GRH;
}
- lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
- qp->remote_ah_attr.sl << 4;
+ lrh0 |= ibp->sl_to_vl[rdma_ah_get_sl(&qp->remote_ah_attr)] << 12 |
+ rdma_ah_get_sl(&qp->remote_ah_attr) << 4;
priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
- priv->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ priv->s_hdr->lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
priv->s_hdr->lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- priv->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
- qp->remote_ah_attr.src_path_bits);
+ priv->s_hdr->lrh[3] =
+ cpu_to_be16(ppd_from_ibp(ibp)->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
if (qp->s_mig_state == IB_MIG_MIGRATED)
@@ -703,7 +718,8 @@ void qib_do_send(struct rvt_qp *qp)
if ((qp->ibqp.qp_type == IB_QPT_RC ||
qp->ibqp.qp_type == IB_QPT_UC) &&
- (qp->remote_ah_attr.dlid & ~((1 << ppd->lmc) - 1)) == ppd->lid) {
+ (rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ppd->lmc) - 1)) == ppd->lid) {
qib_ruc_loopback(qp);
return;
}
@@ -769,7 +785,10 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- rvt_qp_swqe_complete(qp, wqe, status);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_qib_wc_opcode[wqe->wr.opcode],
+ status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index b337b60fc40d..498e2202e72c 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -74,7 +74,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
}
ohdr = &priv->s_hdr->u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &priv->s_hdr->u.l.oth;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -394,8 +394,8 @@ last_imm:
wc.status = IB_WC_SUCCESS;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index ddd4e7458750..341a123ee95c 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -54,7 +54,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
struct qib_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
struct rvt_qp *qp;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
unsigned long flags;
struct rvt_sge_state ssge;
struct rvt_sge *sge;
@@ -92,13 +92,13 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
pkey1 = qib_get_pkey(ibp, sqp->s_pkey_index);
pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
- lid = ppd->lid | (ah_attr->src_path_bits &
+ lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
cpu_to_be16(lid),
- cpu_to_be16(ah_attr->dlid));
+ cpu_to_be16(rdma_ah_get_dlid(ah_attr)));
goto drop;
}
}
@@ -116,13 +116,13 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (unlikely(qkey != qp->qkey)) {
u16 lid;
- lid = ppd->lid | (ah_attr->src_path_bits &
+ lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
cpu_to_be16(lid),
- cpu_to_be16(ah_attr->dlid));
+ cpu_to_be16(rdma_ah_get_dlid(ah_attr)));
goto drop;
}
}
@@ -168,11 +168,11 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
goto bail_unlock;
}
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
struct ib_grh grh;
- struct ib_global_route grd = ah_attr->grh;
+ const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr);
- qib_make_grh(ibp, &grh, &grd, 0, 0);
+ qib_make_grh(ibp, &grh, grd, 0, 0);
qib_copy_sge(&qp->r_sge, &grh,
sizeof(grh), 1);
wc.wc_flags |= IB_WC_GRH;
@@ -220,9 +220,10 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
wc.src_qp = sqp->ibqp.qp_num;
wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
swqe->ud_wr.pkey_index : 0;
- wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
- wc.sl = ah_attr->sl;
- wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
+ wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1));
+ wc.sl = rdma_ah_get_sl(ah_attr);
+ wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -246,7 +247,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
struct ib_other_headers *ohdr;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
struct qib_pportdata *ppd;
struct qib_ibport *ibp;
struct rvt_swqe *wqe;
@@ -289,14 +290,15 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
- if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
- if (ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE))
+ if (rdma_ah_get_dlid(ah_attr) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+ if (rdma_ah_get_dlid(ah_attr) !=
+ be16_to_cpu(IB_LID_PERMISSIVE))
this_cpu_inc(ibp->pmastats->n_multicast_xmit);
else
this_cpu_inc(ibp->pmastats->n_unicast_xmit);
} else {
this_cpu_inc(ibp->pmastats->n_unicast_xmit);
- lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
+ lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) {
unsigned long tflags = *flags;
/*
@@ -328,17 +330,17 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
qp->s_hdrwords = 7;
qp->s_cur_size = wqe->length;
qp->s_cur_sge = &qp->s_sge;
- qp->s_srate = ah_attr->static_rate;
+ qp->s_srate = rdma_ah_get_static_rate(ah_attr);
qp->s_wqe = wqe;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
/* Header size in 32-bit words. */
qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
- &ah_attr->grh,
+ rdma_ah_read_grh(ah_attr),
qp->s_hdrwords, nwords);
lrh0 = QIB_LRH_GRH;
ohdr = &priv->s_hdr->u.l.oth;
@@ -357,18 +359,20 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
} else
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
- lrh0 |= ah_attr->sl << 4;
+ lrh0 |= rdma_ah_get_sl(ah_attr) << 4;
if (qp->ibqp.qp_type == IB_QPT_SMI)
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
else
- lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
+ lrh0 |= ibp->sl_to_vl[rdma_ah_get_sl(ah_attr)] << 12;
priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
- priv->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
+ priv->s_hdr->lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(ah_attr)); /* DEST LID */
priv->s_hdr->lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
lid = ppd->lid;
if (lid) {
- lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
+ lid |= rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1);
priv->s_hdr->lrh[3] = cpu_to_be16(lid);
} else
priv->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
@@ -382,8 +386,9 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
/*
* Use the multicast QP if the destination LID is a multicast LID.
*/
- ohdr->bth[1] = ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
- ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ?
+ ohdr->bth[1] = rdma_ah_get_dlid(ah_attr) >=
+ be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+ rdma_ah_get_dlid(ah_attr) != be16_to_cpu(IB_LID_PERMISSIVE) ?
cpu_to_be32(QIB_MULTICAST_QPN) :
cpu_to_be32(wqe->ud_wr.remote_qpn);
ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 83f8b5f24381..ac42dce7e281 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -114,6 +114,19 @@ module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_qib_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
+};
+
+/*
* System image GUID.
*/
__be64 ib_qib_sys_image_guid;
@@ -343,7 +356,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
if (lnh != QIB_LRH_GRH)
goto drop;
- mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+ mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
if (mcast == NULL)
goto drop;
this_cpu_inc(ibp->pmastats->n_multicast_rcv);
@@ -1323,16 +1336,16 @@ static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
return 0;
}
-int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
{
- if (ah_attr->sl > 15)
+ if (rdma_ah_get_sl(ah_attr) > 15)
return -EINVAL;
return 0;
}
static void qib_notify_new_ah(struct ib_device *ibdev,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct rvt_ah *ah)
{
struct qib_ibport *ibp;
@@ -1343,25 +1356,29 @@ static void qib_notify_new_ah(struct ib_device *ibdev,
* done being setup. We can however modify things which we need to set.
*/
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- ah->vl = ibp->sl_to_vl[ah->attr.sl];
+ ah->vl = ibp->sl_to_vl[rdma_ah_get_sl(&ah->attr)];
ah->log_pmtu = ilog2(ppd->ibmtu);
}
struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
{
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
struct ib_ah *ah = ERR_PTR(-EINVAL);
struct rvt_qp *qp0;
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_devdata *dd = dd_from_ppd(ppd);
+ u8 port_num = ppd->port;
memset(&attr, 0, sizeof(attr));
- attr.dlid = dlid;
- attr.port_num = ppd_from_ibp(ibp)->port;
+ attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
+ rdma_ah_set_dlid(&attr, dlid);
+ rdma_ah_set_port_num(&attr, port_num);
rcu_read_lock();
qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
- ah = ib_create_ah(qp0->ibqp.pd, &attr);
+ ah = rdma_create_ah(qp0->ibqp.pd, &attr);
rcu_read_unlock();
return ah;
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 212e8ce71be8..da0db5485ddc 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -310,7 +310,7 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
-int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
@@ -335,7 +335,7 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords);
+ const struct ib_global_route *grh, u32 hwords, u32 nwords);
void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2);
diff --git a/drivers/infiniband/hw/usnic/usnic_common_util.h b/drivers/infiniband/hw/usnic/usnic_common_util.h
index b54986de5f0c..ddd81294fa46 100644
--- a/drivers/infiniband/hw/usnic/usnic_common_util.h
+++ b/drivers/infiniband/hw/usnic/usnic_common_util.h
@@ -34,21 +34,7 @@
#ifndef USNIC_CMN_UTIL_H
#define USNIC_CMN_UTIL_H
-static inline void
-usnic_mac_to_gid(const char *const mac, char *raw_gid)
-{
- raw_gid[0] = 0xfe;
- raw_gid[1] = 0x80;
- memset(&raw_gid[2], 0, 6);
- raw_gid[8] = mac[0]^2;
- raw_gid[9] = mac[1];
- raw_gid[10] = mac[2];
- raw_gid[11] = 0xff;
- raw_gid[12] = 0xfe;
- raw_gid[13] = mac[3];
- raw_gid[14] = mac[4];
- raw_gid[15] = mac[5];
-}
+#include <net/addrconf.h>
static inline void
usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
@@ -57,27 +43,7 @@ usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
raw_gid[1] = 0x80;
memset(&raw_gid[2], 0, 2);
memcpy(&raw_gid[4], &inaddr, 4);
- raw_gid[8] = mac[0]^2;
- raw_gid[9] = mac[1];
- raw_gid[10] = mac[2];
- raw_gid[11] = 0xff;
- raw_gid[12] = 0xfe;
- raw_gid[13] = mac[3];
- raw_gid[14] = mac[4];
- raw_gid[15] = mac[5];
-}
-
-static inline void
-usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid)
-{
- raw_gid[8] = mac[0]^2;
- raw_gid[9] = mac[1];
- raw_gid[10] = mac[2];
- raw_gid[11] = 0xff;
- raw_gid[12] = 0xfe;
- raw_gid[13] = mac[3];
- raw_gid[14] = mac[4];
- raw_gid[15] = mac[5];
+ addrconf_addr_eui48(&raw_gid[8], mac);
}
#endif /* USNIC_COMMON_UTIL_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 04443242e258..32956f9f5715 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -44,6 +44,7 @@
#include "usnic_vnic.h"
#include "usnic_ib_verbs.h"
#include "usnic_log.h"
+#include "usnic_ib_sysfs.h"
static ssize_t usnic_ib_show_board(struct device *device,
struct device_attribute *attr,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 3284730d3c09..4996984885c2 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -46,6 +46,7 @@
#include "usnic_log.h"
#include "usnic_uiom.h"
#include "usnic_transport.h"
+#include "usnic_ib_verbs.h"
#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
@@ -151,7 +152,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
struct usnic_vnic *vnic;
struct usnic_ib_qp_grp *qp_grp;
struct device *dev, **dev_list;
- int i, found = 0;
+ int i;
BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
@@ -173,8 +174,13 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
us_ibdev->ib_dev.name,
pci_name(usnic_vnic_get_pdev(
vnic)));
- found = 1;
- break;
+ qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev,
+ vf, pd,
+ res_spec,
+ trans_spec);
+
+ spin_unlock(&vf->lock);
+ goto qp_grp_check;
}
spin_unlock(&vf->lock);
@@ -182,34 +188,30 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
usnic_uiom_free_dev_list(dev_list);
}
- if (!found) {
- /* Try to find resources on an unused vf */
- list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
- spin_lock(&vf->lock);
- vnic = vf->vnic;
- if (vf->qp_grp_ref_cnt == 0 &&
- usnic_vnic_check_room(vnic, res_spec) == 0) {
- found = 1;
- break;
- }
+ /* Try to find resources on an unused vf */
+ list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
+ spin_lock(&vf->lock);
+ vnic = vf->vnic;
+ if (vf->qp_grp_ref_cnt == 0 &&
+ usnic_vnic_check_room(vnic, res_spec) == 0) {
+ qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf,
+ pd, res_spec,
+ trans_spec);
+
spin_unlock(&vf->lock);
+ goto qp_grp_check;
}
+ spin_unlock(&vf->lock);
}
- if (!found) {
- usnic_info("No free qp grp found on %s\n",
- us_ibdev->ib_dev.name);
- return ERR_PTR(-ENOMEM);
- }
+ usnic_info("No free qp grp found on %s\n", us_ibdev->ib_dev.name);
+ return ERR_PTR(-ENOMEM);
- qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec,
- trans_spec);
- spin_unlock(&vf->lock);
+qp_grp_check:
if (IS_ERR_OR_NULL(qp_grp)) {
usnic_err("Failed to allocate qp_grp\n");
return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
}
-
return qp_grp;
}
@@ -738,7 +740,7 @@ int usnic_ib_mmap(struct ib_ucontext *context,
/* In ib callbacks section - Start of stub funcs */
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 0ed8e072329e..172e43b6fa95 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -75,7 +75,7 @@ int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
int usnic_ib_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma);
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
int usnic_ib_destroy_ah(struct ib_ah *ah);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 9fbe22d3467b..8e2f0a11690f 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -440,10 +440,10 @@ void pvrdma_global_route_to_ib(struct ib_global_route *dst,
const struct pvrdma_global_route *src);
void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst,
const struct ib_global_route *src);
-void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst,
- const struct pvrdma_ah_attr *src);
-void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
- const struct ib_ah_attr *src);
+void pvrdma_ah_attr_to_rdma(struct rdma_ah_attr *dst,
+ const struct pvrdma_ah_attr *src);
+void rdma_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
+ const struct rdma_ah_attr *src);
int pvrdma_uar_table_init(struct pvrdma_dev *dev);
void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
index 948b5ccd2a70..ec6a4ca1eeb7 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
@@ -194,7 +194,7 @@ int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
len = sg_dma_len(sg) >> PAGE_SHIFT;
for (j = 0; j < len; j++) {
dma_addr_t addr = sg_dma_address(sg) +
- umem->page_size * j;
+ (j << umem->page_shift);
ret = pvrdma_page_dir_insert_dma(pdir, i, addr);
if (ret)
@@ -277,28 +277,29 @@ void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst,
dst->traffic_class = src->traffic_class;
}
-void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst,
- const struct pvrdma_ah_attr *src)
+void pvrdma_ah_attr_to_rdma(struct rdma_ah_attr *dst,
+ const struct pvrdma_ah_attr *src)
{
- pvrdma_global_route_to_ib(&dst->grh, &src->grh);
- dst->dlid = src->dlid;
- dst->sl = src->sl;
- dst->src_path_bits = src->src_path_bits;
- dst->static_rate = src->static_rate;
- dst->ah_flags = src->ah_flags;
- dst->port_num = src->port_num;
- memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac));
+ dst->type = RDMA_AH_ATTR_TYPE_ROCE;
+ pvrdma_global_route_to_ib(rdma_ah_retrieve_grh(dst), &src->grh);
+ rdma_ah_set_dlid(dst, src->dlid);
+ rdma_ah_set_sl(dst, src->sl);
+ rdma_ah_set_path_bits(dst, src->src_path_bits);
+ rdma_ah_set_static_rate(dst, src->static_rate);
+ rdma_ah_set_ah_flags(dst, src->ah_flags);
+ rdma_ah_set_port_num(dst, src->port_num);
+ memcpy(dst->roce.dmac, &src->dmac, ETH_ALEN);
}
-void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
- const struct ib_ah_attr *src)
+void rdma_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
+ const struct rdma_ah_attr *src)
{
- ib_global_route_to_pvrdma(&dst->grh, &src->grh);
- dst->dlid = src->dlid;
- dst->sl = src->sl;
- dst->src_path_bits = src->src_path_bits;
- dst->static_rate = src->static_rate;
- dst->ah_flags = src->ah_flags;
- dst->port_num = src->port_num;
- memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac));
+ ib_global_route_to_pvrdma(&dst->grh, rdma_ah_read_grh(src));
+ dst->dlid = rdma_ah_get_dlid(src);
+ dst->sl = rdma_ah_get_sl(src);
+ dst->src_path_bits = rdma_ah_get_path_bits(src);
+ dst->static_rate = rdma_ah_get_static_rate(src);
+ dst->ah_flags = rdma_ah_get_ah_flags(src);
+ dst->port_num = rdma_ah_get_port_num(src);
+ memcpy(&dst->dmac, src->roce.dmac, sizeof(dst->dmac));
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 30062aad3af1..ed34d5a581fa 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -533,8 +533,8 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
cmd->attrs.alt_port_num = attr->alt_port_num;
cmd->attrs.alt_timeout = attr->alt_timeout;
ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap);
- ib_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
- ib_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
+ rdma_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
+ rdma_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP);
if (ret < 0) {
@@ -938,8 +938,8 @@ int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->alt_port_num = resp->attrs.alt_port_num;
attr->alt_timeout = resp->attrs.alt_timeout;
pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap);
- pvrdma_ah_attr_to_ib(&attr->ah_attr, &resp->attrs.ah_attr);
- pvrdma_ah_attr_to_ib(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
+ pvrdma_ah_attr_to_rdma(&attr->ah_attr, &resp->attrs.ah_attr);
+ pvrdma_ah_attr_to_rdma(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
qp->state = attr->qp_state;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index fec17c49103b..28517042011d 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -520,20 +520,20 @@ int pvrdma_dealloc_pd(struct ib_pd *pd)
*
* @return: the ib_ah pointer on success, otherwise errno.
*/
-struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
struct pvrdma_dev *dev = to_vdev(pd->device);
struct pvrdma_ah *ah;
- enum rdma_link_layer ll;
+ const struct ib_global_route *grh;
+ u8 port_num = rdma_ah_get_port_num(ah_attr);
- if (!(ah_attr->ah_flags & IB_AH_GRH))
+ if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
return ERR_PTR(-EINVAL);
- ll = rdma_port_get_link_layer(pd->device, ah_attr->port_num);
-
- if (ll != IB_LINK_LAYER_ETHERNET ||
- rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw))
+ grh = rdma_ah_read_grh(ah_attr);
+ if ((ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
+ rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw))
return ERR_PTR(-EINVAL);
if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah))
@@ -545,15 +545,15 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
return ERR_PTR(-ENOMEM);
}
- ah->av.port_pd = to_vpd(pd)->pd_handle | (ah_attr->port_num << 24);
- ah->av.src_path_bits = ah_attr->src_path_bits;
+ ah->av.port_pd = to_vpd(pd)->pd_handle | (port_num << 24);
+ ah->av.src_path_bits = rdma_ah_get_path_bits(ah_attr);
ah->av.src_path_bits |= 0x80;
- ah->av.gid_index = ah_attr->grh.sgid_index;
- ah->av.hop_limit = ah_attr->grh.hop_limit;
- ah->av.sl_tclass_flowlabel = (ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label;
- memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
- memcpy(ah->av.dmac, ah_attr->dmac, 6);
+ ah->av.gid_index = grh->sgid_index;
+ ah->av.hop_limit = grh->hop_limit;
+ ah->av.sl_tclass_flowlabel = (grh->traffic_class << 20) |
+ grh->flow_label;
+ memcpy(ah->av.dgid, grh->dgid.raw, 16);
+ memcpy(ah->av.dmac, ah_attr->roce.dmac, ETH_ALEN);
ah->ibah.device = pd->device;
ah->ibah.pd = pd;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index bfbe96b56255..002a9b066e70 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -417,7 +417,7 @@ int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
int pvrdma_destroy_cq(struct ib_cq *cq);
int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
-struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
int pvrdma_destroy_ah(struct ib_ah *ah);
struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index 16c446142c2a..a96d4aa80ae8 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -60,32 +60,35 @@
* Return: 0 on success
*/
int rvt_check_ah(struct ib_device *ibdev,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
int err;
+ int port_num = rdma_ah_get_port_num(ah_attr);
struct ib_port_attr port_attr;
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
- enum rdma_link_layer link = rdma_port_get_link_layer(ibdev,
- ah_attr->port_num);
+ enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num);
+ u32 dlid = rdma_ah_get_dlid(ah_attr);
+ u8 ah_flags = rdma_ah_get_ah_flags(ah_attr);
+ u8 static_rate = rdma_ah_get_static_rate(ah_attr);
- err = ib_query_port(ibdev, ah_attr->port_num, &port_attr);
+ err = ib_query_port(ibdev, port_num, &port_attr);
if (err)
return -EINVAL;
- if (ah_attr->port_num < 1 ||
- ah_attr->port_num > ibdev->phys_port_cnt)
+ if (port_num < 1 ||
+ port_num > ibdev->phys_port_cnt)
return -EINVAL;
- if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
- ib_rate_to_mbps(ah_attr->static_rate) < 0)
+ if (static_rate != IB_RATE_PORT_CURRENT &&
+ ib_rate_to_mbps(static_rate) < 0)
return -EINVAL;
- if ((ah_attr->ah_flags & IB_AH_GRH) &&
- ah_attr->grh.sgid_index >= port_attr.gid_tbl_len)
+ if ((ah_flags & IB_AH_GRH) &&
+ rdma_ah_read_grh(ah_attr)->sgid_index >= port_attr.gid_tbl_len)
return -EINVAL;
if (link != IB_LINK_LAYER_ETHERNET) {
- if (ah_attr->dlid == 0)
+ if (dlid == 0)
return -EINVAL;
- if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
- ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) &&
- !(ah_attr->ah_flags & IB_AH_GRH))
+ if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+ dlid != be16_to_cpu(IB_LID_PERMISSIVE) &&
+ !(ah_flags & IB_AH_GRH))
return -EINVAL;
}
if (rdi->driver_f.check_ah)
@@ -104,7 +107,7 @@ EXPORT_SYMBOL(rvt_check_ah);
* Return: newly allocated ah
*/
struct ib_ah *rvt_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
struct rvt_ah *ah;
struct rvt_dev_info *dev = ib_to_rvt(pd->device);
@@ -119,7 +122,7 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd,
spin_lock_irqsave(&dev->n_ahs_lock, flags);
if (dev->n_ahs_allocated == dev->dparms.props.max_ah) {
- spin_unlock(&dev->n_ahs_lock);
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
kfree(ah);
return ERR_PTR(-ENOMEM);
}
@@ -167,7 +170,7 @@ int rvt_destroy_ah(struct ib_ah *ibah)
*
* Return: 0 on success
*/
-int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct rvt_ah *ah = ibah_to_rvtah(ibah);
@@ -186,7 +189,7 @@ int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
*
* Return: always 0
*/
-int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct rvt_ah *ah = ibah_to_rvtah(ibah);
diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h
index e9c36be87d79..16105af99189 100644
--- a/drivers/infiniband/sw/rdmavt/ah.h
+++ b/drivers/infiniband/sw/rdmavt/ah.h
@@ -51,9 +51,9 @@
#include <rdma/rdma_vt.h>
struct ib_ah *rvt_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr);
+ struct rdma_ah_attr *ah_attr);
int rvt_destroy_ah(struct ib_ah *ibah);
-int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
-int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
+int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
#endif /* DEF_RVTAH_H */
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 7aa7a4e312f1..0ae2ff8cf81e 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -50,6 +50,7 @@
#include <linux/kthread.h>
#include "cq.h"
#include "vt.h"
+#include "trace.h"
/**
* rvt_cq_enter - add a new entry to the completion queue
@@ -93,6 +94,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
}
return;
}
+ trace_rvt_cq_enter(cq, entry, head);
if (cq->ip) {
wc->uqueue[head].wr_id = entry->wr_id;
wc->uqueue[head].status = entry->status;
@@ -482,6 +484,7 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
if (tail == wc->head)
break;
/* The kernel doesn't need a RMB since it has the lock. */
+ trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled);
*entry = wc->kqueue[tail];
if (tail >= cq->ibcq.cqe)
tail = 0;
diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c
index bba241faca61..d6981dc04adb 100644
--- a/drivers/infiniband/sw/rdmavt/mad.c
+++ b/drivers/infiniband/sw/rdmavt/mad.c
@@ -160,7 +160,7 @@ void rvt_free_mad_agents(struct rvt_dev_info *rdi)
ib_unregister_mad_agent(agent);
}
if (rvp->sm_ah) {
- ib_destroy_ah(&rvp->sm_ah->ibah);
+ rdma_destroy_ah(&rvp->sm_ah->ibah);
rvp->sm_ah = NULL;
}
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index 05c8c2afb0e3..1f12b69a0d07 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -100,10 +100,11 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
/**
* mcast_alloc - allocate the multicast GID structure
* @mgid: the multicast GID
+ * @lid: the muilticast LID (host order)
*
* A list of QPs will be attached to this structure.
*/
-static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid)
+static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid, u16 lid)
{
struct rvt_mcast *mcast;
@@ -111,7 +112,9 @@ static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid)
if (!mcast)
goto bail;
- mcast->mgid = *mgid;
+ mcast->mcast_addr.mgid = *mgid;
+ mcast->mcast_addr.lid = lid;
+
INIT_LIST_HEAD(&mcast->qp_list);
init_waitqueue_head(&mcast->wait);
atomic_set(&mcast->refcount, 0);
@@ -131,15 +134,19 @@ static void rvt_mcast_free(struct rvt_mcast *mcast)
}
/**
- * rvt_mcast_find - search the global table for the given multicast GID
+ * rvt_mcast_find - search the global table for the given multicast GID/LID
+ * NOTE: It is valid to have 1 MLID with multiple MGIDs. It is not valid
+ * to have 1 MGID with multiple MLIDs.
* @ibp: the IB port structure
* @mgid: the multicast GID to search for
+ * @lid: the multicast LID portion of the multicast address (host order)
*
* The caller is responsible for decrementing the reference count if found.
*
* Return: NULL if not found.
*/
-struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid)
+struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
+ u16 lid)
{
struct rb_node *n;
unsigned long flags;
@@ -153,15 +160,18 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid)
mcast = rb_entry(n, struct rvt_mcast, rb_node);
- ret = memcmp(mgid->raw, mcast->mgid.raw,
- sizeof(union ib_gid));
+ ret = memcmp(mgid->raw, mcast->mcast_addr.mgid.raw,
+ sizeof(*mgid));
if (ret < 0) {
n = n->rb_left;
} else if (ret > 0) {
n = n->rb_right;
} else {
- atomic_inc(&mcast->refcount);
- found = mcast;
+ /* MGID/MLID must match */
+ if (mcast->mcast_addr.lid == lid) {
+ atomic_inc(&mcast->refcount);
+ found = mcast;
+ }
break;
}
}
@@ -177,7 +187,8 @@ EXPORT_SYMBOL(rvt_mcast_find);
*
* Return: zero if both were added. Return EEXIST if the GID was already in
* the table but the QP was added. Return ESRCH if the QP was already
- * attached and neither structure was added.
+ * attached and neither structure was added. Return EINVAL if the MGID was
+ * found, but the MLID did NOT match.
*/
static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
struct rvt_mcast *mcast, struct rvt_mcast_qp *mqp)
@@ -195,8 +206,9 @@ static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
pn = *n;
tmcast = rb_entry(pn, struct rvt_mcast, rb_node);
- ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
- sizeof(union ib_gid));
+ ret = memcmp(mcast->mcast_addr.mgid.raw,
+ tmcast->mcast_addr.mgid.raw,
+ sizeof(mcast->mcast_addr.mgid));
if (ret < 0) {
n = &pn->rb_left;
continue;
@@ -206,6 +218,11 @@ static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
continue;
}
+ if (tmcast->mcast_addr.lid != mcast->mcast_addr.lid) {
+ ret = EINVAL;
+ goto bail;
+ }
+
/* Search the QP list to see if this is already there. */
list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
if (p->qp == mqp->qp) {
@@ -276,7 +293,7 @@ int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
* Allocate data structures since its better to do this outside of
* spin locks and it will most likely be needed.
*/
- mcast = rvt_mcast_alloc(gid);
+ mcast = rvt_mcast_alloc(gid, lid);
if (!mcast)
return -ENOMEM;
@@ -296,6 +313,10 @@ int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
/* Exceeded the maximum number of mcast groups. */
ret = -ENOMEM;
goto bail_mqp;
+ case EINVAL:
+ /* Invalid MGID/MLID pair */
+ ret = -EINVAL;
+ goto bail_mqp;
default:
break;
}
@@ -344,14 +365,20 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
mcast = rb_entry(n, struct rvt_mcast, rb_node);
- ret = memcmp(gid->raw, mcast->mgid.raw,
- sizeof(union ib_gid));
- if (ret < 0)
+ ret = memcmp(gid->raw, mcast->mcast_addr.mgid.raw,
+ sizeof(*gid));
+ if (ret < 0) {
n = n->rb_left;
- else if (ret > 0)
+ } else if (ret > 0) {
n = n->rb_right;
- else
+ } else {
+ /* MGID/MLID must match */
+ if (mcast->mcast_addr.lid != lid) {
+ spin_unlock_irq(&ibp->lock);
+ return -EINVAL;
+ }
break;
+ }
}
/* Search the QP list. */
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index ae30b6838d79..aa5f9ea318e4 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -191,8 +191,9 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
tmr = rcu_access_pointer(dev->dma_mr);
if (!tmr) {
- rcu_assign_pointer(dev->dma_mr, mr);
mr->lkey_published = 1;
+ /* Insure published written first */
+ rcu_assign_pointer(dev->dma_mr, mr);
rvt_get_mr(mr);
}
goto success;
@@ -224,8 +225,9 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
mr->lkey |= 1 << 8;
rkt->gen++;
}
- rcu_assign_pointer(rkt->table[r], mr);
mr->lkey_published = 1;
+ /* Insure published written first */
+ rcu_assign_pointer(rkt->table[r], mr);
success:
spin_unlock_irqrestore(&rkt->lock, flags);
out:
@@ -253,23 +255,24 @@ static void rvt_free_lkey(struct rvt_mregion *mr)
spin_lock_irqsave(&rkt->lock, flags);
if (!lkey) {
if (mr->lkey_published) {
- RCU_INIT_POINTER(dev->dma_mr, NULL);
+ mr->lkey_published = 0;
+ /* insure published is written before pointer */
+ rcu_assign_pointer(dev->dma_mr, NULL);
rvt_put_mr(mr);
}
} else {
if (!mr->lkey_published)
goto out;
r = lkey >> (32 - dev->dparms.lkey_table_size);
- RCU_INIT_POINTER(rkt->table[r], NULL);
+ mr->lkey_published = 0;
+ /* insure published is written before pointer */
+ rcu_assign_pointer(rkt->table[r], NULL);
}
- mr->lkey_published = 0;
freed++;
out:
spin_unlock_irqrestore(&rkt->lock, flags);
- if (freed) {
- synchronize_rcu();
+ if (freed)
percpu_ref_kill(&mr->refcount);
- }
}
static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
@@ -405,8 +408,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.access_flags = mr_access_flags;
mr->umem = umem;
- if (is_power_of_2(umem->page_size))
- mr->mr.page_shift = ilog2(umem->page_size);
+ mr->mr.page_shift = umem->page_shift;
m = 0;
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
@@ -418,8 +420,9 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto bail_inval;
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
- mr->mr.map[m]->segs[n].length = umem->page_size;
- trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size);
+ mr->mr.map[m]->segs[n].length = BIT(umem->page_shift);
+ trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr,
+ BIT(umem->page_shift));
n++;
if (n == RVT_SEGSZ) {
m++;
@@ -822,16 +825,21 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
goto ok;
}
mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
- if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
- mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+ if (!mr)
goto bail;
+ rvt_get_mr(mr);
+ if (!READ_ONCE(mr->lkey_published))
+ goto bail_unref;
+
+ if (unlikely(atomic_read(&mr->lkey_invalid) ||
+ mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+ goto bail_unref;
off = sge->addr - mr->user_base;
if (unlikely(sge->addr < mr->user_base ||
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
- goto bail;
- rvt_get_mr(mr);
+ goto bail_unref;
rcu_read_unlock();
off += mr->offset;
@@ -867,6 +875,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = n;
ok:
return 1;
+bail_unref:
+ rvt_put_mr(mr);
bail:
rcu_read_unlock();
return 0;
@@ -922,15 +932,20 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
}
mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
- if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
- mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ if (!mr)
goto bail;
+ rvt_get_mr(mr);
+ /* insure mr read is before test */
+ if (!READ_ONCE(mr->lkey_published))
+ goto bail_unref;
+ if (unlikely(atomic_read(&mr->lkey_invalid) ||
+ mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ goto bail_unref;
off = vaddr - mr->iova;
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
- goto bail;
- rvt_get_mr(mr);
+ goto bail_unref;
rcu_read_unlock();
off += mr->offset;
@@ -966,6 +981,8 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
sge->n = n;
ok:
return 1;
+bail_unref:
+ rvt_put_mr(mr);
bail:
rcu_read_unlock();
return 0;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index f5ad8d4bfb39..727e81cc2c8f 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -117,23 +117,6 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
};
EXPORT_SYMBOL(ib_rvt_state_ops);
-/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_rvt_wc_opcode[] = {
- [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
- [IB_WR_SEND] = IB_WC_SEND,
- [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
- [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
- [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
- [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
- [IB_WR_REG_MR] = IB_WC_REG_MR
-};
-EXPORT_SYMBOL(ib_rvt_wc_opcode);
-
static void get_map_page(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map,
gfp_t gfp)
@@ -1121,14 +1104,15 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto inval;
if (attr_mask & IB_QP_AV) {
- if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
+ if (rdma_ah_get_dlid(&attr->ah_attr) >=
+ be16_to_cpu(IB_MULTICAST_LID_BASE))
goto inval;
if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
goto inval;
}
if (attr_mask & IB_QP_ALT_PATH) {
- if (attr->alt_ah_attr.dlid >=
+ if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
be16_to_cpu(IB_MULTICAST_LID_BASE))
goto inval;
if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
@@ -1257,7 +1241,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_AV) {
qp->remote_ah_attr = attr->ah_attr;
- qp->s_srate = attr->ah_attr.static_rate;
+ qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr);
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
}
@@ -1270,7 +1254,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->s_mig_state = attr->path_mig_state;
if (mig) {
qp->remote_ah_attr = qp->alt_ah_attr;
- qp->port_num = qp->alt_ah_attr.port_num;
+ qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
qp->s_pkey_index = qp->s_alt_pkey_index;
}
}
@@ -1441,7 +1425,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->timeout = qp->timeout;
attr->retry_cnt = qp->s_retry_cnt;
attr->rnr_retry = qp->s_rnr_retry_cnt;
- attr->alt_port_num = qp->alt_ah_attr.port_num;
+ attr->alt_port_num =
+ rdma_ah_get_port_num(&qp->alt_ah_attr);
attr->alt_timeout = qp->alt_timeout;
init_attr->event_handler = qp->ibqp.event_handler;
@@ -1789,11 +1774,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
0);
qp->s_next_psn = wqe->lpsn + 1;
}
- trace_rvt_post_one_wr(qp, wqe);
- if (unlikely(reserved_op))
+ if (unlikely(reserved_op)) {
+ wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
rvt_qp_wqe_reserve(qp, wqe);
- else
+ } else {
+ wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
qp->s_avail--;
+ }
+ trace_rvt_post_one_wr(qp, wqe);
smp_wmb(); /* see request builders */
qp->s_head = next;
@@ -2069,8 +2057,12 @@ static void rvt_rc_timeout(unsigned long arg)
spin_lock_irqsave(&qp->r_lock, flags);
spin_lock(&qp->s_lock);
if (qp->s_flags & RVT_S_TIMER) {
+ struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
+
qp->s_flags &= ~RVT_S_TIMER;
+ rvp->n_rc_timeouts++;
del_timer(&qp->s_timer);
+ trace_rvt_rc_timeout(qp, qp->s_last_psn + 1);
if (rdi->driver_f.notify_restart_rc)
rdi->driver_f.notify_restart_rc(qp,
qp->s_last_psn + 1,
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
index e2d23acb6a7d..bb4b1e710f22 100644
--- a/drivers/infiniband/sw/rdmavt/trace.h
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -52,3 +52,5 @@
#include "trace_qp.h"
#include "trace_tx.h"
#include "trace_mr.h"
+#include "trace_cq.h"
+#include "trace_rc.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h
new file mode 100644
index 000000000000..a315850aa9bb
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_cq.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_CQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_CQ_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdmavt_cq.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_cq
+
+#define wc_opcode_name(opcode) { IB_WC_##opcode, #opcode }
+#define show_wc_opcode(opcode) \
+__print_symbolic(opcode, \
+ wc_opcode_name(SEND), \
+ wc_opcode_name(RDMA_WRITE), \
+ wc_opcode_name(RDMA_READ), \
+ wc_opcode_name(COMP_SWAP), \
+ wc_opcode_name(FETCH_ADD), \
+ wc_opcode_name(LSO), \
+ wc_opcode_name(LOCAL_INV), \
+ wc_opcode_name(REG_MR), \
+ wc_opcode_name(MASKED_COMP_SWAP), \
+ wc_opcode_name(RECV), \
+ wc_opcode_name(RECV_RDMA_WITH_IMM))
+
+#define CQ_PRN \
+"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x"
+
+DECLARE_EVENT_CLASS(
+ rvt_cq_entry_template,
+ TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_ARGS(cq, wc, idx),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(cq->rdi)
+ __field(u64, wr_id)
+ __field(u32, status)
+ __field(u32, opcode)
+ __field(u32, qpn)
+ __field(u32, length)
+ __field(u32, idx)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(cq->rdi)
+ __entry->wr_id = wc->wr_id;
+ __entry->status = wc->status;
+ __entry->opcode = wc->opcode;
+ __entry->length = wc->byte_len;
+ __entry->qpn = wc->qp->qp_num;
+ __entry->idx = idx;
+ ),
+ TP_printk(
+ CQ_PRN,
+ __get_str(dev),
+ __entry->idx,
+ __entry->wr_id,
+ __entry->status,
+ __entry->opcode, show_wc_opcode(__entry->opcode),
+ __entry->length,
+ __entry->qpn
+ )
+);
+
+DEFINE_EVENT(
+ rvt_cq_entry_template, rvt_cq_enter,
+ TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_ARGS(cq, wc, idx));
+
+DEFINE_EVENT(
+ rvt_cq_entry_template, rvt_cq_poll,
+ TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
+ TP_ARGS(cq, wc, idx));
+
+#endif /* __RVT_TRACE_CQ_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_cq
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/trace_rc.h b/drivers/infiniband/sw/rdmavt/trace_rc.h
new file mode 100644
index 000000000000..995276933a55
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_rc.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_RC_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_rc
+
+DECLARE_EVENT_CLASS(rvt_rc_template,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, s_flags)
+ __field(u32, psn)
+ __field(u32, s_psn)
+ __field(u32, s_next_psn)
+ __field(u32, s_sending_psn)
+ __field(u32, s_sending_hpsn)
+ __field(u32, r_psn)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->s_flags = qp->s_flags;
+ __entry->psn = psn;
+ __entry->s_psn = qp->s_psn;
+ __entry->s_next_psn = qp->s_next_psn;
+ __entry->s_sending_psn = qp->s_sending_psn;
+ __entry->s_sending_hpsn = qp->s_sending_hpsn;
+ __entry->r_psn = qp->r_psn;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->s_flags,
+ __entry->psn,
+ __entry->s_psn,
+ __entry->s_next_psn,
+ __entry->s_sending_psn,
+ __entry->s_sending_hpsn,
+ __entry->r_psn
+ )
+);
+
+DEFINE_EVENT(rvt_rc_template, rvt_rc_timeout,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+#endif /* __RVT_TRACE_RC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_rc
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
index 0e03173662d8..a613a2223751 100644
--- a/drivers/infiniband/sw/rdmavt/trace_tx.h
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -71,10 +71,20 @@ __print_symbolic(opcode, \
wr_opcode_name(RDMA_READ_WITH_INV), \
wr_opcode_name(LOCAL_INV), \
wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
- wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
+ wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD), \
+ wr_opcode_name(RESERVED1), \
+ wr_opcode_name(RESERVED2), \
+ wr_opcode_name(RESERVED3), \
+ wr_opcode_name(RESERVED4), \
+ wr_opcode_name(RESERVED5), \
+ wr_opcode_name(RESERVED6), \
+ wr_opcode_name(RESERVED7), \
+ wr_opcode_name(RESERVED8), \
+ wr_opcode_name(RESERVED9), \
+ wr_opcode_name(RESERVED10))
#define POS_PRN \
-"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
+"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u"
TRACE_EVENT(
rvt_post_one_wr,
@@ -83,7 +93,9 @@ TRACE_EVENT(
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u64, wr_id)
+ __field(struct rvt_swqe *, wqe)
__field(u32, qpn)
+ __field(u32, qpt)
__field(u32, psn)
__field(u32, lpsn)
__field(u32, length)
@@ -92,11 +104,17 @@ TRACE_EVENT(
__field(u32, avail)
__field(u32, head)
__field(u32, last)
+ __field(u32, ssn)
+ __field(int, send_flags)
+ __field(pid_t, pid)
+ __field(int, num_sge)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->wqe = wqe;
__entry->wr_id = wqe->wr.wr_id;
__entry->qpn = qp->ibqp.qp_num;
+ __entry->qpt = qp->ibqp.qp_type;
__entry->psn = wqe->psn;
__entry->lpsn = wqe->lpsn;
__entry->length = wqe->length;
@@ -105,20 +123,30 @@ TRACE_EVENT(
__entry->avail = qp->s_avail;
__entry->head = qp->s_head;
__entry->last = qp->s_last;
+ __entry->pid = qp->pid;
+ __entry->ssn = wqe->ssn;
+ __entry->send_flags = wqe->wr.send_flags;
+ __entry->num_sge = wqe->wr.num_sge;
),
TP_printk(
POS_PRN,
__get_str(dev),
+ __entry->wqe,
__entry->wr_id,
+ __entry->send_flags,
__entry->qpn,
+ __entry->qpt,
__entry->psn,
__entry->lpsn,
+ __entry->ssn,
__entry->length,
__entry->opcode, show_wr_opcode(__entry->opcode),
__entry->size,
__entry->avail,
__entry->head,
- __entry->last
+ __entry->last,
+ __entry->pid,
+ __entry->num_sge
)
);
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 6332dedc11e8..320bffc980d8 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -2,6 +2,7 @@ config RDMA_RXE
tristate "Software RDMA over Ethernet (RoCE) driver"
depends on INET && PCI && INFINIBAND
depends on NET_UDP_TUNNEL
+ depends on CRYPTO_CRC32
select DMA_VIRT_OPS
---help---
This driver implements the InfiniBand RDMA transport over
diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index ec35ff022a42..3f12beb7076f 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -20,4 +20,5 @@ rdma_rxe-y := \
rxe_mcast.o \
rxe_task.o \
rxe_net.o \
- rxe_sysfs.o
+ rxe_sysfs.o \
+ rxe_hw_counters.o
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index b12dd9b5a89d..c21c913f911a 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <net/addrconf.h>
#include "rxe.h"
#include "rxe_loc.h"
@@ -64,6 +65,8 @@ static void rxe_cleanup(struct rxe_dev *rxe)
rxe_pool_cleanup(&rxe->mc_elem_pool);
rxe_cleanup_ports(rxe);
+
+ crypto_free_shash(rxe->tfm);
}
/* called when all references have been dropped */
@@ -178,7 +181,8 @@ static int rxe_init_ports(struct rxe_dev *rxe)
return -ENOMEM;
port->pkey_tbl[0] = 0xffff;
- port->port_guid = rxe_port_guid(rxe);
+ addrconf_addr_eui48((unsigned char *)&port->port_guid,
+ rxe->ndev->dev_addr);
spin_lock_init(&port->port_lock);
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index a696af81e4a5..ecdba2fce083 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -50,6 +50,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
+#include <crypto/hash.h>
#include "rxe_net.h"
#include "rxe_opcode.h"
@@ -64,6 +65,25 @@
#define RXE_ROCE_V2_SPORT (0xc000)
+static inline u32 rxe_crc32(struct rxe_dev *rxe,
+ u32 crc, void *next, size_t len)
+{
+ int err;
+
+ SHASH_DESC_ON_STACK(shash, rxe->tfm);
+
+ shash->tfm = rxe->tfm;
+ shash->flags = 0;
+ *(u32 *)shash_desc_ctx(shash) = crc;
+ err = crypto_shash_update(shash, next, len);
+ if (unlikely(err)) {
+ pr_warn_ratelimited("failed crc calculation, err: %d\n", err);
+ return crc32_le(crc, next, len);
+ }
+
+ return *(u32 *)shash_desc_ctx(shash);
+}
+
int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 604f6fee96bd..5bddf469361b 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -34,21 +34,22 @@
#include "rxe.h"
#include "rxe_loc.h"
-int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr)
+int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
{
struct rxe_port *port;
- if (attr->port_num != 1) {
- pr_info("invalid port_num = %d\n", attr->port_num);
+ if (rdma_ah_get_port_num(attr) != 1) {
+ pr_info("invalid port_num = %d\n", rdma_ah_get_port_num(attr));
return -EINVAL;
}
port = &rxe->port;
- if (attr->ah_flags & IB_AH_GRH) {
- if (attr->grh.sgid_index > port->attr.gid_tbl_len) {
- pr_info("invalid sgid index = %d\n",
- attr->grh.sgid_index);
+ if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) {
+ u8 sgid_index = rdma_ah_read_grh(attr)->sgid_index;
+
+ if (sgid_index > port->attr.gid_tbl_len) {
+ pr_info("invalid sgid index = %d\n", sgid_index);
return -EINVAL;
}
}
@@ -57,30 +58,33 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr)
}
int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
- struct rxe_av *av, struct ib_ah_attr *attr)
+ struct rxe_av *av, struct rdma_ah_attr *attr)
{
memset(av, 0, sizeof(*av));
- memcpy(&av->grh, &attr->grh, sizeof(attr->grh));
+ memcpy(&av->grh, rdma_ah_read_grh(attr),
+ sizeof(*rdma_ah_read_grh(attr)));
av->port_num = port_num;
return 0;
}
int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
- struct ib_ah_attr *attr)
+ struct rdma_ah_attr *attr)
{
- memcpy(&attr->grh, &av->grh, sizeof(av->grh));
- attr->port_num = av->port_num;
+ attr->type = RDMA_AH_ATTR_TYPE_ROCE;
+ memcpy(rdma_ah_retrieve_grh(attr), &av->grh, sizeof(av->grh));
+ rdma_ah_set_ah_flags(attr, IB_AH_GRH);
+ rdma_ah_set_port_num(attr, av->port_num);
return 0;
}
int rxe_av_fill_ip_info(struct rxe_dev *rxe,
struct rxe_av *av,
- struct ib_ah_attr *attr,
+ struct rdma_ah_attr *attr,
struct ib_gid_attr *sgid_attr,
union ib_gid *sgid)
{
rdma_gid2ip(&av->sgid_addr._sockaddr, sgid);
- rdma_gid2ip(&av->dgid_addr._sockaddr, &attr->grh.dgid);
+ rdma_gid2ip(&av->dgid_addr._sockaddr, &rdma_ah_read_grh(attr)->dgid);
av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid);
return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 4cd55d5617f7..9eb12c2e3c74 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -154,6 +154,8 @@ void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,
skb_queue_tail(&qp->resp_pkts, skb);
must_sched = skb_queue_len(&qp->resp_pkts) > 1;
+ if (must_sched != 0)
+ rxe_counter_inc(rxe, RXE_CNT_COMPLETER_SCHED);
rxe_run_task(&qp->comp.task, must_sched);
}
@@ -236,6 +238,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
{
unsigned int mask = pkt->mask;
u8 syn;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
/* Check the sequence only */
switch (qp->comp.opcode) {
@@ -298,6 +301,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
return COMPST_WRITE_SEND;
case AETH_RNR_NAK:
+ rxe_counter_inc(rxe, RXE_CNT_RCV_RNR);
return COMPST_RNR_RETRY;
case AETH_NAK:
@@ -307,6 +311,8 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
* before
*/
if (psn_compare(pkt->psn, qp->comp.psn) > 0) {
+ rxe_counter_inc(rxe,
+ RXE_CNT_RCV_SEQ_ERR);
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
@@ -534,6 +540,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
int rxe_completer(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_send_wqe *wqe = wqe;
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
@@ -683,8 +690,10 @@ int rxe_completer(void *arg)
if (psn_compare(qp->req.psn,
qp->comp.psn) > 0) {
/* tell the requester to retry the
- * send send queue next time around
+ * send queue next time around
*/
+ rxe_counter_inc(rxe,
+ RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
rxe_run_task(&qp->req.task, 1);
}
@@ -699,6 +708,7 @@ int rxe_completer(void *arg)
goto exit;
} else {
+ rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED);
wqe->status = IB_WC_RETRY_EXC_ERR;
state = COMPST_ERROR;
}
@@ -720,6 +730,8 @@ int rxe_completer(void *arg)
skb = NULL;
goto exit;
} else {
+ rxe_counter_inc(rxe,
+ RXE_CNT_RNR_RETRY_EXCEEDED);
wqe->status = IB_WC_RNR_RETRY_EXC_ERR;
state = COMPST_ERROR;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
new file mode 100644
index 000000000000..7ef90aad7dfd
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rxe.h"
+#include "rxe_hw_counters.h"
+
+const char * const rxe_counter_name[] = {
+ [RXE_CNT_SENT_PKTS] = "sent_pkts",
+ [RXE_CNT_RCVD_PKTS] = "rcvd_pkts",
+ [RXE_CNT_DUP_REQ] = "duplicate_request",
+ [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_sequence",
+ [RXE_CNT_RCV_RNR] = "rcvd_rnr_err",
+ [RXE_CNT_SND_RNR] = "send_rnr_err",
+ [RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err",
+ [RXE_CNT_COMPLETER_SCHED] = "ack_deffered",
+ [RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err",
+ [RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err",
+ [RXE_CNT_COMP_RETRY] = "completer_retry_err",
+ [RXE_CNT_SEND_ERR] = "send_err",
+};
+
+int rxe_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port, int index)
+{
+ struct rxe_dev *dev = to_rdev(ibdev);
+ unsigned int cnt;
+
+ if (!port || !stats)
+ return -EINVAL;
+
+ for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++)
+ stats->value[cnt] = dev->stats_counters[cnt];
+
+ return ARRAY_SIZE(rxe_counter_name);
+}
+
+struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_name) != RXE_NUM_OF_COUNTERS);
+ /* We support only per port stats */
+ if (!port_num)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(rxe_counter_name,
+ ARRAY_SIZE(rxe_counter_name),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
new file mode 100644
index 000000000000..f44df1b76742
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RXE_HW_COUNTERS_H
+#define RXE_HW_COUNTERS_H
+
+/*
+ * when adding counters to enum also add
+ * them to rxe_counter_name[] vector.
+ */
+enum rxe_counters {
+ RXE_CNT_SENT_PKTS,
+ RXE_CNT_RCVD_PKTS,
+ RXE_CNT_DUP_REQ,
+ RXE_CNT_OUT_OF_SEQ_REQ,
+ RXE_CNT_RCV_RNR,
+ RXE_CNT_SND_RNR,
+ RXE_CNT_RCV_SEQ_ERR,
+ RXE_CNT_COMPLETER_SCHED,
+ RXE_CNT_RETRY_EXCEEDED,
+ RXE_CNT_RNR_RETRY_EXCEEDED,
+ RXE_CNT_COMP_RETRY,
+ RXE_CNT_SEND_ERR,
+ RXE_NUM_OF_COUNTERS
+};
+
+struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num);
+int rxe_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port, int index);
+#endif /* RXE_HW_COUNTERS_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index 413b56b23a06..39e0be31aab1 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -87,10 +87,10 @@ u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb)
bth->qpn |= cpu_to_be32(~BTH_QPN_MASK);
length = hdr_size + RXE_BTH_BYTES;
- crc = crc32_le(crc, pshdr, length);
+ crc = rxe_crc32(pkt->rxe, crc, pshdr, length);
/* And finish to compute the CRC on the remainder of the headers. */
- crc = crc32_le(crc, pkt->hdr + RXE_BTH_BYTES,
- rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);
+ crc = rxe_crc32(pkt->rxe, crc, pkt->hdr + RXE_BTH_BYTES,
+ rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES);
return crc;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 183a9d379b41..d6299edf9a5b 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -36,17 +36,17 @@
/* rxe_av.c */
-int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr);
+int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr);
int rxe_av_from_attr(struct rxe_dev *rxe, u8 port_num,
- struct rxe_av *av, struct ib_ah_attr *attr);
+ struct rxe_av *av, struct rdma_ah_attr *attr);
int rxe_av_to_attr(struct rxe_dev *rxe, struct rxe_av *av,
- struct ib_ah_attr *attr);
+ struct rdma_ah_attr *attr);
int rxe_av_fill_ip_info(struct rxe_dev *rxe,
struct rxe_av *av,
- struct ib_ah_attr *attr,
+ struct rdma_ah_attr *attr,
struct ib_gid_attr *sgid_attr,
union ib_gid *sgid);
@@ -145,7 +145,6 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
int rxe_loopback(struct sk_buff *skb);
int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb);
-__be64 rxe_port_guid(struct rxe_dev *rxe);
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt);
int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
@@ -153,7 +152,6 @@ int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num);
const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
struct device *rxe_dma_device(struct rxe_dev *rxe);
-__be64 rxe_node_guid(struct rxe_dev *rxe);
int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid);
int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid);
@@ -278,6 +276,7 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
if (err) {
rxe->xmit_errors++;
+ rxe_counter_inc(rxe, RXE_CNT_SEND_ERR);
return err;
}
@@ -287,6 +286,7 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
rxe_run_task(&qp->comp.task, 1);
}
+ rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
goto done;
drop:
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 37eea7441ca4..e37cc89987e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -191,10 +191,8 @@ int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
goto err1;
}
- WARN_ON_ONCE(!is_power_of_2(umem->page_size));
-
- mem->page_shift = ilog2(umem->page_size);
- mem->page_mask = umem->page_size - 1;
+ mem->page_shift = umem->page_shift;
+ mem->page_mask = BIT(umem->page_shift) - 1;
num_buf = 0;
map = mem->map;
@@ -210,7 +208,7 @@ int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
}
buf->addr = (uintptr_t)vaddr;
- buf->size = umem->page_size;
+ buf->size = BIT(umem->page_shift);
num_buf++;
buf++;
@@ -370,7 +368,8 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
((void *)(uintptr_t)iova) : addr;
if (crcp)
- *crcp = crc32_le(*crcp, src, length);
+ *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+ *crcp, src, length);
memcpy(dest, src, length);
@@ -403,7 +402,8 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
bytes = length;
if (crcp)
- crc = crc32_le(crc, src, bytes);
+ crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
+ crc, src, bytes);
memcpy(dest, src, bytes);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index d8610960630a..c3a140ed4df2 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -84,34 +84,6 @@ struct rxe_dev *get_rxe_by_name(const char *name)
struct rxe_recv_sockets recv_sockets;
-static __be64 rxe_mac_to_eui64(struct net_device *ndev)
-{
- unsigned char *mac_addr = ndev->dev_addr;
- __be64 eui64;
- unsigned char *dst = (unsigned char *)&eui64;
-
- dst[0] = mac_addr[0] ^ 2;
- dst[1] = mac_addr[1];
- dst[2] = mac_addr[2];
- dst[3] = 0xff;
- dst[4] = 0xfe;
- dst[5] = mac_addr[3];
- dst[6] = mac_addr[4];
- dst[7] = mac_addr[5];
-
- return eui64;
-}
-
-__be64 rxe_node_guid(struct rxe_dev *rxe)
-{
- return rxe_mac_to_eui64(rxe->ndev);
-}
-
-__be64 rxe_port_guid(struct rxe_dev *rxe)
-{
- return rxe_mac_to_eui64(rxe->ndev);
-}
-
struct device *rxe_dma_device(struct rxe_dev *rxe)
{
struct net_device *ndev;
@@ -210,6 +182,39 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
#endif
+static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
+ struct rxe_qp *qp,
+ struct rxe_av *av)
+{
+ struct dst_entry *dst = NULL;
+
+ if (qp_type(qp) == IB_QPT_RC)
+ dst = sk_dst_get(qp->sk->sk);
+
+ if (!dst || !(dst->obsolete && dst->ops->check(dst, 0))) {
+ if (dst)
+ dst_release(dst);
+
+ if (av->network_type == RDMA_NETWORK_IPV4) {
+ struct in_addr *saddr;
+ struct in_addr *daddr;
+
+ saddr = &av->sgid_addr._sockaddr_in.sin_addr;
+ daddr = &av->dgid_addr._sockaddr_in.sin_addr;
+ dst = rxe_find_route4(rxe->ndev, saddr, daddr);
+ } else if (av->network_type == RDMA_NETWORK_IPV6) {
+ struct in6_addr *saddr6;
+ struct in6_addr *daddr6;
+
+ saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
+ daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
+ dst = rxe_find_route6(rxe->ndev, saddr6, daddr6);
+ }
+ }
+
+ return dst;
+}
+
static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct udphdr *udph;
@@ -301,7 +306,7 @@ static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb,
skb_scrub_packet(skb, xnet);
skb_clear_hash(skb);
- skb_dst_set(skb, dst);
+ skb_dst_set(skb, dst_clone(dst));
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
skb_push(skb, sizeof(struct iphdr));
@@ -349,13 +354,14 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, struct rxe_av *av)
{
+ struct rxe_qp *qp = pkt->qp;
struct dst_entry *dst;
bool xnet = false;
__be16 df = htons(IP_DF);
struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
- dst = rxe_find_route4(rxe->ndev, saddr, daddr);
+ dst = rxe_find_route(rxe, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
@@ -369,17 +375,24 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
av->grh.traffic_class, av->grh.hop_limit, df, xnet);
+
+ if (qp_type(qp) == IB_QPT_RC)
+ sk_dst_set(qp->sk->sk, dst);
+ else
+ dst_release(dst);
+
return 0;
}
static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, struct rxe_av *av)
{
- struct dst_entry *dst;
+ struct rxe_qp *qp = pkt->qp;
+ struct dst_entry *dst = NULL;
struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
- dst = rxe_find_route6(rxe->ndev, saddr, daddr);
+ dst = rxe_find_route(rxe, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
@@ -394,6 +407,12 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
av->grh.traffic_class,
av->grh.hop_limit);
+
+ if (qp_type(qp) == IB_QPT_RC)
+ sk_dst_set(qp->sk->sk, dst);
+ else
+ dst_release(dst);
+
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 13ed2cc6eaa2..1b596fbbe251 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -114,7 +114,6 @@ enum rxe_device_param {
RXE_MAX_UCONTEXT = 512,
RXE_NUM_PORT = 1,
- RXE_NUM_COMP_VECTORS = 1,
RXE_MIN_QP_INDEX = 16,
RXE_MAX_QP_INDEX = 0x00020000,
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index f98a19e61a3d..80ccc7c7c341 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -273,10 +273,11 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
rxe_init_task(rxe, &qp->comp.task, qp,
rxe_completer, "comp");
- setup_timer(&qp->rnr_nak_timer, rnr_nak_timer, (unsigned long)qp);
- setup_timer(&qp->retrans_timer, retransmit_timer, (unsigned long)qp);
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
-
+ if (init->qp_type == IB_QPT_RC) {
+ setup_timer(&qp->rnr_nak_timer, rnr_nak_timer, (unsigned long)qp);
+ setup_timer(&qp->retrans_timer, retransmit_timer, (unsigned long)qp);
+ }
return 0;
}
@@ -630,8 +631,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
if (mask & IB_QP_AV) {
ib_get_cached_gid(&rxe->ib_dev, 1,
- attr->ah_attr.grh.sgid_index, &sgid,
- &sgid_attr);
+ rdma_ah_read_grh(&attr->ah_attr)->sgid_index,
+ &sgid, &sgid_attr);
rxe_av_from_attr(rxe, attr->port_num, &qp->pri_av,
&attr->ah_attr);
rxe_av_fill_ip_info(rxe, &qp->pri_av, &attr->ah_attr,
@@ -641,9 +642,11 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
}
if (mask & IB_QP_ALT_PATH) {
- ib_get_cached_gid(&rxe->ib_dev, 1,
- attr->alt_ah_attr.grh.sgid_index, &sgid,
- &sgid_attr);
+ u8 sgid_index =
+ rdma_ah_read_grh(&attr->alt_ah_attr)->sgid_index;
+
+ ib_get_cached_gid(&rxe->ib_dev, 1, sgid_index,
+ &sgid, &sgid_attr);
rxe_av_from_attr(rxe, attr->alt_port_num, &qp->alt_av,
&attr->alt_ah_attr);
@@ -804,8 +807,10 @@ void rxe_qp_destroy(struct rxe_qp *qp)
qp->qp_timeout_jiffies = 0;
rxe_cleanup_task(&qp->resp.task);
- del_timer_sync(&qp->retrans_timer);
- del_timer_sync(&qp->rnr_nak_timer);
+ if (qp_type(qp) == IB_QPT_RC) {
+ del_timer_sync(&qp->retrans_timer);
+ del_timer_sync(&qp->rnr_nak_timer);
+ }
rxe_cleanup_task(&qp->req.task);
rxe_cleanup_task(&qp->comp.task);
@@ -846,6 +851,14 @@ void rxe_qp_cleanup(struct rxe_pool_entry *arg)
qp->resp.mr = NULL;
}
+ if (qp_type(qp) == IB_QPT_RC) {
+ struct dst_entry *dst = NULL;
+
+ dst = sk_dst_get(qp->sk->sk);
+ if (dst)
+ dst_release(dst);
+ }
+
free_rd_atomic_resources(qp);
kernel_sock_shutdown(qp->sk, SHUT_RDWR);
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 50886031096f..fb8c83e055e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -387,8 +387,8 @@ int rxe_rcv(struct sk_buff *skb)
pack_icrc = be32_to_cpu(*icrcp);
calc_icrc = rxe_icrc_hdr(pkt, skb);
- calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt),
- payload_size(pkt));
+ calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt),
+ payload_size(pkt));
calc_icrc = (__force u32)cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
if (skb->protocol == htons(ETH_P_IPV6))
@@ -403,6 +403,8 @@ int rxe_rcv(struct sk_buff *skb)
goto drop;
}
+ rxe_counter_inc(rxe, RXE_CNT_RCVD_PKTS);
+
if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))
rxe_rcv_mcast_pkt(rxe, skb);
else
@@ -417,4 +419,3 @@ drop:
kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL(rxe_rcv);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 9f95f50b2909..7ee465d1a1e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -32,6 +32,7 @@
*/
#include <linux/skbuff.h>
+#include <crypto/hash.h>
#include "rxe.h"
#include "rxe_loc.h"
@@ -483,8 +484,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (wqe->wr.send_flags & IB_SEND_INLINE) {
u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
- crc = crc32_le(crc, tmp, paylen);
-
+ crc = rxe_crc32(rxe, crc, tmp, paylen);
memcpy(payload_addr(pkt), tmp, paylen);
wqe->dma.resid -= paylen;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c9dd385ce62e..23039768f541 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -149,6 +149,7 @@ static enum resp_states check_psn(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
int diff = psn_compare(pkt->psn, qp->resp.psn);
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
switch (qp_type(qp)) {
case IB_QPT_RC:
@@ -157,9 +158,11 @@ static enum resp_states check_psn(struct rxe_qp *qp,
return RESPST_CLEANUP;
qp->resp.sent_psn_nak = 1;
+ rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ);
return RESPST_ERR_PSN_OUT_OF_SEQ;
} else if (diff < 0) {
+ rxe_counter_inc(rxe, RXE_CNT_DUP_REQ);
return RESPST_DUPLICATE_REQUEST;
}
@@ -478,8 +481,6 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
state = RESPST_ERR_LENGTH;
goto err;
}
-
- qp->resp.resid = mtu;
} else {
if (pktlen != resid) {
state = RESPST_ERR_LENGTH;
@@ -1223,6 +1224,7 @@ void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
int rxe_responder(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
enum resp_states state;
struct rxe_pkt_info *pkt = NULL;
int ret = 0;
@@ -1311,6 +1313,7 @@ int rxe_responder(void *arg)
break;
case RESPST_ERR_RNR:
if (qp_type(qp) == IB_QPT_RC) {
+ rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
/* RC - class B */
send_ack(qp, pkt, AETH_RNR_NAK |
(~AETH_TYPE_MASK &
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 5113e502f6f9..83d709e74dfb 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -32,9 +32,11 @@
*/
#include <linux/dma-mapping.h>
+#include <net/addrconf.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
+#include "rxe_hw_counters.h"
static int rxe_query_device(struct ib_device *dev,
struct ib_device_attr *attr,
@@ -295,22 +297,22 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd)
return 0;
}
-static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr,
+static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
struct rxe_av *av)
{
int err;
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
- err = ib_get_cached_gid(&rxe->ib_dev, attr->port_num,
- attr->grh.sgid_index, &sgid,
+ err = ib_get_cached_gid(&rxe->ib_dev, rdma_ah_get_port_num(attr),
+ rdma_ah_read_grh(attr)->sgid_index, &sgid,
&sgid_attr);
if (err) {
pr_err("Failed to query sgid. err = %d\n", err);
return err;
}
- err = rxe_av_from_attr(rxe, attr->port_num, av, attr);
+ err = rxe_av_from_attr(rxe, rdma_ah_get_port_num(attr), av, attr);
if (!err)
err = rxe_av_fill_ip_info(rxe, av, attr, &sgid_attr, &sgid);
@@ -319,7 +321,8 @@ static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr,
return err;
}
-static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
+ struct rdma_ah_attr *attr,
struct ib_udata *udata)
{
@@ -354,7 +357,7 @@ err1:
return ERR_PTR(err);
}
-static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
int err;
struct rxe_dev *rxe = to_rdev(ibah->device);
@@ -371,11 +374,13 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
return 0;
}
-static int rxe_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
struct rxe_dev *rxe = to_rdev(ibah->device);
struct rxe_ah *ah = to_rah(ibah);
+ memset(attr, 0, sizeof(*attr));
+ attr->type = ibah->type;
rxe_av_to_attr(rxe, &ah->av, attr);
return 0;
}
@@ -1234,10 +1239,11 @@ int rxe_register_device(struct rxe_dev *rxe)
dev->owner = THIS_MODULE;
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
- dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;
+ dev->num_comp_vectors = num_possible_cpus();
dev->dev.parent = rxe_dma_device(rxe);
dev->local_dma_lkey = 0;
- dev->node_guid = rxe_node_guid(rxe);
+ addrconf_addr_eui48((unsigned char *)&dev->node_guid,
+ rxe->ndev->dev_addr);
dev->dev.dma_ops = &dma_virt_ops;
dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
@@ -1318,6 +1324,15 @@ int rxe_register_device(struct rxe_dev *rxe)
dev->map_mr_sg = rxe_map_mr_sg;
dev->attach_mcast = rxe_attach_mcast;
dev->detach_mcast = rxe_detach_mcast;
+ dev->get_hw_stats = rxe_ib_get_hw_stats;
+ dev->alloc_hw_stats = rxe_ib_alloc_hw_stats;
+
+ rxe->tfm = crypto_alloc_shash("crc32", 0, 0);
+ if (IS_ERR(rxe->tfm)) {
+ pr_err("failed to allocate crc algorithm err:%ld\n",
+ PTR_ERR(rxe->tfm));
+ return PTR_ERR(rxe->tfm);
+ }
err = ib_register_device(dev, NULL);
if (err) {
@@ -1339,6 +1354,8 @@ int rxe_register_device(struct rxe_dev *rxe)
err2:
ib_unregister_device(dev);
err1:
+ crypto_free_shash(rxe->tfm);
+
return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index e100c500ae85..5a180fbe40d9 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -38,6 +38,7 @@
#include <rdma/rdma_user_rxe.h>
#include "rxe_pool.h"
#include "rxe_task.h"
+#include "rxe_hw_counters.h"
static inline int pkey_match(u16 key1, u16 key2)
{
@@ -401,10 +402,18 @@ struct rxe_dev {
spinlock_t mmap_offset_lock; /* guard mmap_offset */
int mmap_offset;
+ u64 stats_counters[RXE_NUM_OF_COUNTERS];
+
struct rxe_port port;
struct list_head list;
+ struct crypto_shash *tfm;
};
+static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters cnt)
+{
+ rxe->stats_counters[cnt]++;
+}
+
static inline struct rxe_dev *to_rdev(struct ib_device *dev)
{
return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
index f3c7dcf03098..c28af1823a2d 100644
--- a/drivers/infiniband/ulp/Makefile
+++ b/drivers/infiniband/ulp/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_INFINIBAND_SRP) += srp/
obj-$(CONFIG_INFINIBAND_SRPT) += srpt/
obj-$(CONFIG_INFINIBAND_ISER) += iser/
obj-$(CONFIG_INFINIBAND_ISERT) += isert/
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic/
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index bed233bf45c3..ff50a7bd66d8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -52,7 +52,6 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_sa.h>
#include <linux/sched.h>
-
/* constants */
enum ipoib_flush_level {
@@ -153,6 +152,13 @@ static inline void skb_add_pseudo_hdr(struct sk_buff *skb)
skb_pull(skb, IPOIB_HARD_LEN);
}
+static inline struct ipoib_dev_priv *ipoib_priv(const struct net_device *dev)
+{
+ struct rdma_netdev *rn = netdev_priv(dev);
+
+ return rn->clnt_priv;
+}
+
/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
@@ -404,6 +410,7 @@ struct ipoib_dev_priv {
struct timer_list poll_timer;
unsigned max_send_sge;
bool sm_fullmember_sendonly_support;
+ const struct net_device_ops *rn_ops;
};
struct ipoib_ah {
@@ -416,7 +423,7 @@ struct ipoib_ah {
struct ipoib_path {
struct net_device *dev;
- struct ib_sa_path_rec pathrec;
+ struct sa_path_rec pathrec;
struct ipoib_ah *ah;
struct sk_buff_head queue;
@@ -472,7 +479,7 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
- struct ib_pd *pd, struct ib_ah_attr *attr);
+ struct ib_pd *pd, struct rdma_ah_attr *attr);
void ipoib_free_ah(struct kref *kref);
static inline void ipoib_put_ah(struct ipoib_ah *ah)
{
@@ -482,27 +489,28 @@ int ipoib_open(struct net_device *dev);
int ipoib_add_pkey_attr(struct net_device *dev);
int ipoib_add_umcast_attr(struct net_device *dev);
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ipoib_ah *address, u32 qpn);
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn);
void ipoib_reap_ah(struct work_struct *work);
struct ipoib_path *__path_find(struct net_device *dev, void *gid);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+ const char *format);
+void ipoib_ib_tx_timer_func(unsigned long ctx);
void ipoib_ib_dev_flush_light(struct work_struct *work);
void ipoib_ib_dev_flush_normal(struct work_struct *work);
void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
+int ipoib_ib_dev_open_default(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_ib_dev_up(struct net_device *dev);
void ipoib_ib_dev_down(struct net_device *dev);
-void ipoib_ib_dev_stop(struct net_device *dev);
+int ipoib_ib_dev_stop_default(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -562,8 +570,10 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
struct ipoib_path *path);
#endif
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
- union ib_gid *mgid, int set_qkey);
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey);
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid);
void ipoib_mcast_remove_list(struct list_head *remove_list);
void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
struct list_head *remove_list);
@@ -587,7 +597,7 @@ void __exit ipoib_netlink_fini(void);
void ipoib_set_umcast(struct net_device *ndev, int umcast_val);
int ipoib_set_mode(struct net_device *dev, const char *buf);
-void ipoib_setup(struct net_device *dev);
+void ipoib_setup_common(struct net_device *dev);
void ipoib_pkey_open(struct ipoib_dev_priv *priv);
void ipoib_drain_cq(struct net_device *dev);
@@ -607,14 +617,14 @@ extern int ipoib_max_conn_qp;
static inline int ipoib_cm_admin_enabled(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return IPOIB_CM_SUPPORTED(hwaddr) &&
test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
}
@@ -637,13 +647,13 @@ static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *t
static inline int ipoib_cm_has_srq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return !!priv->cm.srq;
}
static inline unsigned int ipoib_cm_max_mtu(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return priv->cm.max_cm_mtu;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 0cdf2b7f272f..7cbcfdac6529 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -92,7 +92,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
@@ -118,7 +118,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
struct ib_recv_wr *wr,
struct ib_sge *sge, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
@@ -145,7 +145,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
u64 mapping[IPOIB_CM_RX_SG],
gfp_t gfp)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
int i;
@@ -196,7 +196,7 @@ partial_error:
static void ipoib_cm_free_rx_ring(struct net_device *dev,
struct ipoib_cm_rx_buf *rx_ring)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < ipoib_recvq_size; ++i)
@@ -235,7 +235,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
{
struct ipoib_cm_rx *p = ctx;
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
unsigned long flags;
if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
@@ -251,7 +251,7 @@ static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
struct ipoib_cm_rx *p)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr attr = {
.event_handler = ipoib_cm_rx_event_handler,
.send_cq = priv->recv_cq, /* For drain WR */
@@ -276,7 +276,7 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp,
unsigned psn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
@@ -331,7 +331,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
struct ib_recv_wr *wr,
struct ib_sge *sge)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < priv->cm.num_frags; ++i)
@@ -349,7 +349,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
struct ipoib_cm_rx *rx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct {
struct ib_recv_wr wr;
struct ib_sge sge[IPOIB_CM_RX_SG];
@@ -422,7 +422,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
struct ib_qp *qp, struct ib_cm_req_event_param *req,
unsigned psn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
struct ib_cm_rep_param rep = {};
@@ -442,7 +442,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct net_device *dev = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
unsigned psn;
int ret;
@@ -515,7 +515,7 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
/* Fall through */
case IB_CM_REJ_RECEIVED:
p = cm_id->context;
- priv = netdev_priv(p->dev);
+ priv = ipoib_priv(p->dev);
if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
ipoib_warn(priv, "unable to move qp to error state\n");
/* Fall through */
@@ -559,7 +559,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx_buf *rx_ring;
unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
struct sk_buff *skb, *newskb;
@@ -708,7 +708,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
@@ -786,7 +786,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx = wc->qp->qp_context;
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
struct ipoib_tx_buf *tx_req;
@@ -855,7 +855,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
int ipoib_cm_dev_open(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
@@ -887,7 +887,7 @@ err_cm:
static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *rx, *n;
LIST_HEAD(list);
@@ -910,7 +910,7 @@ static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
void ipoib_cm_dev_stop(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
unsigned long begin;
int ret;
@@ -969,7 +969,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct ipoib_cm_tx *p = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
struct ipoib_cm_data *data = event->private_data;
struct sk_buff_head skqueue;
struct ib_qp_attr qp_attr;
@@ -1037,7 +1037,7 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr attr = {
.send_cq = priv->recv_cq,
.recv_cq = priv->recv_cq,
@@ -1068,9 +1068,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
static int ipoib_cm_send_req(struct net_device *dev,
struct ib_cm_id *id, struct ib_qp *qp,
u32 qpn,
- struct ib_sa_path_rec *pathrec)
+ struct sa_path_rec *pathrec)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
struct ib_cm_req_param req = {};
@@ -1105,7 +1105,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
static int ipoib_cm_modify_tx_init(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
@@ -1128,9 +1128,9 @@ static int ipoib_cm_modify_tx_init(struct net_device *dev,
}
static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
- struct ib_sa_path_rec *pathrec)
+ struct sa_path_rec *pathrec)
{
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
int ret;
p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
@@ -1186,7 +1186,7 @@ err_tx:
static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
- struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
struct ipoib_tx_buf *tx_req;
unsigned long begin;
@@ -1236,7 +1236,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event)
{
struct ipoib_cm_tx *tx = cm_id->context;
- struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
struct net_device *dev = priv->dev;
struct ipoib_neigh *neigh;
unsigned long flags;
@@ -1287,7 +1287,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
struct ipoib_neigh *neigh)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx;
tx = kzalloc(sizeof *tx, GFP_ATOMIC);
@@ -1306,7 +1306,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
{
- struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
unsigned long flags;
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags);
@@ -1332,7 +1332,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
struct ipoib_path *path;
int ret;
- struct ib_sa_path_rec pathrec;
+ struct sa_path_rec pathrec;
u32 qpn;
netif_tx_lock_bh(dev);
@@ -1441,7 +1441,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
unsigned int mtu)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int e = skb_queue_empty(&priv->cm.skb_queue);
if (skb_dst(skb))
@@ -1490,7 +1490,8 @@ static void ipoib_cm_stale_task(struct work_struct *work)
static ssize_t show_mode(struct device *d, struct device_attribute *attr,
char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d));
+ struct net_device *dev = to_net_dev(d);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
return sprintf(buf, "connected\n");
@@ -1503,7 +1504,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
{
struct net_device *dev = to_net_dev(d);
int ret;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
return -EPERM;
@@ -1532,7 +1533,7 @@ int ipoib_cm_add_mode_attr(struct net_device *dev)
static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_srq_init_attr srq_init_attr = {
.srq_type = IB_SRQT_BASIC,
.attr = {
@@ -1561,7 +1562,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
int ipoib_cm_dev_init(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int max_srq_sge, i;
INIT_LIST_HEAD(&priv->cm.passive_ids);
@@ -1622,7 +1623,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
void ipoib_cm_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
if (!priv->cm.srq)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index bac455a1942d..874b24366e4d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -60,7 +60,7 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = {
static void ipoib_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
- struct ipoib_dev_priv *priv = netdev_priv(netdev);
+ struct ipoib_dev_priv *priv = ipoib_priv(netdev);
ib_get_device_fw_str(priv->ca, drvinfo->fw_version,
sizeof(drvinfo->fw_version));
@@ -77,7 +77,7 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
static int ipoib_get_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
@@ -88,7 +88,7 @@ static int ipoib_get_coalesce(struct net_device *dev,
static int ipoib_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *coal)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
/*
@@ -155,7 +155,66 @@ static int ipoib_get_sset_count(struct net_device __always_unused *dev,
return -EOPNOTSUPP;
}
+/* Return lane speed in unit of 1e6 bit/sec */
+static inline int ib_speed_enum_to_int(int speed)
+{
+ switch (speed) {
+ case IB_SPEED_SDR:
+ return SPEED_2500;
+ case IB_SPEED_DDR:
+ return SPEED_5000;
+ case IB_SPEED_QDR:
+ case IB_SPEED_FDR10:
+ return SPEED_10000;
+ case IB_SPEED_FDR:
+ return SPEED_14000;
+ case IB_SPEED_EDR:
+ return SPEED_25000;
+ }
+
+ return SPEED_UNKNOWN;
+}
+
+static int ipoib_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(netdev);
+ struct ib_port_attr attr;
+ int ret, speed, width;
+
+ if (!netif_carrier_ok(netdev)) {
+ cmd->base.speed = SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+ return 0;
+ }
+
+ ret = ib_query_port(priv->ca, priv->port, &attr);
+ if (ret < 0)
+ return -EINVAL;
+
+ speed = ib_speed_enum_to_int(attr.active_speed);
+ width = ib_width_enum_to_int(attr.active_width);
+
+ if (speed < 0 || width < 0)
+ return -EINVAL;
+
+ /* Except the following are set, the other members of
+ * the struct ethtool_link_settings are initialized to
+ * zero in the function __ethtool_get_link_ksettings.
+ */
+ cmd->base.speed = speed * width;
+ cmd->base.duplex = DUPLEX_FULL;
+
+ cmd->base.phy_address = 0xFF;
+
+ cmd->base.autoneg = AUTONEG_ENABLE;
+ cmd->base.port = PORT_OTHER;
+
+ return 0;
+}
+
static const struct ethtool_ops ipoib_ethtool_ops = {
+ .get_link_ksettings = ipoib_get_link_ksettings,
.get_drvinfo = ipoib_get_drvinfo,
.get_coalesce = ipoib_get_coalesce,
.set_coalesce = ipoib_set_coalesce,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 6bd5740e2691..11f74cbe6660 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -210,16 +210,16 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
seq_printf(file,
"GID: %s\n"
" complete: %6s\n",
- gid_buf, path.pathrec.dlid ? "yes" : "no");
+ gid_buf, sa_path_get_dlid(&path.pathrec) ? "yes" : "no");
- if (path.pathrec.dlid) {
+ if (sa_path_get_dlid(&path.pathrec)) {
rate = ib_rate_to_mbps(path.pathrec.rate);
seq_printf(file,
" DLID: 0x%04x\n"
" SL: %12d\n"
" rate: %8d.%d Gb/sec\n",
- be16_to_cpu(path.pathrec.dlid),
+ be32_to_cpu(sa_path_get_dlid(&path.pathrec)),
path.pathrec.sl,
rate / 1000, rate % 1000);
}
@@ -261,7 +261,7 @@ static const struct file_operations ipoib_path_fops = {
void ipoib_create_debug_files(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
char name[IFNAMSIZ + sizeof "_path"];
snprintf(name, sizeof name, "%s_mcg", dev->name);
@@ -279,10 +279,13 @@ void ipoib_create_debug_files(struct net_device *dev)
void ipoib_delete_debug_files(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n");
+ WARN_ONCE(!priv->path_dentry, "null path debug file\n");
debugfs_remove(priv->mcg_dentry);
debugfs_remove(priv->path_dentry);
+ priv->mcg_dentry = priv->path_dentry = NULL;
}
int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 12c4f84a6639..0060b2f9f659 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -52,7 +52,7 @@ MODULE_PARM_DESC(data_debug_level,
#endif
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
- struct ib_pd *pd, struct ib_ah_attr *attr)
+ struct ib_pd *pd, struct rdma_ah_attr *attr)
{
struct ipoib_ah *ah;
struct ib_ah *vah;
@@ -65,13 +65,13 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
ah->last_send = 0;
kref_init(&ah->ref);
- vah = ib_create_ah(pd, attr);
+ vah = rdma_create_ah(pd, attr);
if (IS_ERR(vah)) {
kfree(ah);
ah = (struct ipoib_ah *)vah;
} else {
ah->ah = vah;
- ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah);
+ ipoib_dbg(ipoib_priv(dev), "Created ah %p\n", ah->ah);
}
return ah;
@@ -80,7 +80,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
void ipoib_free_ah(struct kref *kref)
{
struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
- struct ipoib_dev_priv *priv = netdev_priv(ah->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ah->dev);
unsigned long flags;
@@ -99,7 +99,7 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
static int ipoib_ib_post_receive(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_recv_wr *bad_wr;
int ret;
@@ -121,7 +121,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id)
static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
int buf_size;
u64 *mapping;
@@ -153,7 +153,7 @@ error:
static int ipoib_ib_post_receives(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i;
for (i = 0; i < ipoib_recvq_size; ++i) {
@@ -172,7 +172,7 @@ static int ipoib_ib_post_receives(struct net_device *dev)
static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
struct sk_buff *skb;
u64 mapping[IPOIB_UD_RX_SG];
@@ -381,7 +381,7 @@ free_res:
static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned int wr_id = wc->wr_id;
struct ipoib_tx_buf *tx_req;
@@ -485,14 +485,14 @@ poll_more:
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
{
struct net_device *dev = dev_ptr;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
napi_schedule(&priv->napi);
}
static void drain_tx_cq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
netif_tx_lock(dev);
while (poll_tx(priv))
@@ -506,14 +506,14 @@ static void drain_tx_cq(struct net_device *dev)
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev_ptr);
mod_timer(&priv->poll_timer, jiffies);
}
static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
- struct ib_ah *address, u32 qpn,
+ struct ib_ah *address, u32 dqpn,
struct ipoib_tx_buf *tx_req,
void *head, int hlen)
{
@@ -523,7 +523,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
ipoib_build_sge(priv, tx_req);
priv->tx_wr.wr.wr_id = wr_id;
- priv->tx_wr.remote_qpn = qpn;
+ priv->tx_wr.remote_qpn = dqpn;
priv->tx_wr.ah = address;
if (head) {
@@ -537,10 +537,10 @@ static inline int post_send(struct ipoib_dev_priv *priv,
return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr);
}
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ipoib_ah *address, u32 qpn)
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int hlen, rc;
void *phead;
@@ -554,7 +554,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
} else {
if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
@@ -563,7 +563,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
- return;
+ return -1;
}
phead = NULL;
hlen = 0;
@@ -574,7 +574,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
/* Does skb_linearize return ok without reducing nr_frags? */
if (skb_shinfo(skb)->nr_frags > usable_sge) {
@@ -582,12 +582,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
}
- ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
- skb->len, address, qpn);
+ ipoib_dbg_data(priv,
+ "sending packet, length=%d address=%p dqpn=0x%06x\n",
+ skb->len, address, dqpn);
/*
* We put the skb into the tx_ring _before_ we call post_send()
@@ -601,7 +602,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return -1;
}
if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -620,7 +621,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
skb_dst_drop(skb);
rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
- address->ah, qpn, tx_req, phead, hlen);
+ address, dqpn, tx_req, phead, hlen);
if (unlikely(rc)) {
ipoib_warn(priv, "post_send failed, error %d\n", rc);
++dev->stats.tx_errors;
@@ -629,21 +630,24 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
dev_kfree_skb_any(skb);
if (netif_queue_stopped(dev))
netif_wake_queue(dev);
+ rc = 0;
} else {
netif_trans_update(dev);
- address->last_send = priv->tx_head;
+ rc = priv->tx_head;
++priv->tx_head;
}
if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
while (poll_tx(priv))
; /* nothing */
+
+ return rc;
}
static void __ipoib_reap_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah, *tah;
LIST_HEAD(remove_list);
unsigned long flags;
@@ -654,7 +658,7 @@ static void __ipoib_reap_ah(struct net_device *dev)
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
list_del(&ah->list);
- ib_destroy_ah(ah->ah);
+ rdma_destroy_ah(ah->ah);
kfree(ah);
}
@@ -677,7 +681,7 @@ void ipoib_reap_ah(struct work_struct *work)
static void ipoib_flush_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(priv->wq);
@@ -686,30 +690,124 @@ static void ipoib_flush_ah(struct net_device *dev)
static void ipoib_stop_ah(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
set_bit(IPOIB_STOP_REAPER, &priv->flags);
ipoib_flush_ah(dev);
}
-static void ipoib_ib_tx_timer_func(unsigned long ctx)
+static int recvs_pending(struct net_device *dev)
{
- drain_tx_cq((struct net_device *)ctx);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int pending = 0;
+ int i;
+
+ for (i = 0; i < ipoib_recvq_size; ++i)
+ if (priv->rx_ring[i].skb)
+ ++pending;
+
+ return pending;
}
-int ipoib_ib_dev_open(struct net_device *dev)
+int ipoib_ib_dev_stop_default(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int ret;
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct ib_qp_attr qp_attr;
+ unsigned long begin;
+ struct ipoib_tx_buf *tx_req;
+ int i;
- ipoib_pkey_dev_check_presence(dev);
+ if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_disable(&priv->napi);
- if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
- ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
- (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
- return -1;
+ ipoib_cm_dev_stop(dev);
+
+ /*
+ * Move our QP to the error state and then reinitialize in
+ * when all work requests have completed or have been flushed.
+ */
+ qp_attr.qp_state = IB_QPS_ERR;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
+
+ /* Wait for all sends and receives to complete */
+ begin = jiffies;
+
+ while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
+ if (time_after(jiffies, begin + 5 * HZ)) {
+ ipoib_warn(priv,
+ "timing out; %d sends %d receives not completed\n",
+ priv->tx_head - priv->tx_tail,
+ recvs_pending(dev));
+
+ /*
+ * assume the HW is wedged and just free up
+ * all our pending work requests.
+ */
+ while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
+ tx_req = &priv->tx_ring[priv->tx_tail &
+ (ipoib_sendq_size - 1)];
+ ipoib_dma_unmap_tx(priv, tx_req);
+ dev_kfree_skb_any(tx_req->skb);
+ ++priv->tx_tail;
+ --priv->tx_outstanding;
+ }
+
+ for (i = 0; i < ipoib_recvq_size; ++i) {
+ struct ipoib_rx_buf *rx_req;
+
+ rx_req = &priv->rx_ring[i];
+ if (!rx_req->skb)
+ continue;
+ ipoib_ud_dma_unmap_rx(priv,
+ priv->rx_ring[i].mapping);
+ dev_kfree_skb_any(rx_req->skb);
+ rx_req->skb = NULL;
+ }
+
+ goto timeout;
+ }
+
+ ipoib_drain_cq(dev);
+
+ msleep(1);
}
+ ipoib_dbg(priv, "All sends and receives done.\n");
+
+timeout:
+ del_timer_sync(&priv->poll_timer);
+ qp_attr.qp_state = IB_QPS_RESET;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to RESET state\n");
+
+ ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
+
+ return 0;
+}
+
+int ipoib_ib_dev_stop(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ priv->rn_ops->ndo_stop(dev);
+
+ clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ ipoib_flush_ah(dev);
+
+ return 0;
+}
+
+void ipoib_ib_tx_timer_func(unsigned long ctx)
+{
+ drain_tx_cq((struct net_device *)ctx);
+}
+
+int ipoib_ib_dev_open_default(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret;
+
ret = ipoib_init_qp(dev);
if (ret) {
ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -719,33 +817,60 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- goto dev_stop;
+ goto out;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
- goto dev_stop;
+ goto out;
+ }
+
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_enable(&priv->napi);
+
+ return 0;
+out:
+ return -1;
+}
+
+int ipoib_ib_dev_open(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ ipoib_pkey_dev_check_presence(dev);
+
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
+ ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
+ (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
+ return -1;
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
- if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_enable(&priv->napi);
+ if (priv->rn_ops->ndo_open(dev)) {
+ pr_warn("%s: Failed to open dev\n", dev->name);
+ goto dev_stop;
+ }
+
+ set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
return 0;
+
dev_stop:
- if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_enable(&priv->napi);
+ set_bit(IPOIB_STOP_REAPER, &priv->flags);
+ cancel_delayed_work(&priv->ah_reap_task);
+ set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ napi_enable(&priv->napi);
ipoib_ib_dev_stop(dev);
return -1;
}
void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!(priv->pkey & 0x7fff) ||
ib_find_pkey(priv->ca, priv->port, priv->pkey,
@@ -757,7 +882,7 @@ void ipoib_pkey_dev_check_presence(struct net_device *dev)
void ipoib_ib_dev_up(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_pkey_dev_check_presence(dev);
@@ -773,7 +898,7 @@ void ipoib_ib_dev_up(struct net_device *dev)
void ipoib_ib_dev_down(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "downing ib_dev\n");
@@ -786,22 +911,9 @@ void ipoib_ib_dev_down(struct net_device *dev)
ipoib_flush_paths(dev);
}
-static int recvs_pending(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- int pending = 0;
- int i;
-
- for (i = 0; i < ipoib_recvq_size; ++i)
- if (priv->rx_ring[i].skb)
- ++pending;
-
- return pending;
-}
-
void ipoib_drain_cq(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int i, n;
/*
@@ -838,107 +950,6 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable();
}
-void ipoib_ib_dev_stop(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_qp_attr qp_attr;
- unsigned long begin;
- struct ipoib_tx_buf *tx_req;
- int i;
-
- if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- napi_disable(&priv->napi);
-
- ipoib_cm_dev_stop(dev);
-
- /*
- * Move our QP to the error state and then reinitialize in
- * when all work requests have completed or have been flushed.
- */
- qp_attr.qp_state = IB_QPS_ERR;
- if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
- ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
-
- /* Wait for all sends and receives to complete */
- begin = jiffies;
-
- while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
- if (time_after(jiffies, begin + 5 * HZ)) {
- ipoib_warn(priv, "timing out; %d sends %d receives not completed\n",
- priv->tx_head - priv->tx_tail, recvs_pending(dev));
-
- /*
- * assume the HW is wedged and just free up
- * all our pending work requests.
- */
- while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
- tx_req = &priv->tx_ring[priv->tx_tail &
- (ipoib_sendq_size - 1)];
- ipoib_dma_unmap_tx(priv, tx_req);
- dev_kfree_skb_any(tx_req->skb);
- ++priv->tx_tail;
- --priv->tx_outstanding;
- }
-
- for (i = 0; i < ipoib_recvq_size; ++i) {
- struct ipoib_rx_buf *rx_req;
-
- rx_req = &priv->rx_ring[i];
- if (!rx_req->skb)
- continue;
- ipoib_ud_dma_unmap_rx(priv,
- priv->rx_ring[i].mapping);
- dev_kfree_skb_any(rx_req->skb);
- rx_req->skb = NULL;
- }
-
- goto timeout;
- }
-
- ipoib_drain_cq(dev);
-
- msleep(1);
- }
-
- ipoib_dbg(priv, "All sends and receives done.\n");
-
-timeout:
- del_timer_sync(&priv->poll_timer);
- qp_attr.qp_state = IB_QPS_RESET;
- if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
- ipoib_warn(priv, "Failed to modify QP to RESET state\n");
-
- ipoib_flush_ah(dev);
-
- ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
-}
-
-int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- priv->ca = ca;
- priv->port = port;
- priv->qp = NULL;
-
- if (ipoib_transport_dev_init(dev, ca)) {
- printk(KERN_WARNING "%s: ipoib_transport_dev_init failed\n", ca->name);
- return -ENODEV;
- }
-
- setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
- (unsigned long) dev);
-
- if (dev->flags & IFF_UP) {
- if (ipoib_ib_dev_open(dev)) {
- ipoib_transport_dev_cleanup(dev);
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
/*
* Takes whatever value which is in pkey index 0 and updates priv->pkey
* returns 0 if the pkey value was changed.
@@ -967,6 +978,19 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
*/
priv->dev->broadcast[8] = priv->pkey >> 8;
priv->dev->broadcast[9] = priv->pkey & 0xff;
+
+ /*
+ * Update the broadcast address in the priv->broadcast object,
+ * in case it already exists, otherwise no one will do that.
+ */
+ if (priv->broadcast) {
+ spin_lock_irq(&priv->lock);
+ memcpy(priv->broadcast->mcmember.mgid.raw,
+ priv->dev->broadcast + 4,
+ sizeof(union ib_gid));
+ spin_unlock_irq(&priv->lock);
+ }
+
return 0;
}
@@ -1216,7 +1240,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
void ipoib_ib_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "cleaning up ib_dev\n");
/*
@@ -1236,7 +1260,13 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/
ipoib_stop_ah(dev);
- ipoib_transport_dev_cleanup(dev);
-}
+ clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ priv->rn_ops->ndo_uninit(dev);
+
+ if (priv->pd) {
+ ib_dealloc_pd(priv->pd);
+ priv->pd = NULL;
+ }
+}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index d1d3fb7a6127..2869d1adb1de 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,9 +108,36 @@ static struct ib_client ipoib_client = {
.get_net_dev_by_params = ipoib_get_net_dev_by_params,
};
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static int ipoib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_info *ni = ptr;
+ struct net_device *dev = ni->dev;
+
+ if (dev->netdev_ops->ndo_open != ipoib_open)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ ipoib_create_debug_files(dev);
+ break;
+ case NETDEV_CHANGENAME:
+ ipoib_delete_debug_files(dev);
+ ipoib_create_debug_files(dev);
+ break;
+ case NETDEV_UNREGISTER:
+ ipoib_delete_debug_files(dev);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+#endif
+
int ipoib_open(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "bringing up interface\n");
@@ -157,7 +184,7 @@ err_disable:
static int ipoib_stop(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "stopping interface\n");
@@ -195,7 +222,7 @@ static void ipoib_uninit(struct net_device *dev)
static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
@@ -205,7 +232,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
/* dev->mtu > 2K ==> connected mode */
if (ipoib_cm_admin_enabled(dev)) {
@@ -468,7 +495,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
int ipoib_set_mode(struct net_device *dev, const char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if ((test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
!strcmp(buf, "connected\n")) ||
@@ -505,7 +532,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
struct ipoib_path *__path_find(struct net_device *dev, void *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node *n = priv->path_tree.rb_node;
struct ipoib_path *path;
int ret;
@@ -529,7 +556,7 @@ struct ipoib_path *__path_find(struct net_device *dev, void *gid)
static int __path_add(struct net_device *dev, struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node **n = &priv->path_tree.rb_node;
struct rb_node *pn = NULL;
struct ipoib_path *tpath;
@@ -564,7 +591,7 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
while ((skb = __skb_dequeue(&path->queue)))
dev_kfree_skb_irq(skb);
- ipoib_dbg(netdev_priv(dev), "path_free\n");
+ ipoib_dbg(ipoib_priv(dev), "path_free\n");
/* remove all neigh connected to this path */
ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
@@ -598,7 +625,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
int ipoib_path_iter_next(struct ipoib_path_iter *iter)
{
- struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
struct rb_node *n;
struct ipoib_path *path;
int ret = 1;
@@ -635,92 +662,21 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
void ipoib_mark_paths_invalid(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path, *tp;
spin_lock_irq(&priv->lock);
list_for_each_entry_safe(path, tp, &priv->path_list, list) {
- ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n",
- be16_to_cpu(path->pathrec.dlid),
- path->pathrec.dgid.raw);
+ ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
+ be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
+ path->pathrec.dgid.raw);
path->valid = 0;
}
spin_unlock_irq(&priv->lock);
}
-struct classport_info_context {
- struct ipoib_dev_priv *priv;
- struct completion done;
- struct ib_sa_query *sa_query;
-};
-
-static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
- void *context)
-{
- struct classport_info_context *cb_ctx = context;
- struct ipoib_dev_priv *priv;
-
- WARN_ON(!context);
-
- priv = cb_ctx->priv;
-
- if (status || !rec) {
- pr_debug("device: %s failed query classport_info status: %d\n",
- priv->dev->name, status);
- /* keeps the default, will try next mcast_restart */
- priv->sm_fullmember_sendonly_support = false;
- goto out;
- }
-
- if (ib_get_cpi_capmask2(rec) &
- IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
- pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
- priv->dev->name);
- priv->sm_fullmember_sendonly_support = true;
- } else {
- pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
- priv->dev->name);
- priv->sm_fullmember_sendonly_support = false;
- }
-
-out:
- complete(&cb_ctx->done);
-}
-
-int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
-{
- struct classport_info_context *callback_context;
- int ret;
-
- callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
- if (!callback_context)
- return -ENOMEM;
-
- callback_context->priv = priv;
- init_completion(&callback_context->done);
-
- ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
- priv->ca, priv->port, 3000,
- GFP_KERNEL,
- classport_info_query_cb,
- callback_context,
- &callback_context->sa_query);
- if (ret < 0) {
- pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
- priv->dev->name, ret);
- kfree(callback_context);
- return ret;
- }
-
- /* waiting for the callback to finish before returnning */
- wait_for_completion(&callback_context->done);
- kfree(callback_context);
-
- return ret;
-}
-
static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
{
struct ipoib_pseudo_header *phdr;
@@ -731,7 +687,7 @@ static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
void ipoib_flush_paths(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path, *tp;
LIST_HEAD(remove_list);
unsigned long flags;
@@ -760,12 +716,12 @@ void ipoib_flush_paths(struct net_device *dev)
}
static void path_rec_completion(int status,
- struct ib_sa_path_rec *pathrec,
+ struct sa_path_rec *pathrec,
void *path_ptr)
{
struct ipoib_path *path = path_ptr;
struct net_device *dev = path->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah = NULL;
struct ipoib_ah *old_ah = NULL;
struct ipoib_neigh *neigh, *tn;
@@ -775,7 +731,8 @@ static void path_rec_completion(int status,
if (!status)
ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n",
- be16_to_cpu(pathrec->dlid), pathrec->dgid.raw);
+ be32_to_cpu(sa_path_get_dlid(pathrec)),
+ pathrec->dgid.raw);
else
ipoib_dbg(priv, "PathRec status %d for GID %pI6\n",
status, path->pathrec.dgid.raw);
@@ -783,7 +740,7 @@ static void path_rec_completion(int status,
skb_queue_head_init(&skqueue);
if (!status) {
- struct ib_ah_attr av;
+ struct rdma_ah_attr av;
if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
ah = ipoib_create_ah(dev, priv->pd, &av);
@@ -798,7 +755,8 @@ static void path_rec_completion(int status,
path->ah = ah;
ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
- ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
+ ah, be32_to_cpu(sa_path_get_dlid(pathrec)),
+ pathrec->sl);
while ((skb = __skb_dequeue(&path->queue)))
__skb_queue_tail(&skqueue, skb);
@@ -858,7 +816,7 @@ static void path_rec_completion(int status,
static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_path *path;
if (!priv->broadcast)
@@ -874,6 +832,10 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
INIT_LIST_HEAD(&path->neigh_list);
+ if (rdma_cap_opa_ah(priv->ca, priv->port))
+ path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path->pathrec.rec_type = SA_PATH_REC_TYPE_IB;
memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
path->pathrec.sgid = priv->local_gid;
path->pathrec.pkey = cpu_to_be16(priv->pkey);
@@ -886,7 +848,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
static int path_rec_start(struct net_device *dev,
struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg(priv, "Start path record lookup for %pI6\n",
path->pathrec.dgid.raw);
@@ -917,7 +879,8 @@ static int path_rec_start(struct net_device *dev,
static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_path *path;
struct ipoib_neigh *neigh;
unsigned long flags;
@@ -964,7 +927,8 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
}
} else {
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr));
+ path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+ IPOIB_QPN(daddr));
ipoib_neigh_put(neigh);
return;
}
@@ -998,7 +962,8 @@ err_drop:
static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
struct ipoib_pseudo_header *phdr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_path *path;
unsigned long flags;
@@ -1038,11 +1003,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
}
if (path->ah) {
- ipoib_dbg(priv, "Send unicast ARP to %04x\n",
- be16_to_cpu(path->pathrec.dlid));
+ ipoib_dbg(priv, "Send unicast ARP to %08x\n",
+ be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+ path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+ IPOIB_QPN(phdr->hwaddr));
return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
@@ -1058,7 +1024,8 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_neigh *neigh;
struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
@@ -1122,7 +1089,8 @@ send_using_neigh:
goto unref;
}
} else if (neigh->ah) {
- ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr));
+ neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
+ IPOIB_QPN(phdr->hwaddr));
goto unref;
}
@@ -1144,7 +1112,7 @@ unref:
static void ipoib_timeout(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
jiffies_to_msecs(jiffies - dev_trans_start(dev)));
@@ -1178,7 +1146,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
static void ipoib_set_mcast_list(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set");
@@ -1190,7 +1158,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
static int ipoib_get_iflink(const struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
/* parent interface */
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
@@ -1218,7 +1186,7 @@ static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh *neigh = NULL;
@@ -1347,7 +1315,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh *neigh;
@@ -1404,7 +1372,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
{
/* neigh reference count was dropprd to zero */
struct net_device *dev = neigh->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sk_buff *skb;
if (neigh->ah)
ipoib_put_ah(neigh->ah);
@@ -1414,7 +1382,7 @@ void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
}
if (ipoib_cm_get(neigh))
ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
- ipoib_dbg(netdev_priv(dev),
+ ipoib_dbg(ipoib_priv(dev),
"neigh free for %06x %pI6\n",
IPOIB_QPN(neigh->daddr),
neigh->daddr + 4);
@@ -1436,7 +1404,7 @@ static void ipoib_neigh_reclaim(struct rcu_head *rp)
void ipoib_neigh_free(struct ipoib_neigh *neigh)
{
struct net_device *dev = neigh->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
struct ipoib_neigh __rcu **np;
@@ -1519,7 +1487,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_neigh_table *ntbl = &priv->ntbl;
struct ipoib_neigh_hash *htbl;
unsigned long flags;
@@ -1605,7 +1573,7 @@ out_unlock:
static void ipoib_neigh_hash_uninit(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int stopped;
ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
@@ -1622,10 +1590,26 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
wait_for_completion(&priv->ntbl.deleted);
}
+void ipoib_dev_uninit_default(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
-int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+ ipoib_transport_dev_cleanup(dev);
+
+ ipoib_cm_dev_cleanup(dev);
+
+ kfree(priv->rx_ring);
+ vfree(priv->tx_ring);
+
+ priv->rx_ring = NULL;
+ priv->tx_ring = NULL;
+}
+
+static int ipoib_dev_init_default(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
@@ -1636,46 +1620,111 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
if (!priv->tx_ring) {
printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
- ca->name, ipoib_sendq_size);
+ priv->ca->name, ipoib_sendq_size);
goto out_rx_ring_cleanup;
}
/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
- if (ipoib_ib_dev_init(dev, ca, port))
+ if (ipoib_transport_dev_init(dev, priv->ca)) {
+ pr_warn("%s: ipoib_transport_dev_init failed\n",
+ priv->ca->name);
goto out_tx_ring_cleanup;
+ }
+
+ /* after qp created set dev address */
+ priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
+ priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
+ priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff;
+
+ setup_timer(&priv->poll_timer, ipoib_ib_tx_timer_func,
+ (unsigned long)dev);
+
+ return 0;
+
+out_tx_ring_cleanup:
+ vfree(priv->tx_ring);
+
+out_rx_ring_cleanup:
+ kfree(priv->rx_ring);
+
+out:
+ return -ENOMEM;
+}
+
+int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret = -ENOMEM;
+
+ priv->ca = ca;
+ priv->port = port;
+ priv->qp = NULL;
/*
- * Must be after ipoib_ib_dev_init so we can allocate a per
- * device wq there and use it here
+ * the various IPoIB tasks assume they will never race against
+ * themselves, so always use a single thread workqueue
*/
- if (ipoib_neigh_hash_init(priv) < 0)
+ priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
+ if (!priv->wq) {
+ pr_warn("%s: failed to allocate device WQ\n", dev->name);
+ goto out;
+ }
+
+ /* create pd, which used both for control and datapath*/
+ priv->pd = ib_alloc_pd(priv->ca, 0);
+ if (IS_ERR(priv->pd)) {
+ pr_warn("%s: failed to allocate PD\n", ca->name);
+ goto clean_wq;
+ }
+
+ ret = priv->rn_ops->ndo_init(dev);
+ if (ret) {
+ pr_warn("%s failed to init HW resource\n", dev->name);
+ goto out_free_pd;
+ }
+
+ if (ipoib_neigh_hash_init(priv) < 0) {
+ pr_warn("%s failed to init neigh hash\n", dev->name);
goto out_dev_uninit;
+ }
+
+ if (dev->flags & IFF_UP) {
+ if (ipoib_ib_dev_open(dev)) {
+ pr_warn("%s failed to open device\n", dev->name);
+ ret = -ENODEV;
+ goto out_dev_uninit;
+ }
+ }
return 0;
out_dev_uninit:
ipoib_ib_dev_cleanup(dev);
-out_tx_ring_cleanup:
- vfree(priv->tx_ring);
+out_free_pd:
+ if (priv->pd) {
+ ib_dealloc_pd(priv->pd);
+ priv->pd = NULL;
+ }
-out_rx_ring_cleanup:
- kfree(priv->rx_ring);
+clean_wq:
+ if (priv->wq) {
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
out:
- return -ENOMEM;
+ return ret;
}
void ipoib_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
+ struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv;
LIST_HEAD(head);
ASSERT_RTNL();
- ipoib_delete_debug_files(dev);
-
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
/* Stop GC on child */
@@ -1685,24 +1734,21 @@ void ipoib_dev_cleanup(struct net_device *dev)
}
unregister_netdevice_many(&head);
- /*
- * Must be before ipoib_ib_dev_cleanup or we delete an in use
- * work queue
- */
ipoib_neigh_hash_uninit(dev);
ipoib_ib_dev_cleanup(dev);
- kfree(priv->rx_ring);
- vfree(priv->tx_ring);
-
- priv->rx_ring = NULL;
- priv->tx_ring = NULL;
+ /* no more works over the priv->wq */
+ if (priv->wq) {
+ flush_workqueue(priv->wq);
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
}
static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state);
}
@@ -1710,7 +1756,7 @@ static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_stat
static int ipoib_get_vf_config(struct net_device *dev, int vf,
struct ifla_vf_info *ivf)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int err;
err = ib_get_vf_config(priv->ca, vf, priv->port, ivf);
@@ -1724,7 +1770,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID)
return -EINVAL;
@@ -1735,7 +1781,7 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
static int ipoib_get_vf_stats(struct net_device *dev, int vf,
struct ifla_vf_stats *vf_stats)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats);
}
@@ -1773,21 +1819,12 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
.ndo_get_iflink = ipoib_get_iflink,
};
-void ipoib_setup(struct net_device *dev)
+void ipoib_setup_common(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
- dev->netdev_ops = &ipoib_netdev_ops_vf;
- else
- dev->netdev_ops = &ipoib_netdev_ops_pf;
-
dev->header_ops = &ipoib_header_ops;
ipoib_set_ethtool_ops(dev);
- netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
-
dev->watchdog_timeo = HZ;
dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
@@ -1801,11 +1838,14 @@ void ipoib_setup(struct net_device *dev)
netif_keep_dst(dev);
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
+}
- priv->dev = dev;
+static void ipoib_build_priv(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ priv->dev = dev;
spin_lock_init(&priv->lock);
-
init_rwsem(&priv->vlan_rwsem);
INIT_LIST_HEAD(&priv->path_list);
@@ -1823,22 +1863,99 @@ void ipoib_setup(struct net_device *dev)
INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
}
-struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
+static const struct net_device_ops ipoib_netdev_default_pf = {
+ .ndo_init = ipoib_dev_init_default,
+ .ndo_uninit = ipoib_dev_uninit_default,
+ .ndo_open = ipoib_ib_dev_open_default,
+ .ndo_stop = ipoib_ib_dev_stop_default,
+};
+
+static struct net_device
+*ipoib_create_netdev_default(struct ib_device *hca,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
{
struct net_device *dev;
+ struct rdma_netdev *rn;
- dev = alloc_netdev((int)sizeof(struct ipoib_dev_priv), name,
- NET_NAME_UNKNOWN, ipoib_setup);
+ dev = alloc_netdev((int)sizeof(struct rdma_netdev),
+ name,
+ name_assign_type, setup);
if (!dev)
return NULL;
- return netdev_priv(dev);
+ rn = netdev_priv(dev);
+
+ rn->send = ipoib_send;
+ rn->attach_mcast = ipoib_mcast_attach;
+ rn->detach_mcast = ipoib_mcast_detach;
+ rn->hca = hca;
+
+ dev->netdev_ops = &ipoib_netdev_default_pf;
+
+ return dev;
+}
+
+static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
+ const char *name)
+{
+ struct net_device *dev;
+
+ if (hca->alloc_rdma_netdev) {
+ dev = hca->alloc_rdma_netdev(hca, port,
+ RDMA_NETDEV_IPOIB, name,
+ NET_NAME_UNKNOWN,
+ ipoib_setup_common);
+ if (IS_ERR_OR_NULL(dev) && PTR_ERR(dev) != -EOPNOTSUPP)
+ return NULL;
+ }
+
+ if (!hca->alloc_rdma_netdev || PTR_ERR(dev) == -EOPNOTSUPP)
+ dev = ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
+ ipoib_setup_common);
+
+ return dev;
+}
+
+struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
+ const char *name)
+{
+ struct net_device *dev;
+ struct ipoib_dev_priv *priv;
+ struct rdma_netdev *rn;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return NULL;
+
+ dev = ipoib_get_netdev(hca, port, name);
+ if (!dev)
+ goto free_priv;
+
+ priv->rn_ops = dev->netdev_ops;
+
+ /* fixme : should be after the query_cap */
+ if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
+ dev->netdev_ops = &ipoib_netdev_ops_vf;
+ else
+ dev->netdev_ops = &ipoib_netdev_ops_pf;
+
+ rn = netdev_priv(dev);
+ rn->clnt_priv = priv;
+ ipoib_build_priv(dev);
+
+ return priv;
+free_priv:
+ kfree(priv);
+ return NULL;
}
static ssize_t show_pkey(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+ struct net_device *ndev = to_net_dev(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
return sprintf(buf, "0x%04x\n", priv->pkey);
}
@@ -1847,14 +1964,15 @@ static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
static ssize_t show_umcast(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+ struct net_device *ndev = to_net_dev(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
}
void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
{
- struct ipoib_dev_priv *priv = netdev_priv(ndev);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
if (umcast_val > 0) {
set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
@@ -1927,7 +2045,7 @@ static int ipoib_check_lladdr(struct net_device *dev,
static int ipoib_set_mac(struct net_device *dev, void *addr)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct sockaddr_storage *ss = addr;
int ret;
@@ -2000,7 +2118,7 @@ void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
priv->hca_caps = hca->attrs.device_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
- priv->dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+ priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
priv->dev->hw_features |= NETIF_F_TSO;
@@ -2016,7 +2134,7 @@ static struct net_device *ipoib_add_port(const char *format,
struct ib_port_attr attr;
int result = -ENOMEM;
- priv = ipoib_intf_alloc(format);
+ priv = ipoib_intf_alloc(hca, port, format);
if (!priv)
goto alloc_mem_failed;
@@ -2090,8 +2208,6 @@ static struct net_device *ipoib_add_port(const char *format,
goto register_failed;
}
- ipoib_create_debug_files(priv->dev);
-
if (ipoib_cm_add_mode_attr(priv->dev))
goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev))
@@ -2106,7 +2222,6 @@ static struct net_device *ipoib_add_port(const char *format,
return priv->dev;
sysfs_failed:
- ipoib_delete_debug_files(priv->dev);
unregister_netdev(priv->dev);
register_failed:
@@ -2146,7 +2261,7 @@ static void ipoib_add_one(struct ib_device *device)
continue;
dev = ipoib_add_port("ib%d", device, p);
if (!IS_ERR(dev)) {
- priv = netdev_priv(dev);
+ priv = ipoib_priv(dev);
list_add_tail(&priv->list, dev_list);
count++;
}
@@ -2186,11 +2301,18 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
unregister_netdev(priv->dev);
free_netdev(priv->dev);
+ kfree(priv);
}
kfree(dev_list);
}
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static struct notifier_block ipoib_netdev_notifier = {
+ .notifier_call = ipoib_netdev_event,
+};
+#endif
+
static int __init ipoib_init_module(void)
{
int ret;
@@ -2243,6 +2365,9 @@ static int __init ipoib_init_module(void)
if (ret)
goto err_client;
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ register_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
return 0;
err_client:
@@ -2260,6 +2385,9 @@ err_fs:
static void __exit ipoib_cleanup_module(void)
{
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+ unregister_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
ipoib_netlink_fini();
ib_unregister_client(&ipoib_client);
ib_sa_unregister_client(&ipoib_sa_client);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 69e146cdc306..057f58e6afca 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -114,7 +114,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
struct net_device *dev = mcast->dev;
int tx_dropped = 0;
- ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
+ ipoib_dbg_mcast(ipoib_priv(dev), "deleting multicast group %pI6\n",
mcast->mcmember.mgid.raw);
/* remove all neigh connected to this mcast */
@@ -158,7 +158,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
while (n) {
@@ -182,7 +182,7 @@ static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid
static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
while (*n) {
@@ -212,8 +212,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
struct ib_sa_mcmember_rec *mcmember)
{
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_ah *ah;
+ struct rdma_ah_attr av;
int ret;
int set_qkey = 0;
@@ -260,8 +262,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
return 0;
}
- ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
- &mcast->mcmember.mgid, set_qkey);
+ ret = rn->attach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->mcmember.mlid),
+ set_qkey, priv->qkey);
if (ret < 0) {
ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
mcast->mcmember.mgid.raw);
@@ -271,40 +274,34 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
}
}
- {
- struct ib_ah_attr av = {
- .dlid = be16_to_cpu(mcast->mcmember.mlid),
- .port_num = priv->port,
- .sl = mcast->mcmember.sl,
- .ah_flags = IB_AH_GRH,
- .static_rate = mcast->mcmember.rate,
- .grh = {
- .flow_label = be32_to_cpu(mcast->mcmember.flow_label),
- .hop_limit = mcast->mcmember.hop_limit,
- .sgid_index = 0,
- .traffic_class = mcast->mcmember.traffic_class
- }
- };
- av.grh.dgid = mcast->mcmember.mgid;
-
- ah = ipoib_create_ah(dev, priv->pd, &av);
- if (IS_ERR(ah)) {
- ipoib_warn(priv, "ib_address_create failed %ld\n",
- -PTR_ERR(ah));
- /* use original error */
- return PTR_ERR(ah);
- } else {
- spin_lock_irq(&priv->lock);
- mcast->ah = ah;
- spin_unlock_irq(&priv->lock);
-
- ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
- mcast->mcmember.mgid.raw,
- mcast->ah->ah,
- be16_to_cpu(mcast->mcmember.mlid),
- mcast->mcmember.sl);
- }
+ memset(&av, 0, sizeof(av));
+ av.type = rdma_ah_find_type(priv->ca, priv->port);
+ rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)),
+ rdma_ah_set_port_num(&av, priv->port);
+ rdma_ah_set_sl(&av, mcast->mcmember.sl);
+ rdma_ah_set_static_rate(&av, mcast->mcmember.rate);
+
+ rdma_ah_set_grh(&av, &mcast->mcmember.mgid,
+ be32_to_cpu(mcast->mcmember.flow_label),
+ 0, mcast->mcmember.hop_limit,
+ mcast->mcmember.traffic_class);
+
+ ah = ipoib_create_ah(dev, priv->pd, &av);
+ if (IS_ERR(ah)) {
+ ipoib_warn(priv, "ib_address_create failed %ld\n",
+ -PTR_ERR(ah));
+ /* use original error */
+ return PTR_ERR(ah);
}
+ spin_lock_irq(&priv->lock);
+ mcast->ah = ah;
+ spin_unlock_irq(&priv->lock);
+
+ ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
+ mcast->mcmember.mgid.raw,
+ mcast->ah->ah,
+ be16_to_cpu(mcast->mcmember.mlid),
+ mcast->mcmember.sl);
/* actually send any queued packets */
netif_tx_lock_bh(dev);
@@ -331,7 +328,6 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task);
struct ib_port_attr attr;
- int ret;
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
@@ -344,11 +340,9 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
* because the broadcast group must always be joined first and is always
* re-joined if the SM changes substantially.
*/
- ret = ipoib_check_sm_sendonly_fullmember_support(priv);
- if (ret < 0)
- pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n",
- priv->dev->name, ret);
-
+ priv->sm_fullmember_sendonly_support =
+ ib_sa_sendonly_fullmem_support(&ipoib_sa_client,
+ priv->ca, priv->port);
/*
* Take rtnl_lock to avoid racing with ipoib_stop() and
* turning the carrier back on while a device is being
@@ -375,7 +369,7 @@ static int ipoib_mcast_join_complete(int status,
{
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
@@ -477,7 +471,7 @@ out_locked:
*/
static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_sa_multicast *multicast;
struct ib_sa_mcmember_rec rec = {
.join_state = 1
@@ -489,6 +483,9 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return -EINVAL;
+ init_completion(&mcast->done);
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+
ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
rec.mgid = mcast->mcmember.mgid;
@@ -647,8 +644,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
if (mcast->backoff == 1 ||
time_after_eq(jiffies, mcast->delay_until)) {
/* Found the next unjoined group */
- init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
if (ipoib_mcast_join(dev, mcast)) {
spin_unlock_irq(&priv->lock);
return;
@@ -668,17 +663,15 @@ out:
queue_delayed_work(priv->wq, &priv->mcast_task,
delay_until - jiffies);
}
- if (mcast) {
- init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ if (mcast)
ipoib_mcast_join(dev, mcast);
- }
+
spin_unlock_irq(&priv->lock);
}
void ipoib_mcast_start_thread(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "starting multicast thread\n");
@@ -690,7 +683,7 @@ void ipoib_mcast_start_thread(struct net_device *dev)
int ipoib_mcast_stop_thread(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
unsigned long flags;
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
@@ -706,7 +699,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
int ret = 0;
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
@@ -720,8 +714,8 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
mcast->mcmember.mgid.raw);
/* Remove ourselves from the multicast group */
- ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
- be16_to_cpu(mcast->mcmember.mlid));
+ ret = rn->detach_mcast(dev, priv->ca, &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->mcmember.mlid));
if (ret)
ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
} else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -762,7 +756,8 @@ void ipoib_mcast_remove_list(struct list_head *remove_list)
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
struct ipoib_mcast *mcast;
unsigned long flags;
void *mgid = daddr + 4;
@@ -825,7 +820,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
}
}
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+ mcast->ah->last_send = rn->send(dev, skb, mcast->ah->ah,
+ IB_MULTICAST_QPN);
if (neigh)
ipoib_neigh_put(neigh);
return;
@@ -837,7 +833,7 @@ unlock:
void ipoib_mcast_dev_flush(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
LIST_HEAD(remove_list);
struct ipoib_mcast *mcast, *tmcast;
unsigned long flags;
@@ -1029,7 +1025,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
{
- struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(iter->dev);
struct rb_node *n;
struct ipoib_mcast *mcast;
int ret = 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index cdc7df4fdb8a..28884781311b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -44,7 +44,7 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
u16 val;
if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey))
@@ -107,7 +107,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
if (!pdev || pdev->type != ARPHRD_INFINIBAND)
return -ENODEV;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
ipoib_warn(ppriv, "child creation disallowed for child devices\n");
@@ -129,7 +129,8 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
*/
child_pkey |= 0x8000;
- err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
+ err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
+ child_pkey, IPOIB_RTNL_CHILD);
if (!err && data)
err = ipoib_changelink(dev, tb, data);
@@ -140,8 +141,8 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
{
struct ipoib_dev_priv *priv, *ppriv;
- priv = netdev_priv(dev);
- ppriv = netdev_priv(priv->parent);
+ priv = ipoib_priv(dev);
+ ppriv = ipoib_priv(priv->parent);
down_write(&ppriv->vlan_rwsem);
unregister_netdevice_queue(dev, head);
@@ -161,7 +162,7 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.maxtype = IFLA_IPOIB_MAX,
.policy = ipoib_policy,
.priv_size = sizeof(struct ipoib_dev_priv),
- .setup = ipoib_setup,
+ .setup = ipoib_setup_common,
.newlink = ipoib_new_child_link,
.changelink = ipoib_changelink,
.dellink = ipoib_unregister_child_dev,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 189dcd1709d2..bb64baf25309 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -35,9 +35,10 @@
#include "ipoib.h"
-int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
+int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid, int set_qkey, u32 qkey)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr *qp_attr = NULL;
int ret;
u16 pkey_index;
@@ -56,7 +57,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int
goto out;
/* set correct QKey for QP */
- qp_attr->qkey = priv->qkey;
+ qp_attr->qkey = qkey;
ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
if (ret) {
ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
@@ -74,9 +75,20 @@ out:
return ret;
}
+int ipoib_mcast_detach(struct net_device *dev, struct ib_device *hca,
+ union ib_gid *mgid, u16 mlid)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ int ret;
+
+ ret = ib_detach_mcast(priv->qp, mgid, mlid);
+
+ return ret;
+}
+
int ipoib_init_qp(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret;
struct ib_qp_attr qp_attr;
int attr_mask;
@@ -130,7 +142,7 @@ out_fail:
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_init_attr init_attr = {
.cap = {
.max_send_wr = ipoib_sendq_size,
@@ -147,22 +159,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
int ret, size;
int i;
- priv->pd = ib_alloc_pd(priv->ca, 0);
- if (IS_ERR(priv->pd)) {
- printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
- return -ENODEV;
- }
-
- /*
- * the various IPoIB tasks assume they will never race against
- * themselves, so always use a single thread workqueue
- */
- priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM);
- if (!priv->wq) {
- printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
- goto out_free_pd;
- }
-
size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
if (!ret) {
@@ -173,7 +169,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size += ipoib_recvq_size * ipoib_max_conn_qp;
} else
if (ret != -ENOSYS)
- goto out_free_wq;
+ return -ENODEV;
cq_attr.cqe = size;
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL,
@@ -212,10 +208,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_send_cq;
}
- priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
- priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
- priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
-
for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
priv->tx_sge[i].lkey = priv->pd->local_dma_lkey;
@@ -247,26 +239,18 @@ out_free_recv_cq:
out_cm_dev_cleanup:
ipoib_cm_dev_cleanup(dev);
-out_free_wq:
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
-
-out_free_pd:
- ib_dealloc_pd(priv->pd);
-
return -ENODEV;
}
void ipoib_transport_dev_cleanup(struct net_device *dev)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (priv->qp) {
if (ib_destroy_qp(priv->qp))
ipoib_warn(priv, "ib_qp_destroy failed\n");
priv->qp = NULL;
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
if (ib_destroy_cq(priv->send_cq))
@@ -274,16 +258,6 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
if (ib_destroy_cq(priv->recv_cq))
ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
-
- ipoib_cm_dev_cleanup(dev);
-
- if (priv->wq) {
- flush_workqueue(priv->wq);
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
- }
-
- ib_dealloc_pd(priv->pd);
}
void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 3e10e3dac2e7..36dc4fcaa3cd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -44,7 +44,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
char *buf)
{
struct net_device *dev = to_net_dev(d);
- struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
return sprintf(buf, "%s\n", priv->parent->name);
}
@@ -86,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
goto register_failed;
}
- ipoib_create_debug_files(priv->dev);
-
/* RTNL childs don't need proprietary sysfs entries */
if (type == IPOIB_LEGACY_CHILD) {
if (ipoib_cm_add_mode_attr(priv->dev))
@@ -108,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
sysfs_failed:
result = -ENOMEM;
- ipoib_delete_debug_files(priv->dev);
unregister_netdevice(priv->dev);
register_failed:
@@ -128,14 +125,15 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
return -EPERM;
snprintf(intf_name, sizeof intf_name, "%s.%04x",
ppriv->dev->name, pkey);
- priv = ipoib_intf_alloc(intf_name);
+
+ priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (!priv)
return -ENOMEM;
@@ -183,7 +181,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = netdev_priv(pdev);
+ ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
return -EPERM;
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 81ae2e30dd12..12ed62ce9ff7 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -612,7 +612,7 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
iser_conn, rkey);
if (unlikely(!iser_conn->snd_w_inv)) {
- iser_err("conn %p: unexepected remote invalidation, "
+ iser_err("conn %p: unexpected remote invalidation, "
"terminating connection\n", iser_conn);
return -EPROTO;
}
diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig b/drivers/infiniband/ulp/opa_vnic/Kconfig
new file mode 100644
index 000000000000..48132ab5e6b9
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_OPA_VNIC
+ tristate "Intel OPA VNIC support"
+ depends on X86_64 && INFINIBAND
+ ---help---
+ This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC)
+ driver for Ethernet over Omni-Path feature. It implements the HW
+ independent VNIC functionality. It interfaces with Linux stack for
+ data path and IB MAD for the control path.
diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile b/drivers/infiniband/ulp/opa_vnic/Makefile
new file mode 100644
index 000000000000..8061b287cfe4
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/Makefile
@@ -0,0 +1,7 @@
+# Makefile - Intel Omni-Path Virtual Network Controller driver
+# Copyright(c) 2017, Intel Corporation.
+#
+obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o
+
+opa_vnic-y := opa_vnic_netdev.o opa_vnic_encap.o opa_vnic_ethtool.o \
+ opa_vnic_vema.o opa_vnic_vema_iface.o
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
new file mode 100644
index 000000000000..2e8fee982436
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC encapsulation/decapsulation function.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+
+#include "opa_vnic_internal.h"
+
+/* OPA 16B Header fields */
+#define OPA_16B_LID_MASK 0xFFFFFull
+#define OPA_16B_SLID_HIGH_SHFT 8
+#define OPA_16B_SLID_MASK 0xF00ull
+#define OPA_16B_DLID_MASK 0xF000ull
+#define OPA_16B_DLID_HIGH_SHFT 12
+#define OPA_16B_LEN_SHFT 20
+#define OPA_16B_SC_SHFT 20
+#define OPA_16B_RC_SHFT 25
+#define OPA_16B_PKEY_SHFT 16
+
+#define OPA_VNIC_L4_HDR_SHFT 16
+
+/* L2+L4 hdr len is 20 bytes (5 quad words) */
+#define OPA_VNIC_HDR_QW_LEN 5
+
+static inline void opa_vnic_make_header(u8 *hdr, u32 slid, u32 dlid, u16 len,
+ u16 pkey, u16 entropy, u8 sc, u8 rc,
+ u8 l4_type, u16 l4_hdr)
+{
+ /* h[1]: LT=1, 16B L2=10 */
+ u32 h[OPA_VNIC_HDR_QW_LEN] = {0, 0xc0000000, 0, 0, 0};
+
+ h[2] = l4_type;
+ h[3] = entropy;
+ h[4] = l4_hdr << OPA_VNIC_L4_HDR_SHFT;
+
+ /* Extract and set 4 upper bits and 20 lower bits of the lids */
+ h[0] |= (slid & OPA_16B_LID_MASK);
+ h[2] |= ((slid >> (20 - OPA_16B_SLID_HIGH_SHFT)) & OPA_16B_SLID_MASK);
+
+ h[1] |= (dlid & OPA_16B_LID_MASK);
+ h[2] |= ((dlid >> (20 - OPA_16B_DLID_HIGH_SHFT)) & OPA_16B_DLID_MASK);
+
+ h[0] |= (len << OPA_16B_LEN_SHFT);
+ h[1] |= (rc << OPA_16B_RC_SHFT);
+ h[1] |= (sc << OPA_16B_SC_SHFT);
+ h[2] |= ((u32)pkey << OPA_16B_PKEY_SHFT);
+
+ memcpy(hdr, h, OPA_VNIC_HDR_LEN);
+}
+
+/*
+ * Using a simple hash table for mac table implementation with the last octet
+ * of mac address as a key.
+ */
+static void opa_vnic_free_mac_tbl(struct hlist_head *mactbl)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_node *tmp;
+ int bkt;
+
+ if (!mactbl)
+ return;
+
+ vnic_hash_for_each_safe(mactbl, bkt, tmp, node, hlist) {
+ hash_del(&node->hlist);
+ kfree(node);
+ }
+ kfree(mactbl);
+}
+
+static struct hlist_head *opa_vnic_alloc_mac_tbl(void)
+{
+ u32 size = sizeof(struct hlist_head) * OPA_VNIC_MAC_TBL_SIZE;
+ struct hlist_head *mactbl;
+
+ mactbl = kzalloc(size, GFP_KERNEL);
+ if (!mactbl)
+ return ERR_PTR(-ENOMEM);
+
+ vnic_hash_init(mactbl);
+ return mactbl;
+}
+
+/* opa_vnic_release_mac_tbl - empty and free the mac table */
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
+{
+ struct hlist_head *mactbl;
+
+ mutex_lock(&adapter->mactbl_lock);
+ mactbl = rcu_access_pointer(adapter->mactbl);
+ rcu_assign_pointer(adapter->mactbl, NULL);
+ synchronize_rcu();
+ opa_vnic_free_mac_tbl(mactbl);
+ mutex_unlock(&adapter->mactbl_lock);
+}
+
+/*
+ * opa_vnic_query_mac_tbl - query the mac table for a section
+ *
+ * This function implements query of specific function of the mac table.
+ * The function also expects the requested range to be valid.
+ */
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_head *mactbl;
+ int bkt;
+ u16 loffset, lnum_entries;
+
+ rcu_read_lock();
+ mactbl = rcu_dereference(adapter->mactbl);
+ if (!mactbl)
+ goto get_mac_done;
+
+ loffset = be16_to_cpu(tbl->offset);
+ lnum_entries = be16_to_cpu(tbl->num_entries);
+
+ vnic_hash_for_each(mactbl, bkt, node, hlist) {
+ struct __opa_vnic_mactable_entry *nentry = &node->entry;
+ struct opa_veswport_mactable_entry *entry;
+
+ if ((node->index < loffset) ||
+ (node->index >= (loffset + lnum_entries)))
+ continue;
+
+ /* populate entry in the tbl corresponding to the index */
+ entry = &tbl->tbl_entries[node->index - loffset];
+ memcpy(entry->mac_addr, nentry->mac_addr,
+ ARRAY_SIZE(entry->mac_addr));
+ memcpy(entry->mac_addr_mask, nentry->mac_addr_mask,
+ ARRAY_SIZE(entry->mac_addr_mask));
+ entry->dlid_sd = cpu_to_be32(nentry->dlid_sd);
+ }
+ tbl->mac_tbl_digest = cpu_to_be32(adapter->info.vport.mac_tbl_digest);
+get_mac_done:
+ rcu_read_unlock();
+}
+
+/*
+ * opa_vnic_update_mac_tbl - update mac table section
+ *
+ * This function updates the specified section of the mac table.
+ * The procedure includes following steps.
+ * - Allocate a new mac (hash) table.
+ * - Add the specified entries to the new table.
+ * (except the ones that are requested to be deleted).
+ * - Add all the other entries from the old mac table.
+ * - If there is a failure, free the new table and return.
+ * - Switch to the new table.
+ * - Free the old table and return.
+ *
+ * The function also expects the requested range to be valid.
+ */
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl)
+{
+ struct opa_vnic_mac_tbl_node *node, *new_node;
+ struct hlist_head *new_mactbl, *old_mactbl;
+ int i, bkt, rc = 0;
+ u8 key;
+ u16 loffset, lnum_entries;
+
+ mutex_lock(&adapter->mactbl_lock);
+ /* allocate new mac table */
+ new_mactbl = opa_vnic_alloc_mac_tbl();
+ if (IS_ERR(new_mactbl)) {
+ mutex_unlock(&adapter->mactbl_lock);
+ return PTR_ERR(new_mactbl);
+ }
+
+ loffset = be16_to_cpu(tbl->offset);
+ lnum_entries = be16_to_cpu(tbl->num_entries);
+
+ /* add updated entries to the new mac table */
+ for (i = 0; i < lnum_entries; i++) {
+ struct __opa_vnic_mactable_entry *nentry;
+ struct opa_veswport_mactable_entry *entry =
+ &tbl->tbl_entries[i];
+ u8 *mac_addr = entry->mac_addr;
+ u8 empty_mac[ETH_ALEN] = { 0 };
+
+ v_dbg("new mac entry %4d: %02x:%02x:%02x:%02x:%02x:%02x %x\n",
+ loffset + i, mac_addr[0], mac_addr[1], mac_addr[2],
+ mac_addr[3], mac_addr[4], mac_addr[5],
+ entry->dlid_sd);
+
+ /* if the entry is being removed, do not add it */
+ if (!memcmp(mac_addr, empty_mac, ARRAY_SIZE(empty_mac)))
+ continue;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node) {
+ rc = -ENOMEM;
+ goto updt_done;
+ }
+
+ node->index = loffset + i;
+ nentry = &node->entry;
+ memcpy(nentry->mac_addr, entry->mac_addr,
+ ARRAY_SIZE(nentry->mac_addr));
+ memcpy(nentry->mac_addr_mask, entry->mac_addr_mask,
+ ARRAY_SIZE(nentry->mac_addr_mask));
+ nentry->dlid_sd = be32_to_cpu(entry->dlid_sd);
+ key = node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_add(new_mactbl, &node->hlist, key);
+ }
+
+ /* add other entries from current mac table to new mac table */
+ old_mactbl = rcu_access_pointer(adapter->mactbl);
+ if (!old_mactbl)
+ goto switch_tbl;
+
+ vnic_hash_for_each(old_mactbl, bkt, node, hlist) {
+ if ((node->index >= loffset) &&
+ (node->index < (loffset + lnum_entries)))
+ continue;
+
+ new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node) {
+ rc = -ENOMEM;
+ goto updt_done;
+ }
+
+ new_node->index = node->index;
+ memcpy(&new_node->entry, &node->entry, sizeof(node->entry));
+ key = new_node->entry.mac_addr[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_add(new_mactbl, &new_node->hlist, key);
+ }
+
+switch_tbl:
+ /* switch to new table */
+ rcu_assign_pointer(adapter->mactbl, new_mactbl);
+ synchronize_rcu();
+
+ adapter->info.vport.mac_tbl_digest = be32_to_cpu(tbl->mac_tbl_digest);
+updt_done:
+ /* upon failure, free the new table; otherwise, free the old table */
+ if (rc)
+ opa_vnic_free_mac_tbl(new_mactbl);
+ else
+ opa_vnic_free_mac_tbl(old_mactbl);
+
+ mutex_unlock(&adapter->mactbl_lock);
+ return rc;
+}
+
+/* opa_vnic_chk_mac_tbl - check mac table for dlid */
+static uint32_t opa_vnic_chk_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct ethhdr *mac_hdr)
+{
+ struct opa_vnic_mac_tbl_node *node;
+ struct hlist_head *mactbl;
+ u32 dlid = 0;
+ u8 key;
+
+ rcu_read_lock();
+ mactbl = rcu_dereference(adapter->mactbl);
+ if (unlikely(!mactbl))
+ goto chk_done;
+
+ key = mac_hdr->h_dest[OPA_VNIC_MAC_HASH_IDX];
+ vnic_hash_for_each_possible(mactbl, node, hlist, key) {
+ struct __opa_vnic_mactable_entry *entry = &node->entry;
+
+ /* if related to source mac, skip */
+ if (unlikely(OPA_VNIC_DLID_SD_IS_SRC_MAC(entry->dlid_sd)))
+ continue;
+
+ if (!memcmp(node->entry.mac_addr, mac_hdr->h_dest,
+ ARRAY_SIZE(node->entry.mac_addr))) {
+ /* mac address found */
+ dlid = OPA_VNIC_DLID_SD_GET_DLID(node->entry.dlid_sd);
+ break;
+ }
+ }
+
+chk_done:
+ rcu_read_unlock();
+ return dlid;
+}
+
+/* opa_vnic_get_dlid - find and return the DLID */
+static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter,
+ struct sk_buff *skb, u8 def_port)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ u32 dlid;
+
+ dlid = opa_vnic_chk_mac_tbl(adapter, mac_hdr);
+ if (dlid)
+ return dlid;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest)) {
+ dlid = info->vesw.u_mcast_dlid;
+ } else {
+ if (is_local_ether_addr(mac_hdr->h_dest)) {
+ dlid = ((uint32_t)mac_hdr->h_dest[5] << 16) |
+ ((uint32_t)mac_hdr->h_dest[4] << 8) |
+ mac_hdr->h_dest[3];
+ if (unlikely(!dlid))
+ v_warn("Null dlid in MAC address\n");
+ } else if (def_port != OPA_VNIC_INVALID_PORT) {
+ dlid = info->vesw.u_ucast_dlid[def_port];
+ }
+ }
+
+ return dlid;
+}
+
+/* opa_vnic_get_sc - return the service class */
+static u8 opa_vnic_get_sc(struct __opa_veswport_info *info,
+ struct sk_buff *skb)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ u16 vlan_tci;
+ u8 sc;
+
+ if (!__vlan_get_tag(skb, &vlan_tci)) {
+ u8 pcp = OPA_VNIC_VLAN_PCP(vlan_tci);
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ sc = info->vport.pcp_to_sc_mc[pcp];
+ else
+ sc = info->vport.pcp_to_sc_uc[pcp];
+ } else {
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ sc = info->vport.non_vlan_sc_mc;
+ else
+ sc = info->vport.non_vlan_sc_uc;
+ }
+
+ return sc;
+}
+
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct __opa_veswport_info *info = &adapter->info;
+ u8 vl;
+
+ if (skb_vlan_tag_present(skb)) {
+ u8 pcp = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ vl = info->vport.pcp_to_vl_mc[pcp];
+ else
+ vl = info->vport.pcp_to_vl_uc[pcp];
+ } else {
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ vl = info->vport.non_vlan_vl_mc;
+ else
+ vl = info->vport.non_vlan_vl_uc;
+ }
+
+ return vl;
+}
+
+/* opa_vnic_calc_entropy - calculate the packet entropy */
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ u16 hash16;
+
+ /*
+ * Get flow based 16-bit hash and then XOR the upper and lower bytes
+ * to get the entropy.
+ * __skb_tx_hash limits qcount to 16 bits. Hence, get 15-bit hash.
+ */
+ hash16 = __skb_tx_hash(adapter->netdev, skb, BIT(15));
+ return (u8)((hash16 >> 8) ^ (hash16 & 0xff));
+}
+
+/* opa_vnic_get_def_port - get default port based on entropy */
+static inline u8 opa_vnic_get_def_port(struct opa_vnic_adapter *adapter,
+ u8 entropy)
+{
+ u8 flow_id;
+
+ /* Add the upper and lower 4-bits of entropy to get the flow id */
+ flow_id = ((entropy & 0xf) + (entropy >> 4));
+ return adapter->flow_tbl[flow_id & (OPA_VNIC_FLOW_TBL_SIZE - 1)];
+}
+
+/* Calculate packet length including OPA header, crc and padding */
+static inline int opa_vnic_wire_length(struct sk_buff *skb)
+{
+ u32 pad_len;
+
+ /* padding for 8 bytes size alignment */
+ pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+ pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+ return (skb->len + pad_len) >> 3;
+}
+
+/* opa_vnic_encap_skb - encapsulate skb packet with OPA header and meta data */
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct opa_vnic_skb_mdata *mdata;
+ u8 def_port, sc, entropy, *hdr;
+ u16 len, l4_hdr;
+ u32 dlid;
+
+ hdr = skb_push(skb, OPA_VNIC_HDR_LEN);
+
+ entropy = opa_vnic_calc_entropy(adapter, skb);
+ def_port = opa_vnic_get_def_port(adapter, entropy);
+ len = opa_vnic_wire_length(skb);
+ dlid = opa_vnic_get_dlid(adapter, skb, def_port);
+ sc = opa_vnic_get_sc(info, skb);
+ l4_hdr = info->vesw.vesw_id;
+
+ mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata));
+ mdata->vl = opa_vnic_get_vl(adapter, skb);
+ mdata->entropy = entropy;
+ mdata->flags = 0;
+ if (unlikely(!dlid)) {
+ mdata->flags = OPA_VNIC_SKB_MDATA_ENCAP_ERR;
+ return;
+ }
+
+ opa_vnic_make_header(hdr, info->vport.encap_slid, dlid, len,
+ info->vesw.pkey, entropy, sc, 0,
+ OPA_VNIC_L4_ETHR, l4_hdr);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
new file mode 100644
index 000000000000..4c434b9dd84c
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -0,0 +1,489 @@
+#ifndef _OPA_VNIC_ENCAP_H
+#define _OPA_VNIC_ENCAP_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains all OPA VNIC declaration required for encapsulation
+ * and decapsulation of Ethernet packets
+ */
+
+#include <linux/types.h>
+#include <rdma/ib_mad.h>
+
+/* EMA class version */
+#define OPA_EMA_CLASS_VERSION 0x80
+
+/*
+ * Define the Intel vendor management class for OPA
+ * ETHERNET MANAGEMENT
+ */
+#define OPA_MGMT_CLASS_INTEL_EMA 0x34
+
+/* EM attribute IDs */
+#define OPA_EM_ATTR_CLASS_PORT_INFO 0x0001
+#define OPA_EM_ATTR_VESWPORT_INFO 0x0011
+#define OPA_EM_ATTR_VESWPORT_MAC_ENTRIES 0x0012
+#define OPA_EM_ATTR_IFACE_UCAST_MACS 0x0013
+#define OPA_EM_ATTR_IFACE_MCAST_MACS 0x0014
+#define OPA_EM_ATTR_DELETE_VESW 0x0015
+#define OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS 0x0020
+#define OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS 0x0022
+
+/* VNIC configured and operational state values */
+#define OPA_VNIC_STATE_DROP_ALL 0x1
+#define OPA_VNIC_STATE_FORWARDING 0x3
+
+#define OPA_VESW_MAX_NUM_DEF_PORT 16
+#define OPA_VNIC_MAX_NUM_PCP 8
+
+#define OPA_VNIC_EMA_DATA (OPA_MGMT_MAD_SIZE - IB_MGMT_VENDOR_HDR)
+
+/* Defines for vendor specific notice(trap) attributes */
+#define OPA_INTEL_EMA_NOTICE_TYPE_INFO 0x04
+
+/* INTEL OUI */
+#define INTEL_OUI_1 0x00
+#define INTEL_OUI_2 0x06
+#define INTEL_OUI_3 0x6a
+
+/* Trap opcodes sent from VNIC */
+#define OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE 0x1
+#define OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE 0x2
+#define OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE 0x3
+
+#define OPA_VNIC_DLID_SD_IS_SRC_MAC(dlid_sd) (!!((dlid_sd) & 0x20))
+#define OPA_VNIC_DLID_SD_GET_DLID(dlid_sd) ((dlid_sd) >> 8)
+
+/* VNIC Ethernet link status */
+#define OPA_VNIC_ETH_LINK_UP 1
+#define OPA_VNIC_ETH_LINK_DOWN 2
+
+/**
+ * struct opa_vesw_info - OPA vnic switch information
+ * @fabric_id: 10-bit fabric id
+ * @vesw_id: 12-bit virtual ethernet switch id
+ * @def_port_mask: bitmask of default ports
+ * @pkey: partition key
+ * @u_mcast_dlid: unknown multicast dlid
+ * @u_ucast_dlid: array of unknown unicast dlids
+ * @eth_mtu: MTUs for each vlan PCP
+ * @eth_mtu_non_vlan: MTU for non vlan packets
+ */
+struct opa_vesw_info {
+ __be16 fabric_id;
+ __be16 vesw_id;
+
+ u8 rsvd0[6];
+ __be16 def_port_mask;
+
+ u8 rsvd1[2];
+ __be16 pkey;
+
+ u8 rsvd2[4];
+ __be32 u_mcast_dlid;
+ __be32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+ u8 rsvd3[44];
+ __be16 eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+ __be16 eth_mtu_non_vlan;
+ u8 rsvd4[2];
+} __packed;
+
+/**
+ * struct opa_per_veswport_info - OPA vnic per port information
+ * @port_num: port number
+ * @eth_link_status: current ethernet link state
+ * @base_mac_addr: base mac address
+ * @config_state: configured port state
+ * @oper_state: operational port state
+ * @max_mac_tbl_ent: max number of mac table entries
+ * @max_smac_ent: max smac entries in mac table
+ * @mac_tbl_digest: mac table digest
+ * @encap_slid: base slid for the port
+ * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
+ * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
+ * @pcp_to_sc_mc: sc by pcp index for multicast ethernet packets
+ * @pcp_to_vl_mc: vl by pcp index for multicast ethernet packets
+ * @non_vlan_sc_uc: sc for non-vlan unicast ethernet packets
+ * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
+ * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
+ * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @uc_macs_gen_count: generation count for unicast macs list
+ * @mc_macs_gen_count: generation count for multicast macs list
+ */
+struct opa_per_veswport_info {
+ __be32 port_num;
+
+ u8 eth_link_status;
+ u8 rsvd0[3];
+
+ u8 base_mac_addr[ETH_ALEN];
+ u8 config_state;
+ u8 oper_state;
+
+ __be16 max_mac_tbl_ent;
+ __be16 max_smac_ent;
+ __be32 mac_tbl_digest;
+ u8 rsvd1[4];
+
+ __be32 encap_slid;
+
+ u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+ u8 non_vlan_sc_uc;
+ u8 non_vlan_vl_uc;
+ u8 non_vlan_sc_mc;
+ u8 non_vlan_vl_mc;
+
+ u8 rsvd2[48];
+
+ __be16 uc_macs_gen_count;
+ __be16 mc_macs_gen_count;
+
+ u8 rsvd3[8];
+} __packed;
+
+/**
+ * struct opa_veswport_info - OPA vnic port information
+ * @vesw: OPA vnic switch information
+ * @vport: OPA vnic per port information
+ *
+ * On host, each of the virtual ethernet ports belongs
+ * to a different virtual ethernet switches.
+ */
+struct opa_veswport_info {
+ struct opa_vesw_info vesw;
+ struct opa_per_veswport_info vport;
+};
+
+/**
+ * struct opa_veswport_mactable_entry - single entry in the forwarding table
+ * @mac_addr: MAC address
+ * @mac_addr_mask: MAC address bit mask
+ * @dlid_sd: Matching DLID and side data
+ *
+ * On the host each virtual ethernet port will have
+ * a forwarding table. These tables are used to
+ * map a MAC to a LID and other data. For more
+ * details see struct opa_veswport_mactable_entries.
+ * This is the structure of a single mactable entry
+ */
+struct opa_veswport_mactable_entry {
+ u8 mac_addr[ETH_ALEN];
+ u8 mac_addr_mask[ETH_ALEN];
+ __be32 dlid_sd;
+} __packed;
+
+/**
+ * struct opa_veswport_mactable - Forwarding table array
+ * @offset: mac table starting offset
+ * @num_entries: Number of entries to get or set
+ * @mac_tbl_digest: mac table digest
+ * @tbl_entries[]: Array of table entries
+ *
+ * The EM sends down this structure in a MAD indicating
+ * the starting offset in the forwarding table that this
+ * entry is to be loaded into and the number of entries
+ * that that this MAD instance contains
+ * The mac_tbl_digest has been added to this MAD structure. It will be set by
+ * the EM and it will be used by the EM to check if there are any
+ * discrepancies with this value and the value
+ * maintained by the EM in the case of VNIC port being deleted or unloaded
+ * A new instantiation of a VNIC will always have a value of zero.
+ * This value is stored as part of the vnic adapter structure and will be
+ * accessed by the GET and SET routines for both the mactable entries and the
+ * veswport info.
+ */
+struct opa_veswport_mactable {
+ __be16 offset;
+ __be16 num_entries;
+ __be32 mac_tbl_digest;
+ struct opa_veswport_mactable_entry tbl_entries[0];
+} __packed;
+
+/**
+ * struct opa_veswport_summary_counters - summary counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_packets: transmit packets
+ * @rx_packets: receive packets
+ * @tx_bytes: transmit bytes
+ * @rx_bytes: receive bytes
+ * @tx_unicast: unicast packets transmitted
+ * @tx_mcastbcast: multicast/broadcast packets transmitted
+ * @tx_untagged: non-vlan packets transmitted
+ * @tx_vlan: vlan packets transmitted
+ * @tx_64_size: transmit packet length is 64 bytes
+ * @tx_65_127: transmit packet length is >=65 and < 127 bytes
+ * @tx_128_255: transmit packet length is >=128 and < 255 bytes
+ * @tx_256_511: transmit packet length is >=256 and < 511 bytes
+ * @tx_512_1023: transmit packet length is >=512 and < 1023 bytes
+ * @tx_1024_1518: transmit packet length is >=1024 and < 1518 bytes
+ * @tx_1519_max: transmit packet length >= 1519 bytes
+ * @rx_unicast: unicast packets received
+ * @rx_mcastbcast: multicast/broadcast packets received
+ * @rx_untagged: non-vlan packets received
+ * @rx_vlan: vlan packets received
+ * @rx_64_size: received packet length is 64 bytes
+ * @rx_65_127: received packet length is >=65 and < 127 bytes
+ * @rx_128_255: received packet length is >=128 and < 255 bytes
+ * @rx_256_511: received packet length is >=256 and < 511 bytes
+ * @rx_512_1023: received packet length is >=512 and < 1023 bytes
+ * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes
+ * @rx_1519_max: received packet length >= 1519 bytes
+ *
+ * All the above are counters of corresponding conditions.
+ */
+struct opa_veswport_summary_counters {
+ __be16 vp_instance;
+ __be16 vesw_id;
+ __be32 veswport_num;
+
+ __be64 tx_errors;
+ __be64 rx_errors;
+ __be64 tx_packets;
+ __be64 rx_packets;
+ __be64 tx_bytes;
+ __be64 rx_bytes;
+
+ __be64 tx_unicast;
+ __be64 tx_mcastbcast;
+
+ __be64 tx_untagged;
+ __be64 tx_vlan;
+
+ __be64 tx_64_size;
+ __be64 tx_65_127;
+ __be64 tx_128_255;
+ __be64 tx_256_511;
+ __be64 tx_512_1023;
+ __be64 tx_1024_1518;
+ __be64 tx_1519_max;
+
+ __be64 rx_unicast;
+ __be64 rx_mcastbcast;
+
+ __be64 rx_untagged;
+ __be64 rx_vlan;
+
+ __be64 rx_64_size;
+ __be64 rx_65_127;
+ __be64 rx_128_255;
+ __be64 rx_256_511;
+ __be64 rx_512_1023;
+ __be64 rx_1024_1518;
+ __be64 rx_1519_max;
+
+ __be64 reserved[16];
+} __packed;
+
+/**
+ * struct opa_veswport_error_counters - error counters
+ * @vp_instance: vport instance on the OPA port
+ * @vesw_id: virtual ethernet switch id
+ * @veswport_num: virtual ethernet switch port number
+ * @tx_errors: transmit errors
+ * @rx_errors: receive errors
+ * @tx_smac_filt: smac filter errors
+ * @tx_dlid_zero: transmit packets with invalid dlid
+ * @tx_logic: other transmit errors
+ * @tx_drop_state: packet tansmission in non-forward port state
+ * @rx_bad_veswid: received packet with invalid vesw id
+ * @rx_runt: received ethernet packet with length < 64 bytes
+ * @rx_oversize: received ethernet packet with length > MTU size
+ * @rx_eth_down: received packets when interface is down
+ * @rx_drop_state: received packets in non-forwarding port state
+ * @rx_logic: other receive errors
+ *
+ * All the above are counters of corresponding erorr conditions.
+ */
+struct opa_veswport_error_counters {
+ __be16 vp_instance;
+ __be16 vesw_id;
+ __be32 veswport_num;
+
+ __be64 tx_errors;
+ __be64 rx_errors;
+
+ __be64 rsvd0;
+ __be64 tx_smac_filt;
+ __be64 rsvd1;
+ __be64 rsvd2;
+ __be64 rsvd3;
+ __be64 tx_dlid_zero;
+ __be64 rsvd4;
+ __be64 tx_logic;
+ __be64 rsvd5;
+ __be64 tx_drop_state;
+
+ __be64 rx_bad_veswid;
+ __be64 rsvd6;
+ __be64 rx_runt;
+ __be64 rx_oversize;
+ __be64 rsvd7;
+ __be64 rx_eth_down;
+ __be64 rx_drop_state;
+ __be64 rx_logic;
+ __be64 rsvd8;
+
+ __be64 rsvd9[16];
+} __packed;
+
+/**
+ * struct opa_veswport_trap - Trap message sent to EM by VNIC
+ * @fabric_id: 10 bit fabric id
+ * @veswid: 12 bit virtual ethernet switch id
+ * @veswportnum: logical port number on the Virtual switch
+ * @opaportnum: physical port num (redundant on host)
+ * @veswportindex: switch port index on opa port 0 based
+ * @opcode: operation
+ * @reserved: 32 bit for alignment
+ *
+ * The VNIC will send trap messages to the Ethernet manager to
+ * inform it about changes to the VNIC config, behaviour etc.
+ * This is the format of the trap payload.
+ */
+struct opa_veswport_trap {
+ __be16 fabric_id;
+ __be16 veswid;
+ __be32 veswportnum;
+ __be16 opaportnum;
+ u8 veswportindex;
+ u8 opcode;
+ __be32 reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_iface_macs_entry - single entry in the mac list
+ * @mac_addr: MAC address
+ */
+struct opa_vnic_iface_mac_entry {
+ u8 mac_addr[ETH_ALEN];
+};
+
+/**
+ * struct opa_veswport_iface_macs - Msg to set globally administered MAC
+ * @start_idx: position of first entry (0 based)
+ * @num_macs_in_msg: number of MACs in this message
+ * @tot_macs_in_lst: The total number of MACs the agent has
+ * @gen_count: gen_count to indicate change
+ * @entry: The mac list entry
+ *
+ * Same attribute IDS and attribute modifiers as in locally administered
+ * addresses used to set globally administered addresses
+ */
+struct opa_veswport_iface_macs {
+ __be16 start_idx;
+ __be16 num_macs_in_msg;
+ __be16 tot_macs_in_lst;
+ __be16 gen_count;
+ struct opa_vnic_iface_mac_entry entry[0];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad - Generic VEMA MAD
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @data: MAD data
+ */
+struct opa_vnic_vema_mad {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 reserved;
+ u8 oui[3];
+ u8 data[OPA_VNIC_EMA_DATA];
+};
+
+/**
+ * struct opa_vnic_notice_attr - Generic Notice MAD
+ * @gen_type: Generic/Specific bit and type of notice
+ * @oui_1: Vendor ID byte 1
+ * @oui_2: Vendor ID byte 2
+ * @oui_3: Vendor ID byte 3
+ * @trap_num: Trap number
+ * @toggle_count: Notice toggle bit and count value
+ * @issuer_lid: Trap issuer's lid
+ * @issuer_gid: Issuer GID (only if Report method)
+ * @raw_data: Trap message body
+ */
+struct opa_vnic_notice_attr {
+ u8 gen_type;
+ u8 oui_1;
+ u8 oui_2;
+ u8 oui_3;
+ __be16 trap_num;
+ __be16 toggle_count;
+ __be32 issuer_lid;
+ __be32 reserved;
+ u8 issuer_gid[16];
+ u8 raw_data[64];
+} __packed;
+
+/**
+ * struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap
+ * @mad_hdr: Generic MAD header
+ * @rmpp_hdr: RMPP header for vendor specific MADs
+ * @oui: Unique org identifier
+ * @notice: Notice structure
+ */
+struct opa_vnic_vema_mad_trap {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 reserved;
+ u8 oui[3];
+ struct opa_vnic_notice_attr notice;
+};
+
+#endif /* _OPA_VNIC_ENCAP_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
new file mode 100644
index 000000000000..d66540e24885
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC ethtool functions
+ */
+
+#include <linux/ethtool.h>
+
+#include "opa_vnic_internal.h"
+
+enum {NETDEV_STATS, VNIC_STATS};
+
+struct vnic_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ struct {
+ int sizeof_stat;
+ int stat_offset;
+ };
+};
+
+#define VNIC_STAT(m) { FIELD_SIZEOF(struct opa_vnic_stats, m), \
+ offsetof(struct opa_vnic_stats, m) }
+
+static struct vnic_stats vnic_gstrings_stats[] = {
+ /* NETDEV stats */
+ {"rx_packets", VNIC_STAT(netstats.rx_packets)},
+ {"tx_packets", VNIC_STAT(netstats.tx_packets)},
+ {"rx_bytes", VNIC_STAT(netstats.rx_bytes)},
+ {"tx_bytes", VNIC_STAT(netstats.tx_bytes)},
+ {"rx_errors", VNIC_STAT(netstats.rx_errors)},
+ {"tx_errors", VNIC_STAT(netstats.tx_errors)},
+ {"rx_dropped", VNIC_STAT(netstats.rx_dropped)},
+ {"tx_dropped", VNIC_STAT(netstats.tx_dropped)},
+
+ /* SUMMARY counters */
+ {"tx_unicast", VNIC_STAT(tx_grp.unicast)},
+ {"tx_mcastbcast", VNIC_STAT(tx_grp.mcastbcast)},
+ {"tx_untagged", VNIC_STAT(tx_grp.untagged)},
+ {"tx_vlan", VNIC_STAT(tx_grp.vlan)},
+
+ {"tx_64_size", VNIC_STAT(tx_grp.s_64)},
+ {"tx_65_127", VNIC_STAT(tx_grp.s_65_127)},
+ {"tx_128_255", VNIC_STAT(tx_grp.s_128_255)},
+ {"tx_256_511", VNIC_STAT(tx_grp.s_256_511)},
+ {"tx_512_1023", VNIC_STAT(tx_grp.s_512_1023)},
+ {"tx_1024_1518", VNIC_STAT(tx_grp.s_1024_1518)},
+ {"tx_1519_max", VNIC_STAT(tx_grp.s_1519_max)},
+
+ {"rx_unicast", VNIC_STAT(rx_grp.unicast)},
+ {"rx_mcastbcast", VNIC_STAT(rx_grp.mcastbcast)},
+ {"rx_untagged", VNIC_STAT(rx_grp.untagged)},
+ {"rx_vlan", VNIC_STAT(rx_grp.vlan)},
+
+ {"rx_64_size", VNIC_STAT(rx_grp.s_64)},
+ {"rx_65_127", VNIC_STAT(rx_grp.s_65_127)},
+ {"rx_128_255", VNIC_STAT(rx_grp.s_128_255)},
+ {"rx_256_511", VNIC_STAT(rx_grp.s_256_511)},
+ {"rx_512_1023", VNIC_STAT(rx_grp.s_512_1023)},
+ {"rx_1024_1518", VNIC_STAT(rx_grp.s_1024_1518)},
+ {"rx_1519_max", VNIC_STAT(rx_grp.s_1519_max)},
+
+ /* ERROR counters */
+ {"rx_fifo_errors", VNIC_STAT(netstats.rx_fifo_errors)},
+ {"rx_length_errors", VNIC_STAT(netstats.rx_length_errors)},
+
+ {"tx_fifo_errors", VNIC_STAT(netstats.tx_fifo_errors)},
+ {"tx_carrier_errors", VNIC_STAT(netstats.tx_carrier_errors)},
+
+ {"tx_dlid_zero", VNIC_STAT(tx_dlid_zero)},
+ {"tx_drop_state", VNIC_STAT(tx_drop_state)},
+ {"rx_drop_state", VNIC_STAT(rx_drop_state)},
+ {"rx_oversize", VNIC_STAT(rx_oversize)},
+ {"rx_runt", VNIC_STAT(rx_runt)},
+};
+
+#define VNIC_STATS_LEN ARRAY_SIZE(vnic_gstrings_stats)
+
+/* vnic_get_drvinfo - get driver info */
+static void vnic_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, opa_vnic_driver_version,
+ sizeof(drvinfo->version));
+ strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent),
+ sizeof(drvinfo->bus_info));
+}
+
+/* vnic_get_sset_count - get string set count */
+static int vnic_get_sset_count(struct net_device *netdev, int sset)
+{
+ return (sset == ETH_SS_STATS) ? VNIC_STATS_LEN : -EOPNOTSUPP;
+}
+
+/* vnic_get_ethtool_stats - get statistics */
+static void vnic_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_stats vstats;
+ int i;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+ for (i = 0; i < VNIC_STATS_LEN; i++) {
+ char *p = (char *)&vstats + vnic_gstrings_stats[i].stat_offset;
+
+ data[i] = (vnic_gstrings_stats[i].sizeof_stat ==
+ sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+ }
+ mutex_unlock(&adapter->stats_lock);
+}
+
+/* vnic_get_strings - get strings */
+static void vnic_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ int i;
+
+ if (stringset != ETH_SS_STATS)
+ return;
+
+ for (i = 0; i < VNIC_STATS_LEN; i++)
+ memcpy(data + i * ETH_GSTRING_LEN,
+ vnic_gstrings_stats[i].stat_string,
+ ETH_GSTRING_LEN);
+}
+
+/* ethtool ops */
+static const struct ethtool_ops opa_vnic_ethtool_ops = {
+ .get_drvinfo = vnic_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = vnic_get_strings,
+ .get_sset_count = vnic_get_sset_count,
+ .get_ethtool_stats = vnic_get_ethtool_stats,
+};
+
+/* opa_vnic_set_ethtool_ops - set ethtool ops */
+void opa_vnic_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &opa_vnic_ethtool_ops;
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
new file mode 100644
index 000000000000..6bba886bec1f
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h
@@ -0,0 +1,329 @@
+#ifndef _OPA_VNIC_INTERNAL_H
+#define _OPA_VNIC_INTERNAL_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC driver internal declarations
+ */
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/hashtable.h>
+#include <linux/sizes.h>
+#include <rdma/opa_vnic.h>
+
+#include "opa_vnic_encap.h"
+
+#define OPA_VNIC_VLAN_PCP(vlan_tci) \
+ (((vlan_tci) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT)
+
+/* Flow to default port redirection table size */
+#define OPA_VNIC_FLOW_TBL_SIZE 32
+
+/* Invalid port number */
+#define OPA_VNIC_INVALID_PORT 0xff
+
+struct opa_vnic_adapter;
+
+/**
+ * struct __opa_vesw_info - OPA vnic virtual switch info
+ *
+ * Same as opa_vesw_info without bitwise attribute.
+ */
+struct __opa_vesw_info {
+ u16 fabric_id;
+ u16 vesw_id;
+
+ u8 rsvd0[6];
+ u16 def_port_mask;
+
+ u8 rsvd1[2];
+ u16 pkey;
+
+ u8 rsvd2[4];
+ u32 u_mcast_dlid;
+ u32 u_ucast_dlid[OPA_VESW_MAX_NUM_DEF_PORT];
+
+ u8 rsvd3[44];
+ u16 eth_mtu[OPA_VNIC_MAX_NUM_PCP];
+ u16 eth_mtu_non_vlan;
+ u8 rsvd4[2];
+} __packed;
+
+/**
+ * struct __opa_per_veswport_info - OPA vnic per port info
+ *
+ * Same as opa_per_veswport_info without bitwise attribute.
+ */
+struct __opa_per_veswport_info {
+ u32 port_num;
+
+ u8 eth_link_status;
+ u8 rsvd0[3];
+
+ u8 base_mac_addr[ETH_ALEN];
+ u8 config_state;
+ u8 oper_state;
+
+ u16 max_mac_tbl_ent;
+ u16 max_smac_ent;
+ u32 mac_tbl_digest;
+ u8 rsvd1[4];
+
+ u32 encap_slid;
+
+ u8 pcp_to_sc_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_uc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_sc_mc[OPA_VNIC_MAX_NUM_PCP];
+ u8 pcp_to_vl_mc[OPA_VNIC_MAX_NUM_PCP];
+
+ u8 non_vlan_sc_uc;
+ u8 non_vlan_vl_uc;
+ u8 non_vlan_sc_mc;
+ u8 non_vlan_vl_mc;
+
+ u8 rsvd2[48];
+
+ u16 uc_macs_gen_count;
+ u16 mc_macs_gen_count;
+
+ u8 rsvd3[8];
+} __packed;
+
+/**
+ * struct __opa_veswport_info - OPA vnic port info
+ *
+ * Same as opa_veswport_info without bitwise attribute.
+ */
+struct __opa_veswport_info {
+ struct __opa_vesw_info vesw;
+ struct __opa_per_veswport_info vport;
+};
+
+/**
+ * struct __opa_veswport_trap - OPA vnic trap info
+ *
+ * Same as opa_veswport_trap without bitwise attribute.
+ */
+struct __opa_veswport_trap {
+ u16 fabric_id;
+ u16 veswid;
+ u32 veswportnum;
+ u16 opaportnum;
+ u8 veswportindex;
+ u8 opcode;
+ u32 reserved;
+} __packed;
+
+/**
+ * struct opa_vnic_ctrl_port - OPA virtual NIC control port
+ * @ibdev: pointer to ib device
+ * @ops: opa vnic control operations
+ * @num_ports: number of opa ports
+ */
+struct opa_vnic_ctrl_port {
+ struct ib_device *ibdev;
+ struct opa_vnic_ctrl_ops *ops;
+ u8 num_ports;
+};
+
+/**
+ * struct opa_vnic_adapter - OPA VNIC netdev private data structure
+ * @netdev: pointer to associated netdev
+ * @ibdev: ib device
+ * @cport: pointer to opa vnic control port
+ * @rn_ops: rdma netdev's net_device_ops
+ * @port_num: OPA port number
+ * @vport_num: vesw port number
+ * @lock: adapter lock
+ * @info: virtual ethernet switch port information
+ * @vema_mac_addr: mac address configured by vema
+ * @umac_hash: unicast maclist hash
+ * @mmac_hash: multicast maclist hash
+ * @mactbl: hash table of MAC entries
+ * @mactbl_lock: mac table lock
+ * @stats_lock: statistics lock
+ * @flow_tbl: flow to default port redirection table
+ * @trap_timeout: trap timeout
+ * @trap_count: no. of traps allowed within timeout period
+ */
+struct opa_vnic_adapter {
+ struct net_device *netdev;
+ struct ib_device *ibdev;
+ struct opa_vnic_ctrl_port *cport;
+ const struct net_device_ops *rn_ops;
+
+ u8 port_num;
+ u8 vport_num;
+
+ /* Lock used around concurrent updates to netdev */
+ struct mutex lock;
+
+ struct __opa_veswport_info info;
+ u8 vema_mac_addr[ETH_ALEN];
+ u32 umac_hash;
+ u32 mmac_hash;
+ struct hlist_head __rcu *mactbl;
+
+ /* Lock used to protect updates to mac table */
+ struct mutex mactbl_lock;
+
+ /* Lock used to protect access to vnic counters */
+ struct mutex stats_lock;
+
+ u8 flow_tbl[OPA_VNIC_FLOW_TBL_SIZE];
+
+ unsigned long trap_timeout;
+ u8 trap_count;
+};
+
+/* Same as opa_veswport_mactable_entry, but without bitwise attribute */
+struct __opa_vnic_mactable_entry {
+ u8 mac_addr[ETH_ALEN];
+ u8 mac_addr_mask[ETH_ALEN];
+ u32 dlid_sd;
+} __packed;
+
+/**
+ * struct opa_vnic_mac_tbl_node - OPA VNIC mac table node
+ * @hlist: hash list handle
+ * @index: index of entry in the mac table
+ * @entry: entry in the table
+ */
+struct opa_vnic_mac_tbl_node {
+ struct hlist_node hlist;
+ u16 index;
+ struct __opa_vnic_mactable_entry entry;
+};
+
+#define v_dbg(format, arg...) \
+ netdev_dbg(adapter->netdev, format, ## arg)
+#define v_err(format, arg...) \
+ netdev_err(adapter->netdev, format, ## arg)
+#define v_info(format, arg...) \
+ netdev_info(adapter->netdev, format, ## arg)
+#define v_warn(format, arg...) \
+ netdev_warn(adapter->netdev, format, ## arg)
+
+#define c_err(format, arg...) \
+ dev_err(&cport->ibdev->dev, format, ## arg)
+#define c_info(format, arg...) \
+ dev_info(&cport->ibdev->dev, format, ## arg)
+#define c_dbg(format, arg...) \
+ dev_dbg(&cport->ibdev->dev, format, ## arg)
+
+/* The maximum allowed entries in the mac table */
+#define OPA_VNIC_MAC_TBL_MAX_ENTRIES 2048
+/* Limit of smac entries in mac table */
+#define OPA_VNIC_MAX_SMAC_LIMIT 256
+
+/* The last octet of the MAC address is used as the key to the hash table */
+#define OPA_VNIC_MAC_HASH_IDX 5
+
+/* The VNIC MAC hash table is of size 2^8 */
+#define OPA_VNIC_MAC_TBL_HASH_BITS 8
+#define OPA_VNIC_MAC_TBL_SIZE BIT(OPA_VNIC_MAC_TBL_HASH_BITS)
+
+/* VNIC HASH MACROS */
+#define vnic_hash_init(hashtable) __hash_init(hashtable, OPA_VNIC_MAC_TBL_SIZE)
+
+#define vnic_hash_add(hashtable, node, key) \
+ hlist_add_head(node, \
+ &hashtable[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))])
+
+#define vnic_hash_for_each_safe(name, bkt, tmp, obj, member) \
+ for ((bkt) = 0, obj = NULL; \
+ !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \
+ hlist_for_each_entry_safe(obj, tmp, &name[bkt], member)
+
+#define vnic_hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, \
+ &name[hash_min(key, ilog2(OPA_VNIC_MAC_TBL_SIZE))], member)
+
+#define vnic_hash_for_each(name, bkt, obj, member) \
+ for ((bkt) = 0, obj = NULL; \
+ !obj && (bkt) < OPA_VNIC_MAC_TBL_SIZE; (bkt)++) \
+ hlist_for_each_entry(obj, &name[bkt], member)
+
+extern char opa_vnic_driver_name[];
+extern const char opa_vnic_driver_version[];
+
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+ u8 port_num, u8 vport_num);
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter);
+void opa_vnic_encap_skb(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_get_vl(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+u8 opa_vnic_calc_entropy(struct opa_vnic_adapter *adapter, struct sk_buff *skb);
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter);
+void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter);
+void opa_vnic_query_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl);
+int opa_vnic_update_mac_tbl(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_mactable *tbl);
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs);
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs);
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_summary_counters *cntrs);
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_error_counters *cntrs);
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info);
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info);
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info);
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info);
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event);
+void opa_vnic_set_ethtool_ops(struct net_device *netdev);
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+ struct __opa_veswport_trap *data, u32 lid);
+
+#endif /* _OPA_VNIC_INTERNAL_H */
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
new file mode 100644
index 000000000000..905f39dda5aa
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC) driver
+ * netdev functionality.
+ */
+
+#include <linux/module.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+
+#include "opa_vnic_internal.h"
+
+#define OPA_TX_TIMEOUT_MS 1000
+
+#define OPA_VNIC_SKB_HEADROOM \
+ ALIGN((OPA_VNIC_HDR_LEN + OPA_VNIC_SKB_MDATA_LEN), 8)
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void opa_vnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_stats vstats;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(netdev, &vstats.netstats);
+ mutex_unlock(&adapter->stats_lock);
+ memcpy(stats, &vstats.netstats, sizeof(*stats));
+}
+
+/* opa_netdev_start_xmit - transmit function */
+static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+
+ v_dbg("xmit: queue %d skb len %d\n", skb->queue_mapping, skb->len);
+ /* pad to ensure mininum ethernet packet length */
+ if (unlikely(skb->len < ETH_ZLEN)) {
+ if (skb_padto(skb, ETH_ZLEN))
+ return NETDEV_TX_OK;
+
+ skb_put(skb, ETH_ZLEN - skb->len);
+ }
+
+ opa_vnic_encap_skb(adapter, skb);
+ return adapter->rn_ops->ndo_start_xmit(skb, netdev);
+}
+
+static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct opa_vnic_skb_mdata *mdata;
+ int rc;
+
+ /* pass entropy and vl as metadata in skb */
+ mdata = (struct opa_vnic_skb_mdata *)skb_push(skb, sizeof(*mdata));
+ mdata->entropy = opa_vnic_calc_entropy(adapter, skb);
+ mdata->vl = opa_vnic_get_vl(adapter, skb);
+ rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
+ accel_priv, fallback);
+ skb_pull(skb, sizeof(*mdata));
+ return rc;
+}
+
+/* opa_vnic_process_vema_config - process vema configuration updates */
+void opa_vnic_process_vema_config(struct opa_vnic_adapter *adapter)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct rdma_netdev *rn = netdev_priv(adapter->netdev);
+ u8 port_num[OPA_VESW_MAX_NUM_DEF_PORT] = { 0 };
+ struct net_device *netdev = adapter->netdev;
+ u8 i, port_count = 0;
+ u16 port_mask;
+
+ /* If the base_mac_addr is changed, update the interface mac address */
+ if (memcmp(info->vport.base_mac_addr, adapter->vema_mac_addr,
+ ARRAY_SIZE(info->vport.base_mac_addr))) {
+ struct sockaddr saddr;
+
+ memcpy(saddr.sa_data, info->vport.base_mac_addr,
+ ARRAY_SIZE(info->vport.base_mac_addr));
+ mutex_lock(&adapter->lock);
+ eth_mac_addr(netdev, &saddr);
+ memcpy(adapter->vema_mac_addr,
+ info->vport.base_mac_addr, ETH_ALEN);
+ mutex_unlock(&adapter->lock);
+ }
+
+ rn->set_id(netdev, info->vesw.vesw_id);
+
+ /* Handle MTU limit change */
+ rtnl_lock();
+ netdev->max_mtu = max_t(unsigned int, info->vesw.eth_mtu_non_vlan,
+ netdev->min_mtu);
+ if (netdev->mtu > netdev->max_mtu)
+ dev_set_mtu(netdev, netdev->max_mtu);
+ rtnl_unlock();
+
+ /* Update flow to default port redirection table */
+ port_mask = info->vesw.def_port_mask;
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++) {
+ if (port_mask & 1)
+ port_num[port_count++] = i;
+ port_mask >>= 1;
+ }
+
+ /*
+ * Build the flow table. Flow table is required when destination LID
+ * is not available. Up to OPA_VNIC_FLOW_TBL_SIZE flows supported.
+ * Each flow need a default port number to get its dlid from the
+ * u_ucast_dlid array.
+ */
+ for (i = 0; i < OPA_VNIC_FLOW_TBL_SIZE; i++)
+ adapter->flow_tbl[i] = port_count ? port_num[i % port_count] :
+ OPA_VNIC_INVALID_PORT;
+
+ /* Operational state can only be DROP_ALL or FORWARDING */
+ if (info->vport.config_state == OPA_VNIC_STATE_FORWARDING) {
+ info->vport.oper_state = OPA_VNIC_STATE_FORWARDING;
+ netif_dormant_off(netdev);
+ } else {
+ info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+ netif_dormant_on(netdev);
+ }
+}
+
+/*
+ * Set the power on default values in adapter's vema interface structure.
+ */
+static inline void opa_vnic_set_pod_values(struct opa_vnic_adapter *adapter)
+{
+ adapter->info.vport.max_mac_tbl_ent = OPA_VNIC_MAC_TBL_MAX_ENTRIES;
+ adapter->info.vport.max_smac_ent = OPA_VNIC_MAX_SMAC_LIMIT;
+ adapter->info.vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+}
+
+/* opa_vnic_set_mac_addr - change mac address */
+static int opa_vnic_set_mac_addr(struct net_device *netdev, void *addr)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct sockaddr *sa = addr;
+ int rc;
+
+ if (!memcmp(netdev->dev_addr, sa->sa_data, ETH_ALEN))
+ return 0;
+
+ mutex_lock(&adapter->lock);
+ rc = eth_mac_addr(netdev, addr);
+ mutex_unlock(&adapter->lock);
+ if (rc)
+ return rc;
+
+ adapter->info.vport.uc_macs_gen_count++;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+ return 0;
+}
+
+/*
+ * opa_vnic_mac_send_event - post event on possible mac list exchange
+ * Send trap when digest from uc/mc mac list differs from previous run.
+ * Digest is evaluated similar to how cksum does.
+ */
+static void opa_vnic_mac_send_event(struct net_device *netdev, u8 event)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ struct netdev_hw_addr *ha;
+ struct netdev_hw_addr_list *hw_list;
+ u32 *ref_crc;
+ u32 l, crc = 0;
+
+ switch (event) {
+ case OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE:
+ hw_list = &netdev->uc;
+ adapter->info.vport.uc_macs_gen_count++;
+ ref_crc = &adapter->umac_hash;
+ break;
+ case OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE:
+ hw_list = &netdev->mc;
+ adapter->info.vport.mc_macs_gen_count++;
+ ref_crc = &adapter->mmac_hash;
+ break;
+ default:
+ return;
+ }
+ netdev_hw_addr_list_for_each(ha, hw_list) {
+ crc = crc32_le(crc, ha->addr, ETH_ALEN);
+ }
+ l = netdev_hw_addr_list_count(hw_list) * ETH_ALEN;
+ crc = ~crc32_le(crc, (void *)&l, sizeof(l));
+
+ if (crc != *ref_crc) {
+ *ref_crc = crc;
+ opa_vnic_vema_report_event(adapter, event);
+ }
+}
+
+/* opa_vnic_set_rx_mode - handle uc/mc mac list change */
+static void opa_vnic_set_rx_mode(struct net_device *netdev)
+{
+ opa_vnic_mac_send_event(netdev,
+ OPA_VESWPORT_TRAP_IFACE_UCAST_MAC_CHANGE);
+
+ opa_vnic_mac_send_event(netdev,
+ OPA_VESWPORT_TRAP_IFACE_MCAST_MAC_CHANGE);
+}
+
+/* opa_netdev_open - activate network interface */
+static int opa_netdev_open(struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ int rc;
+
+ rc = adapter->rn_ops->ndo_open(adapter->netdev);
+ if (rc) {
+ v_dbg("open failed %d\n", rc);
+ return rc;
+ }
+
+ /* Update eth link status and send trap */
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_UP;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+ return 0;
+}
+
+/* opa_netdev_close - disable network interface */
+static int opa_netdev_close(struct net_device *netdev)
+{
+ struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
+ int rc;
+
+ rc = adapter->rn_ops->ndo_stop(adapter->netdev);
+ if (rc) {
+ v_dbg("close failed %d\n", rc);
+ return rc;
+ }
+
+ /* Update eth link status and send trap */
+ adapter->info.vport.eth_link_status = OPA_VNIC_ETH_LINK_DOWN;
+ opa_vnic_vema_report_event(adapter,
+ OPA_VESWPORT_TRAP_ETH_LINK_STATUS_CHANGE);
+ return 0;
+}
+
+/* netdev ops */
+static const struct net_device_ops opa_netdev_ops = {
+ .ndo_open = opa_netdev_open,
+ .ndo_stop = opa_netdev_close,
+ .ndo_start_xmit = opa_netdev_start_xmit,
+ .ndo_get_stats64 = opa_vnic_get_stats64,
+ .ndo_set_rx_mode = opa_vnic_set_rx_mode,
+ .ndo_select_queue = opa_vnic_select_queue,
+ .ndo_set_mac_address = opa_vnic_set_mac_addr,
+};
+
+/* opa_vnic_add_netdev - create vnic netdev interface */
+struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
+ u8 port_num, u8 vport_num)
+{
+ struct opa_vnic_adapter *adapter;
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+ int rc;
+
+ netdev = ibdev->alloc_rdma_netdev(ibdev, port_num,
+ RDMA_NETDEV_OPA_VNIC,
+ "veth%d", NET_NAME_UNKNOWN,
+ ether_setup);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+ else if (IS_ERR(netdev))
+ return ERR_CAST(netdev);
+
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter) {
+ rc = -ENOMEM;
+ goto adapter_err;
+ }
+
+ rn = netdev_priv(netdev);
+ rn->clnt_priv = adapter;
+ rn->hca = ibdev;
+ rn->port_num = port_num;
+ adapter->netdev = netdev;
+ adapter->ibdev = ibdev;
+ adapter->port_num = port_num;
+ adapter->vport_num = vport_num;
+ adapter->rn_ops = netdev->netdev_ops;
+
+ netdev->netdev_ops = &opa_netdev_ops;
+ netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netdev->hard_header_len += OPA_VNIC_SKB_HEADROOM;
+ mutex_init(&adapter->lock);
+ mutex_init(&adapter->mactbl_lock);
+ mutex_init(&adapter->stats_lock);
+
+ SET_NETDEV_DEV(netdev, ibdev->dev.parent);
+
+ opa_vnic_set_ethtool_ops(netdev);
+
+ opa_vnic_set_pod_values(adapter);
+
+ rc = register_netdev(netdev);
+ if (rc)
+ goto netdev_err;
+
+ netif_carrier_off(netdev);
+ netif_dormant_on(netdev);
+ v_info("initialized\n");
+
+ return adapter;
+netdev_err:
+ mutex_destroy(&adapter->lock);
+ mutex_destroy(&adapter->mactbl_lock);
+ mutex_destroy(&adapter->stats_lock);
+ kfree(adapter);
+adapter_err:
+ ibdev->free_rdma_netdev(netdev);
+
+ return ERR_PTR(rc);
+}
+
+/* opa_vnic_rem_netdev - remove vnic netdev interface */
+void opa_vnic_rem_netdev(struct opa_vnic_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct ib_device *ibdev = adapter->ibdev;
+
+ v_info("removing\n");
+ unregister_netdev(netdev);
+ opa_vnic_release_mac_tbl(adapter);
+ mutex_destroy(&adapter->lock);
+ mutex_destroy(&adapter->mactbl_lock);
+ mutex_destroy(&adapter->stats_lock);
+ kfree(adapter);
+ ibdev->free_rdma_netdev(netdev);
+}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
new file mode 100644
index 000000000000..875694f9a7f9
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -0,0 +1,1056 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA Virtual Network Interface Controller (VNIC)
+ * Ethernet Management Agent (EMA) driver
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+
+#include "opa_vnic_internal.h"
+
+#define DRV_VERSION "1.0"
+char opa_vnic_driver_name[] = "opa_vnic";
+const char opa_vnic_driver_version[] = DRV_VERSION;
+
+/*
+ * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd
+ * field in the class port info MAD.
+ */
+#define GET_TRAP_SL_FROM_CLASS_PORT_INFO(x) (((x) >> 3) & 0x1f)
+
+/* Cap trap bursts to a reasonable limit good for normal cases */
+#define OPA_VNIC_TRAP_BURST_LIMIT 4
+
+/*
+ * VNIC trap limit timeout.
+ * Inverse of cap2_mask response time out (1.0737 secs) = 0.9
+ * secs approx IB spec 13.4.6.2.1 PortInfoSubnetTimeout and
+ * 13.4.9 Traps.
+ */
+#define OPA_VNIC_TRAP_TIMEOUT ((4096 * (1UL << 18)) / 1000)
+
+#define OPA_VNIC_UNSUP_ATTR \
+ cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB)
+
+#define OPA_VNIC_INVAL_ATTR \
+ cpu_to_be16(IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE)
+
+#define OPA_VNIC_CLASS_CAP_TRAP 0x1
+
+/* Maximum number of VNIC ports supported */
+#define OPA_VNIC_MAX_NUM_VPORT 255
+
+/**
+ * struct opa_vnic_vema_port -- VNIC VEMA port details
+ * @cport: pointer to port
+ * @mad_agent: pointer to mad agent for port
+ * @class_port_info: Class port info information.
+ * @tid: Transaction id
+ * @port_num: OPA port number
+ * @vport_idr: vnic ports idr
+ * @event_handler: ib event handler
+ * @lock: adapter interface lock
+ */
+struct opa_vnic_vema_port {
+ struct opa_vnic_ctrl_port *cport;
+ struct ib_mad_agent *mad_agent;
+ struct opa_class_port_info class_port_info;
+ u64 tid;
+ u8 port_num;
+ struct idr vport_idr;
+ struct ib_event_handler event_handler;
+
+ /* Lock to query/update network adapter */
+ struct mutex lock;
+};
+
+static void opa_vnic_vema_add_one(struct ib_device *device);
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+ void *client_data);
+
+static struct ib_client opa_vnic_client = {
+ .name = opa_vnic_driver_name,
+ .add = opa_vnic_vema_add_one,
+ .remove = opa_vnic_vema_rem_one,
+};
+
+/**
+ * vema_get_vport_num -- Get the vnic from the mad
+ * @recvd_mad: Received mad
+ *
+ * Return: returns value of the vnic port number
+ */
+static inline u8 vema_get_vport_num(struct opa_vnic_vema_mad *recvd_mad)
+{
+ return be32_to_cpu(recvd_mad->mad_hdr.attr_mod) & 0xff;
+}
+
+/**
+ * vema_get_vport_adapter -- Get vnic port adapter from recvd mad
+ * @recvd_mad: received mad
+ * @port: ptr to port struct on which MAD was recvd
+ *
+ * Return: vnic adapter
+ */
+static inline struct opa_vnic_adapter *
+vema_get_vport_adapter(struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_port *port)
+{
+ u8 vport_num = vema_get_vport_num(recvd_mad);
+
+ return idr_find(&port->vport_idr, vport_num);
+}
+
+/**
+ * vema_mac_tbl_req_ok -- Check if mac request has correct values
+ * @mac_tbl: mac table
+ *
+ * This function checks for the validity of the offset and number of
+ * entries required.
+ *
+ * Return: true if offset and num_entries are valid
+ */
+static inline bool vema_mac_tbl_req_ok(struct opa_veswport_mactable *mac_tbl)
+{
+ u16 offset, num_entries;
+ u16 req_entries = ((OPA_VNIC_EMA_DATA - sizeof(*mac_tbl)) /
+ sizeof(mac_tbl->tbl_entries[0]));
+
+ offset = be16_to_cpu(mac_tbl->offset);
+ num_entries = be16_to_cpu(mac_tbl->num_entries);
+
+ return ((num_entries <= req_entries) &&
+ (offset + num_entries <= OPA_VNIC_MAC_TBL_MAX_ENTRIES));
+}
+
+/*
+ * Return the power on default values in the port info structure
+ * in big endian format as required by MAD.
+ */
+static inline void vema_get_pod_values(struct opa_veswport_info *port_info)
+{
+ memset(port_info, 0, sizeof(*port_info));
+ port_info->vport.max_mac_tbl_ent =
+ cpu_to_be16(OPA_VNIC_MAC_TBL_MAX_ENTRIES);
+ port_info->vport.max_smac_ent =
+ cpu_to_be16(OPA_VNIC_MAX_SMAC_LIMIT);
+ port_info->vport.oper_state = OPA_VNIC_STATE_DROP_ALL;
+ port_info->vport.config_state = OPA_VNIC_STATE_DROP_ALL;
+}
+
+/**
+ * vema_add_vport -- Add a new vnic port
+ * @port: ptr to opa_vnic_vema_port struct
+ * @vport_num: vnic port number (to be added)
+ *
+ * Return a pointer to the vnic adapter structure
+ */
+static struct opa_vnic_adapter *vema_add_vport(struct opa_vnic_vema_port *port,
+ u8 vport_num)
+{
+ struct opa_vnic_ctrl_port *cport = port->cport;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = opa_vnic_add_netdev(cport->ibdev, port->port_num, vport_num);
+ if (!IS_ERR(adapter)) {
+ int rc;
+
+ adapter->cport = cport;
+ rc = idr_alloc(&port->vport_idr, adapter, vport_num,
+ vport_num + 1, GFP_NOWAIT);
+ if (rc < 0) {
+ opa_vnic_rem_netdev(adapter);
+ adapter = ERR_PTR(rc);
+ }
+ }
+
+ return adapter;
+}
+
+/**
+ * vema_get_class_port_info -- Get class info for port
+ * @port: Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function copies the latest class port info value set for the
+ * port and stores it for generating traps
+ */
+static void vema_get_class_port_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_class_port_info *port_info;
+
+ port_info = (struct opa_class_port_info *)rsp_mad->data;
+ memcpy(port_info, &port->class_port_info, sizeof(*port_info));
+ port_info->base_version = OPA_MGMT_BASE_VERSION,
+ port_info->class_version = OPA_EMA_CLASS_VERSION;
+
+ /*
+ * Set capability mask bit indicating agent generates traps,
+ * and set the maximum number of VNIC ports supported.
+ */
+ port_info->cap_mask = cpu_to_be16((OPA_VNIC_CLASS_CAP_TRAP |
+ (OPA_VNIC_MAX_NUM_VPORT << 8)));
+
+ /*
+ * Since a get routine is always sent by the EM first we
+ * set the expected response time to
+ * 4.096 usec * 2^18 == 1.0737 sec here.
+ */
+ port_info->cap_mask2_resp_time = cpu_to_be32(18);
+}
+
+/**
+ * vema_set_class_port_info -- Get class info for port
+ * @port: Port on whic MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function updates the port class info for the specific vnic
+ * and sets up the response mad data
+ */
+static void vema_set_class_port_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ memcpy(&port->class_port_info, recvd_mad->data,
+ sizeof(port->class_port_info));
+
+ vema_get_class_port_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_veswport_info -- Get veswport info
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ */
+static void vema_get_veswport_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_info *port_info =
+ (struct opa_veswport_info *)rsp_mad->data;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ memset(port_info, 0, sizeof(*port_info));
+ opa_vnic_get_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_get_per_veswport_info(adapter,
+ &port_info->vport);
+ } else {
+ vema_get_pod_values(port_info);
+ }
+}
+
+/**
+ * vema_set_veswport_info -- Set veswport info
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function gets the port class infor for vnic
+ */
+static void vema_set_veswport_info(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_vnic_ctrl_port *cport = port->cport;
+ struct opa_veswport_info *port_info;
+ struct opa_vnic_adapter *adapter;
+ u8 vport_num;
+
+ vport_num = vema_get_vport_num(recvd_mad);
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ adapter = vema_add_vport(port, vport_num);
+ if (IS_ERR(adapter)) {
+ c_err("failed to add vport %d: %ld\n",
+ vport_num, PTR_ERR(adapter));
+ goto err_exit;
+ }
+ }
+
+ port_info = (struct opa_veswport_info *)recvd_mad->data;
+ opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+ /* Process the new config settings */
+ opa_vnic_process_vema_config(adapter);
+
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+ return;
+
+err_exit:
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+}
+
+/**
+ * vema_get_mac_entries -- Get MAC entries in VNIC MAC table
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function gets the MAC entries that are programmed into
+ * the VNIC MAC forwarding table. It checks for the validity of
+ * the index into the MAC table and the number of entries that
+ * are to be retrieved.
+ */
+static void vema_get_mac_entries(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_mactable *mac_tbl_in, *mac_tbl_out;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ mac_tbl_in = (struct opa_veswport_mactable *)recvd_mad->data;
+ mac_tbl_out = (struct opa_veswport_mactable *)rsp_mad->data;
+
+ if (vema_mac_tbl_req_ok(mac_tbl_in)) {
+ mac_tbl_out->offset = mac_tbl_in->offset;
+ mac_tbl_out->num_entries = mac_tbl_in->num_entries;
+ opa_vnic_query_mac_tbl(adapter, mac_tbl_out);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_set_mac_entries -- Set MAC entries in VNIC MAC table
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function sets the MAC entries in the VNIC forwarding table
+ * It checks for the validity of the index and the number of forwarding
+ * table entries to be programmed.
+ */
+static void vema_set_mac_entries(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_mactable *mac_tbl;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ mac_tbl = (struct opa_veswport_mactable *)recvd_mad->data;
+ if (vema_mac_tbl_req_ok(mac_tbl)) {
+ if (opa_vnic_update_mac_tbl(adapter, mac_tbl))
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ }
+ vema_get_mac_entries(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_set_delete_vesw -- Reset VESW info to POD values
+ * @port: source port on which MAD was received
+ * @recvd_mad: pointer to the received mad
+ * @rsp_mad: pointer to respose mad
+ *
+ * This function clears all the fields of veswport info for the requested vesw
+ * and sets them back to the power-on default values. It does not delete the
+ * vesw.
+ */
+static void vema_set_delete_vesw(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_info *port_info =
+ (struct opa_veswport_info *)rsp_mad->data;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ vema_get_pod_values(port_info);
+ opa_vnic_set_vesw_info(adapter, &port_info->vesw);
+ opa_vnic_set_per_veswport_info(adapter, &port_info->vport);
+
+ /* Process the new config settings */
+ opa_vnic_process_vema_config(adapter);
+
+ opa_vnic_release_mac_tbl(adapter);
+
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+}
+
+/**
+ * vema_get_mac_list -- Get the unicast/multicast macs.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ * @attr_id: Attribute ID indicating multicast or unicast mac list
+ */
+static void vema_get_mac_list(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad,
+ u16 attr_id)
+{
+ struct opa_veswport_iface_macs *macs_in, *macs_out;
+ int max_entries = (OPA_VNIC_EMA_DATA - sizeof(*macs_out)) / ETH_ALEN;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (!adapter) {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ return;
+ }
+
+ macs_in = (struct opa_veswport_iface_macs *)recvd_mad->data;
+ macs_out = (struct opa_veswport_iface_macs *)rsp_mad->data;
+
+ macs_out->start_idx = macs_in->start_idx;
+ if (macs_in->num_macs_in_msg)
+ macs_out->num_macs_in_msg = macs_in->num_macs_in_msg;
+ else
+ macs_out->num_macs_in_msg = cpu_to_be16(max_entries);
+
+ if (attr_id == OPA_EM_ATTR_IFACE_MCAST_MACS)
+ opa_vnic_query_mcast_macs(adapter, macs_out);
+ else
+ opa_vnic_query_ucast_macs(adapter, macs_out);
+}
+
+/**
+ * vema_get_summary_counters -- Gets summary counters.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get_summary_counters(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_summary_counters *cntrs;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ cntrs = (struct opa_veswport_summary_counters *)rsp_mad->data;
+ opa_vnic_get_summary_counters(adapter, cntrs);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_get_error_counters -- Gets summary counters.
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get_error_counters(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ struct opa_veswport_error_counters *cntrs;
+ struct opa_vnic_adapter *adapter;
+
+ adapter = vema_get_vport_adapter(recvd_mad, port);
+ if (adapter) {
+ cntrs = (struct opa_veswport_error_counters *)rsp_mad->data;
+ opa_vnic_get_error_counters(adapter, cntrs);
+ } else {
+ rsp_mad->mad_hdr.status = OPA_VNIC_INVAL_ATTR;
+ }
+}
+
+/**
+ * vema_get -- Process received get MAD
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_get(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+ switch (attr_id) {
+ case OPA_EM_ATTR_CLASS_PORT_INFO:
+ vema_get_class_port_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_INFO:
+ vema_get_veswport_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+ vema_get_mac_entries(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_IFACE_UCAST_MACS:
+ /* fall through */
+ case OPA_EM_ATTR_IFACE_MCAST_MACS:
+ vema_get_mac_list(port, recvd_mad, rsp_mad, attr_id);
+ break;
+ case OPA_EM_ATTR_VESWPORT_SUMMARY_COUNTERS:
+ vema_get_summary_counters(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_ERROR_COUNTERS:
+ vema_get_error_counters(port, recvd_mad, rsp_mad);
+ break;
+ default:
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+}
+
+/**
+ * vema_set -- Process received set MAD
+ * @port: source port on which MAD was received
+ * @recvd_mad: Received mad contains fields to set vnic parameters
+ * @rsp_mad: Response mad to be built
+ */
+static void vema_set(struct opa_vnic_vema_port *port,
+ struct opa_vnic_vema_mad *recvd_mad,
+ struct opa_vnic_vema_mad *rsp_mad)
+{
+ u16 attr_id = be16_to_cpu(recvd_mad->mad_hdr.attr_id);
+
+ switch (attr_id) {
+ case OPA_EM_ATTR_CLASS_PORT_INFO:
+ vema_set_class_port_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_INFO:
+ vema_set_veswport_info(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_VESWPORT_MAC_ENTRIES:
+ vema_set_mac_entries(port, recvd_mad, rsp_mad);
+ break;
+ case OPA_EM_ATTR_DELETE_VESW:
+ vema_set_delete_vesw(port, recvd_mad, rsp_mad);
+ break;
+ default:
+ rsp_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+}
+
+/**
+ * vema_send -- Send handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @mad_wc: pointer to mad send work completion information
+ *
+ * Free all the data structures associated with the sent MAD
+ */
+static void vema_send(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_wc *mad_wc)
+{
+ rdma_destroy_ah(mad_wc->send_buf->ah);
+ ib_free_send_mad(mad_wc->send_buf);
+}
+
+/**
+ * vema_recv -- Recv handler for VEMA MAD agent
+ * @mad_agent: pointer to the mad agent
+ * @send_buf: Send buffer if found, else NULL
+ * @mad_wc: pointer to mad send work completion information
+ *
+ * Handle only set and get methods and respond to other methods
+ * as unsupported. Allocate response buffer and address handle
+ * for the response MAD.
+ */
+static void vema_recv(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
+ struct ib_mad_recv_wc *mad_wc)
+{
+ struct opa_vnic_vema_port *port;
+ struct ib_ah *ah;
+ struct ib_mad_send_buf *rsp;
+ struct opa_vnic_vema_mad *vema_mad;
+
+ if (!mad_wc || !mad_wc->recv_buf.mad)
+ return;
+
+ port = mad_agent->context;
+ ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
+ mad_wc->recv_buf.grh, mad_agent->port_num);
+ if (IS_ERR(ah))
+ goto free_recv_mad;
+
+ rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
+ mad_wc->wc->pkey_index, 0,
+ IB_MGMT_VENDOR_HDR, OPA_VNIC_EMA_DATA,
+ GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+ if (IS_ERR(rsp))
+ goto err_rsp;
+
+ rsp->ah = ah;
+ vema_mad = rsp->mad;
+ memcpy(vema_mad, mad_wc->recv_buf.mad, IB_MGMT_VENDOR_HDR);
+ vema_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+ vema_mad->mad_hdr.status = 0;
+
+ /* Lock ensures network adapter is not removed */
+ mutex_lock(&port->lock);
+
+ switch (mad_wc->recv_buf.mad->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET:
+ vema_get(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+ vema_mad);
+ break;
+ case IB_MGMT_METHOD_SET:
+ vema_set(port, (struct opa_vnic_vema_mad *)mad_wc->recv_buf.mad,
+ vema_mad);
+ break;
+ default:
+ vema_mad->mad_hdr.status = OPA_VNIC_UNSUP_ATTR;
+ break;
+ }
+ mutex_unlock(&port->lock);
+
+ if (!ib_post_send_mad(rsp, NULL)) {
+ /*
+ * with post send successful ah and send mad
+ * will be destroyed in send handler
+ */
+ goto free_recv_mad;
+ }
+
+ ib_free_send_mad(rsp);
+
+err_rsp:
+ rdma_destroy_ah(ah);
+free_recv_mad:
+ ib_free_recv_mad(mad_wc);
+}
+
+/**
+ * vema_get_port -- Gets the opa_vnic_vema_port
+ * @cport: pointer to control dev
+ * @port_num: Port number
+ *
+ * This function loops through the ports and returns
+ * the opa_vnic_vema port structure that is associated
+ * with the OPA port number
+ *
+ * Return: ptr to requested opa_vnic_vema_port strucure
+ * if success, NULL if not
+ */
+static struct opa_vnic_vema_port *
+vema_get_port(struct opa_vnic_ctrl_port *cport, u8 port_num)
+{
+ struct opa_vnic_vema_port *port = (void *)cport + sizeof(*cport);
+
+ if (port_num > cport->num_ports)
+ return NULL;
+
+ return port + (port_num - 1);
+}
+
+/**
+ * opa_vnic_vema_send_trap -- This function sends a trap to the EM
+ * @cport: pointer to vnic control port
+ * @data: pointer to trap data filled by calling function
+ * @lid: issuers lid (encap_slid from vesw_port_info)
+ *
+ * This function is called from the VNIC driver to send a trap if there
+ * is somethng the EM should be notified about. These events currently
+ * are
+ * 1) UNICAST INTERFACE MACADDRESS changes
+ * 2) MULTICAST INTERFACE MACADDRESS changes
+ * 3) ETHERNET LINK STATUS changes
+ * While allocating the send mad the remote site qpn used is 1
+ * as this is the well known QP.
+ *
+ */
+void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
+ struct __opa_veswport_trap *data, u32 lid)
+{
+ struct opa_vnic_ctrl_port *cport = adapter->cport;
+ struct ib_mad_send_buf *send_buf;
+ struct opa_vnic_vema_port *port;
+ struct ib_device *ibp;
+ struct opa_vnic_vema_mad_trap *trap_mad;
+ struct opa_class_port_info *class;
+ struct rdma_ah_attr ah_attr;
+ struct ib_ah *ah;
+ struct opa_veswport_trap *trap;
+ u32 trap_lid;
+ u16 pkey_idx;
+
+ if (!cport)
+ goto err_exit;
+ ibp = cport->ibdev;
+ port = vema_get_port(cport, data->opaportnum);
+ if (!port || !port->mad_agent)
+ goto err_exit;
+
+ if (time_before(jiffies, adapter->trap_timeout)) {
+ if (adapter->trap_count == OPA_VNIC_TRAP_BURST_LIMIT) {
+ v_warn("Trap rate exceeded\n");
+ goto err_exit;
+ } else {
+ adapter->trap_count++;
+ }
+ } else {
+ adapter->trap_count = 0;
+ }
+
+ class = &port->class_port_info;
+ /* Set up address handle */
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.type = rdma_ah_find_type(ibp, port->port_num);
+ rdma_ah_set_sl(&ah_attr,
+ GET_TRAP_SL_FROM_CLASS_PORT_INFO(class->trap_sl_rsvd));
+ rdma_ah_set_port_num(&ah_attr, port->port_num);
+ trap_lid = be32_to_cpu(class->trap_lid);
+ /*
+ * check for trap lid validity, must not be zero
+ * The trap sink could change after we fashion the MAD but since traps
+ * are not guaranteed we won't use a lock as anyway the change will take
+ * place even with locking.
+ */
+ if (!trap_lid) {
+ c_err("%s: Invalid dlid\n", __func__);
+ goto err_exit;
+ }
+
+ rdma_ah_set_dlid(&ah_attr, trap_lid);
+ ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr);
+ if (IS_ERR(ah)) {
+ c_err("%s:Couldn't create new AH = %p\n", __func__, ah);
+ c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__,
+ rdma_ah_get_dlid(&ah_attr), rdma_ah_get_sl(&ah_attr),
+ rdma_ah_get_port_num(&ah_attr));
+ goto err_exit;
+ }
+
+ if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_FULL,
+ &pkey_idx) < 0) {
+ c_err("%s:full key not found, defaulting to partial\n",
+ __func__);
+ if (ib_find_pkey(ibp, data->opaportnum, IB_DEFAULT_PKEY_PARTIAL,
+ &pkey_idx) < 0)
+ pkey_idx = 1;
+ }
+
+ send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0,
+ IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA,
+ GFP_KERNEL, OPA_MGMT_BASE_VERSION);
+ if (IS_ERR(send_buf)) {
+ c_err("%s:Couldn't allocate send buf\n", __func__);
+ goto err_sndbuf;
+ }
+
+ send_buf->ah = ah;
+
+ /* Set up common MAD hdr */
+ trap_mad = send_buf->mad;
+ trap_mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+ trap_mad->mad_hdr.mgmt_class = OPA_MGMT_CLASS_INTEL_EMA;
+ trap_mad->mad_hdr.class_version = OPA_EMA_CLASS_VERSION;
+ trap_mad->mad_hdr.method = IB_MGMT_METHOD_TRAP;
+ port->tid++;
+ trap_mad->mad_hdr.tid = cpu_to_be64(port->tid);
+ trap_mad->mad_hdr.attr_id = IB_SMP_ATTR_NOTICE;
+
+ /* Set up vendor OUI */
+ trap_mad->oui[0] = INTEL_OUI_1;
+ trap_mad->oui[1] = INTEL_OUI_2;
+ trap_mad->oui[2] = INTEL_OUI_3;
+
+ /* Setup notice attribute portion */
+ trap_mad->notice.gen_type = OPA_INTEL_EMA_NOTICE_TYPE_INFO << 1;
+ trap_mad->notice.oui_1 = INTEL_OUI_1;
+ trap_mad->notice.oui_2 = INTEL_OUI_2;
+ trap_mad->notice.oui_3 = INTEL_OUI_3;
+ trap_mad->notice.issuer_lid = cpu_to_be32(lid);
+
+ /* copy the actual trap data */
+ trap = (struct opa_veswport_trap *)trap_mad->notice.raw_data;
+ trap->fabric_id = cpu_to_be16(data->fabric_id);
+ trap->veswid = cpu_to_be16(data->veswid);
+ trap->veswportnum = cpu_to_be32(data->veswportnum);
+ trap->opaportnum = cpu_to_be16(data->opaportnum);
+ trap->veswportindex = data->veswportindex;
+ trap->opcode = data->opcode;
+
+ /* If successful send set up rate limit timeout else bail */
+ if (ib_post_send_mad(send_buf, NULL)) {
+ ib_free_send_mad(send_buf);
+ } else {
+ if (adapter->trap_count)
+ return;
+ adapter->trap_timeout = jiffies +
+ usecs_to_jiffies(OPA_VNIC_TRAP_TIMEOUT);
+ return;
+ }
+
+err_sndbuf:
+ rdma_destroy_ah(ah);
+err_exit:
+ v_err("Aborting trap\n");
+}
+
+static int vema_rem_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ opa_vnic_rem_netdev(adapter);
+ return 0;
+}
+
+static int vema_enable_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ netif_carrier_on(adapter->netdev);
+ return 0;
+}
+
+static int vema_disable_vport(int id, void *p, void *data)
+{
+ struct opa_vnic_adapter *adapter = p;
+
+ netif_carrier_off(adapter->netdev);
+ return 0;
+}
+
+static void opa_vnic_event(struct ib_event_handler *handler,
+ struct ib_event *record)
+{
+ struct opa_vnic_vema_port *port =
+ container_of(handler, struct opa_vnic_vema_port, event_handler);
+ struct opa_vnic_ctrl_port *cport = port->cport;
+
+ if (record->element.port_num != port->port_num)
+ return;
+
+ c_dbg("OPA_VNIC received event %d on device %s port %d\n",
+ record->event, record->device->name, record->element.port_num);
+
+ if (record->event == IB_EVENT_PORT_ERR)
+ idr_for_each(&port->vport_idr, vema_disable_vport, NULL);
+ if (record->event == IB_EVENT_PORT_ACTIVE)
+ idr_for_each(&port->vport_idr, vema_enable_vport, NULL);
+}
+
+/**
+ * vema_unregister -- Unregisters agent
+ * @cport: pointer to control port
+ *
+ * This deletes the registration by VEMA for MADs
+ */
+static void vema_unregister(struct opa_vnic_ctrl_port *cport)
+{
+ int i;
+
+ for (i = 1; i <= cport->num_ports; i++) {
+ struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+
+ if (!port->mad_agent)
+ continue;
+
+ /* Lock ensures no MAD is being processed */
+ mutex_lock(&port->lock);
+ idr_for_each(&port->vport_idr, vema_rem_vport, NULL);
+ mutex_unlock(&port->lock);
+
+ ib_unregister_mad_agent(port->mad_agent);
+ port->mad_agent = NULL;
+ mutex_destroy(&port->lock);
+ idr_destroy(&port->vport_idr);
+ ib_unregister_event_handler(&port->event_handler);
+ }
+}
+
+/**
+ * vema_register -- Registers agent
+ * @cport: pointer to control port
+ *
+ * This function registers the handlers for the VEMA MADs
+ *
+ * Return: returns 0 on success. non zero otherwise
+ */
+static int vema_register(struct opa_vnic_ctrl_port *cport)
+{
+ struct ib_mad_reg_req reg_req = {
+ .mgmt_class = OPA_MGMT_CLASS_INTEL_EMA,
+ .mgmt_class_version = OPA_MGMT_BASE_VERSION,
+ .oui = { INTEL_OUI_1, INTEL_OUI_2, INTEL_OUI_3 }
+ };
+ int i;
+
+ set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
+ set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
+
+ /* register ib event handler and mad agent for each port on dev */
+ for (i = 1; i <= cport->num_ports; i++) {
+ struct opa_vnic_vema_port *port = vema_get_port(cport, i);
+ int ret;
+
+ port->cport = cport;
+ port->port_num = i;
+
+ INIT_IB_EVENT_HANDLER(&port->event_handler,
+ cport->ibdev, opa_vnic_event);
+ ret = ib_register_event_handler(&port->event_handler);
+ if (ret) {
+ c_err("port %d: event handler register failed\n", i);
+ vema_unregister(cport);
+ return ret;
+ }
+
+ idr_init(&port->vport_idr);
+ mutex_init(&port->lock);
+ port->mad_agent = ib_register_mad_agent(cport->ibdev, i,
+ IB_QPT_GSI, &reg_req,
+ IB_MGMT_RMPP_VERSION,
+ vema_send, vema_recv,
+ port, 0);
+ if (IS_ERR(port->mad_agent)) {
+ ret = PTR_ERR(port->mad_agent);
+ port->mad_agent = NULL;
+ mutex_destroy(&port->lock);
+ idr_destroy(&port->vport_idr);
+ vema_unregister(cport);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * opa_vnic_vema_add_one -- Handle new ib device
+ * @device: ib device pointer
+ *
+ * Allocate the vnic control port and initialize it.
+ */
+static void opa_vnic_vema_add_one(struct ib_device *device)
+{
+ struct opa_vnic_ctrl_port *cport;
+ int rc, size = sizeof(*cport);
+
+ if (!rdma_cap_opa_vnic(device))
+ return;
+
+ size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port);
+ cport = kzalloc(size, GFP_KERNEL);
+ if (!cport)
+ return;
+
+ cport->num_ports = device->phys_port_cnt;
+ cport->ibdev = device;
+
+ /* Initialize opa vnic management agent (vema) */
+ rc = vema_register(cport);
+ if (!rc)
+ c_info("VNIC client initialized\n");
+
+ ib_set_client_data(device, &opa_vnic_client, cport);
+}
+
+/**
+ * opa_vnic_vema_rem_one -- Handle ib device removal
+ * @device: ib device pointer
+ * @client_data: ib client data
+ *
+ * Uninitialize and free the vnic control port.
+ */
+static void opa_vnic_vema_rem_one(struct ib_device *device,
+ void *client_data)
+{
+ struct opa_vnic_ctrl_port *cport = client_data;
+
+ if (!cport)
+ return;
+
+ c_info("removing VNIC client\n");
+ vema_unregister(cport);
+ kfree(cport);
+}
+
+static int __init opa_vnic_init(void)
+{
+ int rc;
+
+ pr_info("OPA Virtual Network Driver - v%s\n",
+ opa_vnic_driver_version);
+
+ rc = ib_register_client(&opa_vnic_client);
+ if (rc)
+ pr_err("VNIC driver register failed %d\n", rc);
+
+ return rc;
+}
+module_init(opa_vnic_init);
+
+static void opa_vnic_deinit(void)
+{
+ ib_unregister_client(&opa_vnic_client);
+}
+module_exit(opa_vnic_deinit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel OPA Virtual Network driver");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
new file mode 100644
index 000000000000..a51bf977f4d6
--- /dev/null
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains OPA VNIC EMA Interface functions.
+ */
+
+#include "opa_vnic_internal.h"
+
+/**
+ * opa_vnic_vema_report_event - sent trap to report the specified event
+ * @adapter: vnic port adapter
+ * @event: event to be reported
+ *
+ * This function calls vema api to sent a trap for the given event.
+ */
+void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event)
+{
+ struct __opa_veswport_info *info = &adapter->info;
+ struct __opa_veswport_trap trap_data;
+
+ trap_data.fabric_id = info->vesw.fabric_id;
+ trap_data.veswid = info->vesw.vesw_id;
+ trap_data.veswportnum = info->vport.port_num;
+ trap_data.opaportnum = adapter->port_num;
+ trap_data.veswportindex = adapter->vport_num;
+ trap_data.opcode = event;
+
+ opa_vnic_vema_send_trap(adapter, &trap_data, info->vport.encap_slid);
+}
+
+/**
+ * opa_vnic_get_error_counters - get summary counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination summary counters structure
+ *
+ * This function populates the summary counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_summary_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_summary_counters *cntrs)
+{
+ struct opa_vnic_stats vstats;
+ __be64 *dst;
+ u64 *src;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+ mutex_unlock(&adapter->stats_lock);
+
+ cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+ cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+ cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+ cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+ cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+ cntrs->tx_packets = cpu_to_be64(vstats.netstats.tx_packets);
+ cntrs->rx_packets = cpu_to_be64(vstats.netstats.rx_packets);
+ cntrs->tx_bytes = cpu_to_be64(vstats.netstats.tx_bytes);
+ cntrs->rx_bytes = cpu_to_be64(vstats.netstats.rx_bytes);
+
+ /*
+ * This loop depends on layout of
+ * opa_veswport_summary_counters opa_vnic_stats structures.
+ */
+ for (dst = &cntrs->tx_unicast, src = &vstats.tx_grp.unicast;
+ dst < &cntrs->reserved[0]; dst++, src++) {
+ *dst = cpu_to_be64(*src);
+ }
+}
+
+/**
+ * opa_vnic_get_error_counters - get error counters
+ * @adapter: vnic port adapter
+ * @cntrs: pointer to destination error counters structure
+ *
+ * This function populates the error counters that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_error_counters(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_error_counters *cntrs)
+{
+ struct opa_vnic_stats vstats;
+
+ memset(&vstats, 0, sizeof(vstats));
+ mutex_lock(&adapter->stats_lock);
+ adapter->rn_ops->ndo_get_stats64(adapter->netdev, &vstats.netstats);
+ mutex_unlock(&adapter->stats_lock);
+
+ cntrs->vp_instance = cpu_to_be16(adapter->vport_num);
+ cntrs->vesw_id = cpu_to_be16(adapter->info.vesw.vesw_id);
+ cntrs->veswport_num = cpu_to_be32(adapter->port_num);
+
+ cntrs->tx_errors = cpu_to_be64(vstats.netstats.tx_errors);
+ cntrs->rx_errors = cpu_to_be64(vstats.netstats.rx_errors);
+ cntrs->tx_dlid_zero = cpu_to_be64(vstats.tx_dlid_zero);
+ cntrs->tx_drop_state = cpu_to_be64(vstats.tx_drop_state);
+ cntrs->tx_logic = cpu_to_be64(vstats.netstats.tx_fifo_errors +
+ vstats.netstats.tx_carrier_errors);
+
+ cntrs->rx_bad_veswid = cpu_to_be64(vstats.netstats.rx_nohandler);
+ cntrs->rx_runt = cpu_to_be64(vstats.rx_runt);
+ cntrs->rx_oversize = cpu_to_be64(vstats.rx_oversize);
+ cntrs->rx_drop_state = cpu_to_be64(vstats.rx_drop_state);
+ cntrs->rx_logic = cpu_to_be64(vstats.netstats.rx_fifo_errors);
+}
+
+/**
+ * opa_vnic_get_vesw_info -- Get the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vesw info structure
+ *
+ * This function copies the vesw info that is maintained by the
+ * given adapter to destination address provided.
+ */
+void opa_vnic_get_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info)
+{
+ struct __opa_vesw_info *src = &adapter->info.vesw;
+ int i;
+
+ info->fabric_id = cpu_to_be16(src->fabric_id);
+ info->vesw_id = cpu_to_be16(src->vesw_id);
+ memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+ info->def_port_mask = cpu_to_be16(src->def_port_mask);
+ memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+ info->pkey = cpu_to_be16(src->pkey);
+
+ memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+ info->u_mcast_dlid = cpu_to_be32(src->u_mcast_dlid);
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+ info->u_ucast_dlid[i] = cpu_to_be32(src->u_ucast_dlid[i]);
+
+ memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+ for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+ info->eth_mtu[i] = cpu_to_be16(src->eth_mtu[i]);
+
+ info->eth_mtu_non_vlan = cpu_to_be16(src->eth_mtu_non_vlan);
+ memcpy(info->rsvd4, src->rsvd4, ARRAY_SIZE(src->rsvd4));
+}
+
+/**
+ * opa_vnic_set_vesw_info -- Set the vesw information
+ * @adapter: vnic port adapter
+ * @info: pointer to vesw info structure
+ *
+ * This function updates the vesw info that is maintained by the
+ * given adapter with vesw info provided. Reserved fields are stored
+ * and returned back to EM as is.
+ */
+void opa_vnic_set_vesw_info(struct opa_vnic_adapter *adapter,
+ struct opa_vesw_info *info)
+{
+ struct __opa_vesw_info *dst = &adapter->info.vesw;
+ int i;
+
+ dst->fabric_id = be16_to_cpu(info->fabric_id);
+ dst->vesw_id = be16_to_cpu(info->vesw_id);
+ memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+ dst->def_port_mask = be16_to_cpu(info->def_port_mask);
+ memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+ dst->pkey = be16_to_cpu(info->pkey);
+
+ memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+ dst->u_mcast_dlid = be32_to_cpu(info->u_mcast_dlid);
+ for (i = 0; i < OPA_VESW_MAX_NUM_DEF_PORT; i++)
+ dst->u_ucast_dlid[i] = be32_to_cpu(info->u_ucast_dlid[i]);
+
+ memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+ for (i = 0; i < OPA_VNIC_MAX_NUM_PCP; i++)
+ dst->eth_mtu[i] = be16_to_cpu(info->eth_mtu[i]);
+
+ dst->eth_mtu_non_vlan = be16_to_cpu(info->eth_mtu_non_vlan);
+ memcpy(dst->rsvd4, info->rsvd4, ARRAY_SIZE(info->rsvd4));
+}
+
+/**
+ * opa_vnic_get_per_veswport_info -- Get the vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to destination vport info structure
+ *
+ * This function copies the vesw per port info that is maintained by the
+ * given adapter to destination address provided.
+ * Note that the read only fields are not copied.
+ */
+void opa_vnic_get_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info)
+{
+ struct __opa_per_veswport_info *src = &adapter->info.vport;
+
+ info->port_num = cpu_to_be32(src->port_num);
+ info->eth_link_status = src->eth_link_status;
+ memcpy(info->rsvd0, src->rsvd0, ARRAY_SIZE(src->rsvd0));
+
+ memcpy(info->base_mac_addr, src->base_mac_addr,
+ ARRAY_SIZE(info->base_mac_addr));
+ info->config_state = src->config_state;
+ info->oper_state = src->oper_state;
+ info->max_mac_tbl_ent = cpu_to_be16(src->max_mac_tbl_ent);
+ info->max_smac_ent = cpu_to_be16(src->max_smac_ent);
+ info->mac_tbl_digest = cpu_to_be32(src->mac_tbl_digest);
+ memcpy(info->rsvd1, src->rsvd1, ARRAY_SIZE(src->rsvd1));
+
+ info->encap_slid = cpu_to_be32(src->encap_slid);
+ memcpy(info->pcp_to_sc_uc, src->pcp_to_sc_uc,
+ ARRAY_SIZE(info->pcp_to_sc_uc));
+ memcpy(info->pcp_to_vl_uc, src->pcp_to_vl_uc,
+ ARRAY_SIZE(info->pcp_to_vl_uc));
+ memcpy(info->pcp_to_sc_mc, src->pcp_to_sc_mc,
+ ARRAY_SIZE(info->pcp_to_sc_mc));
+ memcpy(info->pcp_to_vl_mc, src->pcp_to_vl_mc,
+ ARRAY_SIZE(info->pcp_to_vl_mc));
+ info->non_vlan_sc_uc = src->non_vlan_sc_uc;
+ info->non_vlan_vl_uc = src->non_vlan_vl_uc;
+ info->non_vlan_sc_mc = src->non_vlan_sc_mc;
+ info->non_vlan_vl_mc = src->non_vlan_vl_mc;
+ memcpy(info->rsvd2, src->rsvd2, ARRAY_SIZE(src->rsvd2));
+
+ info->uc_macs_gen_count = cpu_to_be16(src->uc_macs_gen_count);
+ info->mc_macs_gen_count = cpu_to_be16(src->mc_macs_gen_count);
+ memcpy(info->rsvd3, src->rsvd3, ARRAY_SIZE(src->rsvd3));
+}
+
+/**
+ * opa_vnic_set_per_veswport_info -- Set vesw per port information
+ * @adapter: vnic port adapter
+ * @info: pointer to vport info structure
+ *
+ * This function updates the vesw per port info that is maintained by the
+ * given adapter with vesw per port info provided. Reserved fields are
+ * stored and returned back to EM as is.
+ */
+void opa_vnic_set_per_veswport_info(struct opa_vnic_adapter *adapter,
+ struct opa_per_veswport_info *info)
+{
+ struct __opa_per_veswport_info *dst = &adapter->info.vport;
+
+ dst->port_num = be32_to_cpu(info->port_num);
+ memcpy(dst->rsvd0, info->rsvd0, ARRAY_SIZE(info->rsvd0));
+
+ memcpy(dst->base_mac_addr, info->base_mac_addr,
+ ARRAY_SIZE(dst->base_mac_addr));
+ dst->config_state = info->config_state;
+ memcpy(dst->rsvd1, info->rsvd1, ARRAY_SIZE(info->rsvd1));
+
+ dst->encap_slid = be32_to_cpu(info->encap_slid);
+ memcpy(dst->pcp_to_sc_uc, info->pcp_to_sc_uc,
+ ARRAY_SIZE(dst->pcp_to_sc_uc));
+ memcpy(dst->pcp_to_vl_uc, info->pcp_to_vl_uc,
+ ARRAY_SIZE(dst->pcp_to_vl_uc));
+ memcpy(dst->pcp_to_sc_mc, info->pcp_to_sc_mc,
+ ARRAY_SIZE(dst->pcp_to_sc_mc));
+ memcpy(dst->pcp_to_vl_mc, info->pcp_to_vl_mc,
+ ARRAY_SIZE(dst->pcp_to_vl_mc));
+ dst->non_vlan_sc_uc = info->non_vlan_sc_uc;
+ dst->non_vlan_vl_uc = info->non_vlan_vl_uc;
+ dst->non_vlan_sc_mc = info->non_vlan_sc_mc;
+ dst->non_vlan_vl_mc = info->non_vlan_vl_mc;
+ memcpy(dst->rsvd2, info->rsvd2, ARRAY_SIZE(info->rsvd2));
+ memcpy(dst->rsvd3, info->rsvd3, ARRAY_SIZE(info->rsvd3));
+}
+
+/**
+ * opa_vnic_query_mcast_macs - query multicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * multicast addresses in the adapter.
+ */
+void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs)
+{
+ u16 start_idx, num_macs, idx = 0, count = 0;
+ struct netdev_hw_addr *ha;
+
+ start_idx = be16_to_cpu(macs->start_idx);
+ num_macs = be16_to_cpu(macs->num_macs_in_msg);
+ netdev_for_each_mc_addr(ha, adapter->netdev) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ macs->tot_macs_in_lst = cpu_to_be16(netdev_mc_count(adapter->netdev));
+ macs->num_macs_in_msg = cpu_to_be16(count);
+ macs->gen_count = cpu_to_be16(adapter->info.vport.mc_macs_gen_count);
+}
+
+/**
+ * opa_vnic_query_ucast_macs - query unicast mac list
+ * @adapter: vnic port adapter
+ * @macs: pointer mac list
+ *
+ * This function populates the provided mac list with the configured
+ * unicast addresses in the adapter.
+ */
+void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
+ struct opa_veswport_iface_macs *macs)
+{
+ u16 start_idx, tot_macs, num_macs, idx = 0, count = 0;
+ struct netdev_hw_addr *ha;
+
+ start_idx = be16_to_cpu(macs->start_idx);
+ num_macs = be16_to_cpu(macs->num_macs_in_msg);
+ /* loop through dev_addrs list first */
+ for_each_dev_addr(adapter->netdev, ha) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ /* Do not include EM specified MAC address */
+ if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr,
+ ARRAY_SIZE(adapter->info.vport.base_mac_addr)))
+ continue;
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ /* loop through uc list */
+ netdev_for_each_uc_addr(ha, adapter->netdev) {
+ struct opa_vnic_iface_mac_entry *entry = &macs->entry[count];
+
+ if (start_idx > idx++)
+ continue;
+ else if (num_macs == count)
+ break;
+ memcpy(entry, ha->addr, sizeof(*entry));
+ count++;
+ }
+
+ tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) +
+ netdev_uc_count(adapter->netdev);
+ macs->tot_macs_in_lst = cpu_to_be16(tot_macs);
+ macs->num_macs_in_msg = cpu_to_be16(count);
+ macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count);
+}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index cee46266f434..def723a5df29 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -312,10 +312,15 @@ static int srp_new_cm_id(struct srp_rdma_ch *ch)
if (ch->cm_id)
ib_destroy_cm_id(ch->cm_id);
ch->cm_id = new_cm_id;
+ if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
+ target->srp_host->port))
+ ch->path.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ ch->path.rec_type = SA_PATH_REC_TYPE_IB;
ch->path.sgid = target->sgid;
ch->path.dgid = target->orig_dgid;
ch->path.pkey = target->pkey;
- ch->path.service_id = target->service_id;
+ sa_path_set_service_id(&ch->path, target->service_id);
return 0;
}
@@ -643,7 +648,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
}
static void srp_path_rec_completion(int status,
- struct ib_sa_path_rec *pathrec,
+ struct sa_path_rec *pathrec,
void *ch_ptr)
{
struct srp_rdma_ch *ch = ch_ptr;
@@ -2399,12 +2404,12 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
switch (event->param.rej_rcvd.reason) {
case IB_CM_REJ_PORT_CM_REDIRECT:
cpi = event->param.rej_rcvd.ari;
- ch->path.dlid = cpi->redirect_lid;
+ sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid)));
ch->path.pkey = cpi->redirect_pkey;
cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
- ch->status = ch->path.dlid ?
+ ch->status = sa_path_get_dlid(&ch->path) ?
SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
break;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 32ed40db3ca2..ab9077b81d5a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -152,7 +152,7 @@ struct srp_rdma_ch {
struct completion done;
int status;
- struct ib_sa_path_rec path;
+ struct sa_path_rec path;
struct ib_sa_query *path_query;
int path_query_id;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 7e314c2f2071..1ced0731c140 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -417,7 +417,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_wc)
{
- ib_destroy_ah(mad_wc->send_buf->ah);
+ rdma_destroy_ah(mad_wc->send_buf->ah);
ib_free_send_mad(mad_wc->send_buf);
}
@@ -481,7 +481,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
ib_free_send_mad(rsp);
err_rsp:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah);
err:
ib_free_recv_mad(mad_wc);
}
@@ -2302,12 +2302,8 @@ static void srpt_queue_response(struct se_cmd *cmd)
}
spin_unlock_irqrestore(&ioctx->spinlock, flags);
- if (unlikely(transport_check_aborted_status(&ioctx->cmd, false)
- || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) {
- atomic_inc(&ch->req_lim_delta);
- srpt_abort_cmd(ioctx);
+ if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)))
return;
- }
/* For read commands, transfer the data to the initiator. */
if (ioctx->cmd.data_direction == DMA_FROM_DEVICE &&
@@ -2689,7 +2685,8 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
struct srpt_rdma_ch *ch = ioctx->ch;
unsigned long flags;
- WARN_ON(ioctx->state != SRPT_STATE_DONE);
+ WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE &&
+ !(ioctx->cmd.transport_state & CMD_T_ABORTED));
if (ioctx->n_rw_ctx) {
srpt_free_rw_ctxs(ch, ioctx);