aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c27
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c193
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c42
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h24
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c127
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c31
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c40
-rw-r--r--drivers/infiniband/hw/hfi1/Kconfig4
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile2
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c567
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h38
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c296
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h5
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h4
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c52
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c93
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c125
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h123
-rw-r--r--drivers/infiniband/hw/hfi1/init.c51
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c60
-rw-r--r--drivers/infiniband/hw/hfi1/mad.h7
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c254
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.h37
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c68
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c21
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c20
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c68
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h4
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c409
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h3
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c90
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c57
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c25
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h1333
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h141
-rw-r--r--drivers/infiniband/hw/hfi1/trace_dbg.h155
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h209
-rw-r--r--drivers/infiniband/hw/hfi1/trace_misc.h81
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rc.h123
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h322
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h642
-rw-r--r--drivers/infiniband/hw/hfi1/twsi.c489
-rw-r--r--drivers/infiniband/hw/hfi1/twsi.h65
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c61
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c86
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c124
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c19
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c301
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h8
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c89
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h8
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c29
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c76
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c4
-rw-r--r--drivers/infiniband/hw/mlx4/main.c222
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h9
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c87
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c19
-rw-r--r--drivers/infiniband/hw/mlx5/main.c429
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h74
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c4
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c691
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c112
-rw-r--r--drivers/infiniband/hw/mlx5/user.h88
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c42
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c33
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c19
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c43
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c16
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c17
82 files changed, 5966 insertions, 3330 deletions
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 3e8431b5cad7..04bbf172abde 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1396,10 +1396,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
state_set(&child_ep->com, CONNECTING);
child_ep->com.tdev = tdev;
child_ep->com.cm_id = NULL;
- child_ep->com.local_addr.sin_family = PF_INET;
+ child_ep->com.local_addr.sin_family = AF_INET;
child_ep->com.local_addr.sin_port = req->local_port;
child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
- child_ep->com.remote_addr.sin_family = PF_INET;
+ child_ep->com.remote_addr.sin_family = AF_INET;
child_ep->com.remote_addr.sin_port = req->peer_port;
child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
get_ep(&parent_ep->com);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index bb1a839d4d6d..3edb80644b53 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1183,18 +1183,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
-static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
- ibdev.dev);
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
-
- PDBG("%s dev 0x%p\n", __func__, dev);
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
- return sprintf(buf, "%s\n", info.fw_version);
-}
-
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -1334,13 +1322,11 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *iwch_class_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
};
@@ -1362,6 +1348,18 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str,
+ size_t str_len)
+{
+ struct iwch_dev *iwch_dev = to_iwch_dev(ibdev);
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
+
+ PDBG("%s dev 0x%p\n", __func__, iwch_dev);
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ snprintf(str, str_len, "%s", info.fw_version);
+}
+
int iwch_register_device(struct iwch_dev *dev)
{
int ret;
@@ -1437,6 +1435,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.get_hw_stats = iwch_get_mib;
dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = iwch_port_immutable;
+ dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index a3a67216bce6..3aca7f6171b4 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -294,6 +294,25 @@ static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
return;
}
+static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size)
+{
+ struct sk_buff *skb;
+ unsigned int i;
+ size_t len;
+
+ len = roundup(sizeof(union cpl_wr_size), 16);
+ for (i = 0; i < size; i++) {
+ skb = alloc_skb(len, GFP_KERNEL);
+ if (!skb)
+ goto fail;
+ skb_queue_tail(ep_skb_list, skb);
+ }
+ return 0;
+fail:
+ skb_queue_purge(ep_skb_list);
+ return -ENOMEM;
+}
+
static void *alloc_ep(int size, gfp_t gfp)
{
struct c4iw_ep_common *epc;
@@ -384,6 +403,8 @@ void _c4iw_free_ep(struct kref *kref)
if (ep->mpa_skb)
kfree_skb(ep->mpa_skb);
}
+ if (!skb_queue_empty(&ep->com.ep_skb_list))
+ skb_queue_purge(&ep->com.ep_skb_list);
kfree(ep);
}
@@ -620,25 +641,27 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb)
}
}
-static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
+static int send_flowc(struct c4iw_ep *ep)
{
- unsigned int flowclen = 80;
struct fw_flowc_wr *flowc;
+ struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
int i;
u16 vlan = ep->l2t->vlan;
int nparams;
+ if (WARN_ON(!skb))
+ return -ENOMEM;
+
if (vlan == CPL_L2T_VLAN_NONE)
nparams = 8;
else
nparams = 9;
- skb = get_skb(skb, flowclen, GFP_KERNEL);
- flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
+ flowc = (struct fw_flowc_wr *)__skb_put(skb, FLOWC_LEN);
flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
FW_FLOWC_WR_NPARAMS_V(nparams));
- flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen,
+ flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN,
16)) | FW_WR_FLOWID_V(ep->hwtid));
flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
@@ -679,18 +702,16 @@ static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
return c4iw_ofld_send(&ep->com.dev->rdev, skb);
}
-static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
+static int send_halfclose(struct c4iw_ep *ep)
{
struct cpl_close_con_req *req;
- struct sk_buff *skb;
+ struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
int wrlen = roundup(sizeof *req, 16);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- skb = get_skb(NULL, wrlen, gfp);
- if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
+ if (WARN_ON(!skb))
return -ENOMEM;
- }
+
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
@@ -701,26 +722,24 @@ static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
-static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
+static int send_abort(struct c4iw_ep *ep)
{
struct cpl_abort_req *req;
int wrlen = roundup(sizeof *req, 16);
+ struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- skb = get_skb(skb, wrlen, gfp);
- if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ if (WARN_ON(!req_skb))
return -ENOMEM;
- }
- set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(skb, ep, abort_arp_failure);
- req = (struct cpl_abort_req *) skb_put(skb, wrlen);
+
+ set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx);
+ t4_set_arp_err_handler(req_skb, ep, abort_arp_failure);
+ req = (struct cpl_abort_req *)skb_put(req_skb, wrlen);
memset(req, 0, wrlen);
INIT_TP_WR(req, ep->hwtid);
OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
req->cmd = CPL_ABORT_SEND_RST;
- return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+ return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
}
static void best_mtu(const unsigned short *mtus, unsigned short mtu,
@@ -992,9 +1011,19 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
mpa = (struct mpa_message *)(req + 1);
memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
- mpa->flags = (crc_enabled ? MPA_CRC : 0) |
- (markers_enabled ? MPA_MARKERS : 0) |
- (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
+
+ mpa->flags = 0;
+ if (crc_enabled)
+ mpa->flags |= MPA_CRC;
+ if (markers_enabled) {
+ mpa->flags |= MPA_MARKERS;
+ ep->mpa_attr.recv_marker_enabled = 1;
+ } else {
+ ep->mpa_attr.recv_marker_enabled = 0;
+ }
+ if (mpa_rev_to_use == 2)
+ mpa->flags |= MPA_ENHANCED_RDMA_CONN;
+
mpa->private_data_size = htons(ep->plen);
mpa->revision = mpa_rev_to_use;
if (mpa_rev_to_use == 1) {
@@ -1169,8 +1198,11 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
mpa = (struct mpa_message *)(req + 1);
memset(mpa, 0, sizeof(*mpa));
memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
- mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
- (markers_enabled ? MPA_MARKERS : 0);
+ mpa->flags = 0;
+ if (ep->mpa_attr.crc_enabled)
+ mpa->flags |= MPA_CRC;
+ if (ep->mpa_attr.recv_marker_enabled)
+ mpa->flags |= MPA_MARKERS;
mpa->revision = ep->mpa_attr.version;
mpa->private_data_size = htons(plen);
@@ -1248,7 +1280,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
set_bit(ACT_ESTAB, &ep->com.history);
/* start MPA negotiation */
- ret = send_flowc(ep, NULL);
+ ret = send_flowc(ep);
if (ret)
goto err;
if (ep->retry_with_mpa_v1)
@@ -1555,7 +1587,6 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
*/
__state_set(&ep->com, FPDU_MODE);
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
- ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa->revision;
ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
@@ -2004,12 +2035,17 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
}
/*
- * Return whether a failed active open has allocated a TID
+ * Some of the error codes above implicitly indicate that there is no TID
+ * allocated with the result of an ACT_OPEN. We use this predicate to make
+ * that explicit.
*/
static inline int act_open_has_tid(int status)
{
- return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
- status != CPL_ERR_ARP_MISS;
+ return (status != CPL_ERR_TCAM_PARITY &&
+ status != CPL_ERR_TCAM_MISS &&
+ status != CPL_ERR_TCAM_FULL &&
+ status != CPL_ERR_CONN_EXIST_SYNRECV &&
+ status != CPL_ERR_CONN_EXIST);
}
/* Returns whether a CPL status conveys negative advice.
@@ -2130,6 +2166,7 @@ out:
static int c4iw_reconnect(struct c4iw_ep *ep)
{
int err = 0;
+ int size = 0;
struct sockaddr_in *laddr = (struct sockaddr_in *)
&ep->com.cm_id->m_local_addr;
struct sockaddr_in *raddr = (struct sockaddr_in *)
@@ -2145,6 +2182,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
init_timer(&ep->timer);
c4iw_init_wr_wait(&ep->com.wr_wait);
+ /* When MPA revision is different on nodes, the node with MPA_rev=2
+ * tries to reconnect with MPA_rev 1 for the same EP through
+ * c4iw_reconnect(), where the same EP is assigned with new tid for
+ * further connection establishment. As we are using the same EP pointer
+ * for reconnect, few skbs are used during the previous c4iw_connect(),
+ * which leaves the EP with inadequate skbs for further
+ * c4iw_reconnect(), Further causing an assert BUG_ON() due to empty
+ * skb_list() during peer_abort(). Allocate skbs which is already used.
+ */
+ size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list));
+ if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
+ err = -ENOMEM;
+ goto fail1;
+ }
+
/*
* Allocate an active TID to initiate a TCP connection.
*/
@@ -2210,6 +2262,7 @@ fail2:
* response of 1st connect request.
*/
connect_reply_upcall(ep, -ECONNRESET);
+fail1:
c4iw_put_ep(&ep->com);
out:
return err;
@@ -2576,6 +2629,10 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
child_ep->mtu = peer_mss + hdrs;
+ skb_queue_head_init(&child_ep->com.ep_skb_list);
+ if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF))
+ goto fail;
+
state_set(&child_ep->com, CONNECTING);
child_ep->com.dev = dev;
child_ep->com.cm_id = NULL;
@@ -2640,6 +2697,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
goto out;
+fail:
+ c4iw_put_ep(&child_ep->com);
reject:
reject_cr(dev, hwtid, skb);
if (parent_ep)
@@ -2670,7 +2729,7 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
ep->com.state = MPA_REQ_WAIT;
start_ep_timer(ep);
set_bit(PASS_ESTAB, &ep->com.history);
- ret = send_flowc(ep, skb);
+ ret = send_flowc(ep);
mutex_unlock(&ep->com.mutex);
if (ret)
c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
@@ -2871,10 +2930,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
}
mutex_unlock(&ep->com.mutex);
- rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
- if (!rpl_skb) {
- printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
- __func__);
+ rpl_skb = skb_dequeue(&ep->com.ep_skb_list);
+ if (WARN_ON(!rpl_skb)) {
release = 1;
goto out;
}
@@ -3011,9 +3068,9 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
PDBG("%s last streaming msg ack ep %p tid %u state %u "
"initiator %u freeing skb\n", __func__, ep, ep->hwtid,
state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
+ mutex_lock(&ep->com.mutex);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
- mutex_lock(&ep->com.mutex);
if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
stop_ep_timer(ep);
mutex_unlock(&ep->com.mutex);
@@ -3025,9 +3082,9 @@ out:
int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
- int err = 0;
- int disconnect = 0;
+ int abort;
struct c4iw_ep *ep = to_ep(cm_id);
+
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
@@ -3038,16 +3095,13 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
}
set_bit(ULP_REJECT, &ep->com.history);
if (mpa_rev == 0)
- disconnect = 2;
- else {
- err = send_mpa_reject(ep, pdata, pdata_len);
- disconnect = 1;
- }
+ abort = 1;
+ else
+ abort = send_mpa_reject(ep, pdata, pdata_len);
mutex_unlock(&ep->com.mutex);
- if (disconnect) {
- stop_ep_timer(ep);
- err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
- }
+
+ stop_ep_timer(ep);
+ c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
c4iw_put_ep(&ep->com);
return 0;
}
@@ -3248,6 +3302,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
err = -ENOMEM;
goto out;
}
+
+ skb_queue_head_init(&ep->com.ep_skb_list);
+ if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) {
+ err = -ENOMEM;
+ goto fail1;
+ }
+
init_timer(&ep->timer);
ep->plen = conn_param->private_data_len;
if (ep->plen)
@@ -3266,7 +3327,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!ep->com.qp) {
PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
err = -EINVAL;
- goto fail1;
+ goto fail2;
}
ref_qp(ep);
PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
@@ -3279,7 +3340,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ep->atid == -1) {
printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
err = -ENOMEM;
- goto fail1;
+ goto fail2;
}
insert_handle(dev, &dev->atid_idr, ep, ep->atid);
@@ -3303,7 +3364,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
err = pick_local_ipaddrs(dev, cm_id);
if (err)
- goto fail1;
+ goto fail2;
}
/* find a route */
@@ -3323,7 +3384,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
err = pick_local_ip6addrs(dev, cm_id);
if (err)
- goto fail1;
+ goto fail2;
}
/* find a route */
@@ -3339,14 +3400,14 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!ep->dst) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
err = -EHOSTUNREACH;
- goto fail2;
+ goto fail3;
}
err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
if (err) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
- goto fail3;
+ goto fail4;
}
PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
@@ -3362,13 +3423,15 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
goto out;
cxgb4_l2t_release(ep->l2t);
-fail3:
+fail4:
dst_release(ep->dst);
-fail2:
+fail3:
remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
-fail1:
+fail2:
+ skb_queue_purge(&ep->com.ep_skb_list);
deref_cm_id(&ep->com);
+fail1:
c4iw_put_ep(&ep->com);
out:
return err;
@@ -3461,6 +3524,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
err = -ENOMEM;
goto fail1;
}
+ skb_queue_head_init(&ep->com.ep_skb_list);
PDBG("%s ep %p\n", __func__, ep);
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
@@ -3577,11 +3641,22 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
case MPA_REQ_RCVD:
case MPA_REP_SENT:
case FPDU_MODE:
+ case CONNECTING:
close = 1;
if (abrupt)
ep->com.state = ABORTING;
else {
ep->com.state = CLOSING;
+
+ /*
+ * if we close before we see the fw4_ack() then we fix
+ * up the timer state since we're reusing it.
+ */
+ if (ep->mpa_skb &&
+ test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
+ clear_bit(STOP_MPA_TIMER, &ep->com.flags);
+ stop_ep_timer(ep);
+ }
start_ep_timer(ep);
}
set_bit(CLOSE_SENT, &ep->com.flags);
@@ -3611,10 +3686,10 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
if (abrupt) {
set_bit(EP_DISC_ABORT, &ep->com.history);
close_complete_upcall(ep, -ECONNRESET);
- ret = send_abort(ep, NULL, gfp);
+ ret = send_abort(ep);
} else {
set_bit(EP_DISC_CLOSE, &ep->com.history);
- ret = send_halfclose(ep, gfp);
+ ret = send_halfclose(ep);
}
if (ret) {
set_bit(EP_DISC_FAIL, &ep->com.history);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index b0b955724458..812ab7278b8e 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -33,19 +33,15 @@
#include "iw_cxgb4.h"
static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
- struct c4iw_dev_ucontext *uctx)
+ struct c4iw_dev_ucontext *uctx, struct sk_buff *skb)
{
struct fw_ri_res_wr *res_wr;
struct fw_ri_res *res;
int wr_len;
struct c4iw_wr_wait wr_wait;
- struct sk_buff *skb;
int ret;
wr_len = sizeof *res_wr + sizeof *res;
- skb = alloc_skb(wr_len, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
@@ -863,7 +859,9 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq)
ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
: NULL;
destroy_cq(&chp->rhp->rdev, &chp->cq,
- ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
+ ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
+ chp->destroy_skb);
+ chp->destroy_skb = NULL;
kfree(chp);
return 0;
}
@@ -879,7 +877,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
struct c4iw_cq *chp;
struct c4iw_create_cq_resp uresp;
struct c4iw_ucontext *ucontext = NULL;
- int ret;
+ int ret, wr_len;
size_t memsize, hwentries;
struct c4iw_mm_entry *mm, *mm2;
@@ -896,6 +894,13 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
if (!chp)
return ERR_PTR(-ENOMEM);
+ wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
+ chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
+ if (!chp->destroy_skb) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
if (ib_context)
ucontext = to_c4iw_ucontext(ib_context);
@@ -936,7 +941,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
ret = create_cq(&rhp->rdev, &chp->cq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
if (ret)
- goto err1;
+ goto err2;
chp->rhp = rhp;
chp->cq.size--; /* status page */
@@ -947,15 +952,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
init_waitqueue_head(&chp->wait);
ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
if (ret)
- goto err2;
+ goto err3;
if (ucontext) {
mm = kmalloc(sizeof *mm, GFP_KERNEL);
if (!mm)
- goto err3;
+ goto err4;
mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
if (!mm2)
- goto err4;
+ goto err5;
uresp.qid_mask = rhp->rdev.cqmask;
uresp.cqid = chp->cq.cqid;
@@ -970,7 +975,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
ret = ib_copy_to_udata(udata, &uresp,
sizeof(uresp) - sizeof(uresp.reserved));
if (ret)
- goto err5;
+ goto err6;
mm->key = uresp.key;
mm->addr = virt_to_phys(chp->cq.queue);
@@ -986,15 +991,18 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
__func__, chp->cq.cqid, chp, chp->cq.size,
chp->cq.memsize, (unsigned long long) chp->cq.dma_addr);
return &chp->ibcq;
-err5:
+err6:
kfree(mm2);
-err4:
+err5:
kfree(mm);
-err3:
+err4:
remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
-err2:
+err3:
destroy_cq(&chp->rhp->rdev, &chp->cq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+ chp->destroy_skb);
+err2:
+ kfree_skb(chp->destroy_skb);
err1:
kfree(chp);
return ERR_PTR(ret);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index ae2e8b23d2dd..071d7332ec06 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -317,7 +317,7 @@ static int qp_open(struct inode *inode, struct file *file)
idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
spin_unlock_irq(&qpd->devp->lock);
- qpd->bufsize = count * 128;
+ qpd->bufsize = count * 180;
qpd->buf = vmalloc(qpd->bufsize);
if (!qpd->buf) {
kfree(qpd);
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index f6f34a75af27..aa47e0ae80bc 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -384,6 +384,7 @@ struct c4iw_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct c4iw_dev *rhp;
+ struct sk_buff *dereg_skb;
u64 kva;
struct tpt_attributes attr;
u64 *mpl;
@@ -400,6 +401,7 @@ static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr)
struct c4iw_mw {
struct ib_mw ibmw;
struct c4iw_dev *rhp;
+ struct sk_buff *dereg_skb;
u64 kva;
struct tpt_attributes attr;
};
@@ -412,6 +414,7 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw)
struct c4iw_cq {
struct ib_cq ibcq;
struct c4iw_dev *rhp;
+ struct sk_buff *destroy_skb;
struct t4_cq cq;
spinlock_t lock;
spinlock_t comp_handler_lock;
@@ -472,7 +475,7 @@ struct c4iw_qp {
struct t4_wq wq;
spinlock_t lock;
struct mutex mutex;
- atomic_t refcnt;
+ struct kref kref;
wait_queue_head_t wait;
struct timer_list timer;
int sq_sig_all;
@@ -789,10 +792,29 @@ enum c4iw_ep_history {
CM_ID_DEREFED = 28,
};
+enum conn_pre_alloc_buffers {
+ CN_ABORT_REQ_BUF,
+ CN_ABORT_RPL_BUF,
+ CN_CLOSE_CON_REQ_BUF,
+ CN_DESTROY_BUF,
+ CN_FLOWC_BUF,
+ CN_MAX_CON_BUF
+};
+
+#define FLOWC_LEN 80
+union cpl_wr_size {
+ struct cpl_abort_req abrt_req;
+ struct cpl_abort_rpl abrt_rpl;
+ struct fw_ri_wr ri_req;
+ struct cpl_close_con_req close_req;
+ char flowc_buf[FLOWC_LEN];
+};
+
struct c4iw_ep_common {
struct iw_cm_id *cm_id;
struct c4iw_qp *qp;
struct c4iw_dev *dev;
+ struct sk_buff_head ep_skb_list;
enum c4iw_ep_state state;
struct kref kref;
struct mutex mutex;
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 55d0651ee4de..0b91b0f4df71 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -59,9 +59,9 @@ static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length)
}
static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
- u32 len, dma_addr_t data, int wait)
+ u32 len, dma_addr_t data,
+ int wait, struct sk_buff *skb)
{
- struct sk_buff *skb;
struct ulp_mem_io *req;
struct ulptx_sgl *sgl;
u8 wr_len;
@@ -74,9 +74,11 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
c4iw_init_wr_wait(&wr_wait);
wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16);
- skb = alloc_skb(wr_len, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
+ if (!skb) {
+ skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+ if (!skb)
+ return -ENOMEM;
+ }
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
@@ -108,9 +110,8 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
}
static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
- void *data)
+ void *data, struct sk_buff *skb)
{
- struct sk_buff *skb;
struct ulp_mem_io *req;
struct ulptx_idata *sc;
u8 wr_len, *to_dp, *from_dp;
@@ -134,9 +135,11 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
wr_len = roundup(sizeof *req + sizeof *sc +
roundup(copy_len, T4_ULPTX_MIN_IO), 16);
- skb = alloc_skb(wr_len, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
+ if (!skb) {
+ skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+ if (!skb)
+ return -ENOMEM;
+ }
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
@@ -173,6 +176,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO -
(copy_len % T4_ULPTX_MIN_IO));
ret = c4iw_ofld_send(rdev, skb);
+ skb = NULL;
if (ret)
return ret;
len -= C4IW_MAX_INLINE_SIZE;
@@ -182,7 +186,8 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
return ret;
}
-static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data)
+static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len,
+ void *data, struct sk_buff *skb)
{
u32 remain = len;
u32 dmalen;
@@ -205,7 +210,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *
dmalen = T4_ULPTX_MAX_DMA;
remain -= dmalen;
ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr,
- !remain);
+ !remain, skb);
if (ret)
goto out;
addr += dmalen >> 5;
@@ -213,7 +218,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *
daddr += dmalen;
}
if (remain)
- ret = _c4iw_write_mem_inline(rdev, addr, remain, data);
+ ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb);
out:
dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE);
return ret;
@@ -224,23 +229,25 @@ out:
* If data is NULL, clear len byte of memory to zero.
*/
static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
- void *data)
+ void *data, struct sk_buff *skb)
{
if (is_t5(rdev->lldi.adapter_type) && use_dsgl) {
if (len > inline_threshold) {
- if (_c4iw_write_mem_dma(rdev, addr, len, data)) {
+ if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) {
printk_ratelimited(KERN_WARNING
"%s: dma map"
" failure (non fatal)\n",
pci_name(rdev->lldi.pdev));
return _c4iw_write_mem_inline(rdev, addr, len,
- data);
- } else
+ data, skb);
+ } else {
return 0;
+ }
} else
- return _c4iw_write_mem_inline(rdev, addr, len, data);
+ return _c4iw_write_mem_inline(rdev, addr,
+ len, data, skb);
} else
- return _c4iw_write_mem_inline(rdev, addr, len, data);
+ return _c4iw_write_mem_inline(rdev, addr, len, data, skb);
}
/*
@@ -253,7 +260,8 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
u32 *stag, u8 stag_state, u32 pdid,
enum fw_ri_stag_type type, enum fw_ri_mem_perms perm,
int bind_enabled, u32 zbva, u64 to,
- u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr)
+ u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr,
+ struct sk_buff *skb)
{
int err;
struct fw_ri_tpte tpt;
@@ -307,7 +315,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
}
err = write_adapter_mem(rdev, stag_idx +
(rdev->lldi.vr->stag.start >> 5),
- sizeof(tpt), &tpt);
+ sizeof(tpt), &tpt, skb);
if (reset_tpt_entry) {
c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
@@ -327,28 +335,29 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl,
__func__, pbl_addr, rdev->lldi.vr->pbl.start,
pbl_size);
- err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl);
+ err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL);
return err;
}
static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size,
- u32 pbl_addr)
+ u32 pbl_addr, struct sk_buff *skb)
{
return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0,
- pbl_size, pbl_addr);
+ pbl_size, pbl_addr, skb);
}
static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid)
{
*stag = T4_STAG_UNSET;
return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0,
- 0UL, 0, 0, 0, 0);
+ 0UL, 0, 0, 0, 0, NULL);
}
-static int deallocate_window(struct c4iw_rdev *rdev, u32 stag)
+static int deallocate_window(struct c4iw_rdev *rdev, u32 stag,
+ struct sk_buff *skb)
{
return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0,
- 0);
+ 0, skb);
}
static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
@@ -356,7 +365,7 @@ static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
{
*stag = T4_STAG_UNSET;
return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0,
- 0UL, 0, 0, pbl_size, pbl_addr);
+ 0UL, 0, 0, pbl_size, pbl_addr, NULL);
}
static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
@@ -383,14 +392,16 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
mhp->attr.mw_bind_enable, mhp->attr.zbva,
mhp->attr.va_fbo, mhp->attr.len ?
mhp->attr.len : -1, shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr);
+ mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL);
if (ret)
return ret;
ret = finish_mem_reg(mhp, stag);
- if (ret)
+ if (ret) {
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
+ mhp->attr.pbl_addr, mhp->dereg_skb);
+ mhp->dereg_skb = NULL;
+ }
return ret;
}
@@ -423,6 +434,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
+ if (!mhp->dereg_skb) {
+ ret = -ENOMEM;
+ goto err0;
+ }
+
mhp->rhp = rhp;
mhp->attr.pdid = php->pdid;
mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
@@ -435,7 +452,8 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid,
FW_RI_STAG_NSMR, mhp->attr.perms,
- mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0);
+ mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0,
+ NULL);
if (ret)
goto err1;
@@ -445,8 +463,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
return &mhp->ibmr;
err2:
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
+ mhp->attr.pbl_addr, mhp->dereg_skb);
err1:
+ kfree_skb(mhp->dereg_skb);
+err0:
kfree(mhp);
return ERR_PTR(ret);
}
@@ -481,11 +501,18 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
+ if (!mhp->dereg_skb) {
+ kfree(mhp);
+ return ERR_PTR(-ENOMEM);
+ }
+
mhp->rhp = rhp;
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
+ kfree_skb(mhp->dereg_skb);
kfree(mhp);
return ERR_PTR(err);
}
@@ -550,6 +577,7 @@ err_pbl:
err:
ib_umem_release(mhp->umem);
+ kfree_skb(mhp->dereg_skb);
kfree(mhp);
return ERR_PTR(err);
}
@@ -572,11 +600,16 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
return ERR_PTR(-ENOMEM);
- ret = allocate_window(&rhp->rdev, &stag, php->pdid);
- if (ret) {
- kfree(mhp);
- return ERR_PTR(ret);
+
+ mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
+ if (!mhp->dereg_skb) {
+ ret = -ENOMEM;
+ goto free_mhp;
}
+
+ ret = allocate_window(&rhp->rdev, &stag, php->pdid);
+ if (ret)
+ goto free_skb;
mhp->rhp = rhp;
mhp->attr.pdid = php->pdid;
mhp->attr.type = FW_RI_STAG_MW;
@@ -584,12 +617,19 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
mmid = (stag) >> 8;
mhp->ibmw.rkey = stag;
if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
- deallocate_window(&rhp->rdev, mhp->attr.stag);
- kfree(mhp);
- return ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
+ goto dealloc_win;
}
PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
+
+dealloc_win:
+ deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
+free_skb:
+ kfree_skb(mhp->dereg_skb);
+free_mhp:
+ kfree(mhp);
+ return ERR_PTR(ret);
}
int c4iw_dealloc_mw(struct ib_mw *mw)
@@ -602,7 +642,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
rhp = mhp->rhp;
mmid = (mw->rkey) >> 8;
remove_handle(rhp, &rhp->mmidr, mmid);
- deallocate_window(&rhp->rdev, mhp->attr.stag);
+ deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
+ kfree_skb(mhp->dereg_skb);
kfree(mhp);
PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
return 0;
@@ -666,7 +707,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
return &(mhp->ibmr);
err3:
dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
+ mhp->attr.pbl_addr, mhp->dereg_skb);
err2:
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
@@ -717,7 +758,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
+ mhp->attr.pbl_addr, mhp->dereg_skb);
if (mhp->attr.pbl_size)
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index dd8a86b726d2..df127ce6b6ec 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -409,20 +409,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
-static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
- ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
-
- return sprintf(buf, "%u.%u.%u.%u\n",
- FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
- FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers),
- FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers),
- FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
-}
-
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -502,13 +488,11 @@ static int c4iw_get_mib(struct ib_device *ibdev,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *c4iw_class_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
};
@@ -530,6 +514,20 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+static void get_dev_fw_str(struct ib_device *dev, char *str,
+ size_t str_len)
+{
+ struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
+ ibdev);
+ PDBG("%s dev 0x%p\n", __func__, dev);
+
+ snprintf(str, str_len, "%u.%u.%u.%u",
+ FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
+}
+
int c4iw_register_device(struct c4iw_dev *dev)
{
int ret;
@@ -605,6 +603,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.get_hw_stats = c4iw_get_mib;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = c4iw_port_immutable;
+ dev->ibdev.get_dev_fw_str = get_dev_fw_str;
dev->ibdev.drain_sq = c4iw_drain_sq;
dev->ibdev.drain_rq = c4iw_drain_rq;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index e8993e49b8b3..edb1172b6f54 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -683,17 +683,25 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
return 0;
}
+void _free_qp(struct kref *kref)
+{
+ struct c4iw_qp *qhp;
+
+ qhp = container_of(kref, struct c4iw_qp, kref);
+ PDBG("%s qhp %p\n", __func__, qhp);
+ kfree(qhp);
+}
+
void c4iw_qp_add_ref(struct ib_qp *qp)
{
PDBG("%s ib_qp %p\n", __func__, qp);
- atomic_inc(&(to_c4iw_qp(qp)->refcnt));
+ kref_get(&to_c4iw_qp(qp)->kref);
}
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
PDBG("%s ib_qp %p\n", __func__, qp);
- if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt)))
- wake_up(&(to_c4iw_qp(qp)->wait));
+ kref_put(&to_c4iw_qp(qp)->kref, _free_qp);
}
static void add_to_fc_list(struct list_head *head, struct list_head *entry)
@@ -1081,9 +1089,10 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
qhp->ep->hwtid);
- skb = alloc_skb(sizeof *wqe, gfp);
- if (!skb)
+ skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
+ if (WARN_ON(!skb))
return;
+
set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
@@ -1202,9 +1211,10 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
ep->hwtid);
- skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
- if (!skb)
+ skb = skb_dequeue(&ep->com.ep_skb_list);
+ if (WARN_ON(!skb))
return -ENOMEM;
+
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
@@ -1592,8 +1602,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
wait_event(qhp->wait, !qhp->ep);
remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
- atomic_dec(&qhp->refcnt);
- wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
spin_lock_irq(&rhp->lock);
if (!list_empty(&qhp->db_fc_entry))
@@ -1606,8 +1614,9 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ c4iw_qp_rem_ref(ib_qp);
+
PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
- kfree(qhp);
return 0;
}
@@ -1704,7 +1713,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
init_completion(&qhp->rq_drained);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
- atomic_set(&qhp->refcnt, 1);
+ kref_init(&qhp->kref);
ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
if (ret)
@@ -1896,12 +1905,20 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
return 0;
}
+static void move_qp_to_err(struct c4iw_qp *qp)
+{
+ struct c4iw_qp_attributes attrs = { .next_state = C4IW_QP_STATE_ERROR };
+
+ (void)c4iw_modify_qp(qp->rhp, qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+}
+
void c4iw_drain_sq(struct ib_qp *ibqp)
{
struct c4iw_qp *qp = to_c4iw_qp(ibqp);
unsigned long flag;
bool need_to_wait;
+ move_qp_to_err(qp);
spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_sq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);
@@ -1916,6 +1933,7 @@ void c4iw_drain_rq(struct ib_qp *ibqp)
unsigned long flag;
bool need_to_wait;
+ move_qp_to_err(qp);
spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_rq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);
diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index a925fb0db706..f6ea0881765a 100644
--- a/drivers/infiniband/hw/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
@@ -1,9 +1,9 @@
config INFINIBAND_HFI1
tristate "Intel OPA Gen1 support"
- depends on X86_64 && INFINIBAND_RDMAVT
+ depends on X86_64 && INFINIBAND_RDMAVT && I2C
select MMU_NOTIFIER
select CRC32
- default m
+ select I2C_ALGOBIT
---help---
This is a low-level driver for Intel OPA Gen1 adapter.
config HFI1_DEBUG_SDMA_ORDER
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 9b5382c94b0c..0cf97a09b64b 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
eprom.o file_ops.o firmware.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
- qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \
+ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
verbs_txreq.o
hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 14d7eeb09be6..79575ee873f2 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -47,12 +47,18 @@
#include <linux/topology.h>
#include <linux/cpumask.h>
#include <linux/module.h>
+#include <linux/cpumask.h>
#include "hfi.h"
#include "affinity.h"
#include "sdma.h"
#include "trace.h"
+struct hfi1_affinity_node_list node_affinity = {
+ .list = LIST_HEAD_INIT(node_affinity.list),
+ .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock),
+};
+
/* Name of IRQ types, indexed by enum irq_type */
static const char * const irq_type_names[] = {
"SDMA",
@@ -61,6 +67,9 @@ static const char * const irq_type_names[] = {
"OTHER",
};
+/* Per NUMA node count of HFI devices */
+static unsigned int *hfi1_per_node_cntr;
+
static inline void init_cpu_mask_set(struct cpu_mask_set *set)
{
cpumask_clear(&set->mask);
@@ -69,47 +78,136 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
}
/* Initialize non-HT cpu cores mask */
-int init_real_cpu_mask(struct hfi1_devdata *dd)
+void init_real_cpu_mask(void)
{
- struct hfi1_affinity *info;
int possible, curr_cpu, i, ht;
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
-
- cpumask_clear(&info->real_cpu_mask);
+ cpumask_clear(&node_affinity.real_cpu_mask);
/* Start with cpu online mask as the real cpu mask */
- cpumask_copy(&info->real_cpu_mask, cpu_online_mask);
+ cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask);
/*
* Remove HT cores from the real cpu mask. Do this in two steps below.
*/
- possible = cpumask_weight(&info->real_cpu_mask);
+ possible = cpumask_weight(&node_affinity.real_cpu_mask);
ht = cpumask_weight(topology_sibling_cpumask(
- cpumask_first(&info->real_cpu_mask)));
+ cpumask_first(&node_affinity.real_cpu_mask)));
/*
* Step 1. Skip over the first N HT siblings and use them as the
* "real" cores. Assumes that HT cores are not enumerated in
* succession (except in the single core case).
*/
- curr_cpu = cpumask_first(&info->real_cpu_mask);
+ curr_cpu = cpumask_first(&node_affinity.real_cpu_mask);
for (i = 0; i < possible / ht; i++)
- curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
+ curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
/*
* Step 2. Remove the remaining HT siblings. Use cpumask_next() to
* skip any gaps.
*/
for (; i < possible; i++) {
- cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask);
- curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
+ cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask);
+ curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
+ }
+}
+
+int node_affinity_init(void)
+{
+ int node;
+ struct pci_dev *dev = NULL;
+ const struct pci_device_id *ids = hfi1_pci_tbl;
+
+ cpumask_clear(&node_affinity.proc.used);
+ cpumask_copy(&node_affinity.proc.mask, cpu_online_mask);
+
+ node_affinity.proc.gen = 0;
+ node_affinity.num_core_siblings =
+ cpumask_weight(topology_sibling_cpumask(
+ cpumask_first(&node_affinity.proc.mask)
+ ));
+ node_affinity.num_online_nodes = num_online_nodes();
+ node_affinity.num_online_cpus = num_online_cpus();
+
+ /*
+ * The real cpu mask is part of the affinity struct but it has to be
+ * initialized early. It is needed to calculate the number of user
+ * contexts in set_up_context_variables().
+ */
+ init_real_cpu_mask();
+
+ hfi1_per_node_cntr = kcalloc(num_possible_nodes(),
+ sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
+ if (!hfi1_per_node_cntr)
+ return -ENOMEM;
+
+ while (ids->vendor) {
+ dev = NULL;
+ while ((dev = pci_get_device(ids->vendor, ids->device, dev))) {
+ node = pcibus_to_node(dev->bus);
+ if (node < 0)
+ node = numa_node_id();
+
+ hfi1_per_node_cntr[node]++;
+ }
+ ids++;
}
- dd->affinity = info;
return 0;
}
+void node_affinity_destroy(void)
+{
+ struct list_head *pos, *q;
+ struct hfi1_affinity_node *entry;
+
+ spin_lock(&node_affinity.lock);
+ list_for_each_safe(pos, q, &node_affinity.list) {
+ entry = list_entry(pos, struct hfi1_affinity_node,
+ list);
+ list_del(pos);
+ kfree(entry);
+ }
+ spin_unlock(&node_affinity.lock);
+ kfree(hfi1_per_node_cntr);
+}
+
+static struct hfi1_affinity_node *node_affinity_allocate(int node)
+{
+ struct hfi1_affinity_node *entry;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+ entry->node = node;
+ INIT_LIST_HEAD(&entry->list);
+
+ return entry;
+}
+
+/*
+ * It appends an entry to the list.
+ * It *must* be called with node_affinity.lock held.
+ */
+static void node_affinity_add_tail(struct hfi1_affinity_node *entry)
+{
+ list_add_tail(&entry->list, &node_affinity.list);
+}
+
+/* It must be called with node_affinity.lock held */
+static struct hfi1_affinity_node *node_affinity_lookup(int node)
+{
+ struct list_head *pos;
+ struct hfi1_affinity_node *entry;
+
+ list_for_each(pos, &node_affinity.list) {
+ entry = list_entry(pos, struct hfi1_affinity_node, list);
+ if (entry->node == node)
+ return entry;
+ }
+
+ return NULL;
+}
+
/*
* Interrupt affinity.
*
@@ -121,10 +219,10 @@ int init_real_cpu_mask(struct hfi1_devdata *dd)
* to the node relative 1 as necessary.
*
*/
-void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
{
int node = pcibus_to_node(dd->pcidev->bus);
- struct hfi1_affinity *info = dd->affinity;
+ struct hfi1_affinity_node *entry;
const struct cpumask *local_mask;
int curr_cpu, possible, i;
@@ -132,56 +230,93 @@ void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
node = numa_node_id();
dd->node = node;
- spin_lock_init(&info->lock);
-
- init_cpu_mask_set(&info->def_intr);
- init_cpu_mask_set(&info->rcv_intr);
- init_cpu_mask_set(&info->proc);
-
local_mask = cpumask_of_node(dd->node);
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
- /* Use the "real" cpu mask of this node as the default */
- cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask);
-
- /* fill in the receive list */
- possible = cpumask_weight(&info->def_intr.mask);
- curr_cpu = cpumask_first(&info->def_intr.mask);
- if (possible == 1) {
- /* only one CPU, everyone will use it */
- cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
- } else {
- /*
- * Retain the first CPU in the default list for the control
- * context.
- */
- curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
- /*
- * Remove the remaining kernel receive queues from
- * the default list and add them to the receive list.
- */
- for (i = 0; i < dd->n_krcv_queues - 1; i++) {
- cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
- cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
- curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
- if (curr_cpu >= nr_cpu_ids)
- break;
+
+ spin_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ spin_unlock(&node_affinity.lock);
+
+ /*
+ * If this is the first time this NUMA node's affinity is used,
+ * create an entry in the global affinity structure and initialize it.
+ */
+ if (!entry) {
+ entry = node_affinity_allocate(node);
+ if (!entry) {
+ dd_dev_err(dd,
+ "Unable to allocate global affinity node\n");
+ return -ENOMEM;
}
- }
+ init_cpu_mask_set(&entry->def_intr);
+ init_cpu_mask_set(&entry->rcv_intr);
+ cpumask_clear(&entry->general_intr_mask);
+ /* Use the "real" cpu mask of this node as the default */
+ cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
+ local_mask);
+
+ /* fill in the receive list */
+ possible = cpumask_weight(&entry->def_intr.mask);
+ curr_cpu = cpumask_first(&entry->def_intr.mask);
+
+ if (possible == 1) {
+ /* only one CPU, everyone will use it */
+ cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask);
+ cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
+ } else {
+ /*
+ * The general/control context will be the first CPU in
+ * the default list, so it is removed from the default
+ * list and added to the general interrupt list.
+ */
+ cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask);
+ cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
+ curr_cpu = cpumask_next(curr_cpu,
+ &entry->def_intr.mask);
- cpumask_copy(&info->proc.mask, cpu_online_mask);
-}
+ /*
+ * Remove the remaining kernel receive queues from
+ * the default list and add them to the receive list.
+ */
+ for (i = 0;
+ i < (dd->n_krcv_queues - 1) *
+ hfi1_per_node_cntr[dd->node];
+ i++) {
+ cpumask_clear_cpu(curr_cpu,
+ &entry->def_intr.mask);
+ cpumask_set_cpu(curr_cpu,
+ &entry->rcv_intr.mask);
+ curr_cpu = cpumask_next(curr_cpu,
+ &entry->def_intr.mask);
+ if (curr_cpu >= nr_cpu_ids)
+ break;
+ }
-void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
-{
- kfree(dd->affinity);
+ /*
+ * If there ends up being 0 CPU cores leftover for SDMA
+ * engines, use the same CPU cores as general/control
+ * context.
+ */
+ if (cpumask_weight(&entry->def_intr.mask) == 0)
+ cpumask_copy(&entry->def_intr.mask,
+ &entry->general_intr_mask);
+ }
+
+ spin_lock(&node_affinity.lock);
+ node_affinity_add_tail(entry);
+ spin_unlock(&node_affinity.lock);
+ }
+
+ return 0;
}
int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
{
int ret;
cpumask_var_t diff;
- struct cpu_mask_set *set;
+ struct hfi1_affinity_node *entry;
+ struct cpu_mask_set *set = NULL;
struct sdma_engine *sde = NULL;
struct hfi1_ctxtdata *rcd = NULL;
char extra[64];
@@ -194,22 +329,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
if (!ret)
return -ENOMEM;
+ spin_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ spin_unlock(&node_affinity.lock);
+
switch (msix->type) {
case IRQ_SDMA:
sde = (struct sdma_engine *)msix->arg;
scnprintf(extra, 64, "engine %u", sde->this_idx);
- /* fall through */
+ set = &entry->def_intr;
+ break;
case IRQ_GENERAL:
- set = &dd->affinity->def_intr;
+ cpu = cpumask_first(&entry->general_intr_mask);
break;
case IRQ_RCVCTXT:
rcd = (struct hfi1_ctxtdata *)msix->arg;
- if (rcd->ctxt == HFI1_CTRL_CTXT) {
- set = &dd->affinity->def_intr;
- cpu = cpumask_first(&set->mask);
- } else {
- set = &dd->affinity->rcv_intr;
- }
+ if (rcd->ctxt == HFI1_CTRL_CTXT)
+ cpu = cpumask_first(&entry->general_intr_mask);
+ else
+ set = &entry->rcv_intr;
scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
break;
default:
@@ -218,12 +356,12 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
}
/*
- * The control receive context is placed on a particular CPU, which
- * is set above. Skip accounting for it. Everything else finds its
- * CPU here.
+ * The general and control contexts are placed on a particular
+ * CPU, which is set above. Skip accounting for it. Everything else
+ * finds its CPU here.
*/
- if (cpu == -1) {
- spin_lock(&dd->affinity->lock);
+ if (cpu == -1 && set) {
+ spin_lock(&node_affinity.lock);
if (cpumask_equal(&set->mask, &set->used)) {
/*
* We've used up all the CPUs, bump up the generation
@@ -235,7 +373,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
cpumask_andnot(diff, &set->mask, &set->used);
cpu = cpumask_first(diff);
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&dd->affinity->lock);
+ spin_unlock(&node_affinity.lock);
}
switch (msix->type) {
@@ -263,43 +401,84 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
{
struct cpu_mask_set *set = NULL;
struct hfi1_ctxtdata *rcd;
+ struct hfi1_affinity_node *entry;
+
+ spin_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ spin_unlock(&node_affinity.lock);
switch (msix->type) {
case IRQ_SDMA:
+ set = &entry->def_intr;
+ break;
case IRQ_GENERAL:
- set = &dd->affinity->def_intr;
+ /* Don't do accounting for general contexts */
break;
case IRQ_RCVCTXT:
rcd = (struct hfi1_ctxtdata *)msix->arg;
- /* only do accounting for non control contexts */
+ /* Don't do accounting for control contexts */
if (rcd->ctxt != HFI1_CTRL_CTXT)
- set = &dd->affinity->rcv_intr;
+ set = &entry->rcv_intr;
break;
default:
return;
}
if (set) {
- spin_lock(&dd->affinity->lock);
+ spin_lock(&node_affinity.lock);
cpumask_andnot(&set->used, &set->used, &msix->mask);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&dd->affinity->lock);
+ spin_unlock(&node_affinity.lock);
}
irq_set_affinity_hint(msix->msix.vector, NULL);
cpumask_clear(&msix->mask);
}
-int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
+/* This should be called with node_affinity.lock held */
+static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask,
+ struct hfi1_affinity_node_list *affinity)
{
- int cpu = -1, ret;
- cpumask_var_t diff, mask, intrs;
+ int possible, curr_cpu, i;
+ uint num_cores_per_socket = node_affinity.num_online_cpus /
+ affinity->num_core_siblings /
+ node_affinity.num_online_nodes;
+
+ cpumask_copy(hw_thread_mask, &affinity->proc.mask);
+ if (affinity->num_core_siblings > 0) {
+ /* Removing other siblings not needed for now */
+ possible = cpumask_weight(hw_thread_mask);
+ curr_cpu = cpumask_first(hw_thread_mask);
+ for (i = 0;
+ i < num_cores_per_socket * node_affinity.num_online_nodes;
+ i++)
+ curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
+
+ for (; i < possible; i++) {
+ cpumask_clear_cpu(curr_cpu, hw_thread_mask);
+ curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
+ }
+
+ /* Identifying correct HW threads within physical cores */
+ cpumask_shift_left(hw_thread_mask, hw_thread_mask,
+ num_cores_per_socket *
+ node_affinity.num_online_nodes *
+ hw_thread_no);
+ }
+}
+
+int hfi1_get_proc_affinity(int node)
+{
+ int cpu = -1, ret, i;
+ struct hfi1_affinity_node *entry;
+ cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
const struct cpumask *node_mask,
*proc_mask = tsk_cpus_allowed(current);
- struct cpu_mask_set *set = &dd->affinity->proc;
+ struct hfi1_affinity_node_list *affinity = &node_affinity;
+ struct cpu_mask_set *set = &affinity->proc;
/*
* check whether process/context affinity has already
@@ -325,22 +504,41 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
/*
* The process does not have a preset CPU affinity so find one to
- * recommend. We prefer CPUs on the same NUMA as the device.
+ * recommend using the following algorithm:
+ *
+ * For each user process that is opening a context on HFI Y:
+ * a) If all cores are filled, reinitialize the bitmask
+ * b) Fill real cores first, then HT cores (First set of HT
+ * cores on all physical cores, then second set of HT core,
+ * and, so on) in the following order:
+ *
+ * 1. Same NUMA node as HFI Y and not running an IRQ
+ * handler
+ * 2. Same NUMA node as HFI Y and running an IRQ handler
+ * 3. Different NUMA node to HFI Y and not running an IRQ
+ * handler
+ * 4. Different NUMA node to HFI Y and running an IRQ
+ * handler
+ * c) Mark core as filled in the bitmask. As user processes are
+ * done, clear cores from the bitmask.
*/
ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
if (!ret)
goto done;
- ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+ ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL);
if (!ret)
goto free_diff;
- ret = zalloc_cpumask_var(&intrs, GFP_KERNEL);
+ ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL);
+ if (!ret)
+ goto free_hw_thread_mask;
+ ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL);
if (!ret)
- goto free_mask;
+ goto free_available_mask;
- spin_lock(&dd->affinity->lock);
+ spin_lock(&affinity->lock);
/*
- * If we've used all available CPUs, clear the mask and start
+ * If we've used all available HW threads, clear the mask and start
* overloading.
*/
if (cpumask_equal(&set->mask, &set->used)) {
@@ -348,81 +546,198 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
cpumask_clear(&set->used);
}
- /* CPUs used by interrupt handlers */
- cpumask_copy(intrs, (dd->affinity->def_intr.gen ?
- &dd->affinity->def_intr.mask :
- &dd->affinity->def_intr.used));
- cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ?
- &dd->affinity->rcv_intr.mask :
- &dd->affinity->rcv_intr.used));
+ /*
+ * If NUMA node has CPUs used by interrupt handlers, include them in the
+ * interrupt handler mask.
+ */
+ entry = node_affinity_lookup(node);
+ if (entry) {
+ cpumask_copy(intrs_mask, (entry->def_intr.gen ?
+ &entry->def_intr.mask :
+ &entry->def_intr.used));
+ cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ?
+ &entry->rcv_intr.mask :
+ &entry->rcv_intr.used));
+ cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask);
+ }
hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
- cpumask_pr_args(intrs));
+ cpumask_pr_args(intrs_mask));
+
+ cpumask_copy(hw_thread_mask, &set->mask);
/*
- * If we don't have a NUMA node requested, preference is towards
- * device NUMA node
+ * If HT cores are enabled, identify which HW threads within the
+ * physical cores should be used.
*/
- if (node == -1)
- node = dd->node;
+ if (affinity->num_core_siblings > 0) {
+ for (i = 0; i < affinity->num_core_siblings; i++) {
+ find_hw_thread_mask(i, hw_thread_mask, affinity);
+
+ /*
+ * If there's at least one available core for this HW
+ * thread number, stop looking for a core.
+ *
+ * diff will always be not empty at least once in this
+ * loop as the used mask gets reset when
+ * (set->mask == set->used) before this loop.
+ */
+ cpumask_andnot(diff, hw_thread_mask, &set->used);
+ if (!cpumask_empty(diff))
+ break;
+ }
+ }
+ hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl",
+ cpumask_pr_args(hw_thread_mask));
+
node_mask = cpumask_of_node(node);
- hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node,
+ hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node,
cpumask_pr_args(node_mask));
- /* diff will hold all unused cpus */
- cpumask_andnot(diff, &set->mask, &set->used);
- hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff));
-
- /* get cpumask of available CPUs on preferred NUMA */
- cpumask_and(mask, diff, node_mask);
- hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask));
+ /* Get cpumask of available CPUs on preferred NUMA */
+ cpumask_and(available_mask, hw_thread_mask, node_mask);
+ cpumask_andnot(available_mask, available_mask, &set->used);
+ hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node,
+ cpumask_pr_args(available_mask));
/*
* At first, we don't want to place processes on the same
- * CPUs as interrupt handlers.
+ * CPUs as interrupt handlers. Then, CPUs running interrupt
+ * handlers are used.
+ *
+ * 1) If diff is not empty, then there are CPUs not running
+ * non-interrupt handlers available, so diff gets copied
+ * over to available_mask.
+ * 2) If diff is empty, then all CPUs not running interrupt
+ * handlers are taken, so available_mask contains all
+ * available CPUs running interrupt handlers.
+ * 3) If available_mask is empty, then all CPUs on the
+ * preferred NUMA node are taken, so other NUMA nodes are
+ * used for process assignments using the same method as
+ * the preferred NUMA node.
*/
- cpumask_andnot(diff, mask, intrs);
+ cpumask_andnot(diff, available_mask, intrs_mask);
if (!cpumask_empty(diff))
- cpumask_copy(mask, diff);
+ cpumask_copy(available_mask, diff);
- /*
- * if we don't have a cpu on the preferred NUMA, get
- * the list of the remaining available CPUs
- */
- if (cpumask_empty(mask)) {
- cpumask_andnot(diff, &set->mask, &set->used);
- cpumask_andnot(mask, diff, node_mask);
+ /* If we don't have CPUs on the preferred node, use other NUMA nodes */
+ if (cpumask_empty(available_mask)) {
+ cpumask_andnot(available_mask, hw_thread_mask, &set->used);
+ /* Excluding preferred NUMA cores */
+ cpumask_andnot(available_mask, available_mask, node_mask);
+ hfi1_cdbg(PROC,
+ "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl",
+ cpumask_pr_args(available_mask));
+
+ /*
+ * At first, we don't want to place processes on the same
+ * CPUs as interrupt handlers.
+ */
+ cpumask_andnot(diff, available_mask, intrs_mask);
+ if (!cpumask_empty(diff))
+ cpumask_copy(available_mask, diff);
}
- hfi1_cdbg(PROC, "possible CPUs for process %*pbl",
- cpumask_pr_args(mask));
+ hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl",
+ cpumask_pr_args(available_mask));
- cpu = cpumask_first(mask);
+ cpu = cpumask_first(available_mask);
if (cpu >= nr_cpu_ids) /* empty */
cpu = -1;
else
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&dd->affinity->lock);
-
- free_cpumask_var(intrs);
-free_mask:
- free_cpumask_var(mask);
+ spin_unlock(&affinity->lock);
+ hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
+
+ free_cpumask_var(intrs_mask);
+free_available_mask:
+ free_cpumask_var(available_mask);
+free_hw_thread_mask:
+ free_cpumask_var(hw_thread_mask);
free_diff:
free_cpumask_var(diff);
done:
return cpu;
}
-void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu)
+void hfi1_put_proc_affinity(int cpu)
{
- struct cpu_mask_set *set = &dd->affinity->proc;
+ struct hfi1_affinity_node_list *affinity = &node_affinity;
+ struct cpu_mask_set *set = &affinity->proc;
if (cpu < 0)
return;
- spin_lock(&dd->affinity->lock);
+ spin_lock(&affinity->lock);
cpumask_clear_cpu(cpu, &set->used);
+ hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&dd->affinity->lock);
+ spin_unlock(&affinity->lock);
}
+/* Prevents concurrent reads and writes of the sdma_affinity attrib */
+static DEFINE_MUTEX(sdma_affinity_mutex);
+
+int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
+ size_t count)
+{
+ struct hfi1_affinity_node *entry;
+ struct cpumask mask;
+ int ret, i;
+
+ spin_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ spin_unlock(&node_affinity.lock);
+
+ if (!entry)
+ return -EINVAL;
+
+ ret = cpulist_parse(buf, &mask);
+ if (ret)
+ return ret;
+
+ if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) {
+ dd_dev_warn(dd, "Invalid CPU mask\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&sdma_affinity_mutex);
+ /* reset the SDMA interrupt affinity details */
+ init_cpu_mask_set(&entry->def_intr);
+ cpumask_copy(&entry->def_intr.mask, &mask);
+ /*
+ * Reassign the affinity for each SDMA interrupt.
+ */
+ for (i = 0; i < dd->num_msix_entries; i++) {
+ struct hfi1_msix_entry *msix;
+
+ msix = &dd->msix_entries[i];
+ if (msix->type != IRQ_SDMA)
+ continue;
+
+ ret = hfi1_get_irq_affinity(dd, msix);
+
+ if (ret)
+ break;
+ }
+
+ mutex_unlock(&sdma_affinity_mutex);
+ return ret ? ret : strnlen(buf, PAGE_SIZE);
+}
+
+int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf)
+{
+ struct hfi1_affinity_node *entry;
+
+ spin_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ spin_unlock(&node_affinity.lock);
+
+ if (!entry)
+ return -EINVAL;
+
+ mutex_lock(&sdma_affinity_mutex);
+ cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
+ mutex_unlock(&sdma_affinity_mutex);
+ return strnlen(buf, PAGE_SIZE);
+}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 20f52fe74091..8879cf7a8cac 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -73,7 +73,6 @@ struct cpu_mask_set {
struct hfi1_affinity {
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
- struct cpu_mask_set proc;
struct cpumask real_cpu_mask;
/* spin lock to protect affinity struct */
spinlock_t lock;
@@ -82,11 +81,9 @@ struct hfi1_affinity {
struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
-int init_real_cpu_mask(struct hfi1_devdata *);
+void init_real_cpu_mask(void);
/* Initialize driver affinity data */
-void hfi1_dev_affinity_init(struct hfi1_devdata *);
-/* Free driver affinity data */
-void hfi1_dev_affinity_free(struct hfi1_devdata *);
+int hfi1_dev_affinity_init(struct hfi1_devdata *);
/*
* Set IRQ affinity to a CPU. The function will determine the
* CPU and set the affinity to it.
@@ -101,8 +98,35 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
* Determine a CPU affinity for a user process, if the process does not
* have an affinity set yet.
*/
-int hfi1_get_proc_affinity(struct hfi1_devdata *, int);
+int hfi1_get_proc_affinity(int);
/* Release a CPU used by a user process. */
-void hfi1_put_proc_affinity(struct hfi1_devdata *, int);
+void hfi1_put_proc_affinity(int);
+
+int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf);
+int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
+ size_t count);
+
+struct hfi1_affinity_node {
+ int node;
+ struct cpu_mask_set def_intr;
+ struct cpu_mask_set rcv_intr;
+ struct cpumask general_intr_mask;
+ struct list_head list;
+};
+
+struct hfi1_affinity_node_list {
+ struct list_head list;
+ struct cpumask real_cpu_mask;
+ struct cpu_mask_set proc;
+ int num_core_siblings;
+ int num_online_nodes;
+ int num_online_cpus;
+ /* protect affinity node list */
+ spinlock_t lock;
+};
+
+int node_affinity_init(void);
+void node_affinity_destroy(void);
+extern struct hfi1_affinity_node_list node_affinity;
#endif /* _HFI1_AFFINITY_H */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index dad4d0ebbdff..b32638d58ae8 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -63,6 +63,7 @@
#include "efivar.h"
#include "platform.h"
#include "aspm.h"
+#include "affinity.h"
#define NUM_IB_PORTS 1
@@ -121,6 +122,7 @@ struct flag_table {
#define SEC_SC_HALTED 0x4 /* per-context only */
#define SEC_SPC_FREEZE 0x8 /* per-HFI only */
+#define DEFAULT_KRCVQS 2
#define MIN_KERNEL_KCTXTS 2
#define FIRST_KERNEL_KCTXT 1
/* sizes for both the QP and RSM map tables */
@@ -238,6 +240,9 @@ struct flag_table {
/* all CceStatus sub-block RXE pause bits */
#define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK
+#define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
+#define CNTR_32BIT_MAX 0x00000000FFFFFFFF
+
/*
* CCE Error flags.
*/
@@ -3947,6 +3952,28 @@ static u64 access_sdma_wrong_dw_err_cnt(const struct cntr_entry *entry,
return dd->sw_send_dma_eng_err_status_cnt[0];
}
+static u64 access_dc_rcv_err_cnt(const struct cntr_entry *entry,
+ void *context, int vl, int mode,
+ u64 data)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+ u64 val = 0;
+ u64 csr = entry->csr;
+
+ val = read_write_csr(dd, csr, mode, data);
+ if (mode == CNTR_MODE_R) {
+ val = val > CNTR_MAX - dd->sw_rcv_bypass_packet_errors ?
+ CNTR_MAX : val + dd->sw_rcv_bypass_packet_errors;
+ } else if (mode == CNTR_MODE_W) {
+ dd->sw_rcv_bypass_packet_errors = 0;
+ } else {
+ dd_dev_err(dd, "Invalid cntr register access mode");
+ return 0;
+ }
+ return val;
+}
+
#define def_access_sw_cpu(cntr) \
static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \
void *context, int vl, int mode, u64 data) \
@@ -4020,7 +4047,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL),
[C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT,
CNTR_SYNTH),
-[C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH),
+[C_DC_RCV_ERR] = CNTR_ELEM("DcRecvErr", DCC_ERR_PORTRCV_ERR_CNT, 0, CNTR_SYNTH,
+ access_dc_rcv_err_cnt),
[C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT,
CNTR_SYNTH),
[C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT,
@@ -8798,30 +8826,6 @@ static int write_tx_settings(struct hfi1_devdata *dd,
return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame);
}
-static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
-{
- u32 frame, version, prod_id;
- int ret, lane;
-
- /* 4 lanes */
- for (lane = 0; lane < 4; lane++) {
- ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
- if (ret) {
- dd_dev_err(dd,
- "Unable to read lane %d firmware details\n",
- lane);
- continue;
- }
- version = (frame >> SPICO_ROM_VERSION_SHIFT)
- & SPICO_ROM_VERSION_MASK;
- prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
- & SPICO_ROM_PROD_ID_MASK;
- dd_dev_info(dd,
- "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
- lane, version, prod_id);
- }
-}
-
/*
* Read an idle LCB message.
*
@@ -9187,17 +9191,24 @@ static void wait_for_qsfp_init(struct hfi1_pportdata *ppd)
unsigned long timeout;
/*
- * Check for QSFP interrupt for t_init (SFF 8679)
+ * Some QSFP cables have a quirk that asserts the IntN line as a side
+ * effect of power up on plug-in. We ignore this false positive
+ * interrupt until the module has finished powering up by waiting for
+ * a minimum timeout of the module inrush initialization time of
+ * 500 ms (SFF 8679 Table 5-6) to ensure the voltage rails in the
+ * module have stabilized.
+ */
+ msleep(500);
+
+ /*
+ * Check for QSFP interrupt for t_init (SFF 8679 Table 8-1)
*/
timeout = jiffies + msecs_to_jiffies(2000);
while (1) {
mask = read_csr(dd, dd->hfi1_id ?
ASIC_QSFP2_IN : ASIC_QSFP1_IN);
- if (!(mask & QSFP_HFI0_INT_N)) {
- write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR :
- ASIC_QSFP1_CLEAR, QSFP_HFI0_INT_N);
+ if (!(mask & QSFP_HFI0_INT_N))
break;
- }
if (time_after(jiffies, timeout)) {
dd_dev_info(dd, "%s: No IntN detected, reset complete\n",
__func__);
@@ -9213,10 +9224,17 @@ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
u64 mask;
mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK);
- if (enable)
+ if (enable) {
+ /*
+ * Clear the status register to avoid an immediate interrupt
+ * when we re-enable the IntN pin
+ */
+ write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR,
+ QSFP_HFI0_INT_N);
mask |= (u64)QSFP_HFI0_INT_N;
- else
+ } else {
mask &= ~(u64)QSFP_HFI0_INT_N;
+ }
write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
}
@@ -9630,14 +9648,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
}
-int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
- struct hfi1_ctxt_info *kinfo)
-{
- kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) |
- HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U);
- return 0;
-}
-
struct hfi1_message_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr)
{
@@ -9890,6 +9900,131 @@ static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
return 0;
}
+static const char *state_completed_string(u32 completed)
+{
+ static const char * const state_completed[] = {
+ "EstablishComm",
+ "OptimizeEQ",
+ "VerifyCap"
+ };
+
+ if (completed < ARRAY_SIZE(state_completed))
+ return state_completed[completed];
+
+ return "unknown";
+}
+
+static const char all_lanes_dead_timeout_expired[] =
+ "All lanes were inactive – was the interconnect media removed?";
+static const char tx_out_of_policy[] =
+ "Passing lanes on local port do not meet the local link width policy";
+static const char no_state_complete[] =
+ "State timeout occurred before link partner completed the state";
+static const char * const state_complete_reasons[] = {
+ [0x00] = "Reason unknown",
+ [0x01] = "Link was halted by driver, refer to LinkDownReason",
+ [0x02] = "Link partner reported failure",
+ [0x10] = "Unable to achieve frame sync on any lane",
+ [0x11] =
+ "Unable to find a common bit rate with the link partner",
+ [0x12] =
+ "Unable to achieve frame sync on sufficient lanes to meet the local link width policy",
+ [0x13] =
+ "Unable to identify preset equalization on sufficient lanes to meet the local link width policy",
+ [0x14] = no_state_complete,
+ [0x15] =
+ "State timeout occurred before link partner identified equalization presets",
+ [0x16] =
+ "Link partner completed the EstablishComm state, but the passing lanes do not meet the local link width policy",
+ [0x17] = tx_out_of_policy,
+ [0x20] = all_lanes_dead_timeout_expired,
+ [0x21] =
+ "Unable to achieve acceptable BER on sufficient lanes to meet the local link width policy",
+ [0x22] = no_state_complete,
+ [0x23] =
+ "Link partner completed the OptimizeEq state, but the passing lanes do not meet the local link width policy",
+ [0x24] = tx_out_of_policy,
+ [0x30] = all_lanes_dead_timeout_expired,
+ [0x31] =
+ "State timeout occurred waiting for host to process received frames",
+ [0x32] = no_state_complete,
+ [0x33] =
+ "Link partner completed the VerifyCap state, but the passing lanes do not meet the local link width policy",
+ [0x34] = tx_out_of_policy,
+};
+
+static const char *state_complete_reason_code_string(struct hfi1_pportdata *ppd,
+ u32 code)
+{
+ const char *str = NULL;
+
+ if (code < ARRAY_SIZE(state_complete_reasons))
+ str = state_complete_reasons[code];
+
+ if (str)
+ return str;
+ return "Reserved";
+}
+
+/* describe the given last state complete frame */
+static void decode_state_complete(struct hfi1_pportdata *ppd, u32 frame,
+ const char *prefix)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+ u32 success;
+ u32 state;
+ u32 reason;
+ u32 lanes;
+
+ /*
+ * Decode frame:
+ * [ 0: 0] - success
+ * [ 3: 1] - state
+ * [ 7: 4] - next state timeout
+ * [15: 8] - reason code
+ * [31:16] - lanes
+ */
+ success = frame & 0x1;
+ state = (frame >> 1) & 0x7;
+ reason = (frame >> 8) & 0xff;
+ lanes = (frame >> 16) & 0xffff;
+
+ dd_dev_err(dd, "Last %s LNI state complete frame 0x%08x:\n",
+ prefix, frame);
+ dd_dev_err(dd, " last reported state state: %s (0x%x)\n",
+ state_completed_string(state), state);
+ dd_dev_err(dd, " state successfully completed: %s\n",
+ success ? "yes" : "no");
+ dd_dev_err(dd, " fail reason 0x%x: %s\n",
+ reason, state_complete_reason_code_string(ppd, reason));
+ dd_dev_err(dd, " passing lane mask: 0x%x", lanes);
+}
+
+/*
+ * Read the last state complete frames and explain them. This routine
+ * expects to be called if the link went down during link negotiation
+ * and initialization (LNI). That is, anywhere between polling and link up.
+ */
+static void check_lni_states(struct hfi1_pportdata *ppd)
+{
+ u32 last_local_state;
+ u32 last_remote_state;
+
+ read_last_local_state(ppd->dd, &last_local_state);
+ read_last_remote_state(ppd->dd, &last_remote_state);
+
+ /*
+ * Don't report anything if there is nothing to report. A value of
+ * 0 means the link was taken down while polling and there was no
+ * training in-process.
+ */
+ if (last_local_state == 0 && last_remote_state == 0)
+ return;
+
+ decode_state_complete(ppd, last_local_state, "transmitted");
+ decode_state_complete(ppd, last_remote_state, "received");
+}
+
/*
* Helper for set_link_state(). Do not call except from that routine.
* Expects ppd->hls_mutex to be held.
@@ -9902,8 +10037,6 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
{
struct hfi1_devdata *dd = ppd->dd;
u32 pstate, previous_state;
- u32 last_local_state;
- u32 last_remote_state;
int ret;
int do_transition;
int do_wait;
@@ -10003,12 +10136,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
} else if (previous_state
& (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
/* went down while attempting link up */
- /* byte 1 of last_*_state is the failure reason */
- read_last_local_state(dd, &last_local_state);
- read_last_remote_state(dd, &last_remote_state);
- dd_dev_err(dd,
- "LNI failure last states: local 0x%08x, remote 0x%08x\n",
- last_local_state, last_remote_state);
+ check_lni_states(ppd);
}
/* the active link width (downgrade) is 0 on link down */
@@ -11668,9 +11796,6 @@ static void free_cntrs(struct hfi1_devdata *dd)
dd->cntrnames = NULL;
}
-#define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
-#define CNTR_32BIT_MAX 0x00000000FFFFFFFF
-
static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
u64 *psval, void *context, int vl)
{
@@ -12325,37 +12450,6 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
return ib_pstate;
}
-/*
- * Read/modify/write ASIC_QSFP register bits as selected by mask
- * data: 0 or 1 in the positions depending on what needs to be written
- * dir: 0 for read, 1 for write
- * mask: select by setting
- * I2CCLK (bit 0)
- * I2CDATA (bit 1)
- */
-u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
- u32 mask)
-{
- u64 qsfp_oe, target_oe;
-
- target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
- if (mask) {
- /* We are writing register bits, so lock access */
- dir &= mask;
- data &= mask;
-
- qsfp_oe = read_csr(dd, target_oe);
- qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir;
- write_csr(dd, target_oe, qsfp_oe);
- }
- /* We are exclusively reading bits here, but it is unlikely
- * we'll get valid data when we set the direction of the pin
- * in the same call, so read should call this function again
- * to get valid data
- */
- return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
-}
-
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
@@ -12780,7 +12874,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
/*
* Kernel receive contexts:
- * - min of 2 or 1 context/numa (excluding control context)
* - Context 0 - control context (VL15/multicast/error)
* - Context 1 - first kernel context
* - Context 2 - second kernel context
@@ -12794,9 +12887,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
*/
num_kernel_contexts = n_krcvqs + 1;
else
- num_kernel_contexts = num_online_nodes() + 1;
- num_kernel_contexts =
- max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
+ num_kernel_contexts = DEFAULT_KRCVQS + 1;
/*
* Every kernel receive context needs an ACK send context.
* one send context is allocated for each VL{0-7} and VL15
@@ -12815,7 +12906,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
*/
if (num_user_contexts < 0)
num_user_contexts =
- cpumask_weight(&dd->affinity->real_cpu_mask);
+ cpumask_weight(&node_affinity.real_cpu_mask);
total_contexts = num_kernel_contexts + num_user_contexts;
@@ -14141,6 +14232,11 @@ static int init_asic_data(struct hfi1_devdata *dd)
}
dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+
+ /* first one through - set up i2c devices */
+ if (!peer)
+ ret = set_up_i2c(dd, dd->asic_data);
+
return ret;
}
@@ -14445,19 +14541,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
(dd->revision >> CCE_REVISION_SW_SHIFT)
& CCE_REVISION_SW_MASK);
- /*
- * The real cpu mask is part of the affinity struct but has to be
- * initialized earlier than the rest of the affinity struct because it
- * is needed to calculate the number of user contexts in
- * set_up_context_variables(). However, hfi1_dev_affinity_init(),
- * which initializes the rest of the affinity struct members,
- * depends on set_up_context_variables() for the number of kernel
- * contexts, so it cannot be called before set_up_context_variables().
- */
- ret = init_real_cpu_mask(dd);
- if (ret)
- goto bail_cleanup;
-
ret = set_up_context_variables(dd);
if (ret)
goto bail_cleanup;
@@ -14471,7 +14554,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* set up KDETH QP prefix in both RX and TX CSRs */
init_kdeth_qp(dd);
- hfi1_dev_affinity_init(dd);
+ ret = hfi1_dev_affinity_init(dd);
+ if (ret)
+ goto bail_cleanup;
/* send contexts must be set up before receive contexts */
ret = init_send_contexts(dd);
@@ -14508,8 +14593,14 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* set up LCB access - must be after set_up_interrupts() */
init_lcb_access(dd);
+ /*
+ * Serial number is created from the base guid:
+ * [27:24] = base guid [38:35]
+ * [23: 0] = base guid [23: 0]
+ */
snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n",
- dd->base_guid & 0xFFFFFF);
+ (dd->base_guid & 0xFFFFFF) |
+ ((dd->base_guid >> 11) & 0xF000000));
dd->oui1 = dd->base_guid >> 56 & 0xFF;
dd->oui2 = dd->base_guid >> 48 & 0xFF;
@@ -14518,7 +14609,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
ret = load_firmware(dd); /* asymmetric with dispose_firmware() */
if (ret)
goto bail_clear_intr;
- check_fabric_firmware_versions(dd);
thermal_init(dd);
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 66a327978739..ed11107c50fe 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -640,6 +640,7 @@ extern uint platform_config_load;
/* SBus commands */
#define RESET_SBUS_RECEIVER 0x20
#define WRITE_SBUS_RECEIVER 0x21
+#define READ_SBUS_RECEIVER 0x22
void sbus_request(struct hfi1_devdata *dd,
u8 receiver_addr, u8 data_addr, u8 command, u32 data_in);
int sbus_request_slow(struct hfi1_devdata *dd,
@@ -1336,10 +1337,6 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
struct hfi1_message_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr);
-int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
- struct hfi1_ctxt_info *kinfo);
-u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
- u32 mask);
int hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order);
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 8744de6667c2..5b9993899789 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -471,6 +471,10 @@
#define ASIC_STS_SBUS_RESULT (ASIC + 0x000000000010)
#define ASIC_STS_SBUS_RESULT_DONE_SMASK 0x1ull
#define ASIC_STS_SBUS_RESULT_RCV_DATA_VALID_SMASK 0x2ull
+#define ASIC_STS_SBUS_RESULT_RESULT_CODE_SHIFT 2
+#define ASIC_STS_SBUS_RESULT_RESULT_CODE_MASK 0x7ull
+#define ASIC_STS_SBUS_RESULT_DATA_OUT_SHIFT 32
+#define ASIC_STS_SBUS_RESULT_DATA_OUT_MASK 0xFFFFFFFFull
#define ASIC_STS_THERM (ASIC + 0x000000000058)
#define ASIC_STS_THERM_CRIT_TEMP_MASK 0x7FFull
#define ASIC_STS_THERM_CRIT_TEMP_SHIFT 18
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index c75b0ae688f8..8246dc7d0573 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -392,9 +392,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
u16 rlid;
u8 svc_type, sl, sc5;
- sc5 = (be16_to_cpu(rhdr->lrh[0]) >> 12) & 0xf;
- if (rhf_dc_info(packet->rhf))
- sc5 |= 0x10;
+ sc5 = hdr2sc(rhdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
@@ -450,14 +448,20 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd,
packet->rcv_flags = 0;
}
-static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr,
- struct hfi1_other_headers *ohdr,
- u64 rhf, u32 bth1, struct ib_grh *grh)
+void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool do_cnp)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- u32 rqpn = 0;
- u16 rlid;
- u8 sc5, svc_type;
+ struct hfi1_ib_header *hdr = pkt->hdr;
+ struct hfi1_other_headers *ohdr = pkt->ohdr;
+ struct ib_grh *grh = NULL;
+ u32 rqpn = 0, bth1;
+ u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
+ u8 sc, svc_type;
+ bool is_mcast = false;
+
+ if (pkt->rcv_flags & HFI1_HAS_GRH)
+ grh = &hdr->u.l.grh;
switch (qp->ibqp.qp_type) {
case IB_QPT_SMI:
@@ -466,6 +470,8 @@ static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr,
rlid = be16_to_cpu(hdr->lrh[3]);
rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
svc_type = IB_CC_SVCTYPE_UD;
+ is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
break;
case IB_QPT_UC:
rlid = qp->remote_ah_attr.dlid;
@@ -481,24 +487,23 @@ static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr,
return;
}
- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
- if (rhf_dc_info(rhf))
- sc5 |= 0x10;
+ sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf);
- if (bth1 & HFI1_FECN_SMASK) {
+ bth1 = be32_to_cpu(ohdr->bth[1]);
+ if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
- u16 dlid = be16_to_cpu(hdr->lrh[1]);
- return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc5, grh);
+ return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
}
- if (bth1 & HFI1_BECN_SMASK) {
+ if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 lqpn = bth1 & RVT_QPN_MASK;
- u8 sl = ibp->sc_to_sl[sc5];
+ u8 sl = ibp->sc_to_sl[sc];
process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
}
+
}
struct ps_mdata {
@@ -596,7 +601,6 @@ static void __prescan_rxq(struct hfi1_packet *packet)
struct rvt_qp *qp;
struct hfi1_ib_header *hdr;
struct hfi1_other_headers *ohdr;
- struct ib_grh *grh = NULL;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
@@ -616,14 +620,13 @@ static void __prescan_rxq(struct hfi1_packet *packet)
hfi1_get_msgheader(dd, rhf_addr);
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
- if (lnh == HFI1_LRH_BTH) {
+ if (lnh == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
- } else if (lnh == HFI1_LRH_GRH) {
+ else if (lnh == HFI1_LRH_GRH)
ohdr = &hdr->u.l.oth;
- grh = &hdr->u.l.grh;
- } else {
+ else
goto next; /* just in case */
- }
+
bth1 = be32_to_cpu(ohdr->bth[1]);
is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
@@ -639,7 +642,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
goto next;
}
- process_ecn(qp, hdr, ohdr, rhf, bth1, grh);
+ process_ecn(qp, packet, true);
rcu_read_unlock();
/* turn off BECN, FECN */
@@ -1362,6 +1365,7 @@ int process_receive_bypass(struct hfi1_packet *packet)
dd_dev_err(packet->rcd->dd,
"Bypass packets are not supported in normal operation. Dropping\n");
+ incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors);
return RHF_RCV_CONTINUE;
}
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index c702a009608f..1ecbec192358 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -168,6 +168,7 @@ static inline int is_valid_mmap(u64 token)
static int hfi1_file_open(struct inode *inode, struct file *fp)
{
+ struct hfi1_filedata *fd;
struct hfi1_devdata *dd = container_of(inode->i_cdev,
struct hfi1_devdata,
user_cdev);
@@ -176,10 +177,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
kobject_get(&dd->kobj);
/* The real work is performed later in assign_ctxt() */
- fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL);
- if (fp->private_data) /* no cpu affinity by default */
- ((struct hfi1_filedata *)fp->private_data)->rec_cpu_num = -1;
- return fp->private_data ? 0 : -ENOMEM;
+
+ fd = kzalloc(sizeof(*fd), GFP_KERNEL);
+
+ if (fd) {
+ fd->rec_cpu_num = -1; /* no cpu affinity by default */
+ fd->mm = current->mm;
+ }
+
+ fp->private_data = fd;
+
+ return fd ? 0 : -ENOMEM;
}
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -228,7 +236,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(struct hfi1_base_info));
break;
case HFI1_IOCTL_CREDIT_UPD:
- if (uctxt && uctxt->sc)
+ if (uctxt)
sc_return_credits(uctxt->sc);
break;
@@ -392,41 +400,38 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq;
- int ret = 0, done = 0, reqs = 0;
+ int done = 0, reqs = 0;
unsigned long dim = from->nr_segs;
- if (!cq || !pq) {
- ret = -EIO;
- goto done;
- }
+ if (!cq || !pq)
+ return -EIO;
- if (!iter_is_iovec(from) || !dim) {
- ret = -EINVAL;
- goto done;
- }
+ if (!iter_is_iovec(from) || !dim)
+ return -EINVAL;
hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)",
fd->uctxt->ctxt, fd->subctxt, dim);
- if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) {
- ret = -ENOSPC;
- goto done;
- }
+ if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
+ return -ENOSPC;
while (dim) {
+ int ret;
unsigned long count = 0;
ret = hfi1_user_sdma_process_request(
kiocb->ki_filp, (struct iovec *)(from->iov + done),
dim, &count);
- if (ret)
- goto done;
+ if (ret) {
+ reqs = ret;
+ break;
+ }
dim -= count;
done += count;
reqs++;
}
-done:
- return ret ? ret : reqs;
+
+ return reqs;
}
static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
@@ -718,7 +723,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
hfi1_user_sdma_free_queues(fdata);
/* release the cpu */
- hfi1_put_proc_affinity(dd, fdata->rec_cpu_num);
+ hfi1_put_proc_affinity(fdata->rec_cpu_num);
/*
* Clear any left over, unhandled events so the next process that
@@ -730,7 +735,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
if (--uctxt->cnt) {
uctxt->active_slaves &= ~(1 << fdata->subctxt);
- uctxt->subpid[fdata->subctxt] = 0;
mutex_unlock(&hfi1_mutex);
goto done;
}
@@ -756,7 +760,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type));
sc_disable(uctxt->sc);
- uctxt->pid = 0;
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
dd->rcd[uctxt->ctxt] = NULL;
@@ -818,9 +821,10 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
ret = find_shared_ctxt(fp, uinfo);
if (ret < 0)
goto done_unlock;
- if (ret)
- fd->rec_cpu_num = hfi1_get_proc_affinity(
- fd->uctxt->dd, fd->uctxt->numa_id);
+ if (ret) {
+ fd->rec_cpu_num =
+ hfi1_get_proc_affinity(fd->uctxt->numa_id);
+ }
}
/*
@@ -895,7 +899,6 @@ static int find_shared_ctxt(struct file *fp,
}
fd->uctxt = uctxt;
fd->subctxt = uctxt->cnt++;
- uctxt->subpid[fd->subctxt] = current->pid;
uctxt->active_slaves |= 1 << fd->subctxt;
ret = 1;
goto done;
@@ -932,7 +935,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
if (ctxt == dd->num_rcv_contexts)
return -EBUSY;
- fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1);
+ /*
+ * If we don't have a NUMA node requested, preference is towards
+ * device NUMA node.
+ */
+ fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node);
if (fd->rec_cpu_num != -1)
numa = cpu_to_node(fd->rec_cpu_num);
else
@@ -976,8 +983,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
return ret;
}
uctxt->userversion = uinfo->userversion;
- uctxt->pid = current->pid;
- uctxt->flags = HFI1_CAP_UGET(MASK);
+ uctxt->flags = hfi1_cap_mask; /* save current flag state */
init_waitqueue_head(&uctxt->wait);
strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid));
@@ -1080,18 +1086,18 @@ static int user_init(struct file *fp)
hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, HDRSUPP))
+ if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
/*
* Ignore the bit in the flags for now until proper
* support for multiple packet per rcv array entry is
* added.
*/
- if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+ if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR))
rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+ if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL))
rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+ if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
/*
* The RcvCtxtCtrl.TailUpd bit has to be explicitly written.
@@ -1099,7 +1105,7 @@ static int user_init(struct file *fp)
* uses of the chip or ctxt. Therefore, add the rcvctrl op
* for both cases.
*/
- if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+ if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL))
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
else
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
@@ -1122,9 +1128,14 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
int ret = 0;
memset(&cinfo, 0, sizeof(cinfo));
- ret = hfi1_get_base_kinfo(uctxt, &cinfo);
- if (ret < 0)
- goto done;
+ cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) &
+ HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) |
+ HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
+ HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
+ /* adjust flag if this fd is not able to cache */
+ if (!fd->handler)
+ cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */
+
cinfo.num_active = hfi1_count_active_units();
cinfo.unit = uctxt->dd->unit;
cinfo.ctxt = uctxt->ctxt;
@@ -1146,7 +1157,7 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo);
if (copy_to_user(ubase, &cinfo, sizeof(cinfo)))
ret = -EFAULT;
-done:
+
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index ed680fda611d..13db8eb4f4ec 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -206,6 +206,9 @@ static const struct firmware *platform_config;
/* the number of fabric SerDes on the SBus */
#define NUM_FABRIC_SERDES 4
+/* ASIC_STS_SBUS_RESULT.RESULT_CODE value */
+#define SBUS_READ_COMPLETE 0x4
+
/* SBus fabric SerDes addresses, one set per HFI */
static const u8 fabric_serdes_addrs[2][NUM_FABRIC_SERDES] = {
{ 0x01, 0x02, 0x03, 0x04 },
@@ -240,6 +243,7 @@ static const u8 all_pcie_serdes_broadcast = 0xe0;
static void dispose_one_firmware(struct firmware_details *fdet);
static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
struct firmware_details *fdet);
+static void dump_fw_version(struct hfi1_devdata *dd);
/*
* Read a single 64-bit value from 8051 data memory.
@@ -1079,6 +1083,44 @@ void sbus_request(struct hfi1_devdata *dd,
}
/*
+ * Read a value from the SBus.
+ *
+ * Requires the caller to be in fast mode
+ */
+static u32 sbus_read(struct hfi1_devdata *dd, u8 receiver_addr, u8 data_addr,
+ u32 data_in)
+{
+ u64 reg;
+ int retries;
+ int success = 0;
+ u32 result = 0;
+ u32 result_code = 0;
+
+ sbus_request(dd, receiver_addr, data_addr, READ_SBUS_RECEIVER, data_in);
+
+ for (retries = 0; retries < 100; retries++) {
+ usleep_range(1000, 1200); /* arbitrary */
+ reg = read_csr(dd, ASIC_STS_SBUS_RESULT);
+ result_code = (reg >> ASIC_STS_SBUS_RESULT_RESULT_CODE_SHIFT)
+ & ASIC_STS_SBUS_RESULT_RESULT_CODE_MASK;
+ if (result_code != SBUS_READ_COMPLETE)
+ continue;
+
+ success = 1;
+ result = (reg >> ASIC_STS_SBUS_RESULT_DATA_OUT_SHIFT)
+ & ASIC_STS_SBUS_RESULT_DATA_OUT_MASK;
+ break;
+ }
+
+ if (!success) {
+ dd_dev_err(dd, "%s: read failed, result code 0x%x\n", __func__,
+ result_code);
+ }
+
+ return result;
+}
+
+/*
* Turn off the SBus and fabric serdes spicos.
*
* + Must be called with Sbus fast mode turned on.
@@ -1636,6 +1678,7 @@ int load_firmware(struct hfi1_devdata *dd)
return ret;
}
+ dump_fw_version(dd);
return 0;
}
@@ -2054,3 +2097,85 @@ void read_guid(struct hfi1_devdata *dd)
dd_dev_info(dd, "GUID %llx",
(unsigned long long)dd->base_guid);
}
+
+/* read and display firmware version info */
+static void dump_fw_version(struct hfi1_devdata *dd)
+{
+ u32 pcie_vers[NUM_PCIE_SERDES];
+ u32 fabric_vers[NUM_FABRIC_SERDES];
+ u32 sbus_vers;
+ int i;
+ int all_same;
+ int ret;
+ u8 rcv_addr;
+
+ ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+ if (ret) {
+ dd_dev_err(dd, "Unable to acquire SBus to read firmware versions\n");
+ return;
+ }
+
+ /* set fast mode */
+ set_sbus_fast_mode(dd);
+
+ /* read version for SBus Master */
+ sbus_request(dd, SBUS_MASTER_BROADCAST, 0x02, WRITE_SBUS_RECEIVER, 0);
+ sbus_request(dd, SBUS_MASTER_BROADCAST, 0x07, WRITE_SBUS_RECEIVER, 0x1);
+ /* wait for interrupt to be processed */
+ usleep_range(10000, 11000);
+ sbus_vers = sbus_read(dd, SBUS_MASTER_BROADCAST, 0x08, 0x1);
+ dd_dev_info(dd, "SBus Master firmware version 0x%08x\n", sbus_vers);
+
+ /* read version for PCIe SerDes */
+ all_same = 1;
+ pcie_vers[0] = 0;
+ for (i = 0; i < NUM_PCIE_SERDES; i++) {
+ rcv_addr = pcie_serdes_addrs[dd->hfi1_id][i];
+ sbus_request(dd, rcv_addr, 0x03, WRITE_SBUS_RECEIVER, 0);
+ /* wait for interrupt to be processed */
+ usleep_range(10000, 11000);
+ pcie_vers[i] = sbus_read(dd, rcv_addr, 0x04, 0x0);
+ if (i > 0 && pcie_vers[0] != pcie_vers[i])
+ all_same = 0;
+ }
+
+ if (all_same) {
+ dd_dev_info(dd, "PCIe SerDes firmware version 0x%x\n",
+ pcie_vers[0]);
+ } else {
+ dd_dev_warn(dd, "PCIe SerDes do not have the same firmware version\n");
+ for (i = 0; i < NUM_PCIE_SERDES; i++) {
+ dd_dev_info(dd,
+ "PCIe SerDes lane %d firmware version 0x%x\n",
+ i, pcie_vers[i]);
+ }
+ }
+
+ /* read version for fabric SerDes */
+ all_same = 1;
+ fabric_vers[0] = 0;
+ for (i = 0; i < NUM_FABRIC_SERDES; i++) {
+ rcv_addr = fabric_serdes_addrs[dd->hfi1_id][i];
+ sbus_request(dd, rcv_addr, 0x03, WRITE_SBUS_RECEIVER, 0);
+ /* wait for interrupt to be processed */
+ usleep_range(10000, 11000);
+ fabric_vers[i] = sbus_read(dd, rcv_addr, 0x04, 0x0);
+ if (i > 0 && fabric_vers[0] != fabric_vers[i])
+ all_same = 0;
+ }
+
+ if (all_same) {
+ dd_dev_info(dd, "Fabric SerDes firmware version 0x%x\n",
+ fabric_vers[0]);
+ } else {
+ dd_dev_warn(dd, "Fabric SerDes do not have the same firmware version\n");
+ for (i = 0; i < NUM_FABRIC_SERDES; i++) {
+ dd_dev_info(dd,
+ "Fabric SerDes lane %d firmware version 0x%x\n",
+ i, fabric_vers[i]);
+ }
+ }
+
+ clear_sbus_fast_mode(dd);
+ release_chip_resource(dd, CR_SBUS);
+}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 4417a0fd3ef9..1000e0fd96d9 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -62,6 +62,8 @@
#include <linux/cdev.h>
#include <linux/delay.h>
#include <linux/kthread.h>
+#include <linux/i2c.h>
+#include <linux/i2c-algo-bit.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
@@ -253,7 +255,7 @@ struct hfi1_ctxtdata {
/* chip offset of PIO buffers for this ctxt */
u32 piobufs;
/* per-context configuration flags */
- u32 flags;
+ unsigned long flags;
/* per-context event flags for fileops/intr communication */
unsigned long event_flags;
/* WAIT_RCV that timed out, no interrupt */
@@ -268,9 +270,6 @@ struct hfi1_ctxtdata {
u32 urgent;
/* saved total number of polled urgent packets for poll edge trigger */
u32 urgent_poll;
- /* pid of process using this ctxt */
- pid_t pid;
- pid_t subpid[HFI1_MAX_SHARED_CTXTS];
/* same size as task_struct .comm[], command that opened context */
char comm[TASK_COMM_LEN];
/* so file ops can get at unit */
@@ -366,11 +365,6 @@ struct hfi1_packet {
u8 etype;
};
-static inline bool has_sc4_bit(struct hfi1_packet *p)
-{
- return !!rhf_dc_info(p->rhf);
-}
-
/*
* Private data for snoop/capture support.
*/
@@ -805,10 +799,19 @@ struct hfi1_temp {
u8 triggers; /* temperature triggers */
};
+struct hfi1_i2c_bus {
+ struct hfi1_devdata *controlling_dd; /* current controlling device */
+ struct i2c_adapter adapter; /* bus details */
+ struct i2c_algo_bit_data algo; /* bus algorithm details */
+ int num; /* bus number, 0 or 1 */
+};
+
/* common data between shared ASIC HFIs */
struct hfi1_asic_data {
struct hfi1_devdata *dds[2]; /* back pointers */
struct mutex asic_resource_mutex;
+ struct hfi1_i2c_bus *i2c_bus0;
+ struct hfi1_i2c_bus *i2c_bus1;
};
/* device data struct now contains only "general per-device" info.
@@ -1128,7 +1131,8 @@ struct hfi1_devdata {
NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS];
/* Software counter that aggregates all cce_err_status errors */
u64 sw_cce_err_status_aggregate;
-
+ /* Software counter that aggregates all bypass packet rcv errors */
+ u64 sw_rcv_bypass_packet_errors;
/* receive interrupt functions */
rhf_rcv_function_ptr *rhf_rcv_function_map;
rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
@@ -1174,6 +1178,8 @@ struct hfi1_devdata {
/* 8051 firmware version helper */
#define dc8051_ver(a, b) ((a) << 8 | (b))
+#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
+#define dc8051_ver_min(a) (a & 0x00ff)
/* f_put_tid types */
#define PT_EXPECTED 0
@@ -1182,6 +1188,7 @@ struct hfi1_devdata {
struct tid_rb_node;
struct mmu_rb_node;
+struct mmu_rb_handler;
/* Private data for file operations */
struct hfi1_filedata {
@@ -1192,7 +1199,7 @@ struct hfi1_filedata {
/* for cpu affinity; -1 if none */
int rec_cpu_num;
u32 tid_n_pinned;
- struct rb_root tid_rb_root;
+ struct mmu_rb_handler *handler;
struct tid_rb_node **entry_to_rb;
spinlock_t tid_lock; /* protect tid_[limit,used] counters */
u32 tid_limit;
@@ -1201,6 +1208,7 @@ struct hfi1_filedata {
u32 invalid_tid_idx;
/* protect invalid_tids array and invalid_tid_idx */
spinlock_t invalid_lock;
+ struct mm_struct *mm;
};
extern struct list_head hfi1_dev_list;
@@ -1234,6 +1242,8 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
void set_all_slowpath(struct hfi1_devdata *dd);
+extern const struct pci_device_id hfi1_pci_tbl[];
+
/* receive packet handler dispositions */
#define RCV_PKT_OK 0x0 /* keep going */
#define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */
@@ -1259,7 +1269,7 @@ void receive_interrupt_work(struct work_struct *work);
static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
{
return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
- ((!!(rhf & RHF_DC_INFO_SMASK)) << 4);
+ ((!!(rhf_dc_info(rhf))) << 4);
}
static inline u16 generate_jkey(kuid_t uid)
@@ -1569,6 +1579,22 @@ static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port)
return &dd->pport[pidx].ibport_data;
}
+void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool do_cnp);
+static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool do_cnp)
+{
+ struct hfi1_other_headers *ohdr = pkt->ohdr;
+ u32 bth1;
+
+ bth1 = be32_to_cpu(ohdr->bth[1]);
+ if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
+ hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
+ return bth1 & HFI1_FECN_SMASK;
+ }
+ return false;
+}
+
/*
* Return the indexed PKEY from the port PKEY table.
*/
@@ -1586,8 +1612,7 @@ static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index)
}
/*
- * Readers of cc_state must call get_cc_state() under rcu_read_lock().
- * Writers of cc_state must call get_cc_state() under cc_state_lock.
+ * Called by readers of cc_state only, must call under rcu_read_lock().
*/
static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
{
@@ -1595,6 +1620,16 @@ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
}
/*
+ * Called by writers of cc_state only, must call under cc_state_lock.
+ */
+static inline
+struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
+{
+ return rcu_dereference_protected(ppd->cc_state,
+ lockdep_is_held(&ppd->cc_state_lock));
+}
+
+/*
* values for dd->flags (_device_ related flags)
*/
#define HFI1_INITTED 0x1 /* chip and driver up and initted */
@@ -1669,9 +1704,12 @@ void shutdown_led_override(struct hfi1_pportdata *ppd);
*/
#define DEFAULT_RCVHDR_ENTSIZE 32
-bool hfi1_can_pin_pages(struct hfi1_devdata *, u32, u32);
-int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **);
-void hfi1_release_user_pages(struct mm_struct *, struct page **, size_t, bool);
+bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
+ u32 nlocked, u32 npages);
+int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr,
+ size_t npages, bool writable, struct page **pages);
+void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
+ size_t npages, bool dirty);
static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
{
@@ -1947,4 +1985,55 @@ static inline u32 qsfp_resource(struct hfi1_devdata *dd)
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
+#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
+#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev))
+
+#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
+#define show_packettype(etype) \
+__print_symbolic(etype, \
+ packettype_name(EXPECTED), \
+ packettype_name(EAGER), \
+ packettype_name(IB), \
+ packettype_name(ERROR), \
+ packettype_name(BYPASS))
+
+#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
+#define show_ib_opcode(opcode) \
+__print_symbolic(opcode, \
+ ib_opcode_name(RC_SEND_FIRST), \
+ ib_opcode_name(RC_SEND_MIDDLE), \
+ ib_opcode_name(RC_SEND_LAST), \
+ ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_SEND_ONLY), \
+ ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_WRITE_FIRST), \
+ ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
+ ib_opcode_name(RC_RDMA_WRITE_LAST), \
+ ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_WRITE_ONLY), \
+ ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_READ_REQUEST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
+ ib_opcode_name(RC_ACKNOWLEDGE), \
+ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
+ ib_opcode_name(RC_COMPARE_SWAP), \
+ ib_opcode_name(RC_FETCH_ADD), \
+ ib_opcode_name(UC_SEND_FIRST), \
+ ib_opcode_name(UC_SEND_MIDDLE), \
+ ib_opcode_name(UC_SEND_LAST), \
+ ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_SEND_ONLY), \
+ ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_RDMA_WRITE_FIRST), \
+ ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
+ ib_opcode_name(UC_RDMA_WRITE_LAST), \
+ ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_RDMA_WRITE_ONLY), \
+ ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(UD_SEND_ONLY), \
+ ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(CNP))
#endif /* _HFI1_KERNEL_H */
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index eed971ccd2a1..a358d23ecd54 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -64,6 +64,7 @@
#include "debugfs.h"
#include "verbs.h"
#include "aspm.h"
+#include "affinity.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -474,8 +475,9 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
struct hfi1_devdata *dd, u8 hw_pidx, u8 port)
{
- int i, size;
+ int i;
uint default_pkey_idx;
+ struct cc_state *cc_state;
ppd->dd = dd;
ppd->hw_pidx = hw_pidx;
@@ -526,9 +528,9 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
spin_lock_init(&ppd->cc_state_lock);
spin_lock_init(&ppd->cc_log_lock);
- size = sizeof(struct cc_state);
- RCU_INIT_POINTER(ppd->cc_state, kzalloc(size, GFP_KERNEL));
- if (!rcu_dereference(ppd->cc_state))
+ cc_state = kzalloc(sizeof(*cc_state), GFP_KERNEL);
+ RCU_INIT_POINTER(ppd->cc_state, cc_state);
+ if (!cc_state)
goto bail;
return;
@@ -972,39 +974,49 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
/*
* Release our hold on the shared asic data. If we are the last one,
- * free the structure. Must be holding hfi1_devs_lock.
+ * return the structure to be finalized outside the lock. Must be
+ * holding hfi1_devs_lock.
*/
-static void release_asic_data(struct hfi1_devdata *dd)
+static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd)
{
+ struct hfi1_asic_data *ad;
int other;
if (!dd->asic_data)
- return;
+ return NULL;
dd->asic_data->dds[dd->hfi1_id] = NULL;
other = dd->hfi1_id ? 0 : 1;
- if (!dd->asic_data->dds[other]) {
- /* we are the last holder, free it */
- kfree(dd->asic_data);
- }
+ ad = dd->asic_data;
dd->asic_data = NULL;
+ /* return NULL if the other dd still has a link */
+ return ad->dds[other] ? NULL : ad;
+}
+
+static void finalize_asic_data(struct hfi1_devdata *dd,
+ struct hfi1_asic_data *ad)
+{
+ clean_up_i2c(dd, ad);
+ kfree(ad);
}
static void __hfi1_free_devdata(struct kobject *kobj)
{
struct hfi1_devdata *dd =
container_of(kobj, struct hfi1_devdata, kobj);
+ struct hfi1_asic_data *ad;
unsigned long flags;
spin_lock_irqsave(&hfi1_devs_lock, flags);
idr_remove(&hfi1_unit_table, dd->unit);
list_del(&dd->list);
- release_asic_data(dd);
+ ad = release_asic_data(dd);
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ if (ad)
+ finalize_asic_data(dd, ad);
free_platform_config(dd);
rcu_barrier(); /* wait for rcu callbacks to complete */
free_percpu(dd->int_counter);
free_percpu(dd->rcv_limit);
- hfi1_dev_affinity_free(dd);
free_percpu(dd->send_schedule);
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
@@ -1162,7 +1174,7 @@ static int init_one(struct pci_dev *, const struct pci_device_id *);
#define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: "
#define PFX DRIVER_NAME ": "
-static const struct pci_device_id hfi1_pci_tbl[] = {
+const struct pci_device_id hfi1_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL0) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL1) },
{ 0, }
@@ -1198,6 +1210,10 @@ static int __init hfi1_mod_init(void)
if (ret)
goto bail;
+ ret = node_affinity_init();
+ if (ret)
+ goto bail;
+
/* validate max MTU before any devices start */
if (!valid_opa_max_mtu(hfi1_max_mtu)) {
pr_err("Invalid max_mtu 0x%x, using 0x%x instead\n",
@@ -1278,6 +1294,7 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
+ node_affinity_destroy();
hfi1_wss_exit();
hfi1_dbg_exit();
hfi1_cpulist_count = 0;
@@ -1311,7 +1328,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
hrtimer_cancel(&ppd->cca_timer[i].hrtimer);
spin_lock(&ppd->cc_state_lock);
- cc_state = get_cc_state(ppd);
+ cc_state = get_cc_state_protected(ppd);
RCU_INIT_POINTER(ppd->cc_state, NULL);
spin_unlock(&ppd->cc_state_lock);
@@ -1760,8 +1777,8 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
hfi1_cdbg(PROC,
"ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n",
- rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size,
- rcd->egrbufs.size);
+ rcd->ctxt, rcd->egrbufs.alloced,
+ rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024);
/*
* Set the contexts rcv array head update threshold to the closest
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index fca07a1d6c28..1263abe01999 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -588,7 +588,6 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
pi->port_phys_conf = (ppd->port_type & 0xf);
-#if PI_LED_ENABLE_SUP
pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
pi->port_states.ledenable_offlinereason |=
ppd->is_sm_config_started << 5;
@@ -602,11 +601,6 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
pi->port_states.ledenable_offlinereason |=
ppd->offline_disabled_reason;
-#else
- pi->port_states.offline_reason = ppd->neighbor_normal << 4;
- pi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
- pi->port_states.offline_reason |= ppd->offline_disabled_reason;
-#endif /* PI_LED_ENABLE_SUP */
pi->port_states.portphysstate_portstate =
(hfi1_ibphys_portstate(ppd) << 4) | state;
@@ -1752,17 +1746,11 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
if (start_of_sm_config && (lstate == IB_PORT_INIT))
ppd->is_sm_config_started = 1;
-#if PI_LED_ENABLE_SUP
psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
psi->port_states.ledenable_offlinereason |=
ppd->is_sm_config_started << 5;
psi->port_states.ledenable_offlinereason |=
ppd->offline_disabled_reason;
-#else
- psi->port_states.offline_reason = ppd->neighbor_normal << 4;
- psi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
- psi->port_states.offline_reason |= ppd->offline_disabled_reason;
-#endif /* PI_LED_ENABLE_SUP */
psi->port_states.portphysstate_portstate =
(hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
@@ -2430,14 +2418,9 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
rsp->port_rcv_remote_physical_errors =
cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
CNTR_INVALID_VL));
- tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
- tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
- if (tmp2 < tmp) {
- /* overflow/wrapped */
- rsp->local_link_integrity_errors = cpu_to_be64(~0);
- } else {
- rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
- }
+ rsp->local_link_integrity_errors =
+ cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
+ CNTR_INVALID_VL));
tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
CNTR_INVALID_VL);
@@ -2499,6 +2482,9 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
idx_from_vl(vl)));
+ rsp->vls[vfi].port_vl_xmit_discards =
+ cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
+ idx_from_vl(vl)));
vlinfo++;
vfi++;
}
@@ -2529,9 +2515,8 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
CNTR_INVALID_VL);
/* local link integrity must be right-shifted by the lli resolution */
- tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
- tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
- error_counter_summary += (tmp >> res_lli);
+ error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY,
+ CNTR_INVALID_VL) >> res_lli);
/* link error recovery must b right-shifted by the ler resolution */
tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
@@ -2800,14 +2785,9 @@ static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
rsp->port_rcv_constraint_errors =
cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
CNTR_INVALID_VL));
- tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
- tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
- if (tmp2 < tmp) {
- /* overflow/wrapped */
- rsp->local_link_integrity_errors = cpu_to_be64(~0);
- } else {
- rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
- }
+ rsp->local_link_integrity_errors =
+ cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
+ CNTR_INVALID_VL));
rsp->excessive_buffer_overruns =
cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
}
@@ -2883,14 +2863,17 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
-
+ rsp->port_rcv_errors =
+ cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
vlinfo = &rsp->vls[0];
vfi = 0;
vl_select_mask = be32_to_cpu(req->vl_select_mask);
for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
8 * sizeof(req->vl_select_mask)) {
memset(vlinfo, 0, sizeof(*vlinfo));
- /* vlinfo->vls[vfi].port_vl_xmit_discards ??? */
+ rsp->vls[vfi].port_vl_xmit_discards =
+ cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
+ idx_from_vl(vl)));
vlinfo += 1;
vfi++;
}
@@ -3162,10 +3145,8 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
- if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) {
- write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
+ if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS)
write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
- }
if (counter_select & CS_LINK_ERROR_RECOVERY) {
write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
@@ -3223,7 +3204,9 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
/* if (counter_select & CS_PORT_MARK_FECN)
* write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
*/
- /* port_vl_xmit_discards ??? */
+ if (counter_select & C_SW_XMIT_DSCD_VL)
+ write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
+ idx_from_vl(vl), 0);
}
if (resp_len)
@@ -3392,7 +3375,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd)
*/
spin_lock(&ppd->cc_state_lock);
- old_cc_state = get_cc_state(ppd);
+ old_cc_state = get_cc_state_protected(ppd);
if (!old_cc_state) {
/* never active, or shutting down */
spin_unlock(&ppd->cc_state_lock);
@@ -3960,7 +3943,6 @@ void clear_linkup_counters(struct hfi1_devdata *dd)
write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
/* LocalLinkIntegrityErrors */
- write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0);
write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
/* ExcessiveBufferOverruns */
write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 8b734aaae88a..5aa3fd1be653 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -48,15 +48,8 @@
#define _HFI1_MAD_H
#include <rdma/ib_pma.h>
-#define USE_PI_LED_ENABLE 1 /*
- * use led enabled bit in struct
- * opa_port_states, if available
- */
#include <rdma/opa_smi.h>
#include <rdma/opa_port_info.h>
-#ifndef PI_LED_ENABLE_SUP
-#define PI_LED_ENABLE_SUP 0
-#endif
#include "opa_compat.h"
/*
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index b7a80aa1ae30..7ad30898fc19 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -53,19 +53,20 @@
#include "trace.h"
struct mmu_rb_handler {
- struct list_head list;
struct mmu_notifier mn;
- struct rb_root *root;
+ struct rb_root root;
+ void *ops_arg;
spinlock_t lock; /* protect the RB tree */
struct mmu_rb_ops *ops;
+ struct mm_struct *mm;
+ struct list_head lru_list;
+ struct work_struct del_work;
+ struct list_head del_list;
+ struct workqueue_struct *wq;
};
-static LIST_HEAD(mmu_rb_handlers);
-static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */
-
static unsigned long mmu_node_start(struct mmu_rb_node *);
static unsigned long mmu_node_last(struct mmu_rb_node *);
-static struct mmu_rb_handler *find_mmu_handler(struct rb_root *);
static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
unsigned long);
static inline void mmu_notifier_range_start(struct mmu_notifier *,
@@ -76,6 +77,9 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
unsigned long, unsigned long);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long);
+static void do_remove(struct mmu_rb_handler *handler,
+ struct list_head *del_list);
+static void handle_remove(struct work_struct *work);
static struct mmu_notifier_ops mn_opts = {
.invalidate_page = mmu_notifier_page,
@@ -95,73 +99,79 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node)
return PAGE_ALIGN(node->addr + node->len) - 1;
}
-int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
+int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
+ struct mmu_rb_ops *ops,
+ struct workqueue_struct *wq,
+ struct mmu_rb_handler **handler)
{
struct mmu_rb_handler *handlr;
-
- if (!ops->invalidate)
- return -EINVAL;
+ int ret;
handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
if (!handlr)
return -ENOMEM;
- handlr->root = root;
+ handlr->root = RB_ROOT;
handlr->ops = ops;
+ handlr->ops_arg = ops_arg;
INIT_HLIST_NODE(&handlr->mn.hlist);
spin_lock_init(&handlr->lock);
handlr->mn.ops = &mn_opts;
- spin_lock(&mmu_rb_lock);
- list_add_tail_rcu(&handlr->list, &mmu_rb_handlers);
- spin_unlock(&mmu_rb_lock);
+ handlr->mm = mm;
+ INIT_WORK(&handlr->del_work, handle_remove);
+ INIT_LIST_HEAD(&handlr->del_list);
+ INIT_LIST_HEAD(&handlr->lru_list);
+ handlr->wq = wq;
+
+ ret = mmu_notifier_register(&handlr->mn, handlr->mm);
+ if (ret) {
+ kfree(handlr);
+ return ret;
+ }
- return mmu_notifier_register(&handlr->mn, current->mm);
+ *handler = handlr;
+ return 0;
}
-void hfi1_mmu_rb_unregister(struct rb_root *root)
+void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
{
- struct mmu_rb_handler *handler = find_mmu_handler(root);
+ struct mmu_rb_node *rbnode;
+ struct rb_node *node;
unsigned long flags;
-
- if (!handler)
- return;
+ struct list_head del_list;
/* Unregister first so we don't get any more notifications. */
- if (current->mm)
- mmu_notifier_unregister(&handler->mn, current->mm);
+ mmu_notifier_unregister(&handler->mn, handler->mm);
- spin_lock(&mmu_rb_lock);
- list_del_rcu(&handler->list);
- spin_unlock(&mmu_rb_lock);
- synchronize_rcu();
+ /*
+ * Make sure the wq delete handler is finished running. It will not
+ * be triggered once the mmu notifiers are unregistered above.
+ */
+ flush_work(&handler->del_work);
+
+ INIT_LIST_HEAD(&del_list);
spin_lock_irqsave(&handler->lock, flags);
- if (!RB_EMPTY_ROOT(root)) {
- struct rb_node *node;
- struct mmu_rb_node *rbnode;
-
- while ((node = rb_first(root))) {
- rbnode = rb_entry(node, struct mmu_rb_node, node);
- rb_erase(node, root);
- if (handler->ops->remove)
- handler->ops->remove(root, rbnode, NULL);
- }
+ while ((node = rb_first(&handler->root))) {
+ rbnode = rb_entry(node, struct mmu_rb_node, node);
+ rb_erase(node, &handler->root);
+ /* move from LRU list to delete list */
+ list_move(&rbnode->list, &del_list);
}
spin_unlock_irqrestore(&handler->lock, flags);
+ do_remove(handler, &del_list);
+
kfree(handler);
}
-int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
+ struct mmu_rb_node *mnode)
{
- struct mmu_rb_handler *handler = find_mmu_handler(root);
struct mmu_rb_node *node;
unsigned long flags;
int ret = 0;
- if (!handler)
- return -EINVAL;
-
spin_lock_irqsave(&handler->lock, flags);
hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr,
mnode->len);
@@ -170,12 +180,13 @@ int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
ret = -EINVAL;
goto unlock;
}
- __mmu_int_rb_insert(mnode, root);
+ __mmu_int_rb_insert(mnode, &handler->root);
+ list_add(&mnode->list, &handler->lru_list);
- if (handler->ops->insert) {
- ret = handler->ops->insert(root, mnode);
- if (ret)
- __mmu_int_rb_remove(mnode, root);
+ ret = handler->ops->insert(handler->ops_arg, mnode);
+ if (ret) {
+ __mmu_int_rb_remove(mnode, &handler->root);
+ list_del(&mnode->list); /* remove from LRU list */
}
unlock:
spin_unlock_irqrestore(&handler->lock, flags);
@@ -191,10 +202,10 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len);
if (!handler->ops->filter) {
- node = __mmu_int_rb_iter_first(handler->root, addr,
+ node = __mmu_int_rb_iter_first(&handler->root, addr,
(addr + len) - 1);
} else {
- for (node = __mmu_int_rb_iter_first(handler->root, addr,
+ for (node = __mmu_int_rb_iter_first(&handler->root, addr,
(addr + len) - 1);
node;
node = __mmu_int_rb_iter_next(node, addr,
@@ -206,82 +217,72 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node;
}
-/* Caller must *not* hold handler lock. */
-static void __mmu_rb_remove(struct mmu_rb_handler *handler,
- struct mmu_rb_node *node, struct mm_struct *mm)
-{
- unsigned long flags;
-
- /* Validity of handler and node pointers has been checked by caller. */
- hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
- node->len);
- spin_lock_irqsave(&handler->lock, flags);
- __mmu_int_rb_remove(node, handler->root);
- spin_unlock_irqrestore(&handler->lock, flags);
-
- if (handler->ops->remove)
- handler->ops->remove(handler->root, node, mm);
-}
-
-struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
- unsigned long len)
+struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
+ unsigned long addr, unsigned long len)
{
- struct mmu_rb_handler *handler = find_mmu_handler(root);
struct mmu_rb_node *node;
unsigned long flags;
- if (!handler)
- return ERR_PTR(-EINVAL);
-
spin_lock_irqsave(&handler->lock, flags);
node = __mmu_rb_search(handler, addr, len);
+ if (node) {
+ __mmu_int_rb_remove(node, &handler->root);
+ list_del(&node->list); /* remove from LRU list */
+ }
spin_unlock_irqrestore(&handler->lock, flags);
return node;
}
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *root,
- unsigned long addr, unsigned long len)
+void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
{
- struct mmu_rb_handler *handler = find_mmu_handler(root);
- struct mmu_rb_node *node;
+ struct mmu_rb_node *rbnode, *ptr;
+ struct list_head del_list;
unsigned long flags;
+ bool stop = false;
- if (!handler)
- return ERR_PTR(-EINVAL);
+ INIT_LIST_HEAD(&del_list);
spin_lock_irqsave(&handler->lock, flags);
- node = __mmu_rb_search(handler, addr, len);
- if (node)
- __mmu_int_rb_remove(node, handler->root);
+ list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list,
+ list) {
+ if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
+ &stop)) {
+ __mmu_int_rb_remove(rbnode, &handler->root);
+ /* move from LRU list to delete list */
+ list_move(&rbnode->list, &del_list);
+ }
+ if (stop)
+ break;
+ }
spin_unlock_irqrestore(&handler->lock, flags);
- return node;
+ while (!list_empty(&del_list)) {
+ rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
+ list_del(&rbnode->list);
+ handler->ops->remove(handler->ops_arg, rbnode);
+ }
}
-void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
+/*
+ * It is up to the caller to ensure that this function does not race with the
+ * mmu invalidate notifier which may be calling the users remove callback on
+ * 'node'.
+ */
+void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
+ struct mmu_rb_node *node)
{
- struct mmu_rb_handler *handler = find_mmu_handler(root);
-
- if (!handler || !node)
- return;
-
- __mmu_rb_remove(handler, node, NULL);
-}
+ unsigned long flags;
-static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
-{
- struct mmu_rb_handler *handler;
+ /* Validity of handler and node pointers has been checked by caller. */
+ hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
+ node->len);
+ spin_lock_irqsave(&handler->lock, flags);
+ __mmu_int_rb_remove(node, &handler->root);
+ list_del(&node->list); /* remove from LRU list */
+ spin_unlock_irqrestore(&handler->lock, flags);
- rcu_read_lock();
- list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) {
- if (handler->root == root)
- goto unlock;
- }
- handler = NULL;
-unlock:
- rcu_read_unlock();
- return handler;
+ handler->ops->remove(handler->ops_arg, node);
}
static inline void mmu_notifier_page(struct mmu_notifier *mn,
@@ -304,9 +305,10 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
{
struct mmu_rb_handler *handler =
container_of(mn, struct mmu_rb_handler, mn);
- struct rb_root *root = handler->root;
+ struct rb_root *root = &handler->root;
struct mmu_rb_node *node, *ptr = NULL;
unsigned long flags;
+ bool added = false;
spin_lock_irqsave(&handler->lock, flags);
for (node = __mmu_int_rb_iter_first(root, start, end - 1);
@@ -315,11 +317,53 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
ptr = __mmu_int_rb_iter_next(node, start, end - 1);
hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
node->addr, node->len);
- if (handler->ops->invalidate(root, node)) {
+ if (handler->ops->invalidate(handler->ops_arg, node)) {
__mmu_int_rb_remove(node, root);
- if (handler->ops->remove)
- handler->ops->remove(root, node, mm);
+ /* move from LRU list to delete list */
+ list_move(&node->list, &handler->del_list);
+ added = true;
}
}
spin_unlock_irqrestore(&handler->lock, flags);
+
+ if (added)
+ queue_work(handler->wq, &handler->del_work);
+}
+
+/*
+ * Call the remove function for the given handler and the list. This
+ * is expected to be called with a delete list extracted from handler.
+ * The caller should not be holding the handler lock.
+ */
+static void do_remove(struct mmu_rb_handler *handler,
+ struct list_head *del_list)
+{
+ struct mmu_rb_node *node;
+
+ while (!list_empty(del_list)) {
+ node = list_first_entry(del_list, struct mmu_rb_node, list);
+ list_del(&node->list);
+ handler->ops->remove(handler->ops_arg, node);
+ }
+}
+
+/*
+ * Work queue function to remove all nodes that have been queued up to
+ * be removed. The key feature is that mm->mmap_sem is not being held
+ * and the remove callback can sleep while taking it, if needed.
+ */
+static void handle_remove(struct work_struct *work)
+{
+ struct mmu_rb_handler *handler = container_of(work,
+ struct mmu_rb_handler,
+ del_work);
+ struct list_head del_list;
+ unsigned long flags;
+
+ /* remove anything that is queued to get removed */
+ spin_lock_irqsave(&handler->lock, flags);
+ list_replace_init(&handler->del_list, &del_list);
+ spin_unlock_irqrestore(&handler->lock, flags);
+
+ do_remove(handler, &del_list);
}
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index 7a57b9c49d27..754f6ebf13fb 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
@@ -54,23 +54,34 @@ struct mmu_rb_node {
unsigned long len;
unsigned long __last;
struct rb_node node;
+ struct list_head list;
};
+/*
+ * NOTE: filter, insert, invalidate, and evict must not sleep. Only remove is
+ * allowed to sleep.
+ */
struct mmu_rb_ops {
- bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long);
- int (*insert)(struct rb_root *, struct mmu_rb_node *);
- void (*remove)(struct rb_root *, struct mmu_rb_node *,
- struct mm_struct *);
- int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
+ bool (*filter)(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len);
+ int (*insert)(void *ops_arg, struct mmu_rb_node *mnode);
+ void (*remove)(void *ops_arg, struct mmu_rb_node *mnode);
+ int (*invalidate)(void *ops_arg, struct mmu_rb_node *node);
+ int (*evict)(void *ops_arg, struct mmu_rb_node *mnode,
+ void *evict_arg, bool *stop);
};
-int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops);
-void hfi1_mmu_rb_unregister(struct rb_root *);
-int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
-void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
-struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long,
- unsigned long);
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *, unsigned long,
- unsigned long);
+int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
+ struct mmu_rb_ops *ops,
+ struct workqueue_struct *wq,
+ struct mmu_rb_handler **handler);
+void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler);
+int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
+ struct mmu_rb_node *mnode);
+void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
+void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
+ struct mmu_rb_node *mnode);
+struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
+ unsigned long addr, unsigned long len);
#endif /* _HFI1_MMU_RB_H */
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 0bac21e6a658..89c68da1c273 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -679,6 +679,10 @@ static uint pcie_pset = UNSET_PSET;
module_param(pcie_pset, uint, S_IRUGO);
MODULE_PARM_DESC(pcie_pset, "PCIe Eq Pset value to use, range is 0-10");
+static uint pcie_ctle = 1; /* discrete on, integrated off */
+module_param(pcie_ctle, uint, S_IRUGO);
+MODULE_PARM_DESC(pcie_ctle, "PCIe static CTLE mode, bit 0 - discrete on/off, bit 1 - integrated on/off");
+
/* equalization columns */
#define PREC 0
#define ATTN 1
@@ -716,6 +720,36 @@ static const u8 integrated_preliminary_eq[11][3] = {
{ 0x00, 0x1e, 0x0a }, /* p10 */
};
+static const u8 discrete_ctle_tunings[11][4] = {
+ /* DC LF HF BW */
+ { 0x48, 0x0b, 0x04, 0x04 }, /* p0 */
+ { 0x60, 0x05, 0x0f, 0x0a }, /* p1 */
+ { 0x50, 0x09, 0x06, 0x06 }, /* p2 */
+ { 0x68, 0x05, 0x0f, 0x0a }, /* p3 */
+ { 0x80, 0x05, 0x0f, 0x0a }, /* p4 */
+ { 0x70, 0x05, 0x0f, 0x0a }, /* p5 */
+ { 0x68, 0x05, 0x0f, 0x0a }, /* p6 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p7 */
+ { 0x48, 0x09, 0x06, 0x06 }, /* p8 */
+ { 0x60, 0x05, 0x0f, 0x0a }, /* p9 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p10 */
+};
+
+static const u8 integrated_ctle_tunings[11][4] = {
+ /* DC LF HF BW */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p0 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p1 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p2 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p3 */
+ { 0x58, 0x0a, 0x05, 0x05 }, /* p4 */
+ { 0x48, 0x0a, 0x05, 0x05 }, /* p5 */
+ { 0x40, 0x0a, 0x05, 0x05 }, /* p6 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p7 */
+ { 0x38, 0x0f, 0x00, 0x00 }, /* p8 */
+ { 0x38, 0x09, 0x06, 0x06 }, /* p9 */
+ { 0x38, 0x0e, 0x01, 0x01 }, /* p10 */
+};
+
/* helper to format the value to write to hardware */
#define eq_value(pre, curr, post) \
((((u32)(pre)) << \
@@ -951,11 +985,14 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
u32 status, err;
int ret;
int do_retry, retry_count = 0;
+ int intnum = 0;
uint default_pset;
u16 target_vector, target_speed;
u16 lnkctl2, vendor;
u8 div;
const u8 (*eq)[3];
+ const u8 (*ctle_tunings)[4];
+ uint static_ctle_mode;
int return_error = 0;
/* PCIe Gen3 is for the ASIC only */
@@ -1089,6 +1126,9 @@ retry:
div = 3;
eq = discrete_preliminary_eq;
default_pset = DEFAULT_DISCRETE_PSET;
+ ctle_tunings = discrete_ctle_tunings;
+ /* bit 0 - discrete on/off */
+ static_ctle_mode = pcie_ctle & 0x1;
} else {
/* 400mV, FS=29, LF = 9 */
fs = 29;
@@ -1096,6 +1136,9 @@ retry:
div = 1;
eq = integrated_preliminary_eq;
default_pset = DEFAULT_MCP_PSET;
+ ctle_tunings = integrated_ctle_tunings;
+ /* bit 1 - integrated on/off */
+ static_ctle_mode = (pcie_ctle >> 1) & 0x1;
}
pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101,
(fs <<
@@ -1135,16 +1178,33 @@ retry:
* step 5c: Program gasket interrupts
*/
/* set the Rx Bit Rate to REFCLK ratio */
- write_gasket_interrupt(dd, 0, 0x0006, 0x0050);
+ write_gasket_interrupt(dd, intnum++, 0x0006, 0x0050);
/* disable pCal for PCIe Gen3 RX equalization */
- write_gasket_interrupt(dd, 1, 0x0026, 0x5b01);
+ /* select adaptive or static CTLE */
+ write_gasket_interrupt(dd, intnum++, 0x0026,
+ 0x5b01 | (static_ctle_mode << 3));
/*
* Enable iCal for PCIe Gen3 RX equalization, and set which
* evaluation of RX_EQ_EVAL will launch the iCal procedure.
*/
- write_gasket_interrupt(dd, 2, 0x0026, 0x5202);
+ write_gasket_interrupt(dd, intnum++, 0x0026, 0x5202);
+
+ if (static_ctle_mode) {
+ /* apply static CTLE tunings */
+ u8 pcie_dc, pcie_lf, pcie_hf, pcie_bw;
+
+ pcie_dc = ctle_tunings[pcie_pset][0];
+ pcie_lf = ctle_tunings[pcie_pset][1];
+ pcie_hf = ctle_tunings[pcie_pset][2];
+ pcie_bw = ctle_tunings[pcie_pset][3];
+ write_gasket_interrupt(dd, intnum++, 0x0026, 0x0200 | pcie_dc);
+ write_gasket_interrupt(dd, intnum++, 0x0026, 0x0100 | pcie_lf);
+ write_gasket_interrupt(dd, intnum++, 0x0026, 0x0000 | pcie_hf);
+ write_gasket_interrupt(dd, intnum++, 0x0026, 0x5500 | pcie_bw);
+ }
+
/* terminate list */
- write_gasket_interrupt(dd, 3, 0x0000, 0x0000);
+ write_gasket_interrupt(dd, intnum++, 0x0000, 0x0000);
/*
* step 5d: program XMT margin
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index d4022450b73f..ac1bf4a73571 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1952,13 +1952,17 @@ int init_pervl_scs(struct hfi1_devdata *dd)
dd->vld[15].sc = sc_alloc(dd, SC_VL15,
dd->rcd[0]->rcvhdrqentsize, dd->node);
if (!dd->vld[15].sc)
- goto nomem;
+ return -ENOMEM;
+
hfi1_init_ctxt(dd->vld[15].sc);
dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048);
- dd->kernel_send_context = kmalloc_node(dd->num_send_contexts *
+ dd->kernel_send_context = kzalloc_node(dd->num_send_contexts *
sizeof(struct send_context *),
GFP_KERNEL, dd->node);
+ if (!dd->kernel_send_context)
+ goto freesc15;
+
dd->kernel_send_context[0] = dd->vld[15].sc;
for (i = 0; i < num_vls; i++) {
@@ -2010,12 +2014,21 @@ int init_pervl_scs(struct hfi1_devdata *dd)
if (pio_map_init(dd, ppd->port - 1, num_vls, NULL))
goto nomem;
return 0;
+
nomem:
- sc_free(dd->vld[15].sc);
- for (i = 0; i < num_vls; i++)
+ for (i = 0; i < num_vls; i++) {
sc_free(dd->vld[i].sc);
+ dd->vld[i].sc = NULL;
+ }
+
for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++)
sc_free(dd->kernel_send_context[i + 1]);
+
+ kfree(dd->kernel_send_context);
+ dd->kernel_send_context = NULL;
+
+freesc15:
+ sc_free(dd->vld[15].sc);
return -ENOMEM;
}
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 03df9322f862..965c8aef0c60 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -537,20 +537,6 @@ static void apply_tunings(
u8 precur = 0, attn = 0, postcur = 0, external_device_config = 0;
u8 *cache = ppd->qsfp_info.cache;
- /* Enable external device config if channel is limiting active */
- read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
- GENERAL_CONFIG, &config_data);
- config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT);
- config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT);
- ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
- GENERAL_CONFIG, config_data);
- if (ret != HCMD_SUCCESS)
- dd_dev_err(
- ppd->dd,
- "%s: Failed to set enable external device config\n",
- __func__);
-
- config_data = 0; /* re-init */
/* Pass tuning method to 8051 */
read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
&config_data);
@@ -638,9 +624,13 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
if (ret)
return ret;
+ /*
+ * We'll change the QSFP memory contents from here on out, thus we set a
+ * flag here to remind ourselves to reset the QSFP module. This prevents
+ * reuse of stale settings established in our previous pass through.
+ */
if (ppd->qsfp_info.reset_needed) {
reset_qsfp(ppd);
- ppd->qsfp_info.reset_needed = 0;
refresh_qsfp_cache(ppd, &ppd->qsfp_info);
} else {
ppd->qsfp_info.reset_needed = 1;
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 1a942ffba4cb..a5aa3517e7d5 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -52,6 +52,7 @@
#include <linux/seq_file.h>
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_qp.h>
+#include <rdma/ib_verbs.h>
#include "hfi.h"
#include "qp.h"
@@ -115,6 +116,66 @@ static const u16 credit_table[31] = {
32768 /* 1E */
};
+const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
+[IB_WR_RDMA_WRITE] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_RDMA_READ] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC,
+},
+
+[IB_WR_ATOMIC_CMP_AND_SWP] = {
+ .length = sizeof(struct ib_atomic_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
+},
+
+[IB_WR_ATOMIC_FETCH_AND_ADD] = {
+ .length = sizeof(struct ib_atomic_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
+},
+
+[IB_WR_RDMA_WRITE_WITH_IMM] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_SEND] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
+ BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_SEND_WITH_IMM] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
+ BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_REG_MR] = {
+ .length = sizeof(struct ib_reg_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_LOCAL,
+},
+
+[IB_WR_LOCAL_INV] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_LOCAL,
+},
+
+[IB_WR_SEND_WITH_INV] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+},
+
+};
+
static void flush_tx_list(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
@@ -745,8 +806,9 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
priv->owner = qp;
- priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node);
- if (!priv->s_hdr) {
+ priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp,
+ rdi->dparms.node);
+ if (!priv->s_ahg) {
kfree(priv);
return ERR_PTR(-ENOMEM);
}
@@ -759,7 +821,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- kfree(priv->s_hdr);
+ kfree(priv->s_ahg);
kfree(priv);
}
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index e7bc8d6cf681..587d84d65bb8 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -54,6 +54,8 @@
extern unsigned int hfi1_qp_table_size;
+extern const struct rvt_operation_params hfi1_post_parms[];
+
/*
* free_ahg - clear ahg from QP
*/
@@ -61,7 +63,7 @@ static inline void clear_ahg(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- priv->s_hdr->ahgcount = 0;
+ priv->s_ahg->ahgcount = 0;
qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR);
if (priv->s_sde && qp->s_ahgidx >= 0)
sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 9fb561682c66..a207717ade2a 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -50,46 +50,285 @@
#include <linux/vmalloc.h>
#include "hfi.h"
-#include "twsi.h"
+
+/* for the given bus number, return the CSR for reading an i2c line */
+static inline u32 i2c_in_csr(u32 bus_num)
+{
+ return bus_num ? ASIC_QSFP2_IN : ASIC_QSFP1_IN;
+}
+
+/* for the given bus number, return the CSR for writing an i2c line */
+static inline u32 i2c_oe_csr(u32 bus_num)
+{
+ return bus_num ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
+}
+
+static void hfi1_setsda(void *data, int state)
+{
+ struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data;
+ struct hfi1_devdata *dd = bus->controlling_dd;
+ u64 reg;
+ u32 target_oe;
+
+ target_oe = i2c_oe_csr(bus->num);
+ reg = read_csr(dd, target_oe);
+ /*
+ * The OE bit value is inverted and connected to the pin. When
+ * OE is 0 the pin is left to be pulled up, when the OE is 1
+ * the pin is driven low. This matches the "open drain" or "open
+ * collector" convention.
+ */
+ if (state)
+ reg &= ~QSFP_HFI0_I2CDAT;
+ else
+ reg |= QSFP_HFI0_I2CDAT;
+ write_csr(dd, target_oe, reg);
+ /* do a read to force the write into the chip */
+ (void)read_csr(dd, target_oe);
+}
+
+static void hfi1_setscl(void *data, int state)
+{
+ struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data;
+ struct hfi1_devdata *dd = bus->controlling_dd;
+ u64 reg;
+ u32 target_oe;
+
+ target_oe = i2c_oe_csr(bus->num);
+ reg = read_csr(dd, target_oe);
+ /*
+ * The OE bit value is inverted and connected to the pin. When
+ * OE is 0 the pin is left to be pulled up, when the OE is 1
+ * the pin is driven low. This matches the "open drain" or "open
+ * collector" convention.
+ */
+ if (state)
+ reg &= ~QSFP_HFI0_I2CCLK;
+ else
+ reg |= QSFP_HFI0_I2CCLK;
+ write_csr(dd, target_oe, reg);
+ /* do a read to force the write into the chip */
+ (void)read_csr(dd, target_oe);
+}
+
+static int hfi1_getsda(void *data)
+{
+ struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data;
+ u64 reg;
+ u32 target_in;
+
+ hfi1_setsda(data, 1); /* clear OE so we do not pull line down */
+ udelay(2); /* 1us pull up + 250ns hold */
+
+ target_in = i2c_in_csr(bus->num);
+ reg = read_csr(bus->controlling_dd, target_in);
+ return !!(reg & QSFP_HFI0_I2CDAT);
+}
+
+static int hfi1_getscl(void *data)
+{
+ struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data;
+ u64 reg;
+ u32 target_in;
+
+ hfi1_setscl(data, 1); /* clear OE so we do not pull line down */
+ udelay(2); /* 1us pull up + 250ns hold */
+
+ target_in = i2c_in_csr(bus->num);
+ reg = read_csr(bus->controlling_dd, target_in);
+ return !!(reg & QSFP_HFI0_I2CCLK);
+}
/*
- * QSFP support for hfi driver, using "Two Wire Serial Interface" driver
- * in twsi.c
+ * Allocate and initialize the given i2c bus number.
+ * Returns NULL on failure.
*/
-#define I2C_MAX_RETRY 4
+static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd,
+ struct hfi1_asic_data *ad, int num)
+{
+ struct hfi1_i2c_bus *bus;
+ int ret;
+
+ bus = kzalloc(sizeof(*bus), GFP_KERNEL);
+ if (!bus)
+ return NULL;
+
+ bus->controlling_dd = dd;
+ bus->num = num; /* our bus number */
+
+ bus->algo.setsda = hfi1_setsda;
+ bus->algo.setscl = hfi1_setscl;
+ bus->algo.getsda = hfi1_getsda;
+ bus->algo.getscl = hfi1_getscl;
+ bus->algo.udelay = 5;
+ bus->algo.timeout = usecs_to_jiffies(50);
+ bus->algo.data = bus;
+
+ bus->adapter.owner = THIS_MODULE;
+ bus->adapter.algo_data = &bus->algo;
+ bus->adapter.dev.parent = &dd->pcidev->dev;
+ snprintf(bus->adapter.name, sizeof(bus->adapter.name),
+ "hfi1_i2c%d", num);
+
+ ret = i2c_bit_add_bus(&bus->adapter);
+ if (ret) {
+ dd_dev_info(dd, "%s: unable to add i2c bus %d, err %d\n",
+ __func__, num, ret);
+ kfree(bus);
+ return NULL;
+ }
+
+ return bus;
+}
/*
- * Raw i2c write. No set-up or lock checking.
+ * Initialize i2c buses.
+ * Return 0 on success, -errno on error.
*/
-static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
- int offset, void *bp, int len)
+int set_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad)
{
- struct hfi1_devdata *dd = ppd->dd;
- int ret, cnt;
- u8 *buff = bp;
+ ad->i2c_bus0 = init_i2c_bus(dd, ad, 0);
+ ad->i2c_bus1 = init_i2c_bus(dd, ad, 1);
+ if (!ad->i2c_bus0 || !ad->i2c_bus1)
+ return -ENOMEM;
+ return 0;
+};
- cnt = 0;
- while (cnt < len) {
- int wlen = len - cnt;
+static void clean_i2c_bus(struct hfi1_i2c_bus *bus)
+{
+ if (bus) {
+ i2c_del_adapter(&bus->adapter);
+ kfree(bus);
+ }
+}
- ret = hfi1_twsi_blk_wr(dd, target, i2c_addr, offset,
- buff + cnt, wlen);
- if (ret) {
- /* hfi1_twsi_blk_wr() 1 for error, else 0 */
- return -EIO;
- }
- offset += wlen;
- cnt += wlen;
+void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad)
+{
+ clean_i2c_bus(ad->i2c_bus0);
+ ad->i2c_bus0 = NULL;
+ clean_i2c_bus(ad->i2c_bus1);
+ ad->i2c_bus1 = NULL;
+}
+
+static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c,
+ u8 slave_addr, int offset, int offset_size,
+ u8 *data, u16 len)
+{
+ int ret;
+ int num_msgs;
+ u8 offset_bytes[2];
+ struct i2c_msg msgs[2];
+
+ switch (offset_size) {
+ case 0:
+ num_msgs = 1;
+ msgs[0].addr = slave_addr;
+ msgs[0].flags = 0;
+ msgs[0].len = len;
+ msgs[0].buf = data;
+ break;
+ case 2:
+ offset_bytes[1] = (offset >> 8) & 0xff;
+ /* fall through */
+ case 1:
+ num_msgs = 2;
+ offset_bytes[0] = offset & 0xff;
+
+ msgs[0].addr = slave_addr;
+ msgs[0].flags = 0;
+ msgs[0].len = offset_size;
+ msgs[0].buf = offset_bytes;
+
+ msgs[1].addr = slave_addr;
+ msgs[1].flags = I2C_M_NOSTART,
+ msgs[1].len = len;
+ msgs[1].buf = data;
+ break;
+ default:
+ return -EINVAL;
}
- /* Must wait min 20us between qsfp i2c transactions */
- udelay(20);
+ i2c->controlling_dd = dd;
+ ret = i2c_transfer(&i2c->adapter, msgs, num_msgs);
+ if (ret != num_msgs) {
+ dd_dev_err(dd, "%s: bus %d, i2c slave 0x%x, offset 0x%x, len 0x%x; write failed, ret %d\n",
+ __func__, i2c->num, slave_addr, offset, len, ret);
+ return ret < 0 ? ret : -EIO;
+ }
+ return 0;
+}
+
+static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus,
+ u8 slave_addr, int offset, int offset_size,
+ u8 *data, u16 len)
+{
+ int ret;
+ int num_msgs;
+ u8 offset_bytes[2];
+ struct i2c_msg msgs[2];
+
+ switch (offset_size) {
+ case 0:
+ num_msgs = 1;
+ msgs[0].addr = slave_addr;
+ msgs[0].flags = I2C_M_RD;
+ msgs[0].len = len;
+ msgs[0].buf = data;
+ break;
+ case 2:
+ offset_bytes[1] = (offset >> 8) & 0xff;
+ /* fall through */
+ case 1:
+ num_msgs = 2;
+ offset_bytes[0] = offset & 0xff;
+
+ msgs[0].addr = slave_addr;
+ msgs[0].flags = 0;
+ msgs[0].len = offset_size;
+ msgs[0].buf = offset_bytes;
+
+ msgs[1].addr = slave_addr;
+ msgs[1].flags = I2C_M_RD,
+ msgs[1].len = len;
+ msgs[1].buf = data;
+ break;
+ default:
+ return -EINVAL;
+ }
- return cnt;
+ bus->controlling_dd = dd;
+ ret = i2c_transfer(&bus->adapter, msgs, num_msgs);
+ if (ret != num_msgs) {
+ dd_dev_err(dd, "%s: bus %d, i2c slave 0x%x, offset 0x%x, len 0x%x; read failed, ret %d\n",
+ __func__, bus->num, slave_addr, offset, len, ret);
+ return ret < 0 ? ret : -EIO;
+ }
+ return 0;
+}
+
+/*
+ * Raw i2c write. No set-up or lock checking.
+ *
+ * Return 0 on success, -errno on error.
+ */
+static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
+ int offset, void *bp, int len)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+ struct hfi1_i2c_bus *bus;
+ u8 slave_addr;
+ int offset_size;
+
+ bus = target ? dd->asic_data->i2c_bus1 : dd->asic_data->i2c_bus0;
+ slave_addr = (i2c_addr & 0xff) >> 1; /* convert to 7-bit addr */
+ offset_size = (i2c_addr >> 8) & 0x3;
+ return i2c_bus_write(dd, bus, slave_addr, offset, offset_size, bp, len);
}
/*
* Caller must hold the i2c chain resource.
+ *
+ * Return number of bytes written, or -errno.
*/
int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
void *bp, int len)
@@ -99,63 +338,36 @@ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
- /* make sure the TWSI bus is in a sane state */
- ret = hfi1_twsi_reset(ppd->dd, target);
- if (ret) {
- hfi1_dev_porterr(ppd->dd, ppd->port,
- "I2C chain %d write interface reset failed\n",
- target);
+ ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len);
+ if (ret)
return ret;
- }
- return __i2c_write(ppd, target, i2c_addr, offset, bp, len);
+ return len;
}
/*
* Raw i2c read. No set-up or lock checking.
+ *
+ * Return 0 on success, -errno on error.
*/
static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
int offset, void *bp, int len)
{
struct hfi1_devdata *dd = ppd->dd;
- int ret, cnt, pass = 0;
- int orig_offset = offset;
-
- cnt = 0;
- while (cnt < len) {
- int rlen = len - cnt;
-
- ret = hfi1_twsi_blk_rd(dd, target, i2c_addr, offset,
- bp + cnt, rlen);
- /* Some QSFP's fail first try. Retry as experiment */
- if (ret && cnt == 0 && ++pass < I2C_MAX_RETRY)
- continue;
- if (ret) {
- /* hfi1_twsi_blk_rd() 1 for error, else 0 */
- ret = -EIO;
- goto exit;
- }
- offset += rlen;
- cnt += rlen;
- }
-
- ret = cnt;
-
-exit:
- if (ret < 0) {
- hfi1_dev_porterr(dd, ppd->port,
- "I2C chain %d read failed, addr 0x%x, offset 0x%x, len %d\n",
- target, i2c_addr, orig_offset, len);
- }
-
- /* Must wait min 20us between qsfp i2c transactions */
- udelay(20);
-
- return ret;
+ struct hfi1_i2c_bus *bus;
+ u8 slave_addr;
+ int offset_size;
+
+ bus = target ? dd->asic_data->i2c_bus1 : dd->asic_data->i2c_bus0;
+ slave_addr = (i2c_addr & 0xff) >> 1; /* convert to 7-bit addr */
+ offset_size = (i2c_addr >> 8) & 0x3;
+ return i2c_bus_read(dd, bus, slave_addr, offset, offset_size, bp, len);
}
/*
* Caller must hold the i2c chain resource.
+ *
+ * Return number of bytes read, or -errno.
*/
int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
void *bp, int len)
@@ -165,16 +377,11 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
- /* make sure the TWSI bus is in a sane state */
- ret = hfi1_twsi_reset(ppd->dd, target);
- if (ret) {
- hfi1_dev_porterr(ppd->dd, ppd->port,
- "I2C chain %d read interface reset failed\n",
- target);
+ ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len);
+ if (ret)
return ret;
- }
- return __i2c_read(ppd, target, i2c_addr, offset, bp, len);
+ return len;
}
/*
@@ -182,6 +389,8 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
* by writing @addr = ((256 * n) + m)
*
* Caller must hold the i2c chain resource.
+ *
+ * Return number of bytes written or -errno.
*/
int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len)
@@ -189,21 +398,12 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int count = 0;
int offset;
int nwrite;
- int ret;
+ int ret = 0;
u8 page;
if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
- /* make sure the TWSI bus is in a sane state */
- ret = hfi1_twsi_reset(ppd->dd, target);
- if (ret) {
- hfi1_dev_porterr(ppd->dd, ppd->port,
- "QSFP chain %d write interface reset failed\n",
- target);
- return ret;
- }
-
while (count < len) {
/*
* Set the qsfp page based on a zero-based address
@@ -213,11 +413,12 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
- if (ret != 1) {
+ /* QSFPs require a 5-10msec delay after write operations */
+ mdelay(5);
+ if (ret) {
hfi1_dev_porterr(ppd->dd, ppd->port,
"QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
target, ret);
- ret = -EIO;
break;
}
@@ -229,11 +430,13 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
offset, bp + count, nwrite);
- if (ret <= 0) /* stop on error or nothing written */
+ /* QSFPs require a 5-10msec delay after write operations */
+ mdelay(5);
+ if (ret) /* stop on error */
break;
- count += ret;
- addr += ret;
+ count += nwrite;
+ addr += nwrite;
}
if (ret < 0)
@@ -243,7 +446,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
/*
* Perform a stand-alone single QSFP write. Acquire the resource, do the
- * read, then release the resource.
+ * write, then release the resource.
*/
int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len)
@@ -266,6 +469,8 @@ int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
* by reading @addr = ((256 * n) + m)
*
* Caller must hold the i2c chain resource.
+ *
+ * Return the number of bytes read or -errno.
*/
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len)
@@ -273,21 +478,12 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int count = 0;
int offset;
int nread;
- int ret;
+ int ret = 0;
u8 page;
if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES;
- /* make sure the TWSI bus is in a sane state */
- ret = hfi1_twsi_reset(ppd->dd, target);
- if (ret) {
- hfi1_dev_porterr(ppd->dd, ppd->port,
- "QSFP chain %d read interface reset failed\n",
- target);
- return ret;
- }
-
while (count < len) {
/*
* Set the qsfp page based on a zero-based address
@@ -296,11 +492,12 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
page = (u8)(addr / QSFP_PAGESIZE);
ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
- if (ret != 1) {
+ /* QSFPs require a 5-10msec delay after write operations */
+ mdelay(5);
+ if (ret) {
hfi1_dev_porterr(ppd->dd, ppd->port,
"QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
target, ret);
- ret = -EIO;
break;
}
@@ -310,15 +507,13 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
if (((addr % QSFP_RW_BOUNDARY) + nread) > QSFP_RW_BOUNDARY)
nread = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY);
- /* QSFPs require a 5-10msec delay after write operations */
- mdelay(5);
ret = __i2c_read(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
offset, bp + count, nread);
- if (ret <= 0) /* stop on error or nothing read */
+ if (ret) /* stop on error */
break;
- count += ret;
- addr += ret;
+ count += nread;
+ addr += nread;
}
if (ret < 0)
diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index dadc66c442b9..69275ebd9597 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
@@ -238,3 +238,6 @@ int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
+struct hfi1_asic_data;
+int set_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad);
+void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 792f15eb8efe..5da190e6011b 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -477,6 +477,37 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail;
}
+ /*
+ * Local operations are processed immediately
+ * after all prior requests have completed
+ */
+ if (wqe->wr.opcode == IB_WR_REG_MR ||
+ wqe->wr.opcode == IB_WR_LOCAL_INV) {
+ int local_ops = 0;
+ int err = 0;
+
+ if (qp->s_last != qp->s_cur)
+ goto bail;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ if (++qp->s_tail == qp->s_size)
+ qp->s_tail = 0;
+ if (!(wqe->wr.send_flags &
+ RVT_SEND_COMPLETION_ONLY)) {
+ err = rvt_invalidate_rkey(
+ qp,
+ wqe->wr.ex.invalidate_rkey);
+ local_ops = 1;
+ }
+ hfi1_send_complete(qp, wqe,
+ err ? IB_WC_LOC_PROT_ERR
+ : IB_WC_SUCCESS);
+ if (local_ops)
+ atomic_dec(&qp->local_ops_pending);
+ qp->s_hdrwords = 0;
+ goto done_free_tx;
+ }
+
newreq = 1;
qp->s_psn = wqe->psn;
}
@@ -491,6 +522,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
switch (wqe->wr.opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
+ case IB_WR_SEND_WITH_INV:
/* If no credit, return. */
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
@@ -504,11 +536,17 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
if (wqe->wr.opcode == IB_WR_SEND) {
qp->s_state = OP(SEND_ONLY);
- } else {
+ } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
+ } else {
+ qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE);
+ /* Invalidate rkey comes after the BTH */
+ ohdr->u.ieth = cpu_to_be32(
+ wqe->wr.ex.invalidate_rkey);
+ hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
@@ -671,11 +709,16 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
if (wqe->wr.opcode == IB_WR_SEND) {
qp->s_state = OP(SEND_LAST);
- } else {
+ } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
+ } else {
+ qp->s_state = OP(SEND_LAST_WITH_INVALIDATE);
+ /* invalidate data comes after the BTH */
+ ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey);
+ hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= IB_BTH_SOLICITED;
@@ -1047,7 +1090,7 @@ void hfi1_rc_timeout(unsigned long arg)
ibp->rvp.n_rc_timeouts++;
qp->s_flags &= ~RVT_S_TIMER;
del_timer(&qp->s_timer);
- trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1);
+ trace_hfi1_timeout(qp, qp->s_last_psn + 1);
restart_rc(qp, qp->s_last_psn + 1, 1);
hfi1_schedule_send(qp);
}
@@ -1171,7 +1214,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr)
* If we were waiting for sends to complete before re-sending,
* and they are now complete, restart sending.
*/
- trace_hfi1_rc_sendcomplete(qp, psn);
+ trace_hfi1_sendcomplete(qp, psn);
if (qp->s_flags & RVT_S_WAIT_PSN &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
qp->s_flags &= ~RVT_S_WAIT_PSN;
@@ -1567,7 +1610,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
spin_lock_irqsave(&qp->s_lock, flags);
- trace_hfi1_rc_ack(qp, psn);
+ trace_hfi1_ack(qp, psn);
/* Ignore invalid responses. */
smp_read_barrier_depends(); /* see post_one_send */
@@ -1782,7 +1825,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
u8 i, prev;
int old_req;
- trace_hfi1_rc_rcv_error(qp, psn);
+ trace_hfi1_rcv_error(qp, psn);
if (diff > 0) {
/*
* Packet sequence error.
@@ -2086,7 +2129,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
@@ -2097,30 +2139,15 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
int diff;
struct ib_reth *reth;
unsigned long flags;
- u32 bth1;
int ret, is_fecn = 0;
int copy_last = 0;
+ u32 rkey;
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
return;
- bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
- if (bth1 & HFI1_BECN_SMASK) {
- u16 rlid = qp->remote_ah_attr.dlid;
- u32 lqpn, rqpn;
-
- lqpn = qp->ibqp.qp_num;
- rqpn = qp->remote_qpn;
- process_becn(
- ppd,
- qp->remote_ah_attr.sl,
- rlid, lqpn, rqpn,
- IB_CC_SVCTYPE_RC);
- }
- is_fecn = bth1 & HFI1_FECN_SMASK;
- }
+ is_fecn = process_ecn(qp, packet, false);
psn = be32_to_cpu(ohdr->bth[2]);
opcode = (bth0 >> 24) & 0xff;
@@ -2154,7 +2181,8 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
case OP(SEND_MIDDLE):
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
- opcode == OP(SEND_LAST_WITH_IMMEDIATE))
+ opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(SEND_LAST_WITH_INVALIDATE))
break;
goto nack_inv;
@@ -2170,6 +2198,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(SEND_LAST_WITH_INVALIDATE) ||
opcode == OP(RDMA_WRITE_MIDDLE) ||
opcode == OP(RDMA_WRITE_LAST) ||
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
@@ -2218,6 +2247,7 @@ send_middle:
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
+ case OP(SEND_ONLY_WITH_INVALIDATE):
ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0)
goto nack_op_err;
@@ -2226,12 +2256,22 @@ send_middle:
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
goto no_immediate_data;
+ if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
+ goto send_last_inv;
/* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
goto send_last;
+ case OP(SEND_LAST_WITH_INVALIDATE):
+send_last_inv:
+ rkey = be32_to_cpu(ohdr->u.ieth);
+ if (rvt_invalidate_rkey(qp, rkey))
+ goto no_immediate_data;
+ wc.ex.invalidate_rkey = rkey;
+ wc.wc_flags = IB_WC_WITH_INVALIDATE;
+ goto send_last;
case OP(RDMA_WRITE_LAST):
copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
/* fall through */
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index a659aec3c3c6..48d5094f98e2 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -372,6 +372,7 @@ static void ruc_loopback(struct rvt_qp *sqp)
int ret;
int copy_last = 0;
u32 to;
+ int local_ops = 0;
rcu_read_lock();
@@ -440,11 +441,31 @@ again:
sqp->s_sge.num_sge = wqe->wr.num_sge;
sqp->s_len = wqe->length;
switch (wqe->wr.opcode) {
+ case IB_WR_REG_MR:
+ goto send_comp;
+
+ case IB_WR_LOCAL_INV:
+ if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
+ if (rvt_invalidate_rkey(sqp,
+ wqe->wr.ex.invalidate_rkey))
+ send_status = IB_WC_LOC_PROT_ERR;
+ local_ops = 1;
+ }
+ goto send_comp;
+
+ case IB_WR_SEND_WITH_INV:
+ if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
+ wc.wc_flags = IB_WC_WITH_INVALIDATE;
+ wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
+ }
+ goto send;
+
case IB_WR_SEND_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
+send:
ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0)
goto op_err;
@@ -583,6 +604,10 @@ send_comp:
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
hfi1_send_complete(sqp, wqe, send_status);
+ if (local_ops) {
+ atomic_dec(&sqp->local_ops_pending);
+ local_ops = 0;
+ }
goto again;
rnr_nak:
@@ -683,10 +708,10 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
return sizeof(struct ib_grh) / sizeof(u32);
}
-#define BTH2_OFFSET (offsetof(struct hfi1_pio_header, hdr.u.oth.bth[2]) / 4)
+#define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4)
/**
- * build_ahg - create ahg in s_hdr
+ * build_ahg - create ahg in s_ahg
* @qp: a pointer to QP
* @npsn: the next PSN for the request/response
*
@@ -708,19 +733,18 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
if (qp->s_ahgidx >= 0) {
qp->s_ahgpsn = npsn;
- priv->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
+ priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
/* save to protect a change in another thread */
- priv->s_hdr->sde = priv->s_sde;
- priv->s_hdr->ahgidx = qp->s_ahgidx;
+ priv->s_ahg->ahgidx = qp->s_ahgidx;
qp->s_flags |= RVT_S_AHG_VALID;
}
} else {
/* subsequent middle after valid */
if (qp->s_ahgidx >= 0) {
- priv->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG;
- priv->s_hdr->ahgidx = qp->s_ahgidx;
- priv->s_hdr->ahgcount++;
- priv->s_hdr->ahgdesc[0] =
+ priv->s_ahg->tx_flags |= SDMA_TXREQ_F_USE_AHG;
+ priv->s_ahg->ahgidx = qp->s_ahgidx;
+ priv->s_ahg->ahgcount++;
+ priv->s_ahg->ahgdesc[0] =
sdma_build_ahg_descriptor(
(__force u16)cpu_to_be16((u16)npsn),
BTH2_OFFSET,
@@ -728,8 +752,8 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
16);
if ((npsn & 0xffff0000) !=
(qp->s_ahgpsn & 0xffff0000)) {
- priv->s_hdr->ahgcount++;
- priv->s_hdr->ahgdesc[1] =
+ priv->s_ahg->ahgcount++;
+ priv->s_ahg->ahgdesc[1] =
sdma_build_ahg_descriptor(
(__force u16)cpu_to_be16(
(u16)(npsn >> 16)),
@@ -766,7 +790,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
}
lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
/*
- * reset s_hdr/AHG fields
+ * reset s_ahg/AHG fields
*
* This insures that the ahgentry/ahgcount
* are at a non-AHG default to protect
@@ -776,10 +800,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
* build_ahg() will modify as appropriate
* to use the AHG feature.
*/
- priv->s_hdr->tx_flags = 0;
- priv->s_hdr->ahgcount = 0;
- priv->s_hdr->ahgidx = 0;
- priv->s_hdr->sde = NULL;
+ priv->s_ahg->tx_flags = 0;
+ priv->s_ahg->ahgcount = 0;
+ priv->s_ahg->ahgidx = 0;
if (qp->s_mig_state == IB_MIG_MIGRATED)
bth0 |= IB_BTH_MIG_REQ;
else
@@ -890,7 +913,7 @@ void hfi1_do_send(struct rvt_qp *qp)
*/
if (hfi1_verbs_send(qp, &ps))
return;
- /* Record that s_hdr is empty. */
+ /* Record that s_ahg is empty. */
qp->s_hdrwords = 0;
/* allow other tasks to run */
if (unlikely(time_after(jiffies, timeout))) {
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 91fc2aed6aed..74c84c655f7e 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -49,6 +49,7 @@
#include "hfi.h"
#include "mad.h"
#include "trace.h"
+#include "affinity.h"
/*
* Start of per-port congestion control structures and support code
@@ -622,6 +623,27 @@ static ssize_t show_tempsense(struct device *device,
return ret;
}
+static ssize_t show_sdma_affinity(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct hfi1_ibdev *dev =
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ struct hfi1_devdata *dd = dd_from_dev(dev);
+
+ return hfi1_get_sdma_affinity(dd, buf);
+}
+
+static ssize_t store_sdma_affinity(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hfi1_ibdev *dev =
+ container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ struct hfi1_devdata *dd = dd_from_dev(dev);
+
+ return hfi1_set_sdma_affinity(dd, buf, count);
+}
+
/*
* end of per-unit (or driver, in some cases, but replicated
* per unit) functions
@@ -636,6 +658,8 @@ static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
+static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity,
+ store_sdma_affinity);
static struct device_attribute *hfi1_attributes[] = {
&dev_attr_hw_rev,
@@ -646,6 +670,7 @@ static struct device_attribute *hfi1_attributes[] = {
&dev_attr_boardversion,
&dev_attr_tempsense,
&dev_attr_chip_reset,
+ &dev_attr_sdma_affinity,
};
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 28c1d0832886..92dc88f013c9 100644
--- a/drivers/infiniband/hw/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -44,1329 +44,10 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
-#undef TRACE_SYSTEM_VAR
-#define TRACE_SYSTEM_VAR hfi1
-
-#if !defined(__HFI1_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define __HFI1_TRACE_H
-
-#include <linux/tracepoint.h>
-#include <linux/trace_seq.h>
-
-#include "hfi.h"
-#include "mad.h"
-#include "sdma.h"
-
-#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
-#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev))
-
-#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
-#define show_packettype(etype) \
-__print_symbolic(etype, \
- packettype_name(EXPECTED), \
- packettype_name(EAGER), \
- packettype_name(IB), \
- packettype_name(ERROR), \
- packettype_name(BYPASS))
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_rx
-
-TRACE_EVENT(hfi1_rcvhdr,
- TP_PROTO(struct hfi1_devdata *dd,
- u32 ctxt,
- u64 eflags,
- u32 etype,
- u32 hlen,
- u32 tlen,
- u32 updegr,
- u32 etail
- ),
- TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __field(u64, eflags)
- __field(u32, ctxt)
- __field(u32, etype)
- __field(u32, hlen)
- __field(u32, tlen)
- __field(u32, updegr)
- __field(u32, etail)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->eflags = eflags;
- __entry->ctxt = ctxt;
- __entry->etype = etype;
- __entry->hlen = hlen;
- __entry->tlen = tlen;
- __entry->updegr = updegr;
- __entry->etail = etail;
- ),
- TP_printk(
- "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
- __get_str(dev),
- __entry->ctxt,
- __entry->eflags,
- __entry->etype, show_packettype(__entry->etype),
- __entry->hlen,
- __entry->tlen,
- __entry->updegr,
- __entry->etail
- )
-);
-
-TRACE_EVENT(hfi1_receive_interrupt,
- TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
- TP_ARGS(dd, ctxt),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __field(u32, ctxt)
- __field(u8, slow_path)
- __field(u8, dma_rtail)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt) {
- __entry->slow_path = 1;
- __entry->dma_rtail = 0xFF;
- } else if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt_dma_rtail){
- __entry->dma_rtail = 1;
- __entry->slow_path = 0;
- } else if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt_nodma_rtail) {
- __entry->dma_rtail = 0;
- __entry->slow_path = 0;
- }
- ),
- TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
- __get_str(dev),
- __entry->ctxt,
- __entry->slow_path,
- __entry->dma_rtail
- )
-);
-
-TRACE_EVENT(hfi1_exp_tid_reg,
- TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr,
- u32 npages, unsigned long va, unsigned long pa,
- dma_addr_t dma),
- TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
- TP_STRUCT__entry(
- __field(unsigned, ctxt)
- __field(u16, subctxt)
- __field(u32, rarr)
- __field(u32, npages)
- __field(unsigned long, va)
- __field(unsigned long, pa)
- __field(dma_addr_t, dma)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->rarr = rarr;
- __entry->npages = npages;
- __entry->va = va;
- __entry->pa = pa;
- __entry->dma = dma;
- ),
- TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
- __entry->ctxt,
- __entry->subctxt,
- __entry->rarr,
- __entry->npages,
- __entry->pa,
- __entry->va,
- __entry->dma
- )
- );
-
-TRACE_EVENT(hfi1_exp_tid_unreg,
- TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, u32 npages,
- unsigned long va, unsigned long pa, dma_addr_t dma),
- TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
- TP_STRUCT__entry(
- __field(unsigned, ctxt)
- __field(u16, subctxt)
- __field(u32, rarr)
- __field(u32, npages)
- __field(unsigned long, va)
- __field(unsigned long, pa)
- __field(dma_addr_t, dma)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->rarr = rarr;
- __entry->npages = npages;
- __entry->va = va;
- __entry->pa = pa;
- __entry->dma = dma;
- ),
- TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
- __entry->ctxt,
- __entry->subctxt,
- __entry->rarr,
- __entry->npages,
- __entry->pa,
- __entry->va,
- __entry->dma
- )
- );
-
-TRACE_EVENT(hfi1_exp_tid_inval,
- TP_PROTO(unsigned ctxt, u16 subctxt, unsigned long va, u32 rarr,
- u32 npages, dma_addr_t dma),
- TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
- TP_STRUCT__entry(
- __field(unsigned, ctxt)
- __field(u16, subctxt)
- __field(unsigned long, va)
- __field(u32, rarr)
- __field(u32, npages)
- __field(dma_addr_t, dma)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->va = va;
- __entry->rarr = rarr;
- __entry->npages = npages;
- __entry->dma = dma;
- ),
- TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
- __entry->ctxt,
- __entry->subctxt,
- __entry->rarr,
- __entry->npages,
- __entry->va,
- __entry->dma
- )
- );
-
-TRACE_EVENT(hfi1_mmu_invalidate,
- TP_PROTO(unsigned ctxt, u16 subctxt, const char *type,
- unsigned long start, unsigned long end),
- TP_ARGS(ctxt, subctxt, type, start, end),
- TP_STRUCT__entry(
- __field(unsigned, ctxt)
- __field(u16, subctxt)
- __string(type, type)
- __field(unsigned long, start)
- __field(unsigned long, end)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __assign_str(type, type);
- __entry->start = start;
- __entry->end = end;
- ),
- TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx",
- __entry->ctxt,
- __entry->subctxt,
- __get_str(type),
- __entry->start,
- __entry->end
- )
- );
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_tx
-
-TRACE_EVENT(hfi1_piofree,
- TP_PROTO(struct send_context *sc, int extra),
- TP_ARGS(sc, extra),
- TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
- __field(u32, sw_index)
- __field(u32, hw_context)
- __field(int, extra)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
- __entry->sw_index = sc->sw_index;
- __entry->hw_context = sc->hw_context;
- __entry->extra = extra;
- ),
- TP_printk("[%s] ctxt %u(%u) extra %d",
- __get_str(dev),
- __entry->sw_index,
- __entry->hw_context,
- __entry->extra
- )
-);
-
-TRACE_EVENT(hfi1_wantpiointr,
- TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
- TP_ARGS(sc, needint, credit_ctrl),
- TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
- __field(u32, sw_index)
- __field(u32, hw_context)
- __field(u32, needint)
- __field(u64, credit_ctrl)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
- __entry->sw_index = sc->sw_index;
- __entry->hw_context = sc->hw_context;
- __entry->needint = needint;
- __entry->credit_ctrl = credit_ctrl;
- ),
- TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
- __get_str(dev),
- __entry->sw_index,
- __entry->hw_context,
- __entry->needint,
- (unsigned long long)__entry->credit_ctrl
- )
-);
-
-DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
- TP_PROTO(struct rvt_qp *qp, u32 flags),
- TP_ARGS(qp, flags),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
- __field(u32, qpn)
- __field(u32, flags)
- __field(u32, s_flags)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
- __entry->flags = flags;
- __entry->qpn = qp->ibqp.qp_num;
- __entry->s_flags = qp->s_flags;
- ),
- TP_printk(
- "[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
- __get_str(dev),
- __entry->qpn,
- __entry->flags,
- __entry->s_flags
- )
-);
-
-DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup,
- TP_PROTO(struct rvt_qp *qp, u32 flags),
- TP_ARGS(qp, flags));
-
-DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep,
- TP_PROTO(struct rvt_qp *qp, u32 flags),
- TP_ARGS(qp, flags));
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_ibhdrs
-
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
-const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
-
-#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
-
-const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1);
-
-#define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1)
-
-#define lrh_name(lrh) { HFI1_##lrh, #lrh }
-#define show_lnh(lrh) \
-__print_symbolic(lrh, \
- lrh_name(LRH_BTH), \
- lrh_name(LRH_GRH))
-
-#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
-#define show_ib_opcode(opcode) \
-__print_symbolic(opcode, \
- ib_opcode_name(RC_SEND_FIRST), \
- ib_opcode_name(RC_SEND_MIDDLE), \
- ib_opcode_name(RC_SEND_LAST), \
- ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(RC_SEND_ONLY), \
- ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_WRITE_FIRST), \
- ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
- ib_opcode_name(RC_RDMA_WRITE_LAST), \
- ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_WRITE_ONLY), \
- ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_READ_REQUEST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
- ib_opcode_name(RC_ACKNOWLEDGE), \
- ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
- ib_opcode_name(RC_COMPARE_SWAP), \
- ib_opcode_name(RC_FETCH_ADD), \
- ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \
- ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \
- ib_opcode_name(UC_SEND_FIRST), \
- ib_opcode_name(UC_SEND_MIDDLE), \
- ib_opcode_name(UC_SEND_LAST), \
- ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(UC_SEND_ONLY), \
- ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(UC_RDMA_WRITE_FIRST), \
- ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
- ib_opcode_name(UC_RDMA_WRITE_LAST), \
- ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(UC_RDMA_WRITE_ONLY), \
- ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(UD_SEND_ONLY), \
- ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(CNP))
-
-#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
-#define BTH_PRN \
- "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
- "f %d b %d qpn 0x%.6x a %d psn 0x%.8x"
-#define EHDR_PRN "%s"
-
-DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
- TP_PROTO(struct hfi1_devdata *dd,
- struct hfi1_ib_header *hdr),
- TP_ARGS(dd, hdr),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- /* LRH */
- __field(u8, vl)
- __field(u8, lver)
- __field(u8, sl)
- __field(u8, lnh)
- __field(u16, dlid)
- __field(u16, len)
- __field(u16, slid)
- /* BTH */
- __field(u8, opcode)
- __field(u8, se)
- __field(u8, m)
- __field(u8, pad)
- __field(u8, tver)
- __field(u16, pkey)
- __field(u8, f)
- __field(u8, b)
- __field(u32, qpn)
- __field(u8, a)
- __field(u32, psn)
- /* extended headers */
- __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
- ),
- TP_fast_assign(
- struct hfi1_other_headers *ohdr;
-
- DD_DEV_ASSIGN(dd);
- /* LRH */
- __entry->vl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
- __entry->lver =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
- __entry->sl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- __entry->lnh =
- (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- __entry->dlid =
- be16_to_cpu(hdr->lrh[1]);
- /* allow for larger len */
- __entry->len =
- be16_to_cpu(hdr->lrh[2]);
- __entry->slid =
- be16_to_cpu(hdr->lrh[3]);
- /* BTH */
- if (__entry->lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else
- ohdr = &hdr->u.l.oth;
- __entry->opcode =
- (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
- __entry->se =
- (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
- __entry->m =
- (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
- __entry->pad =
- (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
- __entry->tver =
- (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
- __entry->pkey =
- be32_to_cpu(ohdr->bth[0]) & 0xffff;
- __entry->f =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
- HFI1_FECN_MASK;
- __entry->b =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
- HFI1_BECN_MASK;
- __entry->qpn =
- be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- __entry->a =
- (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
- /* allow for larger PSN */
- __entry->psn =
- be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
- /* extended headers */
- memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
- ibhdr_exhdr_len(hdr));
- ),
- TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
- __get_str(dev),
- /* LRH */
- __entry->vl,
- __entry->lver,
- __entry->sl,
- __entry->lnh, show_lnh(__entry->lnh),
- __entry->dlid,
- __entry->len,
- __entry->slid,
- /* BTH */
- __entry->opcode, show_ib_opcode(__entry->opcode),
- __entry->se,
- __entry->m,
- __entry->pad,
- __entry->tver,
- __entry->pkey,
- __entry->f,
- __entry->b,
- __entry->qpn,
- __entry->a,
- __entry->psn,
- /* extended headers */
- __parse_ib_ehdrs(
- __entry->opcode,
- (void *)__get_dynamic_array(ehdrs))
- )
-);
-
-DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
- TP_ARGS(dd, hdr));
-
-DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
- TP_ARGS(dd, hdr));
-
-DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
- TP_ARGS(dd, hdr));
-
-DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
- TP_ARGS(dd, hdr));
-
-#define SNOOP_PRN \
- "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \
- "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]"
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_snoop
-
-TRACE_EVENT(snoop_capture,
- TP_PROTO(struct hfi1_devdata *dd,
- int hdr_len,
- struct hfi1_ib_header *hdr,
- int data_len,
- void *data),
- TP_ARGS(dd, hdr_len, hdr, data_len, data),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u16, slid)
- __field(u16, dlid)
- __field(u32, qpn)
- __field(u8, opcode)
- __field(u8, sl)
- __field(u16, pkey)
- __field(u32, hdr_len)
- __field(u32, data_len)
- __field(u8, lnh)
- __dynamic_array(u8, raw_hdr, hdr_len)
- __dynamic_array(u8, raw_pkt, data_len)
- ),
- TP_fast_assign(
- struct hfi1_other_headers *ohdr;
-
- __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- if (__entry->lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else
- ohdr = &hdr->u.l.oth;
- DD_DEV_ASSIGN(dd);
- __entry->slid = be16_to_cpu(hdr->lrh[3]);
- __entry->dlid = be16_to_cpu(hdr->lrh[1]);
- __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
- __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff;
- __entry->hdr_len = hdr_len;
- __entry->data_len = data_len;
- memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
- memcpy(__get_dynamic_array(raw_pkt), data, data_len);
- ),
- TP_printk(
- "[%s] " SNOOP_PRN,
- __get_str(dev),
- __entry->slid,
- __entry->dlid,
- __entry->qpn,
- __entry->opcode,
- show_ib_opcode(__entry->opcode),
- __entry->sl,
- __entry->pkey,
- __entry->hdr_len,
- __entry->data_len
- )
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_ctxts
-
-#define UCTXT_FMT \
- "cred:%u, credaddr:0x%llx, piobase:0x%llx, rcvhdr_cnt:%u, " \
- "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx"
-TRACE_EVENT(hfi1_uctxtdata,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
- TP_ARGS(dd, uctxt),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __field(unsigned, ctxt)
- __field(u32, credits)
- __field(u64, hw_free)
- __field(u64, piobase)
- __field(u16, rcvhdrq_cnt)
- __field(u64, rcvhdrq_phys)
- __field(u32, eager_cnt)
- __field(u64, rcvegr_phys)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->ctxt = uctxt->ctxt;
- __entry->credits = uctxt->sc->credits;
- __entry->hw_free = (u64)uctxt->sc->hw_free;
- __entry->piobase = (u64)uctxt->sc->base_addr;
- __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
- __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
- __entry->eager_cnt = uctxt->egrbufs.alloced;
- __entry->rcvegr_phys =
- uctxt->egrbufs.rcvtids[0].phys;
- ),
- TP_printk("[%s] ctxt %u " UCTXT_FMT,
- __get_str(dev),
- __entry->ctxt,
- __entry->credits,
- __entry->hw_free,
- __entry->piobase,
- __entry->rcvhdrq_cnt,
- __entry->rcvhdrq_phys,
- __entry->eager_cnt,
- __entry->rcvegr_phys
- )
-);
-
-#define CINFO_FMT \
- "egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u"
-TRACE_EVENT(hfi1_ctxt_info,
- TP_PROTO(struct hfi1_devdata *dd, unsigned ctxt, unsigned subctxt,
- struct hfi1_ctxt_info cinfo),
- TP_ARGS(dd, ctxt, subctxt, cinfo),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __field(unsigned, ctxt)
- __field(unsigned, subctxt)
- __field(u16, egrtids)
- __field(u16, rcvhdrq_cnt)
- __field(u16, rcvhdrq_size)
- __field(u16, sdma_ring_size)
- __field(u32, rcvegr_size)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->egrtids = cinfo.egrtids;
- __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
- __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
- __entry->sdma_ring_size = cinfo.sdma_ring_size;
- __entry->rcvegr_size = cinfo.rcvegr_size;
- ),
- TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
- __get_str(dev),
- __entry->ctxt,
- __entry->subctxt,
- __entry->egrtids,
- __entry->rcvegr_size,
- __entry->rcvhdrq_cnt,
- __entry->rcvhdrq_size,
- __entry->sdma_ring_size
- )
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_sma
-
-#define BCT_FORMAT \
- "shared_limit %x vls 0-7 [%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x] 15 [%x,%x]"
-
-#define BCT(field) \
- be16_to_cpu( \
- ((struct buffer_control *)__get_dynamic_array(bct))->field \
- )
-
-DECLARE_EVENT_CLASS(hfi1_bct_template,
- TP_PROTO(struct hfi1_devdata *dd,
- struct buffer_control *bc),
- TP_ARGS(dd, bc),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __dynamic_array(u8, bct, sizeof(*bc))
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- memcpy(__get_dynamic_array(bct), bc,
- sizeof(*bc));
- ),
- TP_printk(BCT_FORMAT,
- BCT(overall_shared_limit),
-
- BCT(vl[0].dedicated),
- BCT(vl[0].shared),
-
- BCT(vl[1].dedicated),
- BCT(vl[1].shared),
-
- BCT(vl[2].dedicated),
- BCT(vl[2].shared),
-
- BCT(vl[3].dedicated),
- BCT(vl[3].shared),
-
- BCT(vl[4].dedicated),
- BCT(vl[4].shared),
-
- BCT(vl[5].dedicated),
- BCT(vl[5].shared),
-
- BCT(vl[6].dedicated),
- BCT(vl[6].shared),
-
- BCT(vl[7].dedicated),
- BCT(vl[7].shared),
-
- BCT(vl[15].dedicated),
- BCT(vl[15].shared)
- )
-);
-
-DEFINE_EVENT(hfi1_bct_template, bct_set,
- TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
- TP_ARGS(dd, bc));
-
-DEFINE_EVENT(hfi1_bct_template, bct_get,
- TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
- TP_ARGS(dd, bc));
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_sdma
-
-TRACE_EVENT(hfi1_sdma_descriptor,
- TP_PROTO(struct sdma_engine *sde,
- u64 desc0,
- u64 desc1,
- u16 e,
- void *descp),
- TP_ARGS(sde, desc0, desc1, e, descp),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(void *, descp)
- __field(u64, desc0)
- __field(u64, desc1)
- __field(u16, e)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->desc0 = desc0;
- __entry->desc1 = desc1;
- __entry->idx = sde->this_idx;
- __entry->descp = descp;
- __entry->e = e;
- ),
- TP_printk(
- "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
- __get_str(dev),
- __entry->idx,
- __parse_sdma_flags(__entry->desc0, __entry->desc1),
- (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) &
- SDMA_DESC0_PHY_ADDR_MASK,
- (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) &
- SDMA_DESC1_GENERATION_MASK),
- (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) &
- SDMA_DESC0_BYTE_COUNT_MASK),
- __entry->desc0,
- __entry->desc1,
- __entry->descp,
- __entry->e
- )
-);
-
-TRACE_EVENT(hfi1_sdma_engine_select,
- TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
- TP_ARGS(dd, sel, vl, idx),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __field(u32, sel)
- __field(u8, vl)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->sel = sel;
- __entry->vl = vl;
- __entry->idx = idx;
- ),
- TP_printk("[%s] selecting SDE %u sel 0x%x vl %u",
- __get_str(dev),
- __entry->idx,
- __entry->sel,
- __entry->vl
- )
-);
-
-DECLARE_EVENT_CLASS(hfi1_sdma_engine_class,
- TP_PROTO(struct sdma_engine *sde, u64 status),
- TP_ARGS(sde, status),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(u64, status)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->status = status;
- __entry->idx = sde->this_idx;
- ),
- TP_printk("[%s] SDE(%u) status %llx",
- __get_str(dev),
- __entry->idx,
- (unsigned long long)__entry->status
- )
-);
-
-DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt,
- TP_PROTO(struct sdma_engine *sde, u64 status),
- TP_ARGS(sde, status)
-);
-
-DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress,
- TP_PROTO(struct sdma_engine *sde, u64 status),
- TP_ARGS(sde, status)
-);
-
-DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad,
- TP_PROTO(struct sdma_engine *sde, int aidx),
- TP_ARGS(sde, aidx),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(int, aidx)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->idx = sde->this_idx;
- __entry->aidx = aidx;
- ),
- TP_printk("[%s] SDE(%u) aidx %d",
- __get_str(dev),
- __entry->idx,
- __entry->aidx
- )
-);
-
-DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate,
- TP_PROTO(struct sdma_engine *sde, int aidx),
- TP_ARGS(sde, aidx));
-
-DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate,
- TP_PROTO(struct sdma_engine *sde, int aidx),
- TP_ARGS(sde, aidx));
-
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-TRACE_EVENT(hfi1_sdma_progress,
- TP_PROTO(struct sdma_engine *sde,
- u16 hwhead,
- u16 swhead,
- struct sdma_txreq *txp
- ),
- TP_ARGS(sde, hwhead, swhead, txp),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(u64, sn)
- __field(u16, hwhead)
- __field(u16, swhead)
- __field(u16, txnext)
- __field(u16, tx_tail)
- __field(u16, tx_head)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->hwhead = hwhead;
- __entry->swhead = swhead;
- __entry->tx_tail = sde->tx_tail;
- __entry->tx_head = sde->tx_head;
- __entry->txnext = txp ? txp->next_descq_idx : ~0;
- __entry->idx = sde->this_idx;
- __entry->sn = txp ? txp->sn : ~0;
- ),
- TP_printk(
- "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
- __get_str(dev),
- __entry->idx,
- __entry->sn,
- __entry->hwhead,
- __entry->swhead,
- __entry->txnext,
- __entry->tx_head,
- __entry->tx_tail
- )
-);
-#else
-TRACE_EVENT(hfi1_sdma_progress,
- TP_PROTO(struct sdma_engine *sde,
- u16 hwhead, u16 swhead,
- struct sdma_txreq *txp
- ),
- TP_ARGS(sde, hwhead, swhead, txp),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(u16, hwhead)
- __field(u16, swhead)
- __field(u16, txnext)
- __field(u16, tx_tail)
- __field(u16, tx_head)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->hwhead = hwhead;
- __entry->swhead = swhead;
- __entry->tx_tail = sde->tx_tail;
- __entry->tx_head = sde->tx_head;
- __entry->txnext = txp ? txp->next_descq_idx : ~0;
- __entry->idx = sde->this_idx;
- ),
- TP_printk(
- "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
- __get_str(dev),
- __entry->idx,
- __entry->hwhead,
- __entry->swhead,
- __entry->txnext,
- __entry->tx_head,
- __entry->tx_tail
- )
-);
-#endif
-
-DECLARE_EVENT_CLASS(hfi1_sdma_sn,
- TP_PROTO(struct sdma_engine *sde, u64 sn),
- TP_ARGS(sde, sn),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __field(u64, sn)
- __field(u8, idx)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __entry->sn = sn;
- __entry->idx = sde->this_idx;
- ),
- TP_printk("[%s] SDE(%u) sn %llu",
- __get_str(dev),
- __entry->idx,
- __entry->sn
- )
-);
-
-DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn,
- TP_PROTO(
- struct sdma_engine *sde,
- u64 sn
- ),
- TP_ARGS(sde, sn)
-);
-
-DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn,
- TP_PROTO(struct sdma_engine *sde, u64 sn),
- TP_ARGS(sde, sn)
-);
-
-#define USDMA_HDR_FORMAT \
- "[%s:%u:%u:%u] PBC=(0x%x 0x%x) LRH=(0x%x 0x%x) BTH=(0x%x 0x%x 0x%x) KDETH=(0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x) TIDVal=0x%x"
-
-TRACE_EVENT(hfi1_sdma_user_header,
- TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req,
- struct hfi1_pkt_header *hdr, u32 tidval),
- TP_ARGS(dd, ctxt, subctxt, req, hdr, tidval),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u16, ctxt)
- __field(u8, subctxt)
- __field(u16, req)
- __field(__le32, pbc0)
- __field(__le32, pbc1)
- __field(__be32, lrh0)
- __field(__be32, lrh1)
- __field(__be32, bth0)
- __field(__be32, bth1)
- __field(__be32, bth2)
- __field(__le32, kdeth0)
- __field(__le32, kdeth1)
- __field(__le32, kdeth2)
- __field(__le32, kdeth3)
- __field(__le32, kdeth4)
- __field(__le32, kdeth5)
- __field(__le32, kdeth6)
- __field(__le32, kdeth7)
- __field(__le32, kdeth8)
- __field(u32, tidval)
- ),
- TP_fast_assign(
- __le32 *pbc = (__le32 *)hdr->pbc;
- __be32 *lrh = (__be32 *)hdr->lrh;
- __be32 *bth = (__be32 *)hdr->bth;
- __le32 *kdeth = (__le32 *)&hdr->kdeth;
-
- DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->req = req;
- __entry->pbc0 = pbc[0];
- __entry->pbc1 = pbc[1];
- __entry->lrh0 = be32_to_cpu(lrh[0]);
- __entry->lrh1 = be32_to_cpu(lrh[1]);
- __entry->bth0 = be32_to_cpu(bth[0]);
- __entry->bth1 = be32_to_cpu(bth[1]);
- __entry->bth2 = be32_to_cpu(bth[2]);
- __entry->kdeth0 = kdeth[0];
- __entry->kdeth1 = kdeth[1];
- __entry->kdeth2 = kdeth[2];
- __entry->kdeth3 = kdeth[3];
- __entry->kdeth4 = kdeth[4];
- __entry->kdeth5 = kdeth[5];
- __entry->kdeth6 = kdeth[6];
- __entry->kdeth7 = kdeth[7];
- __entry->kdeth8 = kdeth[8];
- __entry->tidval = tidval;
- ),
- TP_printk(USDMA_HDR_FORMAT,
- __get_str(dev),
- __entry->ctxt,
- __entry->subctxt,
- __entry->req,
- __entry->pbc1,
- __entry->pbc0,
- __entry->lrh0,
- __entry->lrh1,
- __entry->bth0,
- __entry->bth1,
- __entry->bth2,
- __entry->kdeth0,
- __entry->kdeth1,
- __entry->kdeth2,
- __entry->kdeth3,
- __entry->kdeth4,
- __entry->kdeth5,
- __entry->kdeth6,
- __entry->kdeth7,
- __entry->kdeth8,
- __entry->tidval
- )
- );
-
-#define SDMA_UREQ_FMT \
- "[%s:%u:%u] ver/op=0x%x, iovcnt=%u, npkts=%u, frag=%u, idx=%u"
-TRACE_EVENT(hfi1_sdma_user_reqinfo,
- TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i),
- TP_ARGS(dd, ctxt, subctxt, i),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd);
- __field(u16, ctxt)
- __field(u8, subctxt)
- __field(u8, ver_opcode)
- __field(u8, iovcnt)
- __field(u16, npkts)
- __field(u16, fragsize)
- __field(u16, comp_idx)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->ver_opcode = i[0] & 0xff;
- __entry->iovcnt = (i[0] >> 8) & 0xff;
- __entry->npkts = i[1];
- __entry->fragsize = i[2];
- __entry->comp_idx = i[3];
- ),
- TP_printk(SDMA_UREQ_FMT,
- __get_str(dev),
- __entry->ctxt,
- __entry->subctxt,
- __entry->ver_opcode,
- __entry->iovcnt,
- __entry->npkts,
- __entry->fragsize,
- __entry->comp_idx
- )
- );
-
-#define usdma_complete_name(st) { st, #st }
-#define show_usdma_complete_state(st) \
- __print_symbolic(st, \
- usdma_complete_name(FREE), \
- usdma_complete_name(QUEUED), \
- usdma_complete_name(COMPLETE), \
- usdma_complete_name(ERROR))
-
-TRACE_EVENT(hfi1_sdma_user_completion,
- TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 idx,
- u8 state, int code),
- TP_ARGS(dd, ctxt, subctxt, idx, state, code),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u16, ctxt)
- __field(u8, subctxt)
- __field(u16, idx)
- __field(u8, state)
- __field(int, code)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->idx = idx;
- __entry->state = state;
- __entry->code = code;
- ),
- TP_printk("[%s:%u:%u:%u] SDMA completion state %s (%d)",
- __get_str(dev), __entry->ctxt, __entry->subctxt,
- __entry->idx, show_usdma_complete_state(__entry->state),
- __entry->code)
- );
-
-const char *print_u32_array(struct trace_seq *, u32 *, int);
-#define __print_u32_hex(arr, len) print_u32_array(p, arr, len)
-
-TRACE_EVENT(hfi1_sdma_user_header_ahg,
- TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req,
- u8 sde, u8 ahgidx, u32 *ahg, int len, u32 tidval),
- TP_ARGS(dd, ctxt, subctxt, req, sde, ahgidx, ahg, len, tidval),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u16, ctxt)
- __field(u8, subctxt)
- __field(u16, req)
- __field(u8, sde)
- __field(u8, idx)
- __field(int, len)
- __field(u32, tidval)
- __array(u32, ahg, 10)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->req = req;
- __entry->sde = sde;
- __entry->idx = ahgidx;
- __entry->len = len;
- __entry->tidval = tidval;
- memcpy(__entry->ahg, ahg, len * sizeof(u32));
- ),
- TP_printk("[%s:%u:%u:%u] (SDE%u/AHG%u) ahg[0-%d]=(%s) TIDVal=0x%x",
- __get_str(dev),
- __entry->ctxt,
- __entry->subctxt,
- __entry->req,
- __entry->sde,
- __entry->idx,
- __entry->len - 1,
- __print_u32_hex(__entry->ahg, __entry->len),
- __entry->tidval
- )
- );
-
-TRACE_EVENT(hfi1_sdma_state,
- TP_PROTO(struct sdma_engine *sde,
- const char *cstate,
- const char *nstate
- ),
- TP_ARGS(sde, cstate, nstate),
- TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
- __string(curstate, cstate)
- __string(newstate, nstate)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __assign_str(curstate, cstate);
- __assign_str(newstate, nstate);
- ),
- TP_printk("[%s] current state %s new state %s",
- __get_str(dev),
- __get_str(curstate),
- __get_str(newstate)
- )
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_rc
-
-DECLARE_EVENT_CLASS(hfi1_rc_template,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
- __field(u32, qpn)
- __field(u32, s_flags)
- __field(u32, psn)
- __field(u32, s_psn)
- __field(u32, s_next_psn)
- __field(u32, s_sending_psn)
- __field(u32, s_sending_hpsn)
- __field(u32, r_psn)
- ),
- TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
- __entry->qpn = qp->ibqp.qp_num;
- __entry->s_flags = qp->s_flags;
- __entry->psn = psn;
- __entry->s_psn = qp->s_psn;
- __entry->s_next_psn = qp->s_next_psn;
- __entry->s_sending_psn = qp->s_sending_psn;
- __entry->s_sending_hpsn = qp->s_sending_hpsn;
- __entry->r_psn = qp->r_psn;
- ),
- TP_printk(
- "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
- __get_str(dev),
- __entry->qpn,
- __entry->s_flags,
- __entry->psn,
- __entry->s_psn,
- __entry->s_next_psn,
- __entry->s_sending_psn,
- __entry->s_sending_hpsn,
- __entry->r_psn
- )
-);
-
-DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
-DEFINE_EVENT(hfi1_rc_template, hfi1_rc_ack,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
-DEFINE_EVENT(hfi1_rc_template, hfi1_rc_timeout,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
-DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_misc
-
-TRACE_EVENT(hfi1_interrupt,
- TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
- int src),
- TP_ARGS(dd, is_entry, src),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
- __array(char, buf, 64)
- __field(int, src)
- ),
- TP_fast_assign(DD_DEV_ASSIGN(dd)
- is_entry->is_name(__entry->buf, 64,
- src - is_entry->start);
- __entry->src = src;
- ),
- TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
- __entry->src)
-);
-
-/*
- * Note:
- * This produces a REALLY ugly trace in the console output when the string is
- * too long.
- */
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_trace
-
-#define MAX_MSG_LEN 512
-
-DECLARE_EVENT_CLASS(hfi1_trace_template,
- TP_PROTO(const char *function, struct va_format *vaf),
- TP_ARGS(function, vaf),
- TP_STRUCT__entry(__string(function, function)
- __dynamic_array(char, msg, MAX_MSG_LEN)
- ),
- TP_fast_assign(__assign_str(function, function);
- WARN_ON_ONCE(vsnprintf
- (__get_dynamic_array(msg),
- MAX_MSG_LEN, vaf->fmt,
- *vaf->va) >=
- MAX_MSG_LEN);
- ),
- TP_printk("(%s) %s",
- __get_str(function),
- __get_str(msg))
-);
-
-/*
- * It may be nice to macroize the __hfi1_trace but the va_* stuff requires an
- * actual function to work and can not be in a macro.
- */
-#define __hfi1_trace_def(lvl) \
-void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \
- \
-DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \
- TP_PROTO(const char *function, struct va_format *vaf), \
- TP_ARGS(function, vaf))
-
-#define __hfi1_trace_fn(lvl) \
-void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \
-{ \
- struct va_format vaf = { \
- .fmt = fmt, \
- }; \
- va_list args; \
- \
- va_start(args, fmt); \
- vaf.va = &args; \
- trace_hfi1_ ##lvl(func, &vaf); \
- va_end(args); \
- return; \
-}
-
-/*
- * To create a new trace level simply define it below and as a __hfi1_trace_fn
- * in trace.c. This will create all the hooks for calling
- * hfi1_cdbg(LVL, fmt, ...); as well as take care of all
- * the debugfs stuff.
- */
-__hfi1_trace_def(PKT);
-__hfi1_trace_def(PROC);
-__hfi1_trace_def(SDMA);
-__hfi1_trace_def(LINKVERB);
-__hfi1_trace_def(DEBUG);
-__hfi1_trace_def(SNOOP);
-__hfi1_trace_def(CNTR);
-__hfi1_trace_def(PIO);
-__hfi1_trace_def(DC8051);
-__hfi1_trace_def(FIRMWARE);
-__hfi1_trace_def(RCVCTRL);
-__hfi1_trace_def(TID);
-__hfi1_trace_def(MMU);
-__hfi1_trace_def(IOCTL);
-
-#define hfi1_cdbg(which, fmt, ...) \
- __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)
-
-#define hfi1_dbg(fmt, ...) \
- hfi1_cdbg(DEBUG, fmt, ##__VA_ARGS__)
-
-/*
- * Define HFI1_EARLY_DBG at compile time or here to enable early trace
- * messages. Do not check in an enablement for this.
- */
-
-#ifdef HFI1_EARLY_DBG
-#define hfi1_dbg_early(fmt, ...) \
- trace_printk(fmt, ##__VA_ARGS__)
-#else
-#define hfi1_dbg_early(fmt, ...)
-#endif
-
-#endif /* __HFI1_TRACE_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace
-#include <trace/define_trace.h>
+#include "trace_dbg.h"
+#include "trace_misc.h"
+#include "trace_ctxts.h"
+#include "trace_ibhdrs.h"
+#include "trace_rc.h"
+#include "trace_rx.h"
+#include "trace_tx.h"
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
new file mode 100644
index 000000000000..31654bbac1cf
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -0,0 +1,141 @@
+/*
+* Copyright(c) 2015, 2016 Intel Corporation.
+*
+* This file is provided under a dual BSD/GPLv2 license. When using or
+* redistributing this file, you may do so under either license.
+*
+* GPL LICENSE SUMMARY
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of version 2 of the GNU General Public License as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* BSD LICENSE
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* - Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* - Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in
+* the documentation and/or other materials provided with the
+* distribution.
+* - Neither the name of Intel Corporation nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+#if !defined(__HFI1_TRACE_CTXTS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_CTXTS_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_ctxts
+
+#define UCTXT_FMT \
+ "cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \
+ "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx"
+TRACE_EVENT(hfi1_uctxtdata,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
+ TP_ARGS(dd, uctxt),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(unsigned int, ctxt)
+ __field(u32, credits)
+ __field(u64, hw_free)
+ __field(void __iomem *, piobase)
+ __field(u16, rcvhdrq_cnt)
+ __field(u64, rcvhdrq_phys)
+ __field(u32, eager_cnt)
+ __field(u64, rcvegr_phys)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = uctxt->ctxt;
+ __entry->credits = uctxt->sc->credits;
+ __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
+ __entry->piobase = uctxt->sc->base_addr;
+ __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+ __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
+ __entry->eager_cnt = uctxt->egrbufs.alloced;
+ __entry->rcvegr_phys =
+ uctxt->egrbufs.rcvtids[0].phys;
+ ),
+ TP_printk("[%s] ctxt %u " UCTXT_FMT,
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->credits,
+ __entry->hw_free,
+ __entry->piobase,
+ __entry->rcvhdrq_cnt,
+ __entry->rcvhdrq_phys,
+ __entry->eager_cnt,
+ __entry->rcvegr_phys
+ )
+);
+
+#define CINFO_FMT \
+ "egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u"
+TRACE_EVENT(hfi1_ctxt_info,
+ TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt,
+ unsigned int subctxt,
+ struct hfi1_ctxt_info cinfo),
+ TP_ARGS(dd, ctxt, subctxt, cinfo),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(unsigned int, ctxt)
+ __field(unsigned int, subctxt)
+ __field(u16, egrtids)
+ __field(u16, rcvhdrq_cnt)
+ __field(u16, rcvhdrq_size)
+ __field(u16, sdma_ring_size)
+ __field(u32, rcvegr_size)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->egrtids = cinfo.egrtids;
+ __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
+ __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
+ __entry->sdma_ring_size = cinfo.sdma_ring_size;
+ __entry->rcvegr_size = cinfo.rcvegr_size;
+ ),
+ TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->egrtids,
+ __entry->rcvegr_size,
+ __entry->rcvhdrq_cnt,
+ __entry->rcvhdrq_size,
+ __entry->sdma_ring_size
+ )
+);
+
+#endif /* __HFI1_TRACE_CTXTS_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_ctxts
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h
new file mode 100644
index 000000000000..0e7d929530c5
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_dbg.h
@@ -0,0 +1,155 @@
+/*
+* Copyright(c) 2015, 2016 Intel Corporation.
+*
+* This file is provided under a dual BSD/GPLv2 license. When using or
+* redistributing this file, you may do so under either license.
+*
+* GPL LICENSE SUMMARY
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of version 2 of the GNU General Public License as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* BSD LICENSE
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* - Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* - Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in
+* the documentation and/or other materials provided with the
+* distribution.
+* - Neither the name of Intel Corporation nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+#if !defined(__HFI1_TRACE_EXTRA_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_EXTRA_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+/*
+ * Note:
+ * This produces a REALLY ugly trace in the console output when the string is
+ * too long.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_dbg
+
+#define MAX_MSG_LEN 512
+
+DECLARE_EVENT_CLASS(hfi1_trace_template,
+ TP_PROTO(const char *function, struct va_format *vaf),
+ TP_ARGS(function, vaf),
+ TP_STRUCT__entry(__string(function, function)
+ __dynamic_array(char, msg, MAX_MSG_LEN)
+ ),
+ TP_fast_assign(__assign_str(function, function);
+ WARN_ON_ONCE(vsnprintf
+ (__get_dynamic_array(msg),
+ MAX_MSG_LEN, vaf->fmt,
+ *vaf->va) >=
+ MAX_MSG_LEN);
+ ),
+ TP_printk("(%s) %s",
+ __get_str(function),
+ __get_str(msg))
+);
+
+/*
+ * It may be nice to macroize the __hfi1_trace but the va_* stuff requires an
+ * actual function to work and can not be in a macro.
+ */
+#define __hfi1_trace_def(lvl) \
+void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \
+ \
+DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \
+ TP_PROTO(const char *function, struct va_format *vaf), \
+ TP_ARGS(function, vaf))
+
+#define __hfi1_trace_fn(lvl) \
+void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \
+{ \
+ struct va_format vaf = { \
+ .fmt = fmt, \
+ }; \
+ va_list args; \
+ \
+ va_start(args, fmt); \
+ vaf.va = &args; \
+ trace_hfi1_ ##lvl(func, &vaf); \
+ va_end(args); \
+ return; \
+}
+
+/*
+ * To create a new trace level simply define it below and as a __hfi1_trace_fn
+ * in trace.c. This will create all the hooks for calling
+ * hfi1_cdbg(LVL, fmt, ...); as well as take care of all
+ * the debugfs stuff.
+ */
+__hfi1_trace_def(PKT);
+__hfi1_trace_def(PROC);
+__hfi1_trace_def(SDMA);
+__hfi1_trace_def(LINKVERB);
+__hfi1_trace_def(DEBUG);
+__hfi1_trace_def(SNOOP);
+__hfi1_trace_def(CNTR);
+__hfi1_trace_def(PIO);
+__hfi1_trace_def(DC8051);
+__hfi1_trace_def(FIRMWARE);
+__hfi1_trace_def(RCVCTRL);
+__hfi1_trace_def(TID);
+__hfi1_trace_def(MMU);
+__hfi1_trace_def(IOCTL);
+
+#define hfi1_cdbg(which, fmt, ...) \
+ __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)
+
+#define hfi1_dbg(fmt, ...) \
+ hfi1_cdbg(DEBUG, fmt, ##__VA_ARGS__)
+
+/*
+ * Define HFI1_EARLY_DBG at compile time or here to enable early trace
+ * messages. Do not check in an enablement for this.
+ */
+
+#ifdef HFI1_EARLY_DBG
+#define hfi1_dbg_early(fmt, ...) \
+ trace_printk(fmt, ##__VA_ARGS__)
+#else
+#define hfi1_dbg_early(fmt, ...)
+#endif
+
+#endif /* __HFI1_TRACE_EXTRA_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_dbg
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
new file mode 100644
index 000000000000..c3e41aed0034
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__HFI1_TRACE_IBHDRS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_IBHDRS_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_ibhdrs
+
+u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
+const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
+
+#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
+
+#define lrh_name(lrh) { HFI1_##lrh, #lrh }
+#define show_lnh(lrh) \
+__print_symbolic(lrh, \
+ lrh_name(LRH_BTH), \
+ lrh_name(LRH_GRH))
+
+#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
+#define BTH_PRN \
+ "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
+ "f %d b %d qpn 0x%.6x a %d psn 0x%.8x"
+#define EHDR_PRN "%s"
+
+DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ /* LRH */
+ __field(u8, vl)
+ __field(u8, lver)
+ __field(u8, sl)
+ __field(u8, lnh)
+ __field(u16, dlid)
+ __field(u16, len)
+ __field(u16, slid)
+ /* BTH */
+ __field(u8, opcode)
+ __field(u8, se)
+ __field(u8, m)
+ __field(u8, pad)
+ __field(u8, tver)
+ __field(u16, pkey)
+ __field(u8, f)
+ __field(u8, b)
+ __field(u32, qpn)
+ __field(u8, a)
+ __field(u32, psn)
+ /* extended headers */
+ __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
+ ),
+ TP_fast_assign(
+ struct hfi1_other_headers *ohdr;
+
+ DD_DEV_ASSIGN(dd);
+ /* LRH */
+ __entry->vl =
+ (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
+ __entry->lver =
+ (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
+ __entry->sl =
+ (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
+ __entry->lnh =
+ (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
+ __entry->dlid =
+ be16_to_cpu(hdr->lrh[1]);
+ /* allow for larger len */
+ __entry->len =
+ be16_to_cpu(hdr->lrh[2]);
+ __entry->slid =
+ be16_to_cpu(hdr->lrh[3]);
+ /* BTH */
+ if (__entry->lnh == HFI1_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else
+ ohdr = &hdr->u.l.oth;
+ __entry->opcode =
+ (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+ __entry->se =
+ (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
+ __entry->m =
+ (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
+ __entry->pad =
+ (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ __entry->tver =
+ (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
+ __entry->pkey =
+ be32_to_cpu(ohdr->bth[0]) & 0xffff;
+ __entry->f =
+ (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
+ HFI1_FECN_MASK;
+ __entry->b =
+ (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
+ HFI1_BECN_MASK;
+ __entry->qpn =
+ be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+ __entry->a =
+ (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
+ /* allow for larger PSN */
+ __entry->psn =
+ be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
+ /* extended headers */
+ memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
+ ibhdr_exhdr_len(hdr));
+ ),
+ TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
+ __get_str(dev),
+ /* LRH */
+ __entry->vl,
+ __entry->lver,
+ __entry->sl,
+ __entry->lnh, show_lnh(__entry->lnh),
+ __entry->dlid,
+ __entry->len,
+ __entry->slid,
+ /* BTH */
+ __entry->opcode, show_ib_opcode(__entry->opcode),
+ __entry->se,
+ __entry->m,
+ __entry->pad,
+ __entry->tver,
+ __entry->pkey,
+ __entry->f,
+ __entry->b,
+ __entry->qpn,
+ __entry->a,
+ __entry->psn,
+ /* extended headers */
+ __parse_ib_ehdrs(
+ __entry->opcode,
+ (void *)__get_dynamic_array(ehdrs))
+ )
+);
+
+DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_ARGS(dd, hdr));
+
+#endif /* __HFI1_TRACE_IBHDRS_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_ibhdrs
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
new file mode 100644
index 000000000000..d308454af7fd
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -0,0 +1,81 @@
+/*
+* Copyright(c) 2015, 2016 Intel Corporation.
+*
+* This file is provided under a dual BSD/GPLv2 license. When using or
+* redistributing this file, you may do so under either license.
+*
+* GPL LICENSE SUMMARY
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of version 2 of the GNU General Public License as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* BSD LICENSE
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* - Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* - Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in
+* the documentation and/or other materials provided with the
+* distribution.
+* - Neither the name of Intel Corporation nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+#if !defined(__HFI1_TRACE_MISC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_MISC_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_misc
+
+TRACE_EVENT(hfi1_interrupt,
+ TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
+ int src),
+ TP_ARGS(dd, is_entry, src),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __array(char, buf, 64)
+ __field(int, src)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd)
+ is_entry->is_name(__entry->buf, 64,
+ src - is_entry->start);
+ __entry->src = src;
+ ),
+ TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
+ __entry->src)
+);
+
+#endif /* __HFI1_TRACE_MISC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_misc
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h
new file mode 100644
index 000000000000..5ea5005f9f41
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_rc.h
@@ -0,0 +1,123 @@
+/*
+* Copyright(c) 2015, 2016 Intel Corporation.
+*
+* This file is provided under a dual BSD/GPLv2 license. When using or
+* redistributing this file, you may do so under either license.
+*
+* GPL LICENSE SUMMARY
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of version 2 of the GNU General Public License as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* BSD LICENSE
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* - Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* - Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in
+* the documentation and/or other materials provided with the
+* distribution.
+* - Neither the name of Intel Corporation nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+#if !defined(__HFI1_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_RC_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_rc
+
+DECLARE_EVENT_CLASS(hfi1_rc_template,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, s_flags)
+ __field(u32, psn)
+ __field(u32, s_psn)
+ __field(u32, s_next_psn)
+ __field(u32, s_sending_psn)
+ __field(u32, s_sending_hpsn)
+ __field(u32, r_psn)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->s_flags = qp->s_flags;
+ __entry->psn = psn;
+ __entry->s_psn = qp->s_psn;
+ __entry->s_next_psn = qp->s_next_psn;
+ __entry->s_sending_psn = qp->s_sending_psn;
+ __entry->s_sending_hpsn = qp->s_sending_hpsn;
+ __entry->r_psn = qp->r_psn;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->s_flags,
+ __entry->psn,
+ __entry->s_psn,
+ __entry->s_next_psn,
+ __entry->s_sending_psn,
+ __entry->s_sending_hpsn,
+ __entry->r_psn
+ )
+);
+
+DEFINE_EVENT(hfi1_rc_template, hfi1_sendcomplete,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(hfi1_rc_template, hfi1_ack,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(hfi1_rc_template, hfi1_timeout,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+#endif /* __HFI1_TRACE_RC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_rc
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
new file mode 100644
index 000000000000..9ba1f615ec95
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -0,0 +1,322 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__HFI1_TRACE_RX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_RX_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_rx
+
+TRACE_EVENT(hfi1_rcvhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ u32 ctxt,
+ u64 eflags,
+ u32 etype,
+ u32 hlen,
+ u32 tlen,
+ u32 updegr,
+ u32 etail
+ ),
+ TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u64, eflags)
+ __field(u32, ctxt)
+ __field(u32, etype)
+ __field(u32, hlen)
+ __field(u32, tlen)
+ __field(u32, updegr)
+ __field(u32, etail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->eflags = eflags;
+ __entry->ctxt = ctxt;
+ __entry->etype = etype;
+ __entry->hlen = hlen;
+ __entry->tlen = tlen;
+ __entry->updegr = updegr;
+ __entry->etail = etail;
+ ),
+ TP_printk(
+ "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->eflags,
+ __entry->etype, show_packettype(__entry->etype),
+ __entry->hlen,
+ __entry->tlen,
+ __entry->updegr,
+ __entry->etail
+ )
+);
+
+TRACE_EVENT(hfi1_receive_interrupt,
+ TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
+ TP_ARGS(dd, ctxt),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u32, ctxt)
+ __field(u8, slow_path)
+ __field(u8, dma_rtail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ if (dd->rcd[ctxt]->do_interrupt ==
+ &handle_receive_interrupt) {
+ __entry->slow_path = 1;
+ __entry->dma_rtail = 0xFF;
+ } else if (dd->rcd[ctxt]->do_interrupt ==
+ &handle_receive_interrupt_dma_rtail){
+ __entry->dma_rtail = 1;
+ __entry->slow_path = 0;
+ } else if (dd->rcd[ctxt]->do_interrupt ==
+ &handle_receive_interrupt_nodma_rtail) {
+ __entry->dma_rtail = 0;
+ __entry->slow_path = 0;
+ }
+ ),
+ TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->slow_path,
+ __entry->dma_rtail
+ )
+);
+
+TRACE_EVENT(hfi1_exp_tid_reg,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr,
+ u32 npages, unsigned long va, unsigned long pa,
+ dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
+ TP_STRUCT__entry(
+ __field(unsigned int, ctxt)
+ __field(u16, subctxt)
+ __field(u32, rarr)
+ __field(u32, npages)
+ __field(unsigned long, va)
+ __field(unsigned long, pa)
+ __field(dma_addr_t, dma)
+ ),
+ TP_fast_assign(
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->rarr = rarr;
+ __entry->npages = npages;
+ __entry->va = va;
+ __entry->pa = pa;
+ __entry->dma = dma;
+ ),
+ TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->rarr,
+ __entry->npages,
+ __entry->pa,
+ __entry->va,
+ __entry->dma
+ )
+ );
+
+TRACE_EVENT(hfi1_exp_tid_unreg,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
+ unsigned long va, unsigned long pa, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
+ TP_STRUCT__entry(
+ __field(unsigned int, ctxt)
+ __field(u16, subctxt)
+ __field(u32, rarr)
+ __field(u32, npages)
+ __field(unsigned long, va)
+ __field(unsigned long, pa)
+ __field(dma_addr_t, dma)
+ ),
+ TP_fast_assign(
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->rarr = rarr;
+ __entry->npages = npages;
+ __entry->va = va;
+ __entry->pa = pa;
+ __entry->dma = dma;
+ ),
+ TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->rarr,
+ __entry->npages,
+ __entry->pa,
+ __entry->va,
+ __entry->dma
+ )
+ );
+
+TRACE_EVENT(hfi1_exp_tid_inval,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,
+ u32 npages, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
+ TP_STRUCT__entry(
+ __field(unsigned int, ctxt)
+ __field(u16, subctxt)
+ __field(unsigned long, va)
+ __field(u32, rarr)
+ __field(u32, npages)
+ __field(dma_addr_t, dma)
+ ),
+ TP_fast_assign(
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->va = va;
+ __entry->rarr = rarr;
+ __entry->npages = npages;
+ __entry->dma = dma;
+ ),
+ TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->rarr,
+ __entry->npages,
+ __entry->va,
+ __entry->dma
+ )
+ );
+
+TRACE_EVENT(hfi1_mmu_invalidate,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type,
+ unsigned long start, unsigned long end),
+ TP_ARGS(ctxt, subctxt, type, start, end),
+ TP_STRUCT__entry(
+ __field(unsigned int, ctxt)
+ __field(u16, subctxt)
+ __string(type, type)
+ __field(unsigned long, start)
+ __field(unsigned long, end)
+ ),
+ TP_fast_assign(
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __assign_str(type, type);
+ __entry->start = start;
+ __entry->end = end;
+ ),
+ TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx",
+ __entry->ctxt,
+ __entry->subctxt,
+ __get_str(type),
+ __entry->start,
+ __entry->end
+ )
+ );
+
+#define SNOOP_PRN \
+ "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \
+ "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]"
+
+TRACE_EVENT(snoop_capture,
+ TP_PROTO(struct hfi1_devdata *dd,
+ int hdr_len,
+ struct hfi1_ib_header *hdr,
+ int data_len,
+ void *data),
+ TP_ARGS(dd, hdr_len, hdr, data_len, data),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(u16, slid)
+ __field(u16, dlid)
+ __field(u32, qpn)
+ __field(u8, opcode)
+ __field(u8, sl)
+ __field(u16, pkey)
+ __field(u32, hdr_len)
+ __field(u32, data_len)
+ __field(u8, lnh)
+ __dynamic_array(u8, raw_hdr, hdr_len)
+ __dynamic_array(u8, raw_pkt, data_len)
+ ),
+ TP_fast_assign(
+ struct hfi1_other_headers *ohdr;
+
+ __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
+ if (__entry->lnh == HFI1_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else
+ ohdr = &hdr->u.l.oth;
+ DD_DEV_ASSIGN(dd);
+ __entry->slid = be16_to_cpu(hdr->lrh[3]);
+ __entry->dlid = be16_to_cpu(hdr->lrh[1]);
+ __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+ __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+ __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
+ __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff;
+ __entry->hdr_len = hdr_len;
+ __entry->data_len = data_len;
+ memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
+ memcpy(__get_dynamic_array(raw_pkt), data, data_len);
+ ),
+ TP_printk(
+ "[%s] " SNOOP_PRN,
+ __get_str(dev),
+ __entry->slid,
+ __entry->dlid,
+ __entry->qpn,
+ __entry->opcode,
+ show_ib_opcode(__entry->opcode),
+ __entry->sl,
+ __entry->pkey,
+ __entry->hdr_len,
+ __entry->data_len
+ )
+);
+
+#endif /* __HFI1_TRACE_RX_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_rx
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
new file mode 100644
index 000000000000..415d6be42c5d
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -0,0 +1,642 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__HFI1_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_TX_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+#include "mad.h"
+#include "sdma.h"
+
+const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1);
+
+#define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_tx
+
+TRACE_EVENT(hfi1_piofree,
+ TP_PROTO(struct send_context *sc, int extra),
+ TP_ARGS(sc, extra),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+ __field(u32, sw_index)
+ __field(u32, hw_context)
+ __field(int, extra)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+ __entry->sw_index = sc->sw_index;
+ __entry->hw_context = sc->hw_context;
+ __entry->extra = extra;
+ ),
+ TP_printk("[%s] ctxt %u(%u) extra %d",
+ __get_str(dev),
+ __entry->sw_index,
+ __entry->hw_context,
+ __entry->extra
+ )
+);
+
+TRACE_EVENT(hfi1_wantpiointr,
+ TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
+ TP_ARGS(sc, needint, credit_ctrl),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+ __field(u32, sw_index)
+ __field(u32, hw_context)
+ __field(u32, needint)
+ __field(u64, credit_ctrl)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+ __entry->sw_index = sc->sw_index;
+ __entry->hw_context = sc->hw_context;
+ __entry->needint = needint;
+ __entry->credit_ctrl = credit_ctrl;
+ ),
+ TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
+ __get_str(dev),
+ __entry->sw_index,
+ __entry->hw_context,
+ __entry->needint,
+ (unsigned long long)__entry->credit_ctrl
+ )
+);
+
+DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
+ TP_PROTO(struct rvt_qp *qp, u32 flags),
+ TP_ARGS(qp, flags),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, flags)
+ __field(u32, s_flags)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->flags = flags;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->s_flags = qp->s_flags;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->flags,
+ __entry->s_flags
+ )
+);
+
+DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup,
+ TP_PROTO(struct rvt_qp *qp, u32 flags),
+ TP_ARGS(qp, flags));
+
+DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep,
+ TP_PROTO(struct rvt_qp *qp, u32 flags),
+ TP_ARGS(qp, flags));
+
+TRACE_EVENT(hfi1_sdma_descriptor,
+ TP_PROTO(struct sdma_engine *sde,
+ u64 desc0,
+ u64 desc1,
+ u16 e,
+ void *descp),
+ TP_ARGS(sde, desc0, desc1, e, descp),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(void *, descp)
+ __field(u64, desc0)
+ __field(u64, desc1)
+ __field(u16, e)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->desc0 = desc0;
+ __entry->desc1 = desc1;
+ __entry->idx = sde->this_idx;
+ __entry->descp = descp;
+ __entry->e = e;
+ ),
+ TP_printk(
+ "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
+ __get_str(dev),
+ __entry->idx,
+ __parse_sdma_flags(__entry->desc0, __entry->desc1),
+ (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) &
+ SDMA_DESC0_PHY_ADDR_MASK,
+ (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) &
+ SDMA_DESC1_GENERATION_MASK),
+ (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) &
+ SDMA_DESC0_BYTE_COUNT_MASK),
+ __entry->desc0,
+ __entry->desc1,
+ __entry->descp,
+ __entry->e
+ )
+);
+
+TRACE_EVENT(hfi1_sdma_engine_select,
+ TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
+ TP_ARGS(dd, sel, vl, idx),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u32, sel)
+ __field(u8, vl)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->sel = sel;
+ __entry->vl = vl;
+ __entry->idx = idx;
+ ),
+ TP_printk("[%s] selecting SDE %u sel 0x%x vl %u",
+ __get_str(dev),
+ __entry->idx,
+ __entry->sel,
+ __entry->vl
+ )
+);
+
+DECLARE_EVENT_CLASS(hfi1_sdma_engine_class,
+ TP_PROTO(struct sdma_engine *sde, u64 status),
+ TP_ARGS(sde, status),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u64, status)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->status = status;
+ __entry->idx = sde->this_idx;
+ ),
+ TP_printk("[%s] SDE(%u) status %llx",
+ __get_str(dev),
+ __entry->idx,
+ (unsigned long long)__entry->status
+ )
+);
+
+DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt,
+ TP_PROTO(struct sdma_engine *sde, u64 status),
+ TP_ARGS(sde, status)
+);
+
+DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress,
+ TP_PROTO(struct sdma_engine *sde, u64 status),
+ TP_ARGS(sde, status)
+);
+
+DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad,
+ TP_PROTO(struct sdma_engine *sde, int aidx),
+ TP_ARGS(sde, aidx),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(int, aidx)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->idx = sde->this_idx;
+ __entry->aidx = aidx;
+ ),
+ TP_printk("[%s] SDE(%u) aidx %d",
+ __get_str(dev),
+ __entry->idx,
+ __entry->aidx
+ )
+);
+
+DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate,
+ TP_PROTO(struct sdma_engine *sde, int aidx),
+ TP_ARGS(sde, aidx));
+
+DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate,
+ TP_PROTO(struct sdma_engine *sde, int aidx),
+ TP_ARGS(sde, aidx));
+
+#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
+TRACE_EVENT(hfi1_sdma_progress,
+ TP_PROTO(struct sdma_engine *sde,
+ u16 hwhead,
+ u16 swhead,
+ struct sdma_txreq *txp
+ ),
+ TP_ARGS(sde, hwhead, swhead, txp),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u64, sn)
+ __field(u16, hwhead)
+ __field(u16, swhead)
+ __field(u16, txnext)
+ __field(u16, tx_tail)
+ __field(u16, tx_head)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->hwhead = hwhead;
+ __entry->swhead = swhead;
+ __entry->tx_tail = sde->tx_tail;
+ __entry->tx_head = sde->tx_head;
+ __entry->txnext = txp ? txp->next_descq_idx : ~0;
+ __entry->idx = sde->this_idx;
+ __entry->sn = txp ? txp->sn : ~0;
+ ),
+ TP_printk(
+ "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+ __get_str(dev),
+ __entry->idx,
+ __entry->sn,
+ __entry->hwhead,
+ __entry->swhead,
+ __entry->txnext,
+ __entry->tx_head,
+ __entry->tx_tail
+ )
+);
+#else
+TRACE_EVENT(hfi1_sdma_progress,
+ TP_PROTO(struct sdma_engine *sde,
+ u16 hwhead, u16 swhead,
+ struct sdma_txreq *txp
+ ),
+ TP_ARGS(sde, hwhead, swhead, txp),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u16, hwhead)
+ __field(u16, swhead)
+ __field(u16, txnext)
+ __field(u16, tx_tail)
+ __field(u16, tx_head)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->hwhead = hwhead;
+ __entry->swhead = swhead;
+ __entry->tx_tail = sde->tx_tail;
+ __entry->tx_head = sde->tx_head;
+ __entry->txnext = txp ? txp->next_descq_idx : ~0;
+ __entry->idx = sde->this_idx;
+ ),
+ TP_printk(
+ "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+ __get_str(dev),
+ __entry->idx,
+ __entry->hwhead,
+ __entry->swhead,
+ __entry->txnext,
+ __entry->tx_head,
+ __entry->tx_tail
+ )
+);
+#endif
+
+DECLARE_EVENT_CLASS(hfi1_sdma_sn,
+ TP_PROTO(struct sdma_engine *sde, u64 sn),
+ TP_ARGS(sde, sn),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __field(u64, sn)
+ __field(u8, idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __entry->sn = sn;
+ __entry->idx = sde->this_idx;
+ ),
+ TP_printk("[%s] SDE(%u) sn %llu",
+ __get_str(dev),
+ __entry->idx,
+ __entry->sn
+ )
+);
+
+DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn,
+ TP_PROTO(
+ struct sdma_engine *sde,
+ u64 sn
+ ),
+ TP_ARGS(sde, sn)
+);
+
+DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn,
+ TP_PROTO(struct sdma_engine *sde, u64 sn),
+ TP_ARGS(sde, sn)
+);
+
+#define USDMA_HDR_FORMAT \
+ "[%s:%u:%u:%u] PBC=(0x%x 0x%x) LRH=(0x%x 0x%x) BTH=(0x%x 0x%x 0x%x) KDETH=(0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x) TIDVal=0x%x"
+
+TRACE_EVENT(hfi1_sdma_user_header,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req,
+ struct hfi1_pkt_header *hdr, u32 tidval),
+ TP_ARGS(dd, ctxt, subctxt, req, hdr, tidval),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(u16, ctxt)
+ __field(u8, subctxt)
+ __field(u16, req)
+ __field(u32, pbc0)
+ __field(u32, pbc1)
+ __field(u32, lrh0)
+ __field(u32, lrh1)
+ __field(u32, bth0)
+ __field(u32, bth1)
+ __field(u32, bth2)
+ __field(u32, kdeth0)
+ __field(u32, kdeth1)
+ __field(u32, kdeth2)
+ __field(u32, kdeth3)
+ __field(u32, kdeth4)
+ __field(u32, kdeth5)
+ __field(u32, kdeth6)
+ __field(u32, kdeth7)
+ __field(u32, kdeth8)
+ __field(u32, tidval)
+ ),
+ TP_fast_assign(
+ __le32 *pbc = (__le32 *)hdr->pbc;
+ __be32 *lrh = (__be32 *)hdr->lrh;
+ __be32 *bth = (__be32 *)hdr->bth;
+ __le32 *kdeth = (__le32 *)&hdr->kdeth;
+
+ DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->req = req;
+ __entry->pbc0 = le32_to_cpu(pbc[0]);
+ __entry->pbc1 = le32_to_cpu(pbc[1]);
+ __entry->lrh0 = be32_to_cpu(lrh[0]);
+ __entry->lrh1 = be32_to_cpu(lrh[1]);
+ __entry->bth0 = be32_to_cpu(bth[0]);
+ __entry->bth1 = be32_to_cpu(bth[1]);
+ __entry->bth2 = be32_to_cpu(bth[2]);
+ __entry->kdeth0 = le32_to_cpu(kdeth[0]);
+ __entry->kdeth1 = le32_to_cpu(kdeth[1]);
+ __entry->kdeth2 = le32_to_cpu(kdeth[2]);
+ __entry->kdeth3 = le32_to_cpu(kdeth[3]);
+ __entry->kdeth4 = le32_to_cpu(kdeth[4]);
+ __entry->kdeth5 = le32_to_cpu(kdeth[5]);
+ __entry->kdeth6 = le32_to_cpu(kdeth[6]);
+ __entry->kdeth7 = le32_to_cpu(kdeth[7]);
+ __entry->kdeth8 = le32_to_cpu(kdeth[8]);
+ __entry->tidval = tidval;
+ ),
+ TP_printk(USDMA_HDR_FORMAT,
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->req,
+ __entry->pbc1,
+ __entry->pbc0,
+ __entry->lrh0,
+ __entry->lrh1,
+ __entry->bth0,
+ __entry->bth1,
+ __entry->bth2,
+ __entry->kdeth0,
+ __entry->kdeth1,
+ __entry->kdeth2,
+ __entry->kdeth3,
+ __entry->kdeth4,
+ __entry->kdeth5,
+ __entry->kdeth6,
+ __entry->kdeth7,
+ __entry->kdeth8,
+ __entry->tidval
+ )
+);
+
+#define SDMA_UREQ_FMT \
+ "[%s:%u:%u] ver/op=0x%x, iovcnt=%u, npkts=%u, frag=%u, idx=%u"
+TRACE_EVENT(hfi1_sdma_user_reqinfo,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i),
+ TP_ARGS(dd, ctxt, subctxt, i),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd);
+ __field(u16, ctxt)
+ __field(u8, subctxt)
+ __field(u8, ver_opcode)
+ __field(u8, iovcnt)
+ __field(u16, npkts)
+ __field(u16, fragsize)
+ __field(u16, comp_idx)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->ver_opcode = i[0] & 0xff;
+ __entry->iovcnt = (i[0] >> 8) & 0xff;
+ __entry->npkts = i[1];
+ __entry->fragsize = i[2];
+ __entry->comp_idx = i[3];
+ ),
+ TP_printk(SDMA_UREQ_FMT,
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->ver_opcode,
+ __entry->iovcnt,
+ __entry->npkts,
+ __entry->fragsize,
+ __entry->comp_idx
+ )
+);
+
+#define usdma_complete_name(st) { st, #st }
+#define show_usdma_complete_state(st) \
+ __print_symbolic(st, \
+ usdma_complete_name(FREE), \
+ usdma_complete_name(QUEUED), \
+ usdma_complete_name(COMPLETE), \
+ usdma_complete_name(ERROR))
+
+TRACE_EVENT(hfi1_sdma_user_completion,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 idx,
+ u8 state, int code),
+ TP_ARGS(dd, ctxt, subctxt, idx, state, code),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(u16, ctxt)
+ __field(u8, subctxt)
+ __field(u16, idx)
+ __field(u8, state)
+ __field(int, code)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->idx = idx;
+ __entry->state = state;
+ __entry->code = code;
+ ),
+ TP_printk("[%s:%u:%u:%u] SDMA completion state %s (%d)",
+ __get_str(dev), __entry->ctxt, __entry->subctxt,
+ __entry->idx, show_usdma_complete_state(__entry->state),
+ __entry->code)
+);
+
+const char *print_u32_array(struct trace_seq *, u32 *, int);
+#define __print_u32_hex(arr, len) print_u32_array(p, arr, len)
+
+TRACE_EVENT(hfi1_sdma_user_header_ahg,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req,
+ u8 sde, u8 ahgidx, u32 *ahg, int len, u32 tidval),
+ TP_ARGS(dd, ctxt, subctxt, req, sde, ahgidx, ahg, len, tidval),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(u16, ctxt)
+ __field(u8, subctxt)
+ __field(u16, req)
+ __field(u8, sde)
+ __field(u8, idx)
+ __field(int, len)
+ __field(u32, tidval)
+ __array(u32, ahg, 10)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->req = req;
+ __entry->sde = sde;
+ __entry->idx = ahgidx;
+ __entry->len = len;
+ __entry->tidval = tidval;
+ memcpy(__entry->ahg, ahg, len * sizeof(u32));
+ ),
+ TP_printk("[%s:%u:%u:%u] (SDE%u/AHG%u) ahg[0-%d]=(%s) TIDVal=0x%x",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->req,
+ __entry->sde,
+ __entry->idx,
+ __entry->len - 1,
+ __print_u32_hex(__entry->ahg, __entry->len),
+ __entry->tidval
+ )
+);
+
+TRACE_EVENT(hfi1_sdma_state,
+ TP_PROTO(struct sdma_engine *sde,
+ const char *cstate,
+ const char *nstate
+ ),
+ TP_ARGS(sde, cstate, nstate),
+ TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+ __string(curstate, cstate)
+ __string(newstate, nstate)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+ __assign_str(curstate, cstate);
+ __assign_str(newstate, nstate);
+ ),
+ TP_printk("[%s] current state %s new state %s",
+ __get_str(dev),
+ __get_str(curstate),
+ __get_str(newstate)
+ )
+);
+
+#define BCT_FORMAT \
+ "shared_limit %x vls 0-7 [%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x] 15 [%x,%x]"
+
+#define BCT(field) \
+ be16_to_cpu( \
+ ((struct buffer_control *)__get_dynamic_array(bct))->field \
+ )
+
+DECLARE_EVENT_CLASS(hfi1_bct_template,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct buffer_control *bc),
+ TP_ARGS(dd, bc),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __dynamic_array(u8, bct, sizeof(*bc))
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ memcpy(__get_dynamic_array(bct), bc,
+ sizeof(*bc));
+ ),
+ TP_printk(BCT_FORMAT,
+ BCT(overall_shared_limit),
+
+ BCT(vl[0].dedicated),
+ BCT(vl[0].shared),
+
+ BCT(vl[1].dedicated),
+ BCT(vl[1].shared),
+
+ BCT(vl[2].dedicated),
+ BCT(vl[2].shared),
+
+ BCT(vl[3].dedicated),
+ BCT(vl[3].shared),
+
+ BCT(vl[4].dedicated),
+ BCT(vl[4].shared),
+
+ BCT(vl[5].dedicated),
+ BCT(vl[5].shared),
+
+ BCT(vl[6].dedicated),
+ BCT(vl[6].shared),
+
+ BCT(vl[7].dedicated),
+ BCT(vl[7].shared),
+
+ BCT(vl[15].dedicated),
+ BCT(vl[15].shared)
+ )
+);
+
+DEFINE_EVENT(hfi1_bct_template, bct_set,
+ TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
+ TP_ARGS(dd, bc));
+
+DEFINE_EVENT(hfi1_bct_template, bct_get,
+ TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
+ TP_ARGS(dd, bc));
+
+#endif /* __HFI1_TRACE_TX_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_tx
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/twsi.c b/drivers/infiniband/hw/hfi1/twsi.c
deleted file mode 100644
index e82e52a63d35..000000000000
--- a/drivers/infiniband/hw/hfi1/twsi.c
+++ /dev/null
@@ -1,489 +0,0 @@
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-
-#include "hfi.h"
-#include "twsi.h"
-
-/*
- * "Two Wire Serial Interface" support.
- *
- * Originally written for a not-quite-i2c serial eeprom, which is
- * still used on some supported boards. Later boards have added a
- * variety of other uses, most board-specific, so the bit-boffing
- * part has been split off to this file, while the other parts
- * have been moved to chip-specific files.
- *
- * We have also dropped all pretense of fully generic (e.g. pretend
- * we don't know whether '1' is the higher voltage) interface, as
- * the restrictions of the generic i2c interface (e.g. no access from
- * driver itself) make it unsuitable for this use.
- */
-
-#define READ_CMD 1
-#define WRITE_CMD 0
-
-/**
- * i2c_wait_for_writes - wait for a write
- * @dd: the hfi1_ib device
- *
- * We use this instead of udelay directly, so we can make sure
- * that previous register writes have been flushed all the way
- * to the chip. Since we are delaying anyway, the cost doesn't
- * hurt, and makes the bit twiddling more regular
- */
-static void i2c_wait_for_writes(struct hfi1_devdata *dd, u32 target)
-{
- /*
- * implicit read of EXTStatus is as good as explicit
- * read of scratch, if all we want to do is flush
- * writes.
- */
- hfi1_gpio_mod(dd, target, 0, 0, 0);
- rmb(); /* inlined, so prevent compiler reordering */
-}
-
-/*
- * QSFP modules are allowed to hold SCL low for 500uSec. Allow twice that
- * for "almost compliant" modules
- */
-#define SCL_WAIT_USEC 1000
-
-/* BUF_WAIT is time bus must be free between STOP or ACK and to next START.
- * Should be 20, but some chips need more.
- */
-#define TWSI_BUF_WAIT_USEC 60
-
-static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit)
-{
- u32 mask;
-
- udelay(1);
-
- mask = QSFP_HFI0_I2CCLK;
-
- /* SCL is meant to be bare-drain, so never set "OUT", just DIR */
- hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask);
-
- /*
- * Allow for slow slaves by simple
- * delay for falling edge, sampling on rise.
- */
- if (!bit) {
- udelay(2);
- } else {
- int rise_usec;
-
- for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) {
- if (mask & hfi1_gpio_mod(dd, target, 0, 0, 0))
- break;
- udelay(2);
- }
- if (rise_usec <= 0)
- dd_dev_err(dd, "SCL interface stuck low > %d uSec\n",
- SCL_WAIT_USEC);
- }
- i2c_wait_for_writes(dd, target);
-}
-
-static u8 scl_in(struct hfi1_devdata *dd, u32 target, int wait)
-{
- u32 read_val, mask;
-
- mask = QSFP_HFI0_I2CCLK;
- /* SCL is meant to be bare-drain, so never set "OUT", just DIR */
- hfi1_gpio_mod(dd, target, 0, 0, mask);
- read_val = hfi1_gpio_mod(dd, target, 0, 0, 0);
- if (wait)
- i2c_wait_for_writes(dd, target);
- return (read_val & mask) >> GPIO_SCL_NUM;
-}
-
-static void sda_out(struct hfi1_devdata *dd, u32 target, u8 bit)
-{
- u32 mask;
-
- mask = QSFP_HFI0_I2CDAT;
-
- /* SDA is meant to be bare-drain, so never set "OUT", just DIR */
- hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask);
-
- i2c_wait_for_writes(dd, target);
- udelay(2);
-}
-
-static u8 sda_in(struct hfi1_devdata *dd, u32 target, int wait)
-{
- u32 read_val, mask;
-
- mask = QSFP_HFI0_I2CDAT;
- /* SDA is meant to be bare-drain, so never set "OUT", just DIR */
- hfi1_gpio_mod(dd, target, 0, 0, mask);
- read_val = hfi1_gpio_mod(dd, target, 0, 0, 0);
- if (wait)
- i2c_wait_for_writes(dd, target);
- return (read_val & mask) >> GPIO_SDA_NUM;
-}
-
-/**
- * i2c_ackrcv - see if ack following write is true
- * @dd: the hfi1_ib device
- */
-static int i2c_ackrcv(struct hfi1_devdata *dd, u32 target)
-{
- u8 ack_received;
-
- /* AT ENTRY SCL = LOW */
- /* change direction, ignore data */
- ack_received = sda_in(dd, target, 1);
- scl_out(dd, target, 1);
- ack_received = sda_in(dd, target, 1) == 0;
- scl_out(dd, target, 0);
- return ack_received;
-}
-
-static void stop_cmd(struct hfi1_devdata *dd, u32 target);
-
-/**
- * rd_byte - read a byte, sending STOP on last, else ACK
- * @dd: the hfi1_ib device
- *
- * Returns byte shifted out of device
- */
-static int rd_byte(struct hfi1_devdata *dd, u32 target, int last)
-{
- int bit_cntr, data;
-
- data = 0;
-
- for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
- data <<= 1;
- scl_out(dd, target, 1);
- data |= sda_in(dd, target, 0);
- scl_out(dd, target, 0);
- }
- if (last) {
- scl_out(dd, target, 1);
- stop_cmd(dd, target);
- } else {
- sda_out(dd, target, 0);
- scl_out(dd, target, 1);
- scl_out(dd, target, 0);
- sda_out(dd, target, 1);
- }
- return data;
-}
-
-/**
- * wr_byte - write a byte, one bit at a time
- * @dd: the hfi1_ib device
- * @data: the byte to write
- *
- * Returns 0 if we got the following ack, otherwise 1
- */
-static int wr_byte(struct hfi1_devdata *dd, u32 target, u8 data)
-{
- int bit_cntr;
- u8 bit;
-
- for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
- bit = (data >> bit_cntr) & 1;
- sda_out(dd, target, bit);
- scl_out(dd, target, 1);
- scl_out(dd, target, 0);
- }
- return (!i2c_ackrcv(dd, target)) ? 1 : 0;
-}
-
-/*
- * issue TWSI start sequence:
- * (both clock/data high, clock high, data low while clock is high)
- */
-static void start_seq(struct hfi1_devdata *dd, u32 target)
-{
- sda_out(dd, target, 1);
- scl_out(dd, target, 1);
- sda_out(dd, target, 0);
- udelay(1);
- scl_out(dd, target, 0);
-}
-
-/**
- * stop_seq - transmit the stop sequence
- * @dd: the hfi1_ib device
- *
- * (both clock/data low, clock high, data high while clock is high)
- */
-static void stop_seq(struct hfi1_devdata *dd, u32 target)
-{
- scl_out(dd, target, 0);
- sda_out(dd, target, 0);
- scl_out(dd, target, 1);
- sda_out(dd, target, 1);
-}
-
-/**
- * stop_cmd - transmit the stop condition
- * @dd: the hfi1_ib device
- *
- * (both clock/data low, clock high, data high while clock is high)
- */
-static void stop_cmd(struct hfi1_devdata *dd, u32 target)
-{
- stop_seq(dd, target);
- udelay(TWSI_BUF_WAIT_USEC);
-}
-
-/**
- * hfi1_twsi_reset - reset I2C communication
- * @dd: the hfi1_ib device
- * returns 0 if ok, -EIO on error
- */
-int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target)
-{
- int clock_cycles_left = 9;
- u32 mask;
-
- /* Both SCL and SDA should be high. If not, there
- * is something wrong.
- */
- mask = QSFP_HFI0_I2CCLK | QSFP_HFI0_I2CDAT;
-
- /*
- * Force pins to desired innocuous state.
- * This is the default power-on state with out=0 and dir=0,
- * So tri-stated and should be floating high (barring HW problems)
- */
- hfi1_gpio_mod(dd, target, 0, 0, mask);
-
- /* Check if SCL is low, if it is low then we have a slave device
- * misbehaving and there is not much we can do.
- */
- if (!scl_in(dd, target, 0))
- return -EIO;
-
- /* Check if SDA is low, if it is low then we have to clock SDA
- * up to 9 times for the device to release the bus
- */
- while (clock_cycles_left--) {
- if (sda_in(dd, target, 0))
- return 0;
- scl_out(dd, target, 0);
- scl_out(dd, target, 1);
- }
-
- return -EIO;
-}
-
-#define HFI1_TWSI_START 0x100
-#define HFI1_TWSI_STOP 0x200
-
-/* Write byte to TWSI, optionally prefixed with START or suffixed with
- * STOP.
- * returns 0 if OK (ACK received), else != 0
- */
-static int twsi_wr(struct hfi1_devdata *dd, u32 target, int data, int flags)
-{
- int ret = 1;
-
- if (flags & HFI1_TWSI_START)
- start_seq(dd, target);
-
- /* Leaves SCL low (from i2c_ackrcv()) */
- ret = wr_byte(dd, target, data);
-
- if (flags & HFI1_TWSI_STOP)
- stop_cmd(dd, target);
- return ret;
-}
-
-/* Added functionality for IBA7220-based cards */
-#define HFI1_TEMP_DEV 0x98
-
-/*
- * hfi1_twsi_blk_rd
- * General interface for data transfer from twsi devices.
- * One vestige of its former role is that it recognizes a device
- * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
- * which responded to all TWSI device codes, interpreting them as
- * address within device. On all other devices found on board handled by
- * this driver, the device is followed by a N-byte "address" which selects
- * the "register" or "offset" within the device from which data should
- * be read.
- */
-int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
- void *buffer, int len)
-{
- u8 *bp = buffer;
- int ret = 1;
- int i;
- int offset_size;
-
- /* obtain the offset size, strip it from the device address */
- offset_size = (dev >> 8) & 0xff;
- dev &= 0xff;
-
- /* allow at most a 2 byte offset */
- if (offset_size > 2)
- goto bail;
-
- if (dev == HFI1_TWSI_NO_DEV) {
- /* legacy not-really-I2C */
- addr = (addr << 1) | READ_CMD;
- ret = twsi_wr(dd, target, addr, HFI1_TWSI_START);
- } else {
- /* Actual I2C */
- if (offset_size) {
- ret = twsi_wr(dd, target,
- dev | WRITE_CMD, HFI1_TWSI_START);
- if (ret) {
- stop_cmd(dd, target);
- goto bail;
- }
-
- for (i = 0; i < offset_size; i++) {
- ret = twsi_wr(dd, target,
- (addr >> (i * 8)) & 0xff, 0);
- udelay(TWSI_BUF_WAIT_USEC);
- if (ret) {
- dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
- i, addr);
- goto bail;
- }
- }
- }
- ret = twsi_wr(dd, target, dev | READ_CMD, HFI1_TWSI_START);
- }
- if (ret) {
- stop_cmd(dd, target);
- goto bail;
- }
-
- /*
- * block devices keeps clocking data out as long as we ack,
- * automatically incrementing the address. Some have "pages"
- * whose boundaries will not be crossed, but the handling
- * of these is left to the caller, who is in a better
- * position to know.
- */
- while (len-- > 0) {
- /*
- * Get and store data, sending ACK if length remaining,
- * else STOP
- */
- *bp++ = rd_byte(dd, target, !len);
- }
-
- ret = 0;
-
-bail:
- return ret;
-}
-
-/*
- * hfi1_twsi_blk_wr
- * General interface for data transfer to twsi devices.
- * One vestige of its former role is that it recognizes a device
- * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
- * which responded to all TWSI device codes, interpreting them as
- * address within device. On all other devices found on board handled by
- * this driver, the device is followed by a N-byte "address" which selects
- * the "register" or "offset" within the device to which data should
- * be written.
- */
-int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
- const void *buffer, int len)
-{
- const u8 *bp = buffer;
- int ret = 1;
- int i;
- int offset_size;
-
- /* obtain the offset size, strip it from the device address */
- offset_size = (dev >> 8) & 0xff;
- dev &= 0xff;
-
- /* allow at most a 2 byte offset */
- if (offset_size > 2)
- goto bail;
-
- if (dev == HFI1_TWSI_NO_DEV) {
- if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD,
- HFI1_TWSI_START)) {
- goto failed_write;
- }
- } else {
- /* Real I2C */
- if (twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START))
- goto failed_write;
- }
-
- for (i = 0; i < offset_size; i++) {
- ret = twsi_wr(dd, target, (addr >> (i * 8)) & 0xff, 0);
- udelay(TWSI_BUF_WAIT_USEC);
- if (ret) {
- dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
- i, addr);
- goto bail;
- }
- }
-
- for (i = 0; i < len; i++)
- if (twsi_wr(dd, target, *bp++, 0))
- goto failed_write;
-
- ret = 0;
-
-failed_write:
- stop_cmd(dd, target);
-
-bail:
- return ret;
-}
diff --git a/drivers/infiniband/hw/hfi1/twsi.h b/drivers/infiniband/hw/hfi1/twsi.h
deleted file mode 100644
index 5b8a5b5e7eae..000000000000
--- a/drivers/infiniband/hw/hfi1/twsi.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef _TWSI_H
-#define _TWSI_H
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#define HFI1_TWSI_NO_DEV 0xFF
-
-struct hfi1_devdata;
-
-/* Bit position of SDA/SCL pins in ASIC_QSFP* registers */
-#define GPIO_SDA_NUM 1
-#define GPIO_SCL_NUM 0
-
-/* these functions must be called with qsfp_lock held */
-int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target);
-int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
- void *buffer, int len);
-int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
- const void *buffer, int len);
-
-#endif /* _TWSI_H */
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index df773d433297..a726d96d185f 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -119,6 +119,31 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail;
}
/*
+ * Local operations are processed immediately
+ * after all prior requests have completed.
+ */
+ if (wqe->wr.opcode == IB_WR_REG_MR ||
+ wqe->wr.opcode == IB_WR_LOCAL_INV) {
+ int local_ops = 0;
+ int err = 0;
+
+ if (qp->s_last != qp->s_cur)
+ goto bail;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
+ err = rvt_invalidate_rkey(
+ qp, wqe->wr.ex.invalidate_rkey);
+ local_ops = 1;
+ }
+ hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
+ : IB_WC_SUCCESS);
+ if (local_ops)
+ atomic_dec(&qp->local_ops_pending);
+ qp->s_hdrwords = 0;
+ goto done_free_tx;
+ }
+ /*
* Start a new request.
*/
qp->s_psn = wqe->psn;
@@ -294,46 +319,12 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
struct ib_reth *reth;
int has_grh = rcv_flags & HFI1_HAS_GRH;
int ret;
- u32 bth1;
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
return;
- bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
- if (bth1 & HFI1_BECN_SMASK) {
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- u32 rqpn, lqpn;
- u16 rlid = be16_to_cpu(hdr->lrh[3]);
- u8 sl, sc5;
-
- lqpn = bth1 & RVT_QPN_MASK;
- rqpn = qp->remote_qpn;
-
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
- sl = ibp->sc_to_sl[sc5];
-
- process_becn(ppd, sl, rlid, lqpn, rqpn,
- IB_CC_SVCTYPE_UC);
- }
-
- if (bth1 & HFI1_FECN_SMASK) {
- struct ib_grh *grh = NULL;
- u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
- u16 slid = be16_to_cpu(hdr->lrh[3]);
- u16 dlid = be16_to_cpu(hdr->lrh[1]);
- u32 src_qp = qp->remote_qpn;
- u8 sc5;
-
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
- if (has_grh)
- grh = &hdr->u.l.grh;
-
- return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5,
- grh);
- }
- }
+ process_ecn(qp, packet, true);
psn = be32_to_cpu(ohdr->bth[2]);
opcode = (bth0 >> 24) & 0xff;
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index be91f6fa1c87..f01e8e1d62d3 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -184,8 +184,12 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
}
if (ah_attr->ah_flags & IB_AH_GRH) {
- hfi1_copy_sge(&qp->r_sge, &ah_attr->grh,
- sizeof(struct ib_grh), 1, 0);
+ struct ib_grh grh;
+ struct ib_global_route grd = ah_attr->grh;
+
+ hfi1_make_grh(ibp, &grh, &grd, 0, 0);
+ hfi1_copy_sge(&qp->r_sge, &grh,
+ sizeof(grh), 1, 0);
wc.wc_flags |= IB_WC_GRH;
} else {
hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
@@ -430,10 +434,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->qkey : wqe->ud_wr.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
/* disarm any ahg */
- priv->s_hdr->ahgcount = 0;
- priv->s_hdr->ahgidx = 0;
- priv->s_hdr->tx_flags = 0;
- priv->s_hdr->sde = NULL;
+ priv->s_ahg->ahgcount = 0;
+ priv->s_ahg->ahgidx = 0;
+ priv->s_ahg->tx_flags = 0;
/* pbc */
ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
@@ -665,13 +668,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
struct hfi1_other_headers *ohdr = packet->ohdr;
int opcode;
u32 hdrsize = packet->hlen;
- u32 pad;
struct ib_wc wc;
u32 qkey;
u32 src_qp;
u16 dlid, pkey;
int mgmt_pkey_idx = -1;
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
@@ -680,52 +683,33 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
bool has_grh = rcv_flags & HFI1_HAS_GRH;
u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
u32 bth1;
- int is_mcast;
- struct ib_grh *grh = NULL;
+ u8 sl_from_sc, sl;
+ u16 slid;
+ u8 extra_bytes;
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
dlid = be16_to_cpu(hdr->lrh[1]);
- is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
- (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & HFI1_BECN_SMASK)) {
- /*
- * In pre-B0 h/w the CNP_OPCODE is handled via an
- * error path.
- */
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- u8 sl;
-
- sl = ibp->sc_to_sl[sc5];
-
- process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD);
- }
+ slid = be16_to_cpu(hdr->lrh[3]);
+ pkey = (u16)be32_to_cpu(ohdr->bth[0]);
+ sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
+ extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ extra_bytes += (SIZE_OF_CRC << 2);
+ sl_from_sc = ibp->sc_to_sl[sc5];
- /*
- * The opcode is in the low byte when its in network order
- * (top byte when in host order).
- */
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
opcode &= 0xff;
- pkey = (u16)be32_to_cpu(ohdr->bth[0]);
-
- if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) {
- u16 slid = be16_to_cpu(hdr->lrh[3]);
-
- return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh);
- }
+ process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
/*
* Get the number of bytes the message was padded by
* and drop incomplete packets.
*/
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
- if (unlikely(tlen < (hdrsize + pad + 4)))
+ if (unlikely(tlen < (hdrsize + extra_bytes)))
goto drop;
- tlen -= hdrsize + pad + 4;
+ tlen -= hdrsize + extra_bytes;
/*
* Check that the permissive LID is only used on QP0
@@ -736,10 +720,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
hdr->lrh[3] == IB_LID_PERMISSIVE))
goto drop;
if (qp->ibqp.qp_num > 1) {
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- u16 slid;
-
- slid = be16_to_cpu(hdr->lrh[3]);
if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) {
/*
* Traps will not be sent for packets dropped
@@ -748,12 +728,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* IB spec (release 1.3, section 10.9.4)
*/
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
- pkey,
- (be16_to_cpu(hdr->lrh[0]) >> 4) &
- 0xF,
+ pkey, sl,
src_qp, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ slid, dlid);
return;
}
} else {
@@ -763,22 +740,18 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
goto drop;
}
if (unlikely(qkey != qp->qkey)) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl,
src_qp, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ slid, dlid);
return;
}
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
- (tlen > 2048 ||
- (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))
+ (tlen > 2048 || (sc5 == 0xF))))
goto drop;
} else {
/* Received on QP0, and so by definition, this is an SMP */
struct opa_smp *smp = (struct opa_smp *)data;
- u16 slid = be16_to_cpu(hdr->lrh[3]);
if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp))
goto drop;
@@ -861,7 +834,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
qp->ibqp.qp_type == IB_QPT_SMI) {
if (mgmt_pkey_idx < 0) {
if (net_ratelimit()) {
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_devdata *dd = ppd->dd;
dd_dev_err(dd, "QP type %d mgmt_pkey_idx < 0 and packet not dropped???\n",
@@ -874,8 +846,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
wc.pkey_index = 0;
}
- wc.slid = be16_to_cpu(hdr->lrh[3]);
- wc.sl = ibp->sc_to_sl[sc5];
+ wc.slid = slid;
+ wc.sl = sl_from_sc;
/*
* Save the LMC lower bits if the destination LID is a unicast LID.
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 1b640a35b3fe..64d26525435a 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -82,24 +82,25 @@ struct tid_pageset {
((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
- struct rb_root *);
+ struct hfi1_filedata *);
static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
static int set_rcvarray_entry(struct file *, unsigned long, u32,
struct tid_group *, struct page **, unsigned);
-static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
-static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *,
- struct mm_struct *);
-static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+static int tid_rb_insert(void *, struct mmu_rb_node *);
+static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
+ struct tid_rb_node *tnode);
+static void tid_rb_remove(void *, struct mmu_rb_node *);
+static int tid_rb_invalidate(void *, struct mmu_rb_node *);
static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
struct tid_pageset *, unsigned, u16, struct page **,
u32 *, unsigned *, unsigned *);
static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
-static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *);
+static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
static struct mmu_rb_ops tid_rb_ops = {
- .insert = mmu_rb_insert,
- .remove = mmu_rb_remove,
- .invalidate = mmu_rb_invalidate
+ .insert = tid_rb_insert,
+ .remove = tid_rb_remove,
+ .invalidate = tid_rb_invalidate
};
static inline u32 rcventry2tidinfo(u32 rcventry)
@@ -162,7 +163,6 @@ int hfi1_user_exp_rcv_init(struct file *fp)
spin_lock_init(&fd->tid_lock);
spin_lock_init(&fd->invalid_lock);
- fd->tid_rb_root = RB_ROOT;
if (!uctxt->subctxt_cnt || !fd->subctxt) {
exp_tid_group_init(&uctxt->tid_group_list);
@@ -197,7 +197,7 @@ int hfi1_user_exp_rcv_init(struct file *fp)
if (!fd->entry_to_rb)
return -ENOMEM;
- if (!HFI1_CAP_IS_USET(TID_UNMAP)) {
+ if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) {
fd->invalid_tid_idx = 0;
fd->invalid_tids = kzalloc(uctxt->expected_count *
sizeof(u32), GFP_KERNEL);
@@ -208,15 +208,15 @@ int hfi1_user_exp_rcv_init(struct file *fp)
/*
* Register MMU notifier callbacks. If the registration
- * fails, continue but turn off the TID caching for
- * all user contexts.
+ * fails, continue without TID caching for this context.
*/
- ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops);
+ ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
+ dd->pport->hfi1_wq,
+ &fd->handler);
if (ret) {
dd_dev_info(dd,
"Failed MMU notifier registration %d\n",
ret);
- HFI1_CAP_USET(TID_UNMAP);
ret = 0;
}
}
@@ -235,7 +235,7 @@ int hfi1_user_exp_rcv_init(struct file *fp)
* init.
*/
spin_lock(&fd->tid_lock);
- if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) {
+ if (uctxt->subctxt_cnt && fd->handler) {
u16 remainder;
fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
@@ -261,18 +261,16 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
* The notifier would have been removed when the process'es mm
* was freed.
*/
- if (!HFI1_CAP_IS_USET(TID_UNMAP))
- hfi1_mmu_rb_unregister(&fd->tid_rb_root);
+ if (fd->handler)
+ hfi1_mmu_rb_unregister(fd->handler);
kfree(fd->invalid_tids);
if (!uctxt->cnt) {
if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
- unlock_exp_tids(uctxt, &uctxt->tid_full_list,
- &fd->tid_rb_root);
+ unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
- unlock_exp_tids(uctxt, &uctxt->tid_used_list,
- &fd->tid_rb_root);
+ unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
list) {
list_del_init(&grp->list);
@@ -399,12 +397,12 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
* pages, accept the amount pinned so far and program only that.
* User space knows how to deal with partially programmed buffers.
*/
- if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) {
+ if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
ret = -ENOMEM;
goto bail;
}
- pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages);
+ pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
if (pinned <= 0) {
ret = pinned;
goto bail;
@@ -559,7 +557,7 @@ nomem:
* for example), unpin all unmapped pages so we can pin them nex time.
*/
if (mapped_pages != pinned) {
- hfi1_release_user_pages(current->mm, &pages[mapped_pages],
+ hfi1_release_user_pages(fd->mm, &pages[mapped_pages],
pinned - mapped_pages,
false);
fd->tid_n_pinned -= pinned - mapped_pages;
@@ -829,7 +827,6 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_rb_node *node;
struct hfi1_devdata *dd = uctxt->dd;
- struct rb_root *root = &fd->tid_rb_root;
dma_addr_t phys;
/*
@@ -861,10 +858,10 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
node->freed = false;
memcpy(node->pages, pages, sizeof(struct page *) * npages);
- if (HFI1_CAP_IS_USET(TID_UNMAP))
- ret = mmu_rb_insert(root, &node->mmu);
+ if (!fd->handler)
+ ret = tid_rb_insert(fd, &node->mmu);
else
- ret = hfi1_mmu_rb_insert(root, &node->mmu);
+ ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu);
if (ret) {
hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
@@ -904,19 +901,19 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
node = fd->entry_to_rb[rcventry];
if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF;
- if (HFI1_CAP_IS_USET(TID_UNMAP))
- mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL);
- else
- hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
if (grp)
*grp = node->grp;
- clear_tid_node(fd, fd->subctxt, node);
+
+ if (!fd->handler)
+ cacheless_tid_rb_remove(fd, node);
+ else
+ hfi1_mmu_rb_remove(fd->handler, &node->mmu);
+
return 0;
}
-static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
- struct tid_rb_node *node)
+static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
@@ -934,7 +931,7 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
PCI_DMA_FROMDEVICE);
- hfi1_release_user_pages(current->mm, node->pages, node->npages, true);
+ hfi1_release_user_pages(fd->mm, node->pages, node->npages, true);
fd->tid_n_pinned -= node->npages;
node->grp->used--;
@@ -949,12 +946,15 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
kfree(node);
}
+/*
+ * As a simple helper for hfi1_user_exp_rcv_free, this function deals with
+ * clearing nodes in the non-cached case.
+ */
static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
- struct exp_tid_set *set, struct rb_root *root)
+ struct exp_tid_set *set,
+ struct hfi1_filedata *fd)
{
struct tid_group *grp, *ptr;
- struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata,
- tid_rb_root);
int i;
list_for_each_entry_safe(grp, ptr, &set->list, list) {
@@ -969,22 +969,23 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
uctxt->expected_base];
if (!node || node->rcventry != rcventry)
continue;
- if (HFI1_CAP_IS_USET(TID_UNMAP))
- mmu_rb_remove(&fd->tid_rb_root,
- &node->mmu, NULL);
- else
- hfi1_mmu_rb_remove(&fd->tid_rb_root,
- &node->mmu);
- clear_tid_node(fd, -1, node);
+
+ cacheless_tid_rb_remove(fd, node);
}
}
}
}
-static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+/*
+ * Always return 0 from this function. A non-zero return indicates that the
+ * remove operation will be called and that memory should be unpinned.
+ * However, the driver cannot unpin out from under PSM. Instead, retain the
+ * memory (by returning 0) and inform PSM that the memory is going away. PSM
+ * will call back later when it has removed the memory from its list.
+ */
+static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
{
- struct hfi1_filedata *fdata =
- container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct hfi1_filedata *fdata = arg;
struct hfi1_ctxtdata *uctxt = fdata->uctxt;
struct tid_rb_node *node =
container_of(mnode, struct tid_rb_node, mmu);
@@ -1025,10 +1026,9 @@ static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
return 0;
}
-static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
+static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
{
- struct hfi1_filedata *fdata =
- container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct hfi1_filedata *fdata = arg;
struct tid_rb_node *tnode =
container_of(node, struct tid_rb_node, mmu);
u32 base = fdata->uctxt->expected_base;
@@ -1037,14 +1037,20 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
return 0;
}
-static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node,
- struct mm_struct *mm)
+static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
+ struct tid_rb_node *tnode)
{
- struct hfi1_filedata *fdata =
- container_of(root, struct hfi1_filedata, tid_rb_root);
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
u32 base = fdata->uctxt->expected_base;
fdata->entry_to_rb[tnode->rcventry - base] = NULL;
+ clear_tid_node(fdata, tnode);
+}
+
+static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
+{
+ struct hfi1_filedata *fdata = arg;
+ struct tid_rb_node *tnode =
+ container_of(node, struct tid_rb_node, mmu);
+
+ cacheless_tid_rb_remove(fdata, tnode);
}
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 88e10b5f55f1..20f4ddcac3b0 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -68,7 +68,8 @@ MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)");
* could keeping caching buffers.
*
*/
-bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages)
+bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
+ u32 nlocked, u32 npages)
{
unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
size = (cache_size * (1UL << 20)); /* convert to bytes */
@@ -89,9 +90,9 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages)
/* Convert to number of pages */
size = DIV_ROUND_UP(size, PAGE_SIZE);
- down_read(&current->mm->mmap_sem);
- pinned = current->mm->pinned_vm;
- up_read(&current->mm->mmap_sem);
+ down_read(&mm->mmap_sem);
+ pinned = mm->pinned_vm;
+ up_read(&mm->mmap_sem);
/* First, check the absolute limit against all pinned pages. */
if (pinned + npages >= ulimit && !can_lock)
@@ -100,8 +101,8 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages)
return ((nlocked + npages) <= size) || can_lock;
}
-int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
- struct page **pages)
+int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t npages,
+ bool writable, struct page **pages)
{
int ret;
@@ -109,9 +110,9 @@ int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
if (ret < 0)
return ret;
- down_write(&current->mm->mmap_sem);
- current->mm->pinned_vm += ret;
- up_write(&current->mm->mmap_sem);
+ down_write(&mm->mmap_sem);
+ mm->pinned_vm += ret;
+ up_write(&mm->mmap_sem);
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 47ffd273ecbd..0ecf27903dc2 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -145,7 +145,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* Last packet in the request */
#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
-#define SDMA_REQ_IN_USE 0
+/* SDMA request flag bits */
#define SDMA_REQ_FOR_THREAD 1
#define SDMA_REQ_SEND_DONE 2
#define SDMA_REQ_HAVE_AHG 3
@@ -183,16 +183,18 @@ struct user_sdma_iovec {
struct sdma_mmu_node *node;
};
-#define SDMA_CACHE_NODE_EVICT 0
-
struct sdma_mmu_node {
struct mmu_rb_node rb;
- struct list_head list;
struct hfi1_user_sdma_pkt_q *pq;
atomic_t refcount;
struct page **pages;
unsigned npages;
- unsigned long flags;
+};
+
+/* evict operation argument */
+struct evict_data {
+ u32 cleared; /* count evicted so far */
+ u32 target; /* target count to evict */
};
struct user_sdma_request {
@@ -305,14 +307,16 @@ static int defer_packet_queue(
unsigned seq);
static void activate_packet_queue(struct iowait *, int);
static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
-static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
-static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *,
- struct mm_struct *);
-static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+static int sdma_rb_insert(void *, struct mmu_rb_node *);
+static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
+ void *arg2, bool *stop);
+static void sdma_rb_remove(void *, struct mmu_rb_node *);
+static int sdma_rb_invalidate(void *, struct mmu_rb_node *);
static struct mmu_rb_ops sdma_rb_ops = {
.filter = sdma_rb_filter,
.insert = sdma_rb_insert,
+ .evict = sdma_rb_evict,
.remove = sdma_rb_remove,
.invalidate = sdma_rb_invalidate
};
@@ -397,6 +401,11 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
if (!pq->reqs)
goto pq_reqs_nomem;
+ memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long);
+ pq->req_in_use = kzalloc(memsize, GFP_KERNEL);
+ if (!pq->req_in_use)
+ goto pq_reqs_no_in_use;
+
INIT_LIST_HEAD(&pq->list);
pq->dd = dd;
pq->ctxt = uctxt->ctxt;
@@ -405,9 +414,8 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait);
- pq->sdma_rb_root = RB_ROOT;
- INIT_LIST_HEAD(&pq->evict);
- spin_lock_init(&pq->evict_lock);
+ atomic_set(&pq->n_locked, 0);
+ pq->mm = fd->mm;
iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
activate_packet_queue, NULL);
@@ -437,7 +445,8 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
cq->nentries = hfi1_sdma_comp_ring_size;
fd->cq = cq;
- ret = hfi1_mmu_rb_register(&pq->sdma_rb_root, &sdma_rb_ops);
+ ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
+ &pq->handler);
if (ret) {
dd_dev_err(dd, "Failed to register with MMU %d", ret);
goto done;
@@ -453,6 +462,8 @@ cq_comps_nomem:
cq_nomem:
kmem_cache_destroy(pq->txreq_cache);
pq_txreq_nomem:
+ kfree(pq->req_in_use);
+pq_reqs_no_in_use:
kfree(pq->reqs);
pq_reqs_nomem:
kfree(pq);
@@ -472,8 +483,9 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
uctxt->ctxt, fd->subctxt);
pq = fd->pq;
- hfi1_mmu_rb_unregister(&pq->sdma_rb_root);
if (pq) {
+ if (pq->handler)
+ hfi1_mmu_rb_unregister(pq->handler);
spin_lock_irqsave(&uctxt->sdma_qlock, flags);
if (!list_empty(&pq->list))
list_del_init(&pq->list);
@@ -484,6 +496,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
pq->wait,
(ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
kfree(pq->reqs);
+ kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
kfree(pq);
fd->pq = NULL;
@@ -496,10 +509,31 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
return 0;
}
+static u8 dlid_to_selector(u16 dlid)
+{
+ static u8 mapping[256];
+ static int initialized;
+ static u8 next;
+ int hash;
+
+ if (!initialized) {
+ memset(mapping, 0xFF, 256);
+ initialized = 1;
+ }
+
+ hash = ((dlid >> 8) ^ dlid) & 0xFF;
+ if (mapping[hash] == 0xFF) {
+ mapping[hash] = next;
+ next = (next + 1) & 0x7F;
+ }
+
+ return mapping[hash];
+}
+
int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
unsigned long dim, unsigned long *count)
{
- int ret = 0, i = 0;
+ int ret = 0, i;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
@@ -511,6 +545,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
struct user_sdma_request *req;
u8 opcode, sc, vl;
int req_queued = 0;
+ u16 dlid;
+ u8 selector;
if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
hfi1_cdbg(
@@ -529,30 +565,48 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
(u16 *)&info);
- if (cq->comps[info.comp_idx].status == QUEUED ||
- test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) {
- hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state",
- dd->unit, uctxt->ctxt, fd->subctxt,
- info.comp_idx);
- return -EBADSLT;
+
+ if (info.comp_idx >= hfi1_sdma_comp_ring_size) {
+ hfi1_cdbg(SDMA,
+ "[%u:%u:%u:%u] Invalid comp index",
+ dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
+ return -EINVAL;
}
+
+ /*
+ * Sanity check the header io vector count. Need at least 1 vector
+ * (header) and cannot be larger than the actual io vector count.
+ */
+ if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) {
+ hfi1_cdbg(SDMA,
+ "[%u:%u:%u:%u] Invalid iov count %d, dim %ld",
+ dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx,
+ req_iovcnt(info.ctrl), dim);
+ return -EINVAL;
+ }
+
if (!info.fragsize) {
hfi1_cdbg(SDMA,
"[%u:%u:%u:%u] Request does not specify fragsize",
dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
return -EINVAL;
}
+
+ /* Try to claim the request. */
+ if (test_and_set_bit(info.comp_idx, pq->req_in_use)) {
+ hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use",
+ dd->unit, uctxt->ctxt, fd->subctxt,
+ info.comp_idx);
+ return -EBADSLT;
+ }
/*
- * We've done all the safety checks that we can up to this point,
- * "allocate" the request entry.
+ * All safety checks have been done and this request has been claimed.
*/
hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit,
uctxt->ctxt, fd->subctxt, info.comp_idx);
req = pq->reqs + info.comp_idx;
memset(req, 0, sizeof(*req));
- /* Mark the request as IN_USE before we start filling it in. */
- set_bit(SDMA_REQ_IN_USE, &req->flags);
- req->data_iovs = req_iovcnt(info.ctrl) - 1;
+ req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
req->pq = pq;
req->cq = cq;
req->status = -1;
@@ -560,13 +614,22 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
memcpy(&req->info, &info, sizeof(info));
- if (req_opcode(info.ctrl) == EXPECTED)
+ if (req_opcode(info.ctrl) == EXPECTED) {
+ /* expected must have a TID info and at least one data vector */
+ if (req->data_iovs < 2) {
+ SDMA_DBG(req,
+ "Not enough vectors for expected request");
+ ret = -EINVAL;
+ goto free_req;
+ }
req->data_iovs--;
+ }
if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) {
SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs,
MAX_VECTORS_PER_REQ);
- return -EINVAL;
+ ret = -EINVAL;
+ goto free_req;
}
/* Copy the header from the user buffer */
ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info),
@@ -634,7 +697,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
idx++;
/* Save all the IO vector structures */
- while (i < req->data_iovs) {
+ for (i = 0; i < req->data_iovs; i++) {
INIT_LIST_HEAD(&req->iovs[i].list);
memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
ret = pin_vector_pages(req, &req->iovs[i]);
@@ -642,7 +705,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
req->status = ret;
goto free_req;
}
- req->data_len += req->iovs[i++].iov.iov_len;
+ req->data_len += req->iovs[i].iov.iov_len;
}
SDMA_DBG(req, "total data length %u", req->data_len);
@@ -686,9 +749,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
idx++;
}
+ dlid = be16_to_cpu(req->hdr.lrh[1]);
+ selector = dlid_to_selector(dlid);
+
/* Have to select the engine */
req->sde = sdma_select_engine_vl(dd,
- (u32)(uctxt->ctxt + fd->subctxt),
+ (u32)(uctxt->ctxt + fd->subctxt +
+ selector),
vl);
if (!req->sde || !sdma_running(req->sde)) {
ret = -ECOMM;
@@ -766,14 +833,21 @@ static inline u32 compute_data_length(struct user_sdma_request *req,
* The size of the data of the first packet is in the header
* template. However, it includes the header and ICRC, which need
* to be subtracted.
+ * The minimum representable packet data length in a header is 4 bytes,
+ * therefore, when the data length request is less than 4 bytes, there's
+ * only one packet, and the packet data length is equal to that of the
+ * request data length.
* The size of the remaining packets is the minimum of the frag
* size (MTU) or remaining data in the request.
*/
u32 len;
if (!req->seqnum) {
- len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) -
- (sizeof(tx->hdr) - 4));
+ if (req->data_len < sizeof(u32))
+ len = req->data_len;
+ else
+ len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) -
+ (sizeof(tx->hdr) - 4));
} else if (req_opcode(req->info.ctrl) == EXPECTED) {
u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
PAGE_SIZE;
@@ -803,6 +877,13 @@ static inline u32 compute_data_length(struct user_sdma_request *req,
return len;
}
+static inline u32 pad_len(u32 len)
+{
+ if (len & (sizeof(u32) - 1))
+ len += sizeof(u32) - (len & (sizeof(u32) - 1));
+ return len;
+}
+
static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
{
/* (Size of complete header - size of PBC) + 4B ICRC + data length */
@@ -894,7 +975,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) {
if (!req->seqnum) {
u16 pbclen = le16_to_cpu(req->hdr.pbc[0]);
- u32 lrhlen = get_lrh_len(req->hdr, datalen);
+ u32 lrhlen = get_lrh_len(req->hdr,
+ pad_len(datalen));
/*
* Copy the request header into the tx header
* because the HW needs a cacheline-aligned
@@ -1048,39 +1130,24 @@ static inline int num_user_pages(const struct iovec *iov)
static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
{
- u32 cleared = 0;
- struct sdma_mmu_node *node, *ptr;
- struct list_head to_evict = LIST_HEAD_INIT(to_evict);
-
- spin_lock(&pq->evict_lock);
- list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) {
- /* Make sure that no one is still using the node. */
- if (!atomic_read(&node->refcount)) {
- set_bit(SDMA_CACHE_NODE_EVICT, &node->flags);
- list_del_init(&node->list);
- list_add(&node->list, &to_evict);
- cleared += node->npages;
- if (cleared >= npages)
- break;
- }
- }
- spin_unlock(&pq->evict_lock);
-
- list_for_each_entry_safe(node, ptr, &to_evict, list)
- hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
+ struct evict_data evict_data;
- return cleared;
+ evict_data.cleared = 0;
+ evict_data.target = npages;
+ hfi1_mmu_rb_evict(pq->handler, &evict_data);
+ return evict_data.cleared;
}
static int pin_vector_pages(struct user_sdma_request *req,
- struct user_sdma_iovec *iovec) {
+ struct user_sdma_iovec *iovec)
+{
int ret = 0, pinned, npages, cleared;
struct page **pages;
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct sdma_mmu_node *node = NULL;
struct mmu_rb_node *rb_node;
- rb_node = hfi1_mmu_rb_extract(&pq->sdma_rb_root,
+ rb_node = hfi1_mmu_rb_extract(pq->handler,
(unsigned long)iovec->iov.iov_base,
iovec->iov.iov_len);
if (rb_node && !IS_ERR(rb_node))
@@ -1096,7 +1163,6 @@ static int pin_vector_pages(struct user_sdma_request *req,
node->rb.addr = (unsigned long)iovec->iov.iov_base;
node->pq = pq;
atomic_set(&node->refcount, 0);
- INIT_LIST_HEAD(&node->list);
}
npages = num_user_pages(&iovec->iov);
@@ -1111,28 +1177,14 @@ static int pin_vector_pages(struct user_sdma_request *req,
npages -= node->npages;
- /*
- * If rb_node is NULL, it means that this is brand new node
- * and, therefore not on the eviction list.
- * If, however, the rb_node is non-NULL, it means that the
- * node is already in RB tree and, therefore on the eviction
- * list (nodes are unconditionally inserted in the eviction
- * list). In that case, we have to remove the node prior to
- * calling the eviction function in order to prevent it from
- * freeing this node.
- */
- if (rb_node) {
- spin_lock(&pq->evict_lock);
- list_del_init(&node->list);
- spin_unlock(&pq->evict_lock);
- }
retry:
- if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) {
+ if (!hfi1_can_pin_pages(pq->dd, pq->mm,
+ atomic_read(&pq->n_locked), npages)) {
cleared = sdma_cache_evict(pq, npages);
if (cleared >= npages)
goto retry;
}
- pinned = hfi1_acquire_user_pages(
+ pinned = hfi1_acquire_user_pages(pq->mm,
((unsigned long)iovec->iov.iov_base +
(node->npages * PAGE_SIZE)), npages, 0,
pages + node->npages);
@@ -1142,7 +1194,7 @@ retry:
goto bail;
}
if (pinned != npages) {
- unpin_vector_pages(current->mm, pages, node->npages,
+ unpin_vector_pages(pq->mm, pages, node->npages,
pinned);
ret = -EFAULT;
goto bail;
@@ -1152,28 +1204,22 @@ retry:
node->pages = pages;
node->npages += pinned;
npages = node->npages;
- spin_lock(&pq->evict_lock);
- list_add(&node->list, &pq->evict);
- pq->n_locked += pinned;
- spin_unlock(&pq->evict_lock);
+ atomic_add(pinned, &pq->n_locked);
}
iovec->pages = node->pages;
iovec->npages = npages;
iovec->node = node;
- ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
+ ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb);
if (ret) {
- spin_lock(&pq->evict_lock);
- if (!list_empty(&node->list))
- list_del(&node->list);
- pq->n_locked -= node->npages;
- spin_unlock(&pq->evict_lock);
+ atomic_sub(node->npages, &pq->n_locked);
+ iovec->node = NULL;
goto bail;
}
return 0;
bail:
if (rb_node)
- unpin_vector_pages(current->mm, node->pages, 0, node->npages);
+ unpin_vector_pages(pq->mm, node->pages, 0, node->npages);
kfree(node);
return ret;
}
@@ -1181,7 +1227,7 @@ bail:
static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
unsigned start, unsigned npages)
{
- hfi1_release_user_pages(mm, pages + start, npages, 0);
+ hfi1_release_user_pages(mm, pages + start, npages, false);
kfree(pages);
}
@@ -1192,16 +1238,14 @@ static int check_header_template(struct user_sdma_request *req,
/*
* Perform safety checks for any type of packet:
* - transfer size is multiple of 64bytes
- * - packet length is multiple of 4bytes
- * - entire request length is multiple of 4bytes
+ * - packet length is multiple of 4 bytes
* - packet length is not larger than MTU size
*
* These checks are only done for the first packet of the
* transfer since the header is "given" to us by user space.
* For the remainder of the packets we compute the values.
*/
- if (req->info.fragsize % PIO_BLOCK_SIZE ||
- lrhlen & 0x3 || req->data_len & 0x3 ||
+ if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 ||
lrhlen > get_lrh_len(*hdr, req->info.fragsize))
return -EINVAL;
@@ -1263,7 +1307,7 @@ static int set_txreq_header(struct user_sdma_request *req,
struct hfi1_pkt_header *hdr = &tx->hdr;
u16 pbclen;
int ret;
- u32 tidval = 0, lrhlen = get_lrh_len(*hdr, datalen);
+ u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
/* Copy the header template to the request before modification */
memcpy(hdr, &req->hdr, sizeof(*hdr));
@@ -1374,7 +1418,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &req->hdr;
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
- u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, len);
+ u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len));
if (PBC2LRH(pbclen) != lrhlen) {
/* PBC.PbcLengthDWs */
@@ -1534,14 +1578,14 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
continue;
if (unpin)
- hfi1_mmu_rb_remove(&req->pq->sdma_rb_root,
+ hfi1_mmu_rb_remove(req->pq->handler,
&node->rb);
else
atomic_dec(&node->refcount);
}
}
kfree(req->tids);
- clear_bit(SDMA_REQ_IN_USE, &req->flags);
+ clear_bit(req->info.comp_idx, req->pq->req_in_use);
}
static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
@@ -1564,7 +1608,7 @@ static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
return (bool)(node->addr == addr);
}
-static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode)
{
struct sdma_mmu_node *node =
container_of(mnode, struct sdma_mmu_node, rb);
@@ -1573,48 +1617,45 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
return 0;
}
-static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
- struct mm_struct *mm)
+/*
+ * Return 1 to remove the node from the rb tree and call the remove op.
+ *
+ * Called with the rb tree lock held.
+ */
+static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
+ void *evict_arg, bool *stop)
+{
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+ struct evict_data *evict_data = evict_arg;
+
+ /* is this node still being used? */
+ if (atomic_read(&node->refcount))
+ return 0; /* keep this node */
+
+ /* this node will be evicted, add its pages to our count */
+ evict_data->cleared += node->npages;
+
+ /* have enough pages been cleared? */
+ if (evict_data->cleared >= evict_data->target)
+ *stop = true;
+
+ return 1; /* remove this node */
+}
+
+static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
{
struct sdma_mmu_node *node =
container_of(mnode, struct sdma_mmu_node, rb);
- spin_lock(&node->pq->evict_lock);
- /*
- * We've been called by the MMU notifier but this node has been
- * scheduled for eviction. The eviction function will take care
- * of freeing this node.
- * We have to take the above lock first because we are racing
- * against the setting of the bit in the eviction function.
- */
- if (mm && test_bit(SDMA_CACHE_NODE_EVICT, &node->flags)) {
- spin_unlock(&node->pq->evict_lock);
- return;
- }
+ atomic_sub(node->npages, &node->pq->n_locked);
- if (!list_empty(&node->list))
- list_del(&node->list);
- node->pq->n_locked -= node->npages;
- spin_unlock(&node->pq->evict_lock);
+ unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages);
- /*
- * If mm is set, we are being called by the MMU notifier and we
- * should not pass a mm_struct to unpin_vector_page(). This is to
- * prevent a deadlock when hfi1_release_user_pages() attempts to
- * take the mmap_sem, which the MMU notifier has already taken.
- */
- unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0,
- node->npages);
- /*
- * If called by the MMU notifier, we have to adjust the pinned
- * page count ourselves.
- */
- if (mm)
- mm->pinned_vm -= node->npages;
kfree(node);
}
-static int sdma_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
{
struct sdma_mmu_node *node =
container_of(mnode, struct sdma_mmu_node, rb);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index b9240e351161..39001714f551 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -63,14 +63,14 @@ struct hfi1_user_sdma_pkt_q {
struct hfi1_devdata *dd;
struct kmem_cache *txreq_cache;
struct user_sdma_request *reqs;
+ unsigned long *req_in_use;
struct iowait busy;
unsigned state;
wait_queue_head_t wait;
unsigned long unpinned;
- struct rb_root sdma_rb_root;
- u32 n_locked;
- struct list_head evict;
- spinlock_t evict_lock; /* protect evict and n_locked */
+ struct mmu_rb_handler *handler;
+ atomic_t n_locked;
+ struct mm_struct *mm;
};
struct hfi1_user_sdma_comp_q {
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 849c4b9399d4..2b359540901d 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -306,7 +306,10 @@ const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+ [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [IB_WR_REG_MR] = IB_WC_REG_MR
};
/*
@@ -378,6 +381,8 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv,
[IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv,
[IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv,
+ [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv,
+ [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
[IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv,
@@ -540,19 +545,15 @@ void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
-static inline int qp_ok(int opcode, struct hfi1_packet *packet)
+static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
{
- struct hfi1_ibport *ibp;
-
if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
- goto dropit;
+ return NULL;
if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
(opcode == IB_OPCODE_CNP))
- return 1;
-dropit:
- ibp = &packet->rcd->ppd->ibport_data;
- ibp->rvp.n_pkt_drops++;
- return 0;
+ return opcode_handler_tbl[opcode];
+
+ return NULL;
}
/**
@@ -571,6 +572,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
+ opcode_handler packet_handler;
unsigned long flags;
u32 qp_num;
int lnh;
@@ -616,8 +618,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
packet->qp = p->qp;
spin_lock_irqsave(&packet->qp->r_lock, flags);
- if (likely((qp_ok(opcode, packet))))
- opcode_handler_tbl[opcode](packet);
+ packet_handler = qp_ok(opcode, packet);
+ if (likely(packet_handler))
+ packet_handler(packet);
+ else
+ ibp->rvp.n_pkt_drops++;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
}
/*
@@ -634,8 +639,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
goto drop;
}
spin_lock_irqsave(&packet->qp->r_lock, flags);
- if (likely((qp_ok(opcode, packet))))
- opcode_handler_tbl[opcode](packet);
+ packet_handler = qp_ok(opcode, packet);
+ if (likely(packet_handler))
+ packet_handler(packet);
+ else
+ ibp->rvp.n_pkt_drops++;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
rcu_read_unlock();
}
@@ -808,19 +816,19 @@ static int build_verbs_tx_desc(
struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx,
- struct ahg_ib_header *ahdr,
+ struct hfi1_ahg_info *ahg_info,
u64 pbc)
{
int ret = 0;
- struct hfi1_pio_header *phdr = &tx->phdr;
+ struct hfi1_sdma_header *phdr = &tx->phdr;
u16 hdrbytes = tx->hdr_dwords << 2;
- if (!ahdr->ahgcount) {
+ if (!ahg_info->ahgcount) {
ret = sdma_txinit_ahg(
&tx->txreq,
- ahdr->tx_flags,
+ ahg_info->tx_flags,
hdrbytes + length,
- ahdr->ahgidx,
+ ahg_info->ahgidx,
0,
NULL,
0,
@@ -838,11 +846,11 @@ static int build_verbs_tx_desc(
} else {
ret = sdma_txinit_ahg(
&tx->txreq,
- ahdr->tx_flags,
+ ahg_info->tx_flags,
length,
- ahdr->ahgidx,
- ahdr->ahgcount,
- ahdr->ahgdesc,
+ ahg_info->ahgidx,
+ ahg_info->ahgcount,
+ ahg_info->ahgdesc,
hdrbytes,
verbs_sdma_complete);
if (ret)
@@ -860,7 +868,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct ahg_ib_header *ahdr = priv->s_hdr;
+ struct hfi1_ahg_info *ahg_info = priv->s_ahg;
u32 hdrwords = qp->s_hdrwords;
struct rvt_sge_state *ss = qp->s_cur_sge;
u32 len = qp->s_cur_size;
@@ -888,7 +896,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
plen);
}
tx->wqe = qp->s_wqe;
- ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc);
+ ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc);
if (unlikely(ret))
goto bail_build;
}
@@ -1291,19 +1299,24 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
{
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+ u16 ver = dd->dc8051_ver;
memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
+ rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) |
+ (u64)dc8051_ver_min(ver);
rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+ IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
rdi->dparms.props.hw_ver = dd->minrev;
rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid;
- rdi->dparms.props.max_mr_size = ~0ULL;
+ rdi->dparms.props.max_mr_size = U64_MAX;
+ rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
rdi->dparms.props.max_qp = hfi1_max_qps;
rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
rdi->dparms.props.max_sge = hfi1_max_sges;
@@ -1567,6 +1580,17 @@ static void init_ibport(struct hfi1_pportdata *ppd)
RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
}
+static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
+ size_t str_len)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+ struct hfi1_ibdev *dev = dev_from_rdi(rdi);
+ u16 ver = dd_from_dev(dev)->dc8051_ver;
+
+ snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver),
+ dc8051_ver_min(ver));
+}
+
/**
* hfi1_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
@@ -1613,6 +1637,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
+ ibdev->get_dev_fw_str = hfi1_get_dev_fw_str;
strncpy(ibdev->node_desc, init_utsname()->nodename,
sizeof(ibdev->node_desc));
@@ -1680,6 +1705,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
+ /* post send table */
+ dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
+
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++)
rvt_init_port(&dd->verbs_dev.rdi,
@@ -1730,8 +1758,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
struct rvt_qp *qp = packet->qp;
u32 lqpn, rqpn = 0;
u16 rlid = 0;
- u8 sl, sc5, sc4_bit, svc_type;
- bool sc4_set = has_sc4_bit(packet);
+ u8 sl, sc5, svc_type;
switch (packet->qp->ibqp.qp_type) {
case IB_QPT_UC:
@@ -1754,9 +1781,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
return;
}
- sc4_bit = sc4_set << 4;
- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
- sc5 |= sc4_bit;
+ sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 488356775627..d1b101c54828 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -178,16 +178,14 @@ struct hfi1_ib_header {
} u;
} __packed;
-struct ahg_ib_header {
- struct sdma_engine *sde;
+struct hfi1_ahg_info {
u32 ahgdesc[2];
u16 tx_flags;
u8 ahgcount;
u8 ahgidx;
- struct hfi1_ib_header ibh;
};
-struct hfi1_pio_header {
+struct hfi1_sdma_header {
__le64 pbc;
struct hfi1_ib_header hdr;
} __packed;
@@ -197,7 +195,7 @@ struct hfi1_pio_header {
* pair is made common
*/
struct hfi1_qp_priv {
- struct ahg_ib_header *s_hdr; /* next header to send */
+ struct hfi1_ahg_info *s_ahg; /* ahg info for next header */
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
u8 s_sc; /* SC[0..4] for next packet */
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index a1d6e0807f97..5660897593ba 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -56,7 +56,7 @@
#include "iowait.h"
struct verbs_txreq {
- struct hfi1_pio_header phdr;
+ struct hfi1_sdma_header phdr;
struct sdma_txreq txreq;
struct rvt_qp *qp;
struct rvt_swqe *wqe;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index d2fa72516960..5026dc79978a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1567,12 +1567,12 @@ static enum i40iw_status_code i40iw_del_multiple_qhash(
ret = i40iw_manage_qhash(iwdev, cm_info,
I40IW_QHASH_TYPE_TCP_SYN,
I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false);
- kfree(child_listen_node);
- cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
i40iw_debug(&iwdev->sc_dev,
I40IW_DEBUG_CM,
"freed pointer = %p\n",
child_listen_node);
+ kfree(child_listen_node);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
}
spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index bd942da91a27..2fac1db0e0a0 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -1557,6 +1557,9 @@ enum i40iw_alignment {
#define I40IW_RING_MOVE_TAIL(_ring) \
(_ring).tail = ((_ring).tail + 1) % (_ring).size
+#define I40IW_RING_MOVE_HEAD_NOCHECK(_ring) \
+ (_ring).head = ((_ring).head + 1) % (_ring).size
+
#define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
(_ring).tail = ((_ring).tail + (_count)) % (_ring).size
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index e9c6e82af9c7..c62d354f7810 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -1025,6 +1025,8 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
u16 txoffset, bufoffset;
buf = i40iw_puda_get_listbuf(pbufl);
+ if (!buf)
+ return;
nextseqnum = buf->seqnum + fpdu_len;
txbuf->totallen = buf->hdrlen + fpdu_len;
txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen;
@@ -1048,6 +1050,8 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
fpdu_len -= buf->datalen;
i40iw_puda_ret_bufpool(ieq, buf);
buf = i40iw_puda_get_listbuf(pbufl);
+ if (!buf)
+ return;
bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
} while (1);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index 16cc61720b53..2b1a04e9ca3c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -667,7 +667,7 @@ struct i40iw_tcp_offload_info {
bool time_stamp;
u8 cwnd_inc_limit;
bool drop_ooo_seg;
- bool dup_ack_thresh;
+ u8 dup_ack_thresh;
u8 ttl;
u8 src_mac_addr_idx;
bool avoid_stretch_ack;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index e35faea88c13..4d28c3cb03cc 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -291,9 +291,9 @@ static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp,
i40iw_set_fragment(wqe, 0, op_info->lo_sg_list);
- for (i = 1; i < op_info->num_lo_sges; i++) {
- byte_off = 32 + (i - 1) * 16;
+ for (i = 1, byte_off = 32; i < op_info->num_lo_sges; i++) {
i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]);
+ byte_off += 16;
}
wmb(); /* make sure WQE is populated before valid bit is set */
@@ -401,9 +401,9 @@ static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp,
i40iw_set_fragment(wqe, 0, op_info->sg_list);
- for (i = 1; i < op_info->num_sges; i++) {
- byte_off = 32 + (i - 1) * 16;
+ for (i = 1, byte_off = 32; i < op_info->num_sges; i++) {
i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]);
+ byte_off += 16;
}
wmb(); /* make sure WQE is populated before valid bit is set */
@@ -685,9 +685,9 @@ static enum i40iw_status_code i40iw_post_receive(struct i40iw_qp_uk *qp,
i40iw_set_fragment(wqe, 0, info->sg_list);
- for (i = 1; i < info->num_sges; i++) {
- byte_off = 32 + (i - 1) * 16;
+ for (i = 1, byte_off = 32; i < info->num_sges; i++) {
i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]);
+ byte_off += 16;
}
wmb(); /* make sure WQE is populated before valid bit is set */
@@ -753,8 +753,7 @@ static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq,
* @post_cq: update cq tail
*/
static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
- struct i40iw_cq_poll_info *info,
- bool post_cq)
+ struct i40iw_cq_poll_info *info)
{
u64 comp_ctx, qword0, qword2, qword3, wqe_qword;
u64 *cqe, *sw_wqe;
@@ -762,7 +761,6 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
struct i40iw_ring *pring = NULL;
u32 wqe_idx, q_type, array_idx = 0;
enum i40iw_status_code ret_code = 0;
- enum i40iw_status_code ret_code2 = 0;
bool move_cq_head = true;
u8 polarity;
u8 addl_wqes = 0;
@@ -870,19 +868,14 @@ exit:
move_cq_head = false;
if (move_cq_head) {
- I40IW_RING_MOVE_HEAD(cq->cq_ring, ret_code2);
-
- if (ret_code2 && !ret_code)
- ret_code = ret_code2;
+ I40IW_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0)
cq->polarity ^= 1;
- if (post_cq) {
- I40IW_RING_MOVE_TAIL(cq->cq_ring);
- set_64bit_val(cq->shadow_area, 0,
- I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
- }
+ I40IW_RING_MOVE_TAIL(cq->cq_ring);
+ set_64bit_val(cq->shadow_area, 0,
+ I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
} else {
if (info->is_srq)
return ret_code;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index 4627646fe8cd..276bcefffd7e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -327,7 +327,7 @@ struct i40iw_cq_ops {
void (*iw_cq_request_notification)(struct i40iw_cq_uk *,
enum i40iw_completion_notify);
enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *,
- struct i40iw_cq_poll_info *, bool);
+ struct i40iw_cq_poll_info *);
enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count);
void (*iw_cq_clean)(void *, struct i40iw_cq_uk *);
};
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 283b64c942ee..2360338877bf 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -529,7 +529,7 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift);
if (status)
- return -ENOSYS;
+ return -ENOMEM;
sqdepth = sq_size << sqshift;
rqdepth = rq_size << rqshift;
@@ -671,7 +671,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
if (init_attr->qp_type != IB_QPT_RC) {
- err_code = -ENOSYS;
+ err_code = -EINVAL;
goto error;
}
if (iwdev->push_mode)
@@ -1840,6 +1840,7 @@ struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *pd,
iwmr->ibmr.lkey = stag;
iwmr->page_cnt = 1;
iwmr->pgaddrmem[0] = addr;
+ iwmr->length = size;
status = i40iw_hwreg_mr(iwdev, iwmr, access);
if (status) {
i40iw_free_stag(iwdev, stag);
@@ -1863,7 +1864,7 @@ static struct ib_mr *i40iw_get_dma_mr(struct ib_pd *pd, int acc)
{
u64 kva = 0;
- return i40iw_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
+ return i40iw_reg_phys_mr(pd, 0, 0, acc, &kva);
}
/**
@@ -1975,18 +1976,6 @@ static ssize_t i40iw_show_rev(struct device *dev,
}
/**
- * i40iw_show_fw_ver
- */
-static ssize_t i40iw_show_fw_ver(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- u32 firmware_version = I40IW_FW_VERSION;
-
- return sprintf(buf, "%u.%u\n", firmware_version,
- (firmware_version & 0x000000ff));
-}
-
-/**
* i40iw_show_hca
*/
static ssize_t i40iw_show_hca(struct device *dev,
@@ -2006,13 +1995,11 @@ static ssize_t i40iw_show_board(struct device *dev,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, i40iw_show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
static struct device_attribute *i40iw_dev_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
@@ -2091,8 +2078,12 @@ static int i40iw_post_send(struct ib_qp *ibqp,
ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
}
- if (ret)
- err = -EIO;
+ if (ret) {
+ if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
+ }
break;
case IB_WR_RDMA_WRITE:
info.op_type = I40IW_OP_TYPE_RDMA_WRITE;
@@ -2113,8 +2104,12 @@ static int i40iw_post_send(struct ib_qp *ibqp,
ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
}
- if (ret)
- err = -EIO;
+ if (ret) {
+ if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
+ }
break;
case IB_WR_RDMA_READ_WITH_INV:
inv_stag = true;
@@ -2132,15 +2127,19 @@ static int i40iw_post_send(struct ib_qp *ibqp,
info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false);
- if (ret)
- err = -EIO;
+ if (ret) {
+ if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
+ }
break;
case IB_WR_LOCAL_INV:
info.op_type = I40IW_OP_TYPE_INV_STAG;
info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true);
if (ret)
- err = -EIO;
+ err = -ENOMEM;
break;
case IB_WR_REG_MR:
{
@@ -2174,7 +2173,7 @@ static int i40iw_post_send(struct ib_qp *ibqp,
ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
if (ret)
- err = -EIO;
+ err = -ENOMEM;
break;
}
default:
@@ -2214,6 +2213,7 @@ static int i40iw_post_recv(struct ib_qp *ibqp,
struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT];
enum i40iw_status_code ret = 0;
unsigned long flags;
+ int err = 0;
iwqp = (struct i40iw_qp *)ibqp;
ukqp = &iwqp->sc_qp.qp_uk;
@@ -2228,6 +2228,10 @@ static int i40iw_post_recv(struct ib_qp *ibqp,
ret = ukqp->ops.iw_post_receive(ukqp, &post_recv);
if (ret) {
i40iw_pr_err(" post_recv err %d\n", ret);
+ if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
*bad_wr = ib_wr;
goto out;
}
@@ -2235,9 +2239,7 @@ static int i40iw_post_recv(struct ib_qp *ibqp,
}
out:
spin_unlock_irqrestore(&iwqp->lock, flags);
- if (ret)
- return -ENOSYS;
- return 0;
+ return err;
}
/**
@@ -2264,7 +2266,7 @@ static int i40iw_poll_cq(struct ib_cq *ibcq,
spin_lock_irqsave(&iwcq->lock, flags);
while (cqe_count < num_entries) {
- ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true);
+ ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info);
if (ret == I40IW_ERR_QUEUE_EMPTY) {
break;
} else if (ret == I40IW_ERR_QUEUE_DESTROYED) {
@@ -2437,6 +2439,15 @@ static const char * const i40iw_hw_stat_names[] = {
"iwRdmaInv"
};
+static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str,
+ size_t str_len)
+{
+ u32 firmware_version = I40IW_FW_VERSION;
+
+ snprintf(str, str_len, "%u.%u", firmware_version,
+ (firmware_version & 0x000000ff));
+}
+
/**
* i40iw_alloc_hw_stats - Allocate a hw stats structure
* @ibdev: device pointer from stack
@@ -2528,7 +2539,7 @@ static int i40iw_modify_port(struct ib_device *ibdev,
int port_modify_mask,
struct ib_port_modify *props)
{
- return 0;
+ return -ENOSYS;
}
/**
@@ -2660,6 +2671,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name,
sizeof(iwibdev->ibdev.iwcm->ifname));
iwibdev->ibdev.get_port_immutable = i40iw_port_immutable;
+ iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str;
iwibdev->ibdev.poll_cq = i40iw_poll_cq;
iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
iwibdev->ibdev.post_send = i40iw_post_send;
@@ -2723,7 +2735,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
if (!iwdev->iwibdev)
- return -ENOSYS;
+ return -ENOMEM;
iwibdev = iwdev->iwibdev;
ret = ib_register_device(&iwibdev->ibdev, NULL);
@@ -2748,5 +2760,5 @@ error:
kfree(iwdev->iwibdev->ibdev.iwcm);
iwdev->iwibdev->ibdev.iwcm = NULL;
ib_dealloc_device(&iwdev->iwibdev->ibdev);
- return -ENOSYS;
+ return ret;
}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 9f8b516eb2b0..d6fc8a6e8c33 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -288,7 +288,7 @@ static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
if (cq->resize_buf)
return -EBUSY;
- cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+ cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);
if (!cq->resize_buf)
return -ENOMEM;
@@ -316,7 +316,7 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
return -EFAULT;
- cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+ cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);
if (!cq->resize_buf)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 42a46078d7d5..2af44c2de262 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2025,16 +2025,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct mlx4_ib_dev *dev =
- container_of(device, struct mlx4_ib_dev, ib_dev.dev);
- return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
- (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
- (int) dev->dev->caps.fw_ver & 0xffff);
-}
-
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
@@ -2053,17 +2043,204 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
+struct diag_counter {
+ const char *name;
+ u32 offset;
+};
+
+#define DIAG_COUNTER(_name, _offset) \
+ { .name = #_name, .offset = _offset }
+
+static const struct diag_counter diag_basic[] = {
+ DIAG_COUNTER(rq_num_lle, 0x00),
+ DIAG_COUNTER(sq_num_lle, 0x04),
+ DIAG_COUNTER(rq_num_lqpoe, 0x08),
+ DIAG_COUNTER(sq_num_lqpoe, 0x0C),
+ DIAG_COUNTER(rq_num_lpe, 0x18),
+ DIAG_COUNTER(sq_num_lpe, 0x1C),
+ DIAG_COUNTER(rq_num_wrfe, 0x20),
+ DIAG_COUNTER(sq_num_wrfe, 0x24),
+ DIAG_COUNTER(sq_num_mwbe, 0x2C),
+ DIAG_COUNTER(sq_num_bre, 0x34),
+ DIAG_COUNTER(sq_num_rire, 0x44),
+ DIAG_COUNTER(rq_num_rire, 0x48),
+ DIAG_COUNTER(sq_num_rae, 0x4C),
+ DIAG_COUNTER(rq_num_rae, 0x50),
+ DIAG_COUNTER(sq_num_roe, 0x54),
+ DIAG_COUNTER(sq_num_tree, 0x5C),
+ DIAG_COUNTER(sq_num_rree, 0x64),
+ DIAG_COUNTER(rq_num_rnr, 0x68),
+ DIAG_COUNTER(sq_num_rnr, 0x6C),
+ DIAG_COUNTER(rq_num_oos, 0x100),
+ DIAG_COUNTER(sq_num_oos, 0x104),
+};
+
+static const struct diag_counter diag_ext[] = {
+ DIAG_COUNTER(rq_num_dup, 0x130),
+ DIAG_COUNTER(sq_num_to, 0x134),
+};
+
+static const struct diag_counter diag_device_only[] = {
+ DIAG_COUNTER(num_cqovf, 0x1A0),
+ DIAG_COUNTER(rq_num_udsdprd, 0x118),
+};
+
+static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_diag_counters *diag = dev->diag_counters;
+
+ if (!diag[!!port_num].name)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
+ diag[!!port_num].num_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port, int index)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_diag_counters *diag = dev->diag_counters;
+ u32 hw_value[ARRAY_SIZE(diag_device_only) +
+ ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
+ int ret;
+ int i;
+
+ ret = mlx4_query_diag_counters(dev->dev,
+ MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
+ diag[!!port].offset, hw_value,
+ diag[!!port].num_counters, port);
+
+ if (ret)
+ return ret;
+
+ for (i = 0; i < diag[!!port].num_counters; i++)
+ stats->value[i] = hw_value[i];
+
+ return diag[!!port].num_counters;
+}
+
+static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
+ const char ***name,
+ u32 **offset,
+ u32 *num,
+ bool port)
+{
+ u32 num_counters;
+
+ num_counters = ARRAY_SIZE(diag_basic);
+
+ if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
+ num_counters += ARRAY_SIZE(diag_ext);
+
+ if (!port)
+ num_counters += ARRAY_SIZE(diag_device_only);
+
+ *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
+ if (!*name)
+ return -ENOMEM;
+
+ *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
+ if (!*offset)
+ goto err_name;
+
+ *num = num_counters;
+
+ return 0;
+
+err_name:
+ kfree(*name);
+ return -ENOMEM;
+}
+
+static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
+ const char **name,
+ u32 *offset,
+ bool port)
+{
+ int i;
+ int j;
+
+ for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
+ name[i] = diag_basic[i].name;
+ offset[i] = diag_basic[i].offset;
+ }
+
+ if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
+ for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
+ name[j] = diag_ext[i].name;
+ offset[j] = diag_ext[i].offset;
+ }
+ }
+
+ if (!port) {
+ for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
+ name[j] = diag_device_only[i].name;
+ offset[j] = diag_device_only[i].offset;
+ }
+ }
+}
+
+static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
+{
+ struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
+ int i;
+ int ret;
+ bool per_port = !!(ibdev->dev->caps.flags2 &
+ MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
+
+ for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
+ /* i == 1 means we are building port counters */
+ if (i && !per_port)
+ continue;
+
+ ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
+ &diag[i].offset,
+ &diag[i].num_counters, i);
+ if (ret)
+ goto err_alloc;
+
+ mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
+ diag[i].offset, i);
+ }
+
+ ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats;
+ ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats;
+
+ return 0;
+
+err_alloc:
+ if (i) {
+ kfree(diag[i - 1].name);
+ kfree(diag[i - 1].offset);
+ }
+
+ return ret;
+}
+
+static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
+{
+ int i;
+
+ for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
+ kfree(ibdev->diag_counters[i].offset);
+ kfree(ibdev->diag_counters[i].name);
+ }
+}
+
#define MLX4_IB_INVALID_MAC ((u64)-1)
static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
struct net_device *dev,
@@ -2280,6 +2457,17 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+static void get_fw_ver_str(struct ib_device *device, char *str,
+ size_t str_len)
+{
+ struct mlx4_ib_dev *dev =
+ container_of(device, struct mlx4_ib_dev, ib_dev);
+ snprintf(str, str_len, "%d.%d.%d",
+ (int) (dev->dev->caps.fw_ver >> 32),
+ (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
+ (int) dev->dev->caps.fw_ver & 0xffff);
+}
+
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
struct mlx4_ib_dev *ibdev;
@@ -2413,6 +2601,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
+ ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
if (!mlx4_is_slave(ibdev->dev)) {
@@ -2555,9 +2744,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
- if (ib_register_device(&ibdev->ib_dev, NULL))
+ if (mlx4_ib_alloc_diag_counters(ibdev))
goto err_steer_free_bitmap;
+ if (ib_register_device(&ibdev->ib_dev, NULL))
+ goto err_diag_counters;
+
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
@@ -2623,6 +2815,9 @@ err_mad:
err_reg:
ib_unregister_device(&ibdev->ib_dev);
+err_diag_counters:
+ mlx4_ib_diag_cleanup(ibdev);
+
err_steer_free_bitmap:
kfree(ibdev->ib_uc_qpns_bitmap);
@@ -2726,6 +2921,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
mlx4_ib_close_sriov(ibdev);
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
+ mlx4_ib_diag_cleanup(ibdev);
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 29acda249612..7c5832ede4bd 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -549,6 +549,14 @@ struct mlx4_ib_counters {
u32 default_counter;
};
+#define MLX4_DIAG_COUNTERS_TYPES 2
+
+struct mlx4_ib_diag_counters {
+ const char **name;
+ u32 *offset;
+ u32 num_counters;
+};
+
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
@@ -585,6 +593,7 @@ struct mlx4_ib_dev {
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
+ struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES];
};
struct ib_event_work {
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 9c0e67bd2ba7..308a358e5b46 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -424,6 +424,83 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
item->key = be32_to_cpu(cqe->mkey);
}
+static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries,
+ struct ib_wc *wc, int *npolled)
+{
+ struct mlx5_ib_wq *wq;
+ unsigned int cur;
+ unsigned int idx;
+ int np;
+ int i;
+
+ wq = &qp->sq;
+ cur = wq->head - wq->tail;
+ np = *npolled;
+
+ if (cur == 0)
+ return;
+
+ for (i = 0; i < cur && np < num_entries; i++) {
+ idx = wq->last_poll & (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[idx];
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
+ wq->tail++;
+ np++;
+ wc->qp = &qp->ibqp;
+ wc++;
+ wq->last_poll = wq->w_list[idx].next;
+ }
+ *npolled = np;
+}
+
+static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries,
+ struct ib_wc *wc, int *npolled)
+{
+ struct mlx5_ib_wq *wq;
+ unsigned int cur;
+ int np;
+ int i;
+
+ wq = &qp->rq;
+ cur = wq->head - wq->tail;
+ np = *npolled;
+
+ if (cur == 0)
+ return;
+
+ for (i = 0; i < cur && np < num_entries; i++) {
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
+ wq->tail++;
+ np++;
+ wc->qp = &qp->ibqp;
+ wc++;
+ }
+ *npolled = np;
+}
+
+static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
+ struct ib_wc *wc, int *npolled)
+{
+ struct mlx5_ib_qp *qp;
+
+ *npolled = 0;
+ /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */
+ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
+ sw_send_comp(qp, num_entries, wc + *npolled, npolled);
+ if (*npolled >= num_entries)
+ return;
+ }
+
+ list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
+ sw_recv_comp(qp, num_entries, wc + *npolled, npolled);
+ if (*npolled >= num_entries)
+ return;
+ }
+}
+
static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_ib_qp **cur_qp,
struct ib_wc *wc)
@@ -594,12 +671,18 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
struct mlx5_ib_cq *cq = to_mcq(ibcq);
struct mlx5_ib_qp *cur_qp = NULL;
+ struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+ struct mlx5_core_dev *mdev = dev->mdev;
unsigned long flags;
int soft_polled = 0;
int npolled;
int err = 0;
spin_lock_irqsave(&cq->lock, flags);
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
+ goto out;
+ }
if (unlikely(!list_empty(&cq->wc_list)))
soft_polled = poll_soft_wc(cq, num_entries, wc);
@@ -612,7 +695,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
if (npolled)
mlx5_cq_set_ci(&cq->mcq);
-
+out:
spin_unlock_irqrestore(&cq->lock, flags);
if (err == 0 || err == -EAGAIN)
@@ -843,6 +926,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
cq->resize_buf = NULL;
cq->resize_umem = NULL;
cq->create_flags = attr->flags;
+ INIT_LIST_HEAD(&cq->list_send_qp);
+ INIT_LIST_HEAD(&cq->list_recv_qp);
if (context) {
err = create_cq_user(dev, udata, context, cq, entries,
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 53e03c8ede79..79e6309460dc 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -69,15 +69,6 @@ static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
}
-static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index)
-{
- return ++index % gsi->cap.max_send_wr;
-}
-
-#define for_each_outstanding_wr(gsi, index) \
- for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \
- index = next_outstanding(gsi, index))
-
/* Call with gsi->lock locked */
static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
{
@@ -85,8 +76,9 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
struct mlx5_ib_gsi_wr *wr;
u32 index;
- for_each_outstanding_wr(gsi, index) {
- wr = &gsi->outstanding_wrs[index];
+ for (index = gsi->outstanding_ci; index != gsi->outstanding_pi;
+ index++) {
+ wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr];
if (!wr->completed)
break;
@@ -430,8 +422,9 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
return -ENOMEM;
}
- gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi];
- gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi);
+ gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi %
+ gsi->cap.max_send_wr];
+ gsi->outstanding_pi++;
if (!wc) {
memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index dad63f038bb8..a84bb766fc62 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -42,11 +42,13 @@
#include <asm/pat.h>
#endif
#include <linux/sched.h>
+#include <linux/delay.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
+#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <linux/in.h>
@@ -457,8 +459,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
int max_rq_sg;
int max_sq_sg;
u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
+ struct mlx5_ib_query_device_resp resp = {};
+ size_t resp_len;
+ u64 max_tso;
- if (uhw->inlen || uhw->outlen)
+ resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
+ if (uhw->outlen && uhw->outlen < resp_len)
+ return -EINVAL;
+ else
+ resp.response_length = resp_len;
+
+ if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
return -EINVAL;
memset(props, 0, sizeof(*props));
@@ -511,10 +522,21 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
- if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
- (MLX5_CAP_ETH(dev->mdev, csum_cap)))
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
+ if (MLX5_CAP_ETH(mdev, csum_cap))
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+ if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
+ max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
+ if (max_tso) {
+ resp.tso_caps.max_tso = 1 << max_tso;
+ resp.tso_caps.supported_qpts |=
+ 1 << IB_QPT_RAW_PACKET;
+ resp.response_length += sizeof(resp.tso_caps);
+ }
+ }
+ }
+
if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
props->device_cap_flags |= IB_DEVICE_UD_TSO;
@@ -576,6 +598,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (!mlx5_core_is_pf(mdev))
props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
+ if (uhw->outlen) {
+ err = ib_copy_to_udata(uhw, &resp, resp.response_length);
+
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -983,6 +1012,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
goto out_uars;
}
+ INIT_LIST_HEAD(&context->vma_private_list);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
@@ -992,6 +1022,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (field_avail(typeof(resp), cqe_version, udata->outlen))
resp.response_length += sizeof(resp.cqe_version);
+ if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
+ resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE;
+ resp.response_length += sizeof(resp.cmds_supp_uhw);
+ }
+
/*
* We don't want to expose information from the PCI bar that is located
* after 4096 bytes, so if the arch only supports larger pages, let's
@@ -1006,8 +1041,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
offsetof(struct mlx5_init_seg, internal_timer_h) %
PAGE_SIZE;
resp.response_length += sizeof(resp.hca_core_clock_offset) +
- sizeof(resp.reserved2) +
- sizeof(resp.reserved3);
+ sizeof(resp.reserved2);
}
err = ib_copy_to_udata(udata, &resp, resp.response_length);
@@ -1086,6 +1120,125 @@ static int get_index(unsigned long offset)
return get_arg(offset);
}
+static void mlx5_ib_vma_open(struct vm_area_struct *area)
+{
+ /* vma_open is called when a new VMA is created on top of our VMA. This
+ * is done through either mremap flow or split_vma (usually due to
+ * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
+ * as this VMA is strongly hardware related. Therefore we set the
+ * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
+ * calling us again and trying to do incorrect actions. We assume that
+ * the original VMA size is exactly a single page, and therefore all
+ * "splitting" operation will not happen to it.
+ */
+ area->vm_ops = NULL;
+}
+
+static void mlx5_ib_vma_close(struct vm_area_struct *area)
+{
+ struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
+
+ /* It's guaranteed that all VMAs opened on a FD are closed before the
+ * file itself is closed, therefore no sync is needed with the regular
+ * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
+ * However need a sync with accessing the vma as part of
+ * mlx5_ib_disassociate_ucontext.
+ * The close operation is usually called under mm->mmap_sem except when
+ * process is exiting.
+ * The exiting case is handled explicitly as part of
+ * mlx5_ib_disassociate_ucontext.
+ */
+ mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
+
+ /* setting the vma context pointer to null in the mlx5_ib driver's
+ * private data, to protect a race condition in
+ * mlx5_ib_disassociate_ucontext().
+ */
+ mlx5_ib_vma_priv_data->vma = NULL;
+ list_del(&mlx5_ib_vma_priv_data->list);
+ kfree(mlx5_ib_vma_priv_data);
+}
+
+static const struct vm_operations_struct mlx5_ib_vm_ops = {
+ .open = mlx5_ib_vma_open,
+ .close = mlx5_ib_vma_close
+};
+
+static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
+ struct mlx5_ib_ucontext *ctx)
+{
+ struct mlx5_ib_vma_private_data *vma_prv;
+ struct list_head *vma_head = &ctx->vma_private_list;
+
+ vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
+ if (!vma_prv)
+ return -ENOMEM;
+
+ vma_prv->vma = vma;
+ vma->vm_private_data = vma_prv;
+ vma->vm_ops = &mlx5_ib_vm_ops;
+
+ list_add(&vma_prv->list, vma_head);
+
+ return 0;
+}
+
+static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ int ret;
+ struct vm_area_struct *vma;
+ struct mlx5_ib_vma_private_data *vma_private, *n;
+ struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
+ struct task_struct *owning_process = NULL;
+ struct mm_struct *owning_mm = NULL;
+
+ owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
+ if (!owning_process)
+ return;
+
+ owning_mm = get_task_mm(owning_process);
+ if (!owning_mm) {
+ pr_info("no mm, disassociate ucontext is pending task termination\n");
+ while (1) {
+ put_task_struct(owning_process);
+ usleep_range(1000, 2000);
+ owning_process = get_pid_task(ibcontext->tgid,
+ PIDTYPE_PID);
+ if (!owning_process ||
+ owning_process->state == TASK_DEAD) {
+ pr_info("disassociate ucontext done, task was terminated\n");
+ /* in case task was dead need to release the
+ * task struct.
+ */
+ if (owning_process)
+ put_task_struct(owning_process);
+ return;
+ }
+ }
+ }
+
+ /* need to protect from a race on closing the vma as part of
+ * mlx5_ib_vma_close.
+ */
+ down_read(&owning_mm->mmap_sem);
+ list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
+ list) {
+ vma = vma_private->vma;
+ ret = zap_vma_ptes(vma, vma->vm_start,
+ PAGE_SIZE);
+ WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__);
+ /* context going to be destroyed, should
+ * not access ops any more.
+ */
+ vma->vm_ops = NULL;
+ list_del(&vma_private->list);
+ kfree(vma_private);
+ }
+ up_read(&owning_mm->mmap_sem);
+ mmput(owning_mm);
+ put_task_struct(owning_process);
+}
+
static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
{
switch (cmd) {
@@ -1101,8 +1254,10 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
}
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
- struct vm_area_struct *vma, struct mlx5_uuar_info *uuari)
+ struct vm_area_struct *vma,
+ struct mlx5_ib_ucontext *context)
{
+ struct mlx5_uuar_info *uuari = &context->uuari;
int err;
unsigned long idx;
phys_addr_t pfn, pa;
@@ -1152,14 +1307,13 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
vma->vm_start, &pa);
- return 0;
+ return mlx5_ib_set_vma_data(vma, context);
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
- struct mlx5_uuar_info *uuari = &context->uuari;
unsigned long command;
phys_addr_t pfn;
@@ -1168,7 +1322,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_NC_PAGE:
case MLX5_IB_MMAP_REGULAR_PAGE:
- return uar_mmap(dev, command, vma, uuari);
+ return uar_mmap(dev, command, vma, context);
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
@@ -1331,6 +1485,32 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
&ib_spec->ipv4.val.dst_ip,
sizeof(ib_spec->ipv4.val.dst_ip));
break;
+ case IB_FLOW_SPEC_IPV6:
+ if (ib_spec->size != sizeof(ib_spec->ipv6))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ ethertype, ETH_P_IPV6);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.src_ip,
+ sizeof(ib_spec->ipv6.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.src_ip,
+ sizeof(ib_spec->ipv6.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.dst_ip,
+ sizeof(ib_spec->ipv6.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.dst_ip,
+ sizeof(ib_spec->ipv6.val.dst_ip));
+ break;
case IB_FLOW_SPEC_TCP:
if (ib_spec->size != sizeof(ib_spec->tcp_udp))
return -EINVAL;
@@ -1801,15 +1981,6 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
}
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
- return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
- fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
-}
-
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
@@ -1828,7 +1999,6 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
@@ -1836,7 +2006,6 @@ static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
static struct device_attribute *mlx5_class_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id,
&dev_attr_fw_pages,
@@ -1854,6 +2023,65 @@ static void pkey_change_handler(struct work_struct *work)
mutex_unlock(&ports->devr->mutex);
}
+static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
+{
+ struct mlx5_ib_qp *mqp;
+ struct mlx5_ib_cq *send_mcq, *recv_mcq;
+ struct mlx5_core_cq *mcq;
+ struct list_head cq_armed_list;
+ unsigned long flags_qp;
+ unsigned long flags_cq;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&cq_armed_list);
+
+ /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
+ spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
+ list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
+ spin_lock_irqsave(&mqp->sq.lock, flags_qp);
+ if (mqp->sq.tail != mqp->sq.head) {
+ send_mcq = to_mcq(mqp->ibqp.send_cq);
+ spin_lock_irqsave(&send_mcq->lock, flags_cq);
+ if (send_mcq->mcq.comp &&
+ mqp->ibqp.send_cq->comp_handler) {
+ if (!send_mcq->mcq.reset_notify_added) {
+ send_mcq->mcq.reset_notify_added = 1;
+ list_add_tail(&send_mcq->mcq.reset_notify,
+ &cq_armed_list);
+ }
+ }
+ spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
+ }
+ spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
+ spin_lock_irqsave(&mqp->rq.lock, flags_qp);
+ /* no handling is needed for SRQ */
+ if (!mqp->ibqp.srq) {
+ if (mqp->rq.tail != mqp->rq.head) {
+ recv_mcq = to_mcq(mqp->ibqp.recv_cq);
+ spin_lock_irqsave(&recv_mcq->lock, flags_cq);
+ if (recv_mcq->mcq.comp &&
+ mqp->ibqp.recv_cq->comp_handler) {
+ if (!recv_mcq->mcq.reset_notify_added) {
+ recv_mcq->mcq.reset_notify_added = 1;
+ list_add_tail(&recv_mcq->mcq.reset_notify,
+ &cq_armed_list);
+ }
+ }
+ spin_unlock_irqrestore(&recv_mcq->lock,
+ flags_cq);
+ }
+ }
+ spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
+ }
+ /*At that point all inflight post send were put to be executed as of we
+ * lock/unlock above locks Now need to arm all involved CQs.
+ */
+ list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
+ mcq->comp(mcq);
+ }
+ spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
+}
+
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
@@ -1866,6 +2094,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
case MLX5_DEV_EVENT_SYS_ERROR:
ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
+ mlx5_ib_handle_internal_error(ibdev);
break;
case MLX5_DEV_EVENT_PORT_UP:
@@ -2272,6 +2501,15 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+static void get_dev_fw_str(struct ib_device *ibdev, char *str,
+ size_t str_len)
+{
+ struct mlx5_ib_dev *dev =
+ container_of(ibdev, struct mlx5_ib_dev, ib_dev);
+ snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev),
+ fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
+}
+
static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
{
int err;
@@ -2298,6 +2536,113 @@ static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
unregister_netdevice_notifier(&dev->roce.nb);
}
+static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->num_ports; i++)
+ mlx5_core_dealloc_q_counter(dev->mdev,
+ dev->port[i].q_cnt_id);
+}
+
+static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < dev->num_ports; i++) {
+ ret = mlx5_core_alloc_q_counter(dev->mdev,
+ &dev->port[i].q_cnt_id);
+ if (ret) {
+ mlx5_ib_warn(dev,
+ "couldn't allocate queue counter for port %d, err %d\n",
+ i + 1, ret);
+ goto dealloc_counters;
+ }
+ }
+
+ return 0;
+
+dealloc_counters:
+ while (--i >= 0)
+ mlx5_core_dealloc_q_counter(dev->mdev,
+ dev->port[i].q_cnt_id);
+
+ return ret;
+}
+
+static const char * const names[] = {
+ "rx_write_requests",
+ "rx_read_requests",
+ "rx_atomic_requests",
+ "out_of_buffer",
+ "out_of_sequence",
+ "duplicate_request",
+ "rnr_nak_retry_err",
+ "packet_seq_err",
+ "implied_nak_seq_err",
+ "local_ack_timeout_err",
+};
+
+static const size_t stats_offsets[] = {
+ MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
+ MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
+ MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
+ MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
+ MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
+ MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
+ MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
+ MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
+ MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
+ MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
+};
+
+static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
+
+ /* We support only per port stats */
+ if (port_num == 0)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
+ void *out;
+ __be32 val;
+ int ret;
+ int i;
+
+ if (!port || !stats)
+ return -ENOSYS;
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ ret = mlx5_core_query_q_counter(dev->mdev,
+ dev->port[port - 1].q_cnt_id, 0,
+ out, outlen);
+ if (ret)
+ goto free;
+
+ for (i = 0; i < ARRAY_SIZE(names); i++) {
+ val = *(__be32 *)(out + stats_offsets[i]);
+ stats->value[i] = (u64)be32_to_cpu(val);
+ }
+free:
+ kvfree(out);
+ return ARRAY_SIZE(names);
+}
+
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
@@ -2320,10 +2665,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->mdev = mdev;
+ dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
+ GFP_KERNEL);
+ if (!dev->port)
+ goto err_dealloc;
+
rwlock_init(&dev->roce.netdev_lock);
err = get_port_caps(dev);
if (err)
- goto err_dealloc;
+ goto err_free_port;
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
@@ -2418,6 +2768,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
+ dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
if (mlx5_core_is_pf(mdev)) {
dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
@@ -2425,6 +2776,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
}
+ dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
+
mlx5_ib_internal_fill_odp_caps(dev);
if (MLX5_CAP_GEN(mdev, imaicl)) {
@@ -2435,6 +2788,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
+ MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
+ dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
+ }
+
if (MLX5_CAP_GEN(mdev, xrc)) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
@@ -2447,9 +2806,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
IB_LINK_LAYER_ETHERNET) {
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
+ dev->ib_dev.create_wq = mlx5_ib_create_wq;
+ dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
+ dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
+ dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+ dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
}
err = init_node_data(dev);
if (err)
@@ -2457,6 +2826,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
+ INIT_LIST_HEAD(&dev->qp_list);
+ spin_lock_init(&dev->reset_flow_resource_lock);
if (ll == IB_LINK_LAYER_ETHERNET) {
err = mlx5_enable_roce(dev);
@@ -2472,10 +2843,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (err)
goto err_rsrc;
- err = ib_register_device(&dev->ib_dev, NULL);
+ err = mlx5_ib_alloc_q_counters(dev);
if (err)
goto err_odp;
+ err = ib_register_device(&dev->ib_dev, NULL);
+ if (err)
+ goto err_q_cnt;
+
err = create_umr_res(dev);
if (err)
goto err_dev;
@@ -2497,6 +2872,9 @@ err_umrc:
err_dev:
ib_unregister_device(&dev->ib_dev);
+err_q_cnt:
+ mlx5_ib_dealloc_q_counters(dev);
+
err_odp:
mlx5_ib_odp_remove_one(dev);
@@ -2507,6 +2885,9 @@ err_disable_roce:
if (ll == IB_LINK_LAYER_ETHERNET)
mlx5_disable_roce(dev);
+err_free_port:
+ kfree(dev->port);
+
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
@@ -2519,11 +2900,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
ib_unregister_device(&dev->ib_dev);
+ mlx5_ib_dealloc_q_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
if (ll == IB_LINK_LAYER_ETHERNET)
mlx5_disable_roce(dev);
+ kfree(dev->port);
ib_dealloc_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index c4a9825828bc..372385d0f993 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -105,6 +105,11 @@ enum {
MLX5_CQE_VERSION_V1,
};
+struct mlx5_ib_vma_private_data {
+ struct list_head list;
+ struct vm_area_struct *vma;
+};
+
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
struct list_head db_page_list;
@@ -116,6 +121,7 @@ struct mlx5_ib_ucontext {
u8 cqe_version;
/* Transport Domain number */
u32 tdn;
+ struct list_head vma_private_list;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -217,12 +223,41 @@ struct mlx5_ib_wq {
void *qend;
};
+struct mlx5_ib_rwq {
+ struct ib_wq ibwq;
+ u32 rqn;
+ u32 rq_num_pas;
+ u32 log_rq_stride;
+ u32 log_rq_size;
+ u32 rq_page_offset;
+ u32 log_page_size;
+ struct ib_umem *umem;
+ size_t buf_size;
+ unsigned int page_shift;
+ int create_type;
+ struct mlx5_db db;
+ u32 user_index;
+ u32 wqe_count;
+ u32 wqe_shift;
+ int wq_sig;
+};
+
enum {
MLX5_QP_USER,
MLX5_QP_KERNEL,
MLX5_QP_EMPTY
};
+enum {
+ MLX5_WQ_USER,
+ MLX5_WQ_KERNEL
+};
+
+struct mlx5_ib_rwq_ind_table {
+ struct ib_rwq_ind_table ib_rwq_ind_tbl;
+ u32 rqtn;
+};
+
/*
* Connect-IB can trigger up to four concurrent pagefaults
* per-QP.
@@ -266,6 +301,10 @@ struct mlx5_ib_qp_trans {
u8 resp_depth;
};
+struct mlx5_ib_rss_qp {
+ u32 tirn;
+};
+
struct mlx5_ib_rq {
struct mlx5_ib_qp_base base;
struct mlx5_ib_wq *rq;
@@ -294,6 +333,7 @@ struct mlx5_ib_qp {
union {
struct mlx5_ib_qp_trans trans_qp;
struct mlx5_ib_raw_packet_qp raw_packet_qp;
+ struct mlx5_ib_rss_qp rss_qp;
};
struct mlx5_buf buf;
@@ -340,6 +380,9 @@ struct mlx5_ib_qp {
spinlock_t disable_page_faults_lock;
struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
#endif
+ struct list_head qps_list;
+ struct list_head cq_recv_list;
+ struct list_head cq_send_list;
};
struct mlx5_ib_cq_buf {
@@ -401,6 +444,8 @@ struct mlx5_ib_cq {
struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
+ struct list_head list_send_qp;
+ struct list_head list_recv_qp;
u32 create_flags;
struct list_head wc_list;
enum ib_cq_notify_flags notify_flags;
@@ -546,6 +591,10 @@ struct mlx5_ib_resources {
struct mutex mutex;
};
+struct mlx5_ib_port {
+ u16 q_cnt_id;
+};
+
struct mlx5_roce {
/* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
* netdev pointer
@@ -581,6 +630,11 @@ struct mlx5_ib_dev {
struct srcu_struct mr_srcu;
#endif
struct mlx5_ib_flow_db flow_db;
+ /* protect resources needed as part of reset flow */
+ spinlock_t reset_flow_resource_lock;
+ struct list_head qp_list;
+ /* Array with num_ports elements */
+ struct mlx5_ib_port *port;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -628,6 +682,16 @@ static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp)
return container_of(ibqp, struct mlx5_ib_qp, ibqp);
}
+static inline struct mlx5_ib_rwq *to_mrwq(struct ib_wq *ibwq)
+{
+ return container_of(ibwq, struct mlx5_ib_rwq, ibwq);
+}
+
+static inline struct mlx5_ib_rwq_ind_table *to_mrwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
+{
+ return container_of(ib_rwq_ind_tbl, struct mlx5_ib_rwq_ind_table, ib_rwq_ind_tbl);
+}
+
static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
{
return container_of(msrq, struct mlx5_ib_srq, msrq);
@@ -762,6 +826,16 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
+struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx5_ib_destroy_wq(struct ib_wq *wq);
+int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata);
+struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
extern struct workqueue_struct *mlx5_ib_page_fault_wq;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8cf2ce50511f..4b021305c321 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1193,12 +1193,16 @@ error:
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
+ struct mlx5_core_dev *mdev = dev->mdev;
struct umr_common *umrc = &dev->umrc;
struct mlx5_ib_umr_context umr_context;
struct mlx5_umr_wr umrwr = {};
struct ib_send_wr *bad;
int err;
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ return 0;
+
mlx5_ib_init_umr_context(&umr_context);
umrwr.wr.wr_cqe = &umr_context.cqe;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index ce0a7ab35a22..0dd7d93cac95 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -77,6 +77,10 @@ struct mlx5_wqe_eth_pad {
u8 rsvd0[16];
};
+static void get_cqs(enum ib_qp_type qp_type,
+ struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
+ struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
+
static int is_qp0(enum ib_qp_type qp_type)
{
return qp_type == IB_QPT_SMI;
@@ -609,6 +613,11 @@ static int to_mlx5_st(enum ib_qp_type type)
}
}
+static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
+ struct mlx5_ib_cq *recv_cq);
+static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
+ struct mlx5_ib_cq *recv_cq);
+
static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
{
return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
@@ -649,6 +658,71 @@ err_umem:
return err;
}
+static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq)
+{
+ struct mlx5_ib_ucontext *context;
+
+ context = to_mucontext(pd->uobject->context);
+ mlx5_ib_db_unmap_user(context, &rwq->db);
+ if (rwq->umem)
+ ib_umem_release(rwq->umem);
+}
+
+static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_rwq *rwq,
+ struct mlx5_ib_create_wq *ucmd)
+{
+ struct mlx5_ib_ucontext *context;
+ int page_shift = 0;
+ int npages;
+ u32 offset = 0;
+ int ncont = 0;
+ int err;
+
+ if (!ucmd->buf_addr)
+ return -EINVAL;
+
+ context = to_mucontext(pd->uobject->context);
+ rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr,
+ rwq->buf_size, 0, 0);
+ if (IS_ERR(rwq->umem)) {
+ mlx5_ib_dbg(dev, "umem_get failed\n");
+ err = PTR_ERR(rwq->umem);
+ return err;
+ }
+
+ mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift,
+ &ncont, NULL);
+ err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
+ &rwq->rq_page_offset);
+ if (err) {
+ mlx5_ib_warn(dev, "bad offset\n");
+ goto err_umem;
+ }
+
+ rwq->rq_num_pas = ncont;
+ rwq->page_shift = page_shift;
+ rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE);
+
+ mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n",
+ (unsigned long long)ucmd->buf_addr, rwq->buf_size,
+ npages, page_shift, ncont, offset);
+
+ err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db);
+ if (err) {
+ mlx5_ib_dbg(dev, "map failed\n");
+ goto err_umem;
+ }
+
+ rwq->create_type = MLX5_WQ_USER;
+ return 0;
+
+err_umem:
+ ib_umem_release(rwq->umem);
+ return err;
+}
+
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
struct ib_qp_init_attr *attr,
@@ -1201,6 +1275,187 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
rq->doorbell = &qp->db;
}
+static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
+{
+ mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
+}
+
+static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_uobject *uobj = pd->uobject;
+ struct ib_ucontext *ucontext = uobj->context;
+ struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+ struct mlx5_ib_create_qp_resp resp = {};
+ int inlen;
+ int err;
+ u32 *in;
+ void *tirc;
+ void *hfso;
+ u32 selected_fields = 0;
+ size_t min_resp_len;
+ u32 tdn = mucontext->tdn;
+ struct mlx5_ib_create_qp_rss ucmd = {};
+ size_t required_cmd_sz;
+
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ return -EOPNOTSUPP;
+
+ if (init_attr->create_flags || init_attr->send_cq)
+ return -EINVAL;
+
+ min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index);
+ if (udata->outlen < min_resp_len)
+ return -EINVAL;
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1);
+ if (udata->inlen < required_cmd_sz) {
+ mlx5_ib_dbg(dev, "invalid inlen\n");
+ return -EINVAL;
+ }
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd))) {
+ mlx5_ib_dbg(dev, "inlen is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
+ mlx5_ib_dbg(dev, "copy failed\n");
+ return -EFAULT;
+ }
+
+ if (ucmd.comp_mask) {
+ mlx5_ib_dbg(dev, "invalid comp mask\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) {
+ mlx5_ib_dbg(dev, "invalid reserved\n");
+ return -EOPNOTSUPP;
+ }
+
+ err = ib_copy_to_udata(udata, &resp, min_resp_len);
+ if (err) {
+ mlx5_ib_dbg(dev, "copy failed\n");
+ return -EINVAL;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ MLX5_SET(tirc, tirc, disp_type,
+ MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table,
+ init_attr->rwq_ind_tbl->ind_tbl_num);
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+ switch (ucmd.rx_hash_function) {
+ case MLX5_RX_HASH_FUNC_TOEPLITZ:
+ {
+ void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
+ size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
+
+ if (len != ucmd.rx_key_len) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+ memcpy(rss_key, ucmd.rx_hash_key, len);
+ break;
+ }
+ default:
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ if (!ucmd.rx_hash_fields_mask) {
+ /* special case when this TIR serves as steering entry without hashing */
+ if (!init_attr->rwq_ind_tbl->log_ind_tbl_size)
+ goto create_tir;
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
+ ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+
+ if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) &&
+ ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_TCP);
+ else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_UDP);
+
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
+ selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP;
+
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
+ selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP;
+
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
+ selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT;
+
+ if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
+
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
+
+create_tir:
+ err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
+
+ if (err)
+ goto err;
+
+ kvfree(in);
+ /* qpn is reserved for that QP */
+ qp->trans_qp.base.mqp.qpn = 0;
+ return 0;
+
+err:
+ kvfree(in);
+ return err;
+}
+
static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, struct mlx5_ib_qp *qp)
@@ -1211,6 +1466,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_create_qp_resp resp;
struct mlx5_create_qp_mbox_in *in;
struct mlx5_ib_create_qp ucmd;
+ struct mlx5_ib_cq *send_cq;
+ struct mlx5_ib_cq *recv_cq;
+ unsigned long flags;
int inlen = sizeof(*in);
int err;
u32 uidx = MLX5_IB_DEFAULT_UIDX;
@@ -1227,6 +1485,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
+ if (init_attr->rwq_ind_tbl) {
+ if (!udata)
+ return -ENOSYS;
+
+ err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata);
+ return err;
+ }
+
if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
if (!MLX5_CAP_GEN(mdev, block_lb_mc)) {
mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
@@ -1460,6 +1726,23 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
base->container_mibqp = qp;
base->mqp.event = mlx5_ib_qp_event;
+ get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq,
+ &send_cq, &recv_cq);
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx5_ib_lock_cqs(send_cq, recv_cq);
+ /* Maintain device to QPs access, needed for further handling via reset
+ * flow
+ */
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ /* Maintain CQ to QPs access, needed for further handling via reset flow
+ */
+ if (send_cq)
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ if (recv_cq)
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ mlx5_ib_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
return 0;
err_create:
@@ -1478,23 +1761,23 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv
if (send_cq) {
if (recv_cq) {
if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
spin_lock_nested(&recv_cq->lock,
SINGLE_DEPTH_NESTING);
} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
} else {
- spin_lock_irq(&recv_cq->lock);
+ spin_lock(&recv_cq->lock);
spin_lock_nested(&send_cq->lock,
SINGLE_DEPTH_NESTING);
}
} else {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
}
} else if (recv_cq) {
- spin_lock_irq(&recv_cq->lock);
+ spin_lock(&recv_cq->lock);
__acquire(&send_cq->lock);
} else {
__acquire(&send_cq->lock);
@@ -1509,21 +1792,21 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re
if (recv_cq) {
if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
__release(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else {
spin_unlock(&send_cq->lock);
- spin_unlock_irq(&recv_cq->lock);
+ spin_unlock(&recv_cq->lock);
}
} else {
__release(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
}
} else if (recv_cq) {
__release(&send_cq->lock);
- spin_unlock_irq(&recv_cq->lock);
+ spin_unlock(&recv_cq->lock);
} else {
__release(&recv_cq->lock);
__release(&send_cq->lock);
@@ -1535,17 +1818,18 @@ static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
return to_mpd(qp->ibqp.pd);
}
-static void get_cqs(struct mlx5_ib_qp *qp,
+static void get_cqs(enum ib_qp_type qp_type,
+ struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
{
- switch (qp->ibqp.qp_type) {
+ switch (qp_type) {
case IB_QPT_XRC_TGT:
*send_cq = NULL;
*recv_cq = NULL;
break;
case MLX5_IB_QPT_REG_UMR:
case IB_QPT_XRC_INI:
- *send_cq = to_mcq(qp->ibqp.send_cq);
+ *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
*recv_cq = NULL;
break;
@@ -1557,8 +1841,8 @@ static void get_cqs(struct mlx5_ib_qp *qp,
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
case IB_QPT_RAW_PACKET:
- *send_cq = to_mcq(qp->ibqp.send_cq);
- *recv_cq = to_mcq(qp->ibqp.recv_cq);
+ *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
+ *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
break;
case IB_QPT_MAX:
@@ -1577,8 +1861,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_modify_qp_mbox_in *in;
+ unsigned long flags;
int err;
+ if (qp->ibqp.rwq_ind_tbl) {
+ destroy_rss_raw_qp_tir(dev, qp);
+ return;
+ }
+
base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
@@ -1602,17 +1892,28 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
base->mqp.qpn);
}
- get_cqs(qp, &send_cq, &recv_cq);
+ get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
+ &send_cq, &recv_cq);
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx5_ib_lock_cqs(send_cq, recv_cq);
+ /* del from lists under both locks above to protect reset flow paths */
+ list_del(&qp->qps_list);
+ if (send_cq)
+ list_del(&qp->cq_send_list);
+
+ if (recv_cq)
+ list_del(&qp->cq_recv_list);
if (qp->create_type == MLX5_QP_KERNEL) {
- mlx5_ib_lock_cqs(send_cq, recv_cq);
__mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
__mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
NULL);
- mlx5_ib_unlock_cqs(send_cq, recv_cq);
}
+ mlx5_ib_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
destroy_raw_packet_qp(dev, qp);
@@ -2300,7 +2601,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
pd = get_pd(qp);
- get_cqs(qp, &send_cq, &recv_cq);
+ get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
+ &send_cq, &recv_cq);
context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
@@ -2349,6 +2651,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
else
sqd_event = 0;
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
+ qp->port) - 1;
+ struct mlx5_ib_port *mibport = &dev->port[port_num];
+
+ context->qp_counter_set_usr_page |=
+ cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
+ }
+
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->sq_crq_size |= cpu_to_be16(1 << 4);
@@ -2439,6 +2750,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int port;
enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
+ if (ibqp->rwq_ind_tbl)
+ return -ENOSYS;
+
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
@@ -3397,6 +3711,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp;
struct mlx5_ib_mr *mr;
struct mlx5_wqe_data_seg *dpseg;
@@ -3424,6 +3739,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qp->sq.lock, flags);
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ err = -EIO;
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
+
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
mlx5_ib_warn(dev, "\n");
@@ -3725,6 +4047,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_data_seg *scat;
struct mlx5_rwqe_sig *sig;
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
unsigned long flags;
int err = 0;
int nreq;
@@ -3736,6 +4060,13 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qp->rq.lock, flags);
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ err = -EIO;
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
+
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -4055,6 +4386,9 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int err = 0;
u8 raw_packet_qp_state;
+ if (ibqp->rwq_ind_tbl)
+ return -ENOSYS;
+
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
@@ -4164,3 +4498,322 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
return 0;
}
+
+static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr)
+{
+ struct mlx5_ib_dev *dev;
+ __be64 *rq_pas0;
+ void *in;
+ void *rqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ dev = to_mdev(pd->device);
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, mem_rq_type,
+ MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+ MLX5_SET(rqc, rqc, user_index, rwq->user_index);
+ MLX5_SET(rqc, rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+ MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride);
+ MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size);
+ MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn);
+ MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset);
+ MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
+ MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
+ MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
+ rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
+ err = mlx5_core_create_rq(dev->mdev, in, inlen, &rwq->rqn);
+ kvfree(in);
+ return err;
+}
+
+static int set_user_rq_size(struct mlx5_ib_dev *dev,
+ struct ib_wq_init_attr *wq_init_attr,
+ struct mlx5_ib_create_wq *ucmd,
+ struct mlx5_ib_rwq *rwq)
+{
+ /* Sanity check RQ size before proceeding */
+ if (wq_init_attr->max_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_wq_sz)))
+ return -EINVAL;
+
+ if (!ucmd->rq_wqe_count)
+ return -EINVAL;
+
+ rwq->wqe_count = ucmd->rq_wqe_count;
+ rwq->wqe_shift = ucmd->rq_wqe_shift;
+ rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift);
+ rwq->log_rq_stride = rwq->wqe_shift;
+ rwq->log_rq_size = ilog2(rwq->wqe_count);
+ return 0;
+}
+
+static int prepare_user_rq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct mlx5_ib_rwq *rwq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_create_wq ucmd = {};
+ int err;
+ size_t required_cmd_sz;
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
+ if (udata->inlen < required_cmd_sz) {
+ mlx5_ib_dbg(dev, "invalid inlen\n");
+ return -EINVAL;
+ }
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd))) {
+ mlx5_ib_dbg(dev, "inlen is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
+ mlx5_ib_dbg(dev, "copy failed\n");
+ return -EFAULT;
+ }
+
+ if (ucmd.comp_mask) {
+ mlx5_ib_dbg(dev, "invalid comp mask\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (ucmd.reserved) {
+ mlx5_ib_dbg(dev, "invalid reserved\n");
+ return -EOPNOTSUPP;
+ }
+
+ err = set_user_rq_size(dev, init_attr, &ucmd, rwq);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ return err;
+ }
+
+ err = create_user_rq(dev, pd, rwq, &ucmd);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ if (err)
+ return err;
+ }
+
+ rwq->user_index = ucmd.user_index;
+ return 0;
+}
+
+struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_rwq *rwq;
+ struct mlx5_ib_create_wq_resp resp = {};
+ size_t min_resp_len;
+ int err;
+
+ if (!udata)
+ return ERR_PTR(-ENOSYS);
+
+ min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ if (udata->outlen && udata->outlen < min_resp_len)
+ return ERR_PTR(-EINVAL);
+
+ dev = to_mdev(pd->device);
+ switch (init_attr->wq_type) {
+ case IB_WQT_RQ:
+ rwq = kzalloc(sizeof(*rwq), GFP_KERNEL);
+ if (!rwq)
+ return ERR_PTR(-ENOMEM);
+ err = prepare_user_rq(pd, init_attr, udata, rwq);
+ if (err)
+ goto err;
+ err = create_rq(rwq, pd, init_attr);
+ if (err)
+ goto err_user_rq;
+ break;
+ default:
+ mlx5_ib_dbg(dev, "unsupported wq type %d\n",
+ init_attr->wq_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ rwq->ibwq.wq_num = rwq->rqn;
+ rwq->ibwq.state = IB_WQS_RESET;
+ if (udata->outlen) {
+ resp.response_length = offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length);
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ if (err)
+ goto err_copy;
+ }
+
+ return &rwq->ibwq;
+
+err_copy:
+ mlx5_core_destroy_rq(dev->mdev, rwq->rqn);
+err_user_rq:
+ destroy_user_rq(pd, rwq);
+err:
+ kfree(rwq);
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_destroy_wq(struct ib_wq *wq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(wq->device);
+ struct mlx5_ib_rwq *rwq = to_mrwq(wq);
+
+ mlx5_core_destroy_rq(dev->mdev, rwq->rqn);
+ destroy_user_rq(wq->pd, rwq);
+ kfree(rwq);
+
+ return 0;
+}
+
+struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_ib_rwq_ind_table *rwq_ind_tbl;
+ int sz = 1 << init_attr->log_ind_tbl_size;
+ struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
+ size_t min_resp_len;
+ int inlen;
+ int err;
+ int i;
+ u32 *in;
+ void *rqtc;
+
+ if (udata->inlen > 0 &&
+ !ib_is_udata_cleared(udata, 0,
+ udata->inlen))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ if (udata->outlen && udata->outlen < min_resp_len)
+ return ERR_PTR(-EINVAL);
+
+ rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL);
+ if (!rwq_ind_tbl)
+ return ERR_PTR(-ENOMEM);
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
+ MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
+
+ for (i = 0; i < sz; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
+
+ err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
+ kvfree(in);
+
+ if (err)
+ goto err;
+
+ rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
+ if (udata->outlen) {
+ resp.response_length = offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length);
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ if (err)
+ goto err_copy;
+ }
+
+ return &rwq_ind_tbl->ib_rwq_ind_tbl;
+
+err_copy:
+ mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+err:
+ kfree(rwq_ind_tbl);
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
+{
+ struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
+ struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
+
+ mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+
+ kfree(rwq_ind_tbl);
+ return 0;
+}
+
+int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(wq->device);
+ struct mlx5_ib_rwq *rwq = to_mrwq(wq);
+ struct mlx5_ib_modify_wq ucmd = {};
+ size_t required_cmd_sz;
+ int curr_wq_state;
+ int wq_state;
+ int inlen;
+ int err;
+ void *rqc;
+ void *in;
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
+ if (udata->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd)))
+ return -EOPNOTSUPP;
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)))
+ return -EFAULT;
+
+ if (ucmd.comp_mask || ucmd.reserved)
+ return -EOPNOTSUPP;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ?
+ wq_attr->curr_wq_state : wq->state;
+ wq_state = (wq_attr_mask & IB_WQ_STATE) ?
+ wq_attr->wq_state : curr_wq_state;
+ if (curr_wq_state == IB_WQS_ERR)
+ curr_wq_state = MLX5_RQC_STATE_ERR;
+ if (wq_state == IB_WQS_ERR)
+ wq_state = MLX5_RQC_STATE_ERR;
+ MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
+ MLX5_SET(rqc, rqc, state, wq_state);
+
+ err = mlx5_core_modify_rq(dev->mdev, rwq->rqn, in, inlen);
+ kvfree(in);
+ if (!err)
+ rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 3b2ddd64a371..ed6ac52355f1 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -74,14 +74,12 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
}
static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
- struct mlx5_create_srq_mbox_in **in,
- struct ib_udata *udata, int buf_size, int *inlen,
- int is_xrc)
+ struct mlx5_srq_attr *in,
+ struct ib_udata *udata, int buf_size)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_create_srq ucmd = {};
size_t ucmdlen;
- void *xsrqc;
int err;
int npages;
int page_shift;
@@ -104,7 +102,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
udata->inlen - sizeof(ucmd)))
return -EINVAL;
- if (is_xrc) {
+ if (in->type == IB_SRQT_XRC) {
err = get_srq_user_index(to_mucontext(pd->uobject->context),
&ucmd, udata->inlen, &uidx);
if (err)
@@ -130,14 +128,13 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
goto err_umem;
}
- *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
- *in = mlx5_vzalloc(*inlen);
- if (!(*in)) {
+ in->pas = mlx5_vzalloc(sizeof(*in->pas) * ncont);
+ if (!in->pas) {
err = -ENOMEM;
goto err_umem;
}
- mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0);
+ mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &srq->db);
@@ -146,20 +143,16 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
goto err_in;
}
- (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
- (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
-
- if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
- is_xrc){
- xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
- xrc_srq_context_entry);
- MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
- }
+ in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ in->page_offset = offset;
+ if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
+ in->type == IB_SRQT_XRC)
+ in->user_index = uidx;
return 0;
err_in:
- kvfree(*in);
+ kvfree(in->pas);
err_umem:
ib_umem_release(srq->umem);
@@ -168,15 +161,13 @@ err_umem:
}
static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
- struct mlx5_create_srq_mbox_in **in, int buf_size,
- int *inlen, int is_xrc)
+ struct mlx5_srq_attr *in, int buf_size)
{
int err;
int i;
struct mlx5_wqe_srq_next_seg *next;
int page_shift;
int npages;
- void *xsrqc;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
@@ -204,13 +195,12 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
buf_size, page_shift, srq->buf.npages, npages);
- *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages;
- *in = mlx5_vzalloc(*inlen);
- if (!*in) {
+ in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
+ if (!in->pas) {
err = -ENOMEM;
goto err_buf;
}
- mlx5_fill_page_array(&srq->buf, (*in)->pas);
+ mlx5_fill_page_array(&srq->buf, in->pas);
srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
@@ -221,20 +211,15 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
}
srq->wq_sig = !!srq_signature;
- (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
-
- if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
- is_xrc){
- xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
- xrc_srq_context_entry);
- /* 0xffffff means we ask to work with cqe version 0 */
- MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
- }
+ in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
+ in->type == IB_SRQT_XRC)
+ in->user_index = MLX5_IB_DEFAULT_UIDX;
return 0;
err_in:
- kvfree(*in);
+ kvfree(in->pas);
err_buf:
mlx5_buf_free(dev->mdev, &srq->buf);
@@ -267,10 +252,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
int desc_size;
int buf_size;
int err;
- struct mlx5_create_srq_mbox_in *uninitialized_var(in);
- int uninitialized_var(inlen);
- int is_xrc;
- u32 flgs, xrcdn;
+ struct mlx5_srq_attr in = {0};
__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
/* Sanity check SRQ size before proceeding */
@@ -302,14 +284,10 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
srq->msrq.max_avail_gather);
- is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
-
if (pd->uobject)
- err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen,
- is_xrc);
+ err = create_srq_user(pd, srq, &in, udata, buf_size);
else
- err = create_srq_kernel(dev, srq, &in, buf_size, &inlen,
- is_xrc);
+ err = create_srq_kernel(dev, srq, &in, buf_size);
if (err) {
mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
@@ -317,23 +295,23 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
goto err_srq;
}
- in->ctx.state_log_sz = ilog2(srq->msrq.max);
- flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
- xrcdn = 0;
- if (is_xrc) {
- xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
- in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn);
+ in.type = init_attr->srq_type;
+ in.log_size = ilog2(srq->msrq.max);
+ in.wqe_shift = srq->msrq.wqe_shift - 4;
+ if (srq->wq_sig)
+ in.flags |= MLX5_SRQ_FLAG_WQ_SIG;
+ if (init_attr->srq_type == IB_SRQT_XRC) {
+ in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
+ in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn;
} else if (init_attr->srq_type == IB_SRQT_BASIC) {
- xrcdn = to_mxrcd(dev->devr.x0)->xrcdn;
- in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn);
+ in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn;
+ in.cqn = to_mcq(dev->devr.c0)->mcq.cqn;
}
- in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF));
-
- in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
- in->ctx.db_record = cpu_to_be64(srq->db.dma);
- err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen, is_xrc);
- kvfree(in);
+ in.pd = to_mpd(pd)->pdn;
+ in.db_record = srq->db.dma;
+ err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);
+ kvfree(in.pas);
if (err) {
mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
goto err_usr_kern_srq;
@@ -401,7 +379,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
struct mlx5_ib_srq *srq = to_msrq(ibsrq);
int ret;
- struct mlx5_query_srq_mbox_out *out;
+ struct mlx5_srq_attr *out;
out = kzalloc(sizeof(*out), GFP_KERNEL);
if (!out)
@@ -411,7 +389,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
if (ret)
goto out_box;
- srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm);
+ srq_attr->srq_limit = out->lwm;
srq_attr->max_wr = srq->msrq.max - 1;
srq_attr->max_sge = srq->msrq.max_gs;
@@ -458,6 +436,8 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct mlx5_ib_srq *srq = to_msrq(ibsrq);
struct mlx5_wqe_srq_next_seg *next;
struct mlx5_wqe_data_seg *scat;
+ struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
unsigned long flags;
int err = 0;
int nreq;
@@ -465,6 +445,12 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
spin_lock_irqsave(&srq->lock, flags);
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ err = -EIO;
+ *bad_wr = wr;
+ goto out;
+ }
+
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
err = -EINVAL;
@@ -507,7 +493,7 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
*srq->db.db = cpu_to_be32(srq->wqe_ctr);
}
-
+out:
spin_unlock_irqrestore(&srq->lock, flags);
return err;
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index 61bc308bb802..188dac4301b5 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -46,6 +46,10 @@ enum {
MLX5_SRQ_FLAG_SIGNATURE = 1 << 0,
};
+enum {
+ MLX5_WQ_FLAG_SIGNATURE = 1 << 0,
+};
+
/* Increment this value if any changes that break userspace ABI
* compatibility are made.
@@ -79,6 +83,10 @@ enum mlx5_ib_alloc_ucontext_resp_mask {
MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
};
+enum mlx5_user_cmds_supp_uhw {
+ MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0,
+};
+
struct mlx5_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 bf_reg_size;
@@ -94,8 +102,8 @@ struct mlx5_ib_alloc_ucontext_resp {
__u32 comp_mask;
__u32 response_length;
__u8 cqe_version;
- __u8 reserved2;
- __u16 reserved3;
+ __u8 cmds_supp_uhw;
+ __u16 reserved2;
__u64 hca_core_clock_offset;
};
@@ -103,6 +111,22 @@ struct mlx5_ib_alloc_pd_resp {
__u32 pdn;
};
+struct mlx5_ib_tso_caps {
+ __u32 max_tso; /* Maximum tso payload size in bytes */
+
+ /* Corresponding bit will be set if qp type from
+ * 'enum ib_qp_type' is supported, e.g.
+ * supported_qpts |= 1 << IB_QPT_UD
+ */
+ __u32 supported_qpts;
+};
+
+struct mlx5_ib_query_device_resp {
+ __u32 comp_mask;
+ __u32 response_length;
+ struct mlx5_ib_tso_caps tso_caps;
+};
+
struct mlx5_ib_create_cq {
__u64 buf_addr;
__u64 db_addr;
@@ -148,6 +172,40 @@ struct mlx5_ib_create_qp {
__u64 sq_buf_addr;
};
+/* RX Hash function flags */
+enum mlx5_rx_hash_function_flags {
+ MLX5_RX_HASH_FUNC_TOEPLITZ = 1 << 0,
+};
+
+/*
+ * RX Hash flags, these flags allows to set which incoming packet's field should
+ * participates in RX Hash. Each flag represent certain packet's field,
+ * when the flag is set the field that is represented by the flag will
+ * participate in RX Hash calculation.
+ * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP
+ * and *TCP and *UDP flags can't be enabled together on the same QP.
+*/
+enum mlx5_rx_hash_fields {
+ MLX5_RX_HASH_SRC_IPV4 = 1 << 0,
+ MLX5_RX_HASH_DST_IPV4 = 1 << 1,
+ MLX5_RX_HASH_SRC_IPV6 = 1 << 2,
+ MLX5_RX_HASH_DST_IPV6 = 1 << 3,
+ MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4,
+ MLX5_RX_HASH_DST_PORT_TCP = 1 << 5,
+ MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6,
+ MLX5_RX_HASH_DST_PORT_UDP = 1 << 7
+};
+
+struct mlx5_ib_create_qp_rss {
+ __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
+ __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */
+ __u8 rx_key_len; /* valid only for Toeplitz */
+ __u8 reserved[6];
+ __u8 rx_hash_key[128]; /* valid only for Toeplitz */
+ __u32 comp_mask;
+ __u32 reserved1;
+};
+
struct mlx5_ib_create_qp_resp {
__u32 uuar_index;
};
@@ -159,6 +217,32 @@ struct mlx5_ib_alloc_mw {
__u16 reserved2;
};
+struct mlx5_ib_create_wq {
+ __u64 buf_addr;
+ __u64 db_addr;
+ __u32 rq_wqe_count;
+ __u32 rq_wqe_shift;
+ __u32 user_index;
+ __u32 flags;
+ __u32 comp_mask;
+ __u32 reserved;
+};
+
+struct mlx5_ib_create_wq_resp {
+ __u32 response_length;
+ __u32 reserved;
+};
+
+struct mlx5_ib_create_rwq_ind_tbl_resp {
+ __u32 response_length;
+ __u32 reserved;
+};
+
+struct mlx5_ib_modify_wq {
+ __u32 comp_mask;
+ __u32 reserved;
+};
+
static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
struct mlx5_ib_create_qp *ucmd,
int inlen,
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 9866c35cc977..da2335f7f7c3 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1081,16 +1081,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
return sprintf(buf, "%x\n", dev->rev_id);
}
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct mthca_dev *dev =
- container_of(device, struct mthca_dev, ib_dev.dev);
- return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32),
- (int) (dev->fw_ver >> 16) & 0xffff,
- (int) dev->fw_ver & 0xffff);
-}
-
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
@@ -1120,13 +1110,11 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *mthca_dev_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
@@ -1187,6 +1175,17 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+static void get_dev_fw_str(struct ib_device *device, char *str,
+ size_t str_len)
+{
+ struct mthca_dev *dev =
+ container_of(device, struct mthca_dev, ib_dev);
+ snprintf(str, str_len, "%d.%d.%d",
+ (int) (dev->fw_ver >> 32),
+ (int) (dev->fw_ver >> 16) & 0xffff,
+ (int) dev->fw_ver & 0xffff);
+}
+
int mthca_register_device(struct mthca_dev *dev)
{
int ret;
@@ -1266,6 +1265,7 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
dev->ib_dev.get_port_immutable = mthca_port_immutable;
+ dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
if (dev->mthca_flags & MTHCA_FLAG_FMR) {
dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index 74c6a9426047..6727af27c017 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -98,7 +98,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENOMEM;
mthca_err(mdev, "Couldn't allocate memory to save HCA "
"PCI header, aborting.\n");
- goto out;
+ goto put_dev;
}
for (i = 0; i < 64; ++i) {
@@ -108,7 +108,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't save HCA "
"PCI header, aborting.\n");
- goto out;
+ goto free_hca;
}
}
@@ -121,7 +121,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENOMEM;
mthca_err(mdev, "Couldn't allocate memory to save HCA "
"bridge PCI header, aborting.\n");
- goto out;
+ goto free_hca;
}
for (i = 0; i < 64; ++i) {
@@ -131,7 +131,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't save HCA bridge "
"PCI header, aborting.\n");
- goto out;
+ goto free_bh;
}
}
bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
@@ -139,7 +139,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't locate HCA bridge "
"PCI-X capability, aborting.\n");
- goto out;
+ goto free_bh;
}
}
@@ -152,7 +152,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENOMEM;
mthca_err(mdev, "Couldn't map HCA reset register, "
"aborting.\n");
- goto out;
+ goto free_bh;
}
writel(MTHCA_RESET_VALUE, reset);
@@ -172,7 +172,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "Couldn't access HCA after reset, "
"aborting.\n");
- goto out;
+ goto free_bh;
}
if (v != 0xffffffff)
@@ -184,7 +184,7 @@ int mthca_reset(struct mthca_dev *mdev)
err = -ENODEV;
mthca_err(mdev, "PCI device did not come back after reset, "
"aborting.\n");
- goto out;
+ goto free_bh;
}
good:
@@ -195,14 +195,14 @@ good:
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge Upstream "
"split transaction control, aborting.\n");
- goto out;
+ goto free_bh;
}
if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc,
bridge_header[(bridge_pcix_cap + 0xc) / 4])) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge Downstream "
"split transaction control, aborting.\n");
- goto out;
+ goto free_bh;
}
/*
* Bridge control register is at 0x3e, so we'll
@@ -216,7 +216,7 @@ good:
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge reg %x, "
"aborting.\n", i);
- goto out;
+ goto free_bh;
}
}
@@ -225,7 +225,7 @@ good:
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, "
"aborting.\n");
- goto out;
+ goto free_bh;
}
}
@@ -235,7 +235,7 @@ good:
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI-X "
"command register, aborting.\n");
- goto out;
+ goto free_bh;
}
}
@@ -246,7 +246,7 @@ good:
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI Express "
"Device Control register, aborting.\n");
- goto out;
+ goto free_bh;
}
linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL,
@@ -254,7 +254,7 @@ good:
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI Express "
"Link control register, aborting.\n");
- goto out;
+ goto free_bh;
}
}
@@ -266,7 +266,7 @@ good:
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA reg %x, "
"aborting.\n", i);
- goto out;
+ goto free_bh;
}
}
@@ -275,14 +275,12 @@ good:
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA COMMAND, "
"aborting.\n");
- goto out;
}
-
-out:
- if (bridge)
- pci_dev_put(bridge);
+free_bh:
kfree(bridge_header);
+free_hca:
kfree(hca_header);
-
+put_dev:
+ pci_dev_put(bridge);
return err;
}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 464d6da5fe91..bd69125731c1 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2606,23 +2606,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
/**
- * show_fw_ver
- */
-static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct nes_ib_device *nesibdev =
- container_of(dev, struct nes_ib_device, ibdev.dev);
- struct nes_vnic *nesvnic = nesibdev->nesvnic;
-
- nes_debug(NES_DBG_INIT, "\n");
- return sprintf(buf, "%u.%u\n",
- (nesvnic->nesdev->nesadapter->firmware_version >> 16),
- (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
-}
-
-
-/**
* show_hca
*/
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
@@ -2645,13 +2628,11 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *nes_dev_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
@@ -3703,6 +3684,19 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+static void get_dev_fw_str(struct ib_device *dev, char *str,
+ size_t str_len)
+{
+ struct nes_ib_device *nesibdev =
+ container_of(dev, struct nes_ib_device, ibdev);
+ struct nes_vnic *nesvnic = nesibdev->nesvnic;
+
+ nes_debug(NES_DBG_INIT, "\n");
+ snprintf(str, str_len, "%u.%u",
+ (nesvnic->nesdev->nesadapter->firmware_version >> 16),
+ (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
+}
+
/**
* nes_init_ofa_device
*/
@@ -3802,6 +3796,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
nesibdev->ibdev.get_port_immutable = nes_port_immutable;
+ nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str;
memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name,
sizeof(nesibdev->ibdev.iwcm->ifname));
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 3d75f65ce87e..07d0c6c5b046 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -107,6 +107,14 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+static void get_dev_fw_str(struct ib_device *device, char *str,
+ size_t str_len)
+{
+ struct ocrdma_dev *dev = get_ocrdma_dev(device);
+
+ snprintf(str, str_len, "%s", &dev->attr.fw_ver[0]);
+}
+
static int ocrdma_register_device(struct ocrdma_dev *dev)
{
strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
@@ -193,6 +201,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.process_mad = ocrdma_process_mad;
dev->ibdev.get_port_immutable = ocrdma_port_immutable;
+ dev->ibdev.get_dev_fw_str = get_dev_fw_str;
if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
dev->ibdev.uverbs_cmd_mask |=
@@ -262,14 +271,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
}
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct ocrdma_dev *dev = dev_get_drvdata(device);
-
- return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]);
-}
-
static ssize_t show_hca_type(struct device *device,
struct device_attribute *attr, char *buf)
{
@@ -279,12 +280,10 @@ static ssize_t show_hca_type(struct device *device,
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
static struct device_attribute *ocrdma_attributes[] = {
&dev_attr_hw_rev,
- &dev_attr_fw_ver,
&dev_attr_hca_type
};
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 575b737d9ef3..9cc0aae1d781 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -106,6 +106,49 @@ static u32 credit_table[31] = {
32768 /* 1E */
};
+const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = {
+[IB_WR_RDMA_WRITE] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_RDMA_READ] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC,
+},
+
+[IB_WR_ATOMIC_CMP_AND_SWP] = {
+ .length = sizeof(struct ib_atomic_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
+},
+
+[IB_WR_ATOMIC_FETCH_AND_ADD] = {
+ .length = sizeof(struct ib_atomic_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
+},
+
+[IB_WR_RDMA_WRITE_WITH_IMM] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_SEND] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
+ BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+[IB_WR_SEND_WITH_IMM] = {
+ .length = sizeof(struct ib_send_wr),
+ .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
+ BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
+},
+
+};
+
static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map,
gfp_t gfp)
{
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 846e6c726df7..10d062561bd9 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -169,8 +169,12 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
}
if (ah_attr->ah_flags & IB_AH_GRH) {
- qib_copy_sge(&qp->r_sge, &ah_attr->grh,
- sizeof(struct ib_grh), 1);
+ struct ib_grh grh;
+ struct ib_global_route grd = ah_attr->grh;
+
+ qib_make_grh(ibp, &grh, &grd, 0, 0);
+ qib_copy_sge(&qp->r_sge, &grh,
+ sizeof(grh), 1);
wc.wc_flags |= IB_WC_GRH;
} else
qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index cbf6200e6afc..fd1dfbce5539 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1582,6 +1582,8 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
rdi->dparms.props.max_total_mcast_qp_attach =
rdi->dparms.props.max_mcast_qp_attach *
rdi->dparms.props.max_mcast_grp;
+ /* post send table */
+ dd->verbs_dev.rdi.post_parms = qib_post_parms;
}
/**
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 4f878151f81f..736ced684842 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -497,4 +497,6 @@ extern unsigned int ib_qib_max_srq_wrs;
extern const u32 ib_qib_rnr_table[];
+extern const struct rvt_operation_params qib_post_parms[];
+
#endif /* QIB_VERBS_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 565c881a44ba..c229b9f4a52d 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -331,6 +331,21 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+static void usnic_get_dev_fw_str(struct ib_device *device,
+ char *str,
+ size_t str_len)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(device, struct usnic_ib_dev, ib_dev);
+ struct ethtool_drvinfo info;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ snprintf(str, str_len, "%s", info.fw_version);
+}
+
/* Start of PF discovery section */
static void *usnic_ib_device_add(struct pci_dev *dev)
{
@@ -414,6 +429,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable;
+ us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str;
if (ib_register_device(&us_ibdev->ib_dev, NULL))
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 3412ea06116e..80ef3f8998c8 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -45,21 +45,6 @@
#include "usnic_ib_verbs.h"
#include "usnic_log.h"
-static ssize_t usnic_ib_show_fw_ver(struct device *device,
- struct device_attribute *attr,
- char *buf)
-{
- struct usnic_ib_dev *us_ibdev =
- container_of(device, struct usnic_ib_dev, ib_dev.dev);
- struct ethtool_drvinfo info;
-
- mutex_lock(&us_ibdev->usdev_lock);
- us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
- mutex_unlock(&us_ibdev->usdev_lock);
-
- return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version);
-}
-
static ssize_t usnic_ib_show_board(struct device *device,
struct device_attribute *attr,
char *buf)
@@ -192,7 +177,6 @@ usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
}
-static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
@@ -201,7 +185,6 @@ static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
static struct device_attribute *usnic_class_attributes[] = {
- &dev_attr_fw_ver,
&dev_attr_board_id,
&dev_attr_config,
&dev_attr_iface,