aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/Kconfig2
-rw-r--r--drivers/infiniband/core/Makefile12
-rw-r--r--drivers/infiniband/core/addr.c226
-rw-r--r--drivers/infiniband/core/core_priv.h16
-rw-r--r--drivers/infiniband/core/device.c58
-rw-r--r--drivers/infiniband/core/mad.c13
-rw-r--r--drivers/infiniband/core/multicast.c23
-rw-r--r--drivers/infiniband/core/sa_query.c211
-rw-r--r--drivers/infiniband/core/sysfs.c366
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c147
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c58
-rw-r--r--drivers/infiniband/hw/hfi1/Kconfig (renamed from drivers/staging/rdma/hfi1/Kconfig)0
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile (renamed from drivers/staging/rdma/hfi1/Makefile)2
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c (renamed from drivers/staging/rdma/hfi1/affinity.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h (renamed from drivers/staging/rdma/hfi1/affinity.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h (renamed from drivers/staging/rdma/hfi1/aspm.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c (renamed from drivers/staging/rdma/hfi1/chip.c)41
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h (renamed from drivers/staging/rdma/hfi1/chip.h)6
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h (renamed from drivers/staging/rdma/hfi1/chip_registers.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/common.h (renamed from drivers/staging/rdma/hfi1/common.h)5
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c (renamed from drivers/staging/rdma/hfi1/debugfs.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h (renamed from drivers/staging/rdma/hfi1/debugfs.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/device.c (renamed from drivers/staging/rdma/hfi1/device.c)18
-rw-r--r--drivers/infiniband/hw/hfi1/device.h (renamed from drivers/staging/rdma/hfi1/device.h)3
-rw-r--r--drivers/infiniband/hw/hfi1/dma.c (renamed from drivers/staging/rdma/hfi1/dma.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c (renamed from drivers/staging/rdma/hfi1/driver.c)2
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.c (renamed from drivers/staging/rdma/hfi1/efivar.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.h (renamed from drivers/staging/rdma/hfi1/efivar.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c102
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.h (renamed from drivers/staging/rdma/hfi1/eprom.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c (renamed from drivers/staging/rdma/hfi1/file_ops.c)549
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c (renamed from drivers/staging/rdma/hfi1/firmware.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h (renamed from drivers/staging/rdma/hfi1/hfi.h)7
-rw-r--r--drivers/infiniband/hw/hfi1/init.c (renamed from drivers/staging/rdma/hfi1/init.c)22
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c (renamed from drivers/staging/rdma/hfi1/intr.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h (renamed from drivers/staging/rdma/hfi1/iowait.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c (renamed from drivers/staging/rdma/hfi1/mad.c)99
-rw-r--r--drivers/infiniband/hw/hfi1/mad.h (renamed from drivers/staging/rdma/hfi1/mad.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c (renamed from drivers/staging/rdma/hfi1/mmu_rb.c)22
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.h (renamed from drivers/staging/rdma/hfi1/mmu_rb.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/opa_compat.h (renamed from drivers/staging/rdma/hfi1/opa_compat.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c (renamed from drivers/staging/rdma/hfi1/pcie.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c (renamed from drivers/staging/rdma/hfi1/pio.c)3
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h (renamed from drivers/staging/rdma/hfi1/pio.h)8
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c (renamed from drivers/staging/rdma/hfi1/pio_copy.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c (renamed from drivers/staging/rdma/hfi1/platform.c)27
-rw-r--r--drivers/infiniband/hw/hfi1/platform.h (renamed from drivers/staging/rdma/hfi1/platform.h)1
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c (renamed from drivers/staging/rdma/hfi1/qp.c)9
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h (renamed from drivers/staging/rdma/hfi1/qp.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c (renamed from drivers/staging/rdma/hfi1/qsfp.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h (renamed from drivers/staging/rdma/hfi1/qsfp.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c (renamed from drivers/staging/rdma/hfi1/rc.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c (renamed from drivers/staging/rdma/hfi1/ruc.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c (renamed from drivers/staging/rdma/hfi1/sdma.c)4
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h (renamed from drivers/staging/rdma/hfi1/sdma.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/sdma_txreq.h (renamed from drivers/staging/rdma/hfi1/sdma_txreq.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c (renamed from drivers/staging/rdma/hfi1/sysfs.c)4
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c (renamed from drivers/staging/rdma/hfi1/trace.c)8
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h (renamed from drivers/staging/rdma/hfi1/trace.h)5
-rw-r--r--drivers/infiniband/hw/hfi1/twsi.c (renamed from drivers/staging/rdma/hfi1/twsi.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/twsi.h (renamed from drivers/staging/rdma/hfi1/twsi.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c (renamed from drivers/staging/rdma/hfi1/uc.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c (renamed from drivers/staging/rdma/hfi1/ud.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c (renamed from drivers/staging/rdma/hfi1/user_exp_rcv.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h (renamed from drivers/staging/rdma/hfi1/user_exp_rcv.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c (renamed from drivers/staging/rdma/hfi1/user_pages.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c (renamed from drivers/staging/rdma/hfi1/user_sdma.c)18
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h (renamed from drivers/staging/rdma/hfi1/user_sdma.h)0
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c (renamed from drivers/staging/rdma/hfi1/verbs.c)4
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h (renamed from drivers/staging/rdma/hfi1/verbs.h)1
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c (renamed from drivers/staging/rdma/hfi1/verbs_txreq.c)0
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h (renamed from drivers/staging/rdma/hfi1/verbs_txreq.h)0
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c145
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h1
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c1
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c30
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c109
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c140
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c48
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c2
-rw-r--r--drivers/staging/Kconfig2
-rw-r--r--drivers/staging/Makefile1
-rw-r--r--drivers/staging/rdma/Kconfig27
-rw-r--r--drivers/staging/rdma/Makefile2
-rw-r--r--drivers/staging/rdma/hfi1/TODO6
-rw-r--r--drivers/staging/rdma/hfi1/diag.c1925
-rw-r--r--drivers/staging/rdma/hfi1/eprom.c471
95 files changed, 1748 insertions, 3301 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 6425c0e5d18a..2137adfbd8c3 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,4 +85,6 @@ source "drivers/infiniband/ulp/isert/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
+source "drivers/infiniband/hw/hfi1/Kconfig"
+
endif # INFINIBAND
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 26987d9d7e1c..edaae9f9853c 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,8 +1,7 @@
infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o
user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
-obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
- ib_cm.o iw_cm.o ib_addr.o \
+obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \
$(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
@@ -10,14 +9,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
- roce_gid_mgmt.o mr_pool.o
+ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
+ multicast.o mad.o smi.o agent.o mad_rmpp.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
-ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
-
-ib_sa-y := sa_query.o multicast.o
-
ib_cm-y := cm.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
@@ -28,8 +24,6 @@ rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
rdma_ucm-y := ucma.o
-ib_addr-y := addr.o
-
ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 337353d86cfa..1374541a4528 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -46,10 +46,10 @@
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
#include <rdma/ib.h>
+#include <rdma/rdma_netlink.h>
+#include <net/netlink.h>
-MODULE_AUTHOR("Sean Hefty");
-MODULE_DESCRIPTION("IB Address Translation");
-MODULE_LICENSE("Dual BSD/GPL");
+#include "core_priv.h"
struct addr_req {
struct list_head list;
@@ -62,8 +62,11 @@ struct addr_req {
struct rdma_dev_addr *addr, void *context);
unsigned long timeout;
int status;
+ u32 seq;
};
+static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
+
static void process_req(struct work_struct *work);
static DEFINE_MUTEX(lock);
@@ -71,6 +74,126 @@ static LIST_HEAD(req_list);
static DECLARE_DELAYED_WORK(work, process_req);
static struct workqueue_struct *addr_wq;
+static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
+ [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
+ .len = sizeof(struct rdma_nla_ls_gid)},
+};
+
+static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
+{
+ struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
+ int ret;
+
+ if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
+ return false;
+
+ ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
+ nlmsg_len(nlh), ib_nl_addr_policy);
+ if (ret)
+ return false;
+
+ return true;
+}
+
+static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
+{
+ const struct nlattr *head, *curr;
+ union ib_gid gid;
+ struct addr_req *req;
+ int len, rem;
+ int found = 0;
+
+ head = (const struct nlattr *)nlmsg_data(nlh);
+ len = nlmsg_len(nlh);
+
+ nla_for_each_attr(curr, head, len, rem) {
+ if (curr->nla_type == LS_NLA_TYPE_DGID)
+ memcpy(&gid, nla_data(curr), nla_len(curr));
+ }
+
+ mutex_lock(&lock);
+ list_for_each_entry(req, &req_list, list) {
+ if (nlh->nlmsg_seq != req->seq)
+ continue;
+ /* We set the DGID part, the rest was set earlier */
+ rdma_addr_set_dgid(req->addr, &gid);
+ req->status = 0;
+ found = 1;
+ break;
+ }
+ mutex_unlock(&lock);
+
+ if (!found)
+ pr_info("Couldn't find request waiting for DGID: %pI6\n",
+ &gid);
+}
+
+int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
+
+ if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
+ !(NETLINK_CB(skb).sk) ||
+ !netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (ib_nl_is_good_ip_resp(nlh))
+ ib_nl_process_good_ip_rsep(nlh);
+
+ return skb->len;
+}
+
+static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
+ const void *daddr,
+ u32 seq, u16 family)
+{
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ struct rdma_ls_ip_resolve_header *header;
+ void *data;
+ size_t size;
+ int attrtype;
+ int len;
+
+ if (family == AF_INET) {
+ size = sizeof(struct in_addr);
+ attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
+ } else {
+ size = sizeof(struct in6_addr);
+ attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
+ }
+
+ len = nla_total_size(sizeof(size));
+ len += NLMSG_ALIGN(sizeof(*header));
+
+ skb = nlmsg_new(len, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
+ RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
+ if (!data) {
+ nlmsg_free(skb);
+ return -ENODATA;
+ }
+
+ /* Construct the family header first */
+ header = (struct rdma_ls_ip_resolve_header *)
+ skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
+ header->ifindex = dev_addr->bound_dev_if;
+ nla_put(skb, attrtype, size, daddr);
+
+ /* Repair the nlmsg header length */
+ nlmsg_end(skb, nlh);
+ ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
+
+ /* Make the request retry, so when we get the response from userspace
+ * we will have something.
+ */
+ return -ENODATA;
+}
+
int rdma_addr_size(struct sockaddr *addr)
{
switch (addr->sa_family) {
@@ -199,6 +322,17 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
+static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+ const void *daddr, u32 seq, u16 family)
+{
+ if (ibnl_chk_listeners(RDMA_NL_GROUP_LS))
+ return -EADDRNOTAVAIL;
+
+ /* We fill in what we can, the response will fill the rest */
+ rdma_copy_addr(dev_addr, dst->dev, NULL);
+ return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
+}
+
static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const void *daddr)
{
@@ -223,6 +357,39 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
return ret;
}
+static bool has_gateway(struct dst_entry *dst, sa_family_t family)
+{
+ struct rtable *rt;
+ struct rt6_info *rt6;
+
+ if (family == AF_INET) {
+ rt = container_of(dst, struct rtable, dst);
+ return rt->rt_uses_gateway;
+ }
+
+ rt6 = container_of(dst, struct rt6_info, dst);
+ return rt6->rt6i_flags & RTF_GATEWAY;
+}
+
+static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+ const struct sockaddr *dst_in, u32 seq)
+{
+ const struct sockaddr_in *dst_in4 =
+ (const struct sockaddr_in *)dst_in;
+ const struct sockaddr_in6 *dst_in6 =
+ (const struct sockaddr_in6 *)dst_in;
+ const void *daddr = (dst_in->sa_family == AF_INET) ?
+ (const void *)&dst_in4->sin_addr.s_addr :
+ (const void *)&dst_in6->sin6_addr;
+ sa_family_t family = dst_in->sa_family;
+
+ /* Gateway + ARPHRD_INFINIBAND -> IB router */
+ if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
+ return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
+ else
+ return dst_fetch_ha(dst, dev_addr, daddr);
+}
+
static int addr4_resolve(struct sockaddr_in *src_in,
const struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr,
@@ -246,10 +413,11 @@ static int addr4_resolve(struct sockaddr_in *src_in,
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr;
- /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
- * routable) and we could set the network type accordingly.
+ /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
+ * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
+ * type accordingly.
*/
- if (rt->rt_uses_gateway)
+ if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
addr->network = RDMA_NETWORK_IPV4;
addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
@@ -291,10 +459,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
src_in->sin6_addr = fl6.saddr;
}
- /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
- * routable) and we could set the network type accordingly.
+ /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
+ * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
+ * type accordingly.
*/
- if (rt->rt6i_flags & RTF_GATEWAY)
+ if (rt->rt6i_flags & RTF_GATEWAY &&
+ ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
addr->network = RDMA_NETWORK_IPV6;
addr->hoplimit = ip6_dst_hoplimit(dst);
@@ -317,7 +487,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
static int addr_resolve_neigh(struct dst_entry *dst,
const struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
+ struct rdma_dev_addr *addr,
+ u32 seq)
{
if (dst->dev->flags & IFF_LOOPBACK) {
int ret;
@@ -331,17 +502,8 @@ static int addr_resolve_neigh(struct dst_entry *dst,
}
/* If the device doesn't do ARP internally */
- if (!(dst->dev->flags & IFF_NOARP)) {
- const struct sockaddr_in *dst_in4 =
- (const struct sockaddr_in *)dst_in;
- const struct sockaddr_in6 *dst_in6 =
- (const struct sockaddr_in6 *)dst_in;
-
- return dst_fetch_ha(dst, addr,
- dst_in->sa_family == AF_INET ?
- (const void *)&dst_in4->sin_addr.s_addr :
- (const void *)&dst_in6->sin6_addr);
- }
+ if (!(dst->dev->flags & IFF_NOARP))
+ return fetch_ha(dst, addr, dst_in, seq);
return rdma_copy_addr(addr, dst->dev, NULL);
}
@@ -349,7 +511,8 @@ static int addr_resolve_neigh(struct dst_entry *dst,
static int addr_resolve(struct sockaddr *src_in,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
- bool resolve_neigh)
+ bool resolve_neigh,
+ u32 seq)
{
struct net_device *ndev;
struct dst_entry *dst;
@@ -366,7 +529,7 @@ static int addr_resolve(struct sockaddr *src_in,
return ret;
if (resolve_neigh)
- ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
+ ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
ndev = rt->dst.dev;
dev_hold(ndev);
@@ -383,7 +546,7 @@ static int addr_resolve(struct sockaddr *src_in,
return ret;
if (resolve_neigh)
- ret = addr_resolve_neigh(dst, dst_in, addr);
+ ret = addr_resolve_neigh(dst, dst_in, addr, seq);
ndev = dst->dev;
dev_hold(ndev);
@@ -412,7 +575,7 @@ static void process_req(struct work_struct *work)
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve(src_in, dst_in, req->addr,
- true);
+ true, req->seq);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@@ -471,8 +634,9 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
req->context = context;
req->client = client;
atomic_inc(&client->refcount);
+ req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
- req->status = addr_resolve(src_in, dst_in, addr, true);
+ req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
switch (req->status) {
case 0:
req->timeout = jiffies;
@@ -510,7 +674,7 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
src_in->sa_family = dst_addr->sa_family;
}
- return addr_resolve(src_in, dst_addr, addr, false);
+ return addr_resolve(src_in, dst_addr, addr, false, 0);
}
EXPORT_SYMBOL(rdma_resolve_ip_route);
@@ -634,7 +798,7 @@ static struct notifier_block nb = {
.notifier_call = netevent_callback
};
-static int __init addr_init(void)
+int addr_init(void)
{
addr_wq = create_singlethread_workqueue("ib_addr");
if (!addr_wq)
@@ -642,15 +806,13 @@ static int __init addr_init(void)
register_netevent_notifier(&nb);
rdma_addr_register_client(&self);
+
return 0;
}
-static void __exit addr_cleanup(void)
+void addr_cleanup(void)
{
rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
}
-
-module_init(addr_init);
-module_exit(addr_cleanup);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index eab32215756b..19d499dcab76 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -137,4 +137,20 @@ static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
return _upper == upper;
}
+int addr_init(void);
+void addr_cleanup(void);
+
+int ib_mad_init(void);
+void ib_mad_cleanup(void);
+
+int ib_sa_init(void);
+void ib_sa_cleanup(void);
+
+int ib_nl_handle_resolve_resp(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int ib_nl_handle_set_timeout(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
+ struct netlink_callback *cb);
+
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 10979844026a..5516fb070344 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -955,6 +955,29 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
}
EXPORT_SYMBOL(ib_get_net_dev_by_params);
+static struct ibnl_client_cbs ibnl_ls_cb_table[] = {
+ [RDMA_NL_LS_OP_RESOLVE] = {
+ .dump = ib_nl_handle_resolve_resp,
+ .module = THIS_MODULE },
+ [RDMA_NL_LS_OP_SET_TIMEOUT] = {
+ .dump = ib_nl_handle_set_timeout,
+ .module = THIS_MODULE },
+ [RDMA_NL_LS_OP_IP_RESOLVE] = {
+ .dump = ib_nl_handle_ip_res_resp,
+ .module = THIS_MODULE },
+};
+
+static int ib_add_ibnl_clients(void)
+{
+ return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table),
+ ibnl_ls_cb_table);
+}
+
+static void ib_remove_ibnl_clients(void)
+{
+ ibnl_remove_client(RDMA_NL_LS);
+}
+
static int __init ib_core_init(void)
{
int ret;
@@ -983,10 +1006,41 @@ static int __init ib_core_init(void)
goto err_sysfs;
}
+ ret = addr_init();
+ if (ret) {
+ pr_warn("Could't init IB address resolution\n");
+ goto err_ibnl;
+ }
+
+ ret = ib_mad_init();
+ if (ret) {
+ pr_warn("Couldn't init IB MAD\n");
+ goto err_addr;
+ }
+
+ ret = ib_sa_init();
+ if (ret) {
+ pr_warn("Couldn't init SA\n");
+ goto err_mad;
+ }
+
+ if (ib_add_ibnl_clients()) {
+ pr_warn("Couldn't register ibnl clients\n");
+ goto err_sa;
+ }
+
ib_cache_setup();
return 0;
+err_sa:
+ ib_sa_cleanup();
+err_mad:
+ ib_mad_cleanup();
+err_addr:
+ addr_cleanup();
+err_ibnl:
+ ibnl_cleanup();
err_sysfs:
class_unregister(&ib_class);
err_comp:
@@ -999,6 +1053,10 @@ err:
static void __exit ib_core_cleanup(void)
{
ib_cache_cleanup();
+ ib_remove_ibnl_clients();
+ ib_sa_cleanup();
+ ib_mad_cleanup();
+ addr_cleanup();
ibnl_cleanup();
class_unregister(&ib_class);
destroy_workqueue(ib_comp_wq);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 9fa5bf33f5a3..82fb511112da 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -47,11 +47,7 @@
#include "smi.h"
#include "opa_smi.h"
#include "agent.h"
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("kernel IB MAD API");
-MODULE_AUTHOR("Hal Rosenstock");
-MODULE_AUTHOR("Sean Hefty");
+#include "core_priv.h"
static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
@@ -3316,7 +3312,7 @@ static struct ib_client mad_client = {
.remove = ib_mad_remove_device
};
-static int __init ib_mad_init_module(void)
+int ib_mad_init(void)
{
mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
@@ -3334,10 +3330,7 @@ static int __init ib_mad_init_module(void)
return 0;
}
-static void __exit ib_mad_cleanup_module(void)
+void ib_mad_cleanup(void)
{
ib_unregister_client(&mad_client);
}
-
-module_init(ib_mad_init_module);
-module_exit(ib_mad_cleanup_module);
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 250937cb9a1a..a83ec28a147b 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -93,6 +93,18 @@ enum {
struct mcast_member;
+/*
+* There are 4 types of join states:
+* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.
+*/
+enum {
+ FULLMEMBER_JOIN,
+ NONMEMBER_JOIN,
+ SENDONLY_NONMEBER_JOIN,
+ SENDONLY_FULLMEMBER_JOIN,
+ NUM_JOIN_MEMBERSHIP_TYPES,
+};
+
struct mcast_group {
struct ib_sa_mcmember_rec rec;
struct rb_node node;
@@ -102,7 +114,7 @@ struct mcast_group {
struct list_head pending_list;
struct list_head active_list;
struct mcast_member *last_join;
- int members[3];
+ int members[NUM_JOIN_MEMBERSHIP_TYPES];
atomic_t refcount;
enum mcast_group_state state;
struct ib_sa_query *query;
@@ -220,8 +232,9 @@ static void queue_join(struct mcast_member *member)
}
/*
- * A multicast group has three types of members: full member, non member, and
- * send only member. We need to keep track of the number of members of each
+ * A multicast group has four types of members: full member, non member,
+ * sendonly non member and sendonly full member.
+ * We need to keep track of the number of members of each
* type based on their join state. Adjust the number of members the belong to
* the specified join states.
*/
@@ -229,7 +242,7 @@ static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
{
int i;
- for (i = 0; i < 3; i++, join_state >>= 1)
+ for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1)
if (join_state & 0x1)
group->members[i] += inc;
}
@@ -245,7 +258,7 @@ static u8 get_leave_state(struct mcast_group *group)
u8 leave_state = 0;
int i;
- for (i = 0; i < 3; i++)
+ for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++)
if (!group->members[i])
leave_state |= (0x1 << i);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 3ebd108bcc5f..e95538650dc6 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -53,10 +53,6 @@
#include "sa.h"
#include "core_priv.h"
-MODULE_AUTHOR("Roland Dreier");
-MODULE_DESCRIPTION("InfiniBand subnet administration query support");
-MODULE_LICENSE("Dual BSD/GPL");
-
#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
@@ -119,6 +115,12 @@ struct ib_sa_guidinfo_query {
struct ib_sa_query sa_query;
};
+struct ib_sa_classport_info_query {
+ void (*callback)(int, struct ib_class_port_info *, void *);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
struct ib_sa_mcmember_query {
void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
void *context;
@@ -392,6 +394,82 @@ static const struct ib_field service_rec_table[] = {
.size_bits = 2*64 },
};
+#define CLASSPORTINFO_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \
+ .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
+ .field_name = "ib_class_port_info:" #field
+
+static const struct ib_field classport_info_rec_table[] = {
+ { CLASSPORTINFO_REC_FIELD(base_version),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { CLASSPORTINFO_REC_FIELD(class_version),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { CLASSPORTINFO_REC_FIELD(capability_mask),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { CLASSPORTINFO_REC_FIELD(redirect_gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { CLASSPORTINFO_REC_FIELD(redirect_lid),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { CLASSPORTINFO_REC_FIELD(redirect_pkey),
+ .offset_words = 7,
+ .offset_bits = 16,
+ .size_bits = 16 },
+
+ { CLASSPORTINFO_REC_FIELD(redirect_qp),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { CLASSPORTINFO_REC_FIELD(redirect_qkey),
+ .offset_words = 9,
+ .offset_bits = 0,
+ .size_bits = 32 },
+
+ { CLASSPORTINFO_REC_FIELD(trap_gid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { CLASSPORTINFO_REC_FIELD(trap_tcslfl),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 32 },
+
+ { CLASSPORTINFO_REC_FIELD(trap_lid),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { CLASSPORTINFO_REC_FIELD(trap_pkey),
+ .offset_words = 15,
+ .offset_bits = 16,
+ .size_bits = 16 },
+
+ { CLASSPORTINFO_REC_FIELD(trap_hlqp),
+ .offset_words = 16,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { CLASSPORTINFO_REC_FIELD(trap_qkey),
+ .offset_words = 17,
+ .offset_bits = 0,
+ .size_bits = 32 },
+};
+
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
.struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
@@ -705,8 +783,8 @@ static void ib_nl_request_timeout(struct work_struct *work)
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
}
-static int ib_nl_handle_set_timeout(struct sk_buff *skb,
- struct netlink_callback *cb)
+int ib_nl_handle_set_timeout(struct sk_buff *skb,
+ struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
int timeout, delta, abs_delta;
@@ -782,8 +860,8 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
return 1;
}
-static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
- struct netlink_callback *cb)
+int ib_nl_handle_resolve_resp(struct sk_buff *skb,
+ struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
unsigned long flags;
@@ -838,15 +916,6 @@ resp_out:
return skb->len;
}
-static struct ibnl_client_cbs ib_sa_cb_table[] = {
- [RDMA_NL_LS_OP_RESOLVE] = {
- .dump = ib_nl_handle_resolve_resp,
- .module = THIS_MODULE },
- [RDMA_NL_LS_OP_SET_TIMEOUT] = {
- .dump = ib_nl_handle_set_timeout,
- .module = THIS_MODULE },
-};
-
static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@@ -1645,6 +1714,97 @@ err1:
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
+/* Support get SA ClassPortInfo */
+static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
+ int status,
+ struct ib_sa_mad *mad)
+{
+ struct ib_sa_classport_info_query *query =
+ container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
+
+ if (mad) {
+ struct ib_class_port_info rec;
+
+ ib_unpack(classport_info_rec_table,
+ ARRAY_SIZE(classport_info_rec_table),
+ mad->data, &rec);
+ query->callback(status, &rec, query->context);
+ } else {
+ query->callback(status, NULL, query->context);
+ }
+}
+
+static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
+{
+ kfree(container_of(sa_query, struct ib_sa_classport_info_query,
+ sa_query));
+}
+
+int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_class_port_info *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct ib_sa_classport_info_query *query;
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
+ int ret;
+
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
+ query = kzalloc(sizeof(*query), gfp_mask);
+ if (!query)
+ return -ENOMEM;
+
+ query->sa_query.port = port;
+ ret = alloc_mad(&query->sa_query, gfp_mask);
+ if (ret)
+ goto err1;
+
+ ib_sa_client_get(client);
+ query->sa_query.client = client;
+ query->callback = callback;
+ query->context = context;
+
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
+
+ query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
+
+ query->sa_query.release = ib_sa_portclass_info_rec_release;
+ /* support GET only */
+ mad->mad_hdr.method = IB_MGMT_METHOD_GET;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
+ mad->sa_hdr.comp_mask = 0;
+ *sa_query = &query->sa_query;
+
+ ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+
+err2:
+ *sa_query = NULL;
+ ib_sa_client_put(query->sa_query.client);
+ free_mad(&query->sa_query);
+
+err1:
+ kfree(query);
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
+
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
@@ -1794,7 +1954,7 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
kfree(sa_dev);
}
-static int __init ib_sa_init(void)
+int ib_sa_init(void)
{
int ret;
@@ -1820,17 +1980,10 @@ static int __init ib_sa_init(void)
goto err3;
}
- if (ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ib_sa_cb_table),
- ib_sa_cb_table)) {
- pr_err("Failed to add netlink callback\n");
- ret = -EINVAL;
- goto err4;
- }
INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
return 0;
-err4:
- destroy_workqueue(ib_nl_wq);
+
err3:
mcast_cleanup();
err2:
@@ -1839,9 +1992,8 @@ err1:
return ret;
}
-static void __exit ib_sa_cleanup(void)
+void ib_sa_cleanup(void)
{
- ibnl_remove_client(RDMA_NL_LS);
cancel_delayed_work(&ib_nl_timed_work);
flush_workqueue(ib_nl_wq);
destroy_workqueue(ib_nl_wq);
@@ -1849,6 +2001,3 @@ static void __exit ib_sa_cleanup(void)
ib_unregister_client(&sa_client);
idr_destroy(&query_idr);
}
-
-module_init(ib_sa_init);
-module_exit(ib_sa_cleanup);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 14606afbfaa8..5e573bb18660 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -56,8 +56,10 @@ struct ib_port {
struct gid_attr_group *gid_attr_group;
struct attribute_group gid_group;
struct attribute_group pkey_group;
- u8 port_num;
struct attribute_group *pma_table;
+ struct attribute_group *hw_stats_ag;
+ struct rdma_hw_stats *hw_stats;
+ u8 port_num;
};
struct port_attribute {
@@ -80,6 +82,18 @@ struct port_table_attribute {
__be16 attr_id;
};
+struct hw_stats_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct kobject *kobj,
+ struct attribute *attr, char *buf);
+ ssize_t (*store)(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf,
+ size_t count);
+ int index;
+ u8 port_num;
+};
+
static ssize_t port_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -733,6 +747,212 @@ static struct attribute_group *get_counter_table(struct ib_device *dev,
return &pma_group;
}
+static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
+ u8 port_num, int index)
+{
+ int ret;
+
+ if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan))
+ return 0;
+ ret = dev->get_hw_stats(dev, stats, port_num, index);
+ if (ret < 0)
+ return ret;
+ if (ret == stats->num_counters)
+ stats->timestamp = jiffies;
+
+ return 0;
+}
+
+static ssize_t print_hw_stat(struct rdma_hw_stats *stats, int index, char *buf)
+{
+ return sprintf(buf, "%llu\n", stats->value[index]);
+}
+
+static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct ib_device *dev;
+ struct ib_port *port;
+ struct hw_stats_attribute *hsa;
+ struct rdma_hw_stats *stats;
+ int ret;
+
+ hsa = container_of(attr, struct hw_stats_attribute, attr);
+ if (!hsa->port_num) {
+ dev = container_of((struct device *)kobj,
+ struct ib_device, dev);
+ stats = dev->hw_stats;
+ } else {
+ port = container_of(kobj, struct ib_port, kobj);
+ dev = port->ibdev;
+ stats = port->hw_stats;
+ }
+ ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
+ if (ret)
+ return ret;
+ return print_hw_stat(stats, hsa->index, buf);
+}
+
+static ssize_t show_stats_lifespan(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct hw_stats_attribute *hsa;
+ int msecs;
+
+ hsa = container_of(attr, struct hw_stats_attribute, attr);
+ if (!hsa->port_num) {
+ struct ib_device *dev = container_of((struct device *)kobj,
+ struct ib_device, dev);
+ msecs = jiffies_to_msecs(dev->hw_stats->lifespan);
+ } else {
+ struct ib_port *p = container_of(kobj, struct ib_port, kobj);
+ msecs = jiffies_to_msecs(p->hw_stats->lifespan);
+ }
+ return sprintf(buf, "%d\n", msecs);
+}
+
+static ssize_t set_stats_lifespan(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hw_stats_attribute *hsa;
+ int msecs;
+ int jiffies;
+ int ret;
+
+ ret = kstrtoint(buf, 10, &msecs);
+ if (ret)
+ return ret;
+ if (msecs < 0 || msecs > 10000)
+ return -EINVAL;
+ jiffies = msecs_to_jiffies(msecs);
+ hsa = container_of(attr, struct hw_stats_attribute, attr);
+ if (!hsa->port_num) {
+ struct ib_device *dev = container_of((struct device *)kobj,
+ struct ib_device, dev);
+ dev->hw_stats->lifespan = jiffies;
+ } else {
+ struct ib_port *p = container_of(kobj, struct ib_port, kobj);
+ p->hw_stats->lifespan = jiffies;
+ }
+ return count;
+}
+
+static void free_hsag(struct kobject *kobj, struct attribute_group *attr_group)
+{
+ struct attribute **attr;
+
+ sysfs_remove_group(kobj, attr_group);
+
+ for (attr = attr_group->attrs; *attr; attr++)
+ kfree(*attr);
+ kfree(attr_group);
+}
+
+static struct attribute *alloc_hsa(int index, u8 port_num, const char *name)
+{
+ struct hw_stats_attribute *hsa;
+
+ hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
+ if (!hsa)
+ return NULL;
+
+ hsa->attr.name = (char *)name;
+ hsa->attr.mode = S_IRUGO;
+ hsa->show = show_hw_stats;
+ hsa->store = NULL;
+ hsa->index = index;
+ hsa->port_num = port_num;
+
+ return &hsa->attr;
+}
+
+static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num)
+{
+ struct hw_stats_attribute *hsa;
+
+ hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
+ if (!hsa)
+ return NULL;
+
+ hsa->attr.name = name;
+ hsa->attr.mode = S_IWUSR | S_IRUGO;
+ hsa->show = show_stats_lifespan;
+ hsa->store = set_stats_lifespan;
+ hsa->index = 0;
+ hsa->port_num = port_num;
+
+ return &hsa->attr;
+}
+
+static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
+ u8 port_num)
+{
+ struct attribute_group *hsag = NULL;
+ struct rdma_hw_stats *stats;
+ int i = 0, ret;
+
+ stats = device->alloc_hw_stats(device, port_num);
+
+ if (!stats)
+ return;
+
+ if (!stats->names || stats->num_counters <= 0)
+ goto err;
+
+ hsag = kzalloc(sizeof(*hsag) +
+ // 1 extra for the lifespan config entry
+ sizeof(void *) * (stats->num_counters + 1),
+ GFP_KERNEL);
+ if (!hsag)
+ return;
+
+ ret = device->get_hw_stats(device, stats, port_num,
+ stats->num_counters);
+ if (ret != stats->num_counters)
+ goto err;
+
+ stats->timestamp = jiffies;
+
+ hsag->name = "hw_counters";
+ hsag->attrs = (void *)hsag + sizeof(*hsag);
+
+ for (i = 0; i < stats->num_counters; i++) {
+ hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]);
+ if (!hsag->attrs[i])
+ goto err;
+ }
+
+ /* treat an error here as non-fatal */
+ hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
+
+ if (port) {
+ struct kobject *kobj = &port->kobj;
+ ret = sysfs_create_group(kobj, hsag);
+ if (ret)
+ goto err;
+ port->hw_stats_ag = hsag;
+ port->hw_stats = stats;
+ } else {
+ struct kobject *kobj = &device->dev.kobj;
+ ret = sysfs_create_group(kobj, hsag);
+ if (ret)
+ goto err;
+ device->hw_stats_ag = hsag;
+ device->hw_stats = stats;
+ }
+
+ return;
+
+err:
+ kfree(stats);
+ for (; i >= 0; i--)
+ kfree(hsag->attrs[i]);
+ kfree(hsag);
+ return;
+}
+
static int add_port(struct ib_device *device, int port_num,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
@@ -835,6 +1055,14 @@ static int add_port(struct ib_device *device, int port_num,
goto err_remove_pkey;
}
+ /*
+ * If port == 0, it means we have only one port and the parent
+ * device, not this port device, should be the holder of the
+ * hw_counters
+ */
+ if (device->alloc_hw_stats && port_num)
+ setup_hw_stats(device, p, port_num);
+
list_add_tail(&p->kobj.entry, &device->port_list);
kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -972,120 +1200,6 @@ static struct device_attribute *ib_class_attributes[] = {
&dev_attr_node_desc
};
-/* Show a given an attribute in the statistics group */
-static ssize_t show_protocol_stat(const struct device *device,
- struct device_attribute *attr, char *buf,
- unsigned offset)
-{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
- union rdma_protocol_stats stats;
- ssize_t ret;
-
- ret = dev->get_protocol_stats(dev, &stats);
- if (ret)
- return ret;
-
- return sprintf(buf, "%llu\n",
- (unsigned long long) ((u64 *) &stats)[offset]);
-}
-
-/* generate a read-only iwarp statistics attribute */
-#define IW_STATS_ENTRY(name) \
-static ssize_t show_##name(struct device *device, \
- struct device_attribute *attr, char *buf) \
-{ \
- return show_protocol_stat(device, attr, buf, \
- offsetof(struct iw_protocol_stats, name) / \
- sizeof (u64)); \
-} \
-static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
-
-IW_STATS_ENTRY(ipInReceives);
-IW_STATS_ENTRY(ipInHdrErrors);
-IW_STATS_ENTRY(ipInTooBigErrors);
-IW_STATS_ENTRY(ipInNoRoutes);
-IW_STATS_ENTRY(ipInAddrErrors);
-IW_STATS_ENTRY(ipInUnknownProtos);
-IW_STATS_ENTRY(ipInTruncatedPkts);
-IW_STATS_ENTRY(ipInDiscards);
-IW_STATS_ENTRY(ipInDelivers);
-IW_STATS_ENTRY(ipOutForwDatagrams);
-IW_STATS_ENTRY(ipOutRequests);
-IW_STATS_ENTRY(ipOutDiscards);
-IW_STATS_ENTRY(ipOutNoRoutes);
-IW_STATS_ENTRY(ipReasmTimeout);
-IW_STATS_ENTRY(ipReasmReqds);
-IW_STATS_ENTRY(ipReasmOKs);
-IW_STATS_ENTRY(ipReasmFails);
-IW_STATS_ENTRY(ipFragOKs);
-IW_STATS_ENTRY(ipFragFails);
-IW_STATS_ENTRY(ipFragCreates);
-IW_STATS_ENTRY(ipInMcastPkts);
-IW_STATS_ENTRY(ipOutMcastPkts);
-IW_STATS_ENTRY(ipInBcastPkts);
-IW_STATS_ENTRY(ipOutBcastPkts);
-IW_STATS_ENTRY(tcpRtoAlgorithm);
-IW_STATS_ENTRY(tcpRtoMin);
-IW_STATS_ENTRY(tcpRtoMax);
-IW_STATS_ENTRY(tcpMaxConn);
-IW_STATS_ENTRY(tcpActiveOpens);
-IW_STATS_ENTRY(tcpPassiveOpens);
-IW_STATS_ENTRY(tcpAttemptFails);
-IW_STATS_ENTRY(tcpEstabResets);
-IW_STATS_ENTRY(tcpCurrEstab);
-IW_STATS_ENTRY(tcpInSegs);
-IW_STATS_ENTRY(tcpOutSegs);
-IW_STATS_ENTRY(tcpRetransSegs);
-IW_STATS_ENTRY(tcpInErrs);
-IW_STATS_ENTRY(tcpOutRsts);
-
-static struct attribute *iw_proto_stats_attrs[] = {
- &dev_attr_ipInReceives.attr,
- &dev_attr_ipInHdrErrors.attr,
- &dev_attr_ipInTooBigErrors.attr,
- &dev_attr_ipInNoRoutes.attr,
- &dev_attr_ipInAddrErrors.attr,
- &dev_attr_ipInUnknownProtos.attr,
- &dev_attr_ipInTruncatedPkts.attr,
- &dev_attr_ipInDiscards.attr,
- &dev_attr_ipInDelivers.attr,
- &dev_attr_ipOutForwDatagrams.attr,
- &dev_attr_ipOutRequests.attr,
- &dev_attr_ipOutDiscards.attr,
- &dev_attr_ipOutNoRoutes.attr,
- &dev_attr_ipReasmTimeout.attr,
- &dev_attr_ipReasmReqds.attr,
- &dev_attr_ipReasmOKs.attr,
- &dev_attr_ipReasmFails.attr,
- &dev_attr_ipFragOKs.attr,
- &dev_attr_ipFragFails.attr,
- &dev_attr_ipFragCreates.attr,
- &dev_attr_ipInMcastPkts.attr,
- &dev_attr_ipOutMcastPkts.attr,
- &dev_attr_ipInBcastPkts.attr,
- &dev_attr_ipOutBcastPkts.attr,
- &dev_attr_tcpRtoAlgorithm.attr,
- &dev_attr_tcpRtoMin.attr,
- &dev_attr_tcpRtoMax.attr,
- &dev_attr_tcpMaxConn.attr,
- &dev_attr_tcpActiveOpens.attr,
- &dev_attr_tcpPassiveOpens.attr,
- &dev_attr_tcpAttemptFails.attr,
- &dev_attr_tcpEstabResets.attr,
- &dev_attr_tcpCurrEstab.attr,
- &dev_attr_tcpInSegs.attr,
- &dev_attr_tcpOutSegs.attr,
- &dev_attr_tcpRetransSegs.attr,
- &dev_attr_tcpInErrs.attr,
- &dev_attr_tcpOutRsts.attr,
- NULL
-};
-
-static struct attribute_group iw_stats_group = {
- .name = "proto_stats",
- .attrs = iw_proto_stats_attrs,
-};
-
static void free_port_list_attributes(struct ib_device *device)
{
struct kobject *p, *t;
@@ -1093,6 +1207,10 @@ static void free_port_list_attributes(struct ib_device *device)
list_for_each_entry_safe(p, t, &device->port_list, entry) {
struct ib_port *port = container_of(p, struct ib_port, kobj);
list_del(&p->entry);
+ if (port->hw_stats) {
+ kfree(port->hw_stats);
+ free_hsag(&port->kobj, port->hw_stats_ag);
+ }
sysfs_remove_group(p, port->pma_table);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
@@ -1149,11 +1267,8 @@ int ib_device_register_sysfs(struct ib_device *device,
}
}
- if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) {
- ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group);
- if (ret)
- goto err_put;
- }
+ if (device->alloc_hw_stats)
+ setup_hw_stats(device, NULL, 0);
return 0;
@@ -1169,15 +1284,18 @@ err:
void ib_device_unregister_sysfs(struct ib_device *device)
{
- /* Hold kobject until ib_dealloc_device() */
- struct kobject *kobj_dev = kobject_get(&device->dev.kobj);
int i;
- if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats)
- sysfs_remove_group(kobj_dev, &iw_stats_group);
+ /* Hold kobject until ib_dealloc_device() */
+ kobject_get(&device->dev.kobj);
free_port_list_attributes(device);
+ if (device->hw_stats) {
+ kfree(device->hw_stats);
+ free_hsag(&device->dev.kobj, device->hw_stats_ag);
+ }
+
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
device_remove_file(&device->dev, ib_class_attributes[i]);
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index c7ad0a4c8b15..c0c7cf8af3f4 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
obj-$(CONFIG_INFINIBAND_NES) += nes/
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
+obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index de1c61b417d6..ada2e5009c86 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -327,7 +327,7 @@ int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
kfree(cq->sw_queue);
dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
(1UL << (cq->size_log2))
- * sizeof(struct t3_cqe), cq->queue,
+ * sizeof(struct t3_cqe) + 1, cq->queue,
dma_unmap_addr(cq, mapping));
cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
return err;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 47cb927a0dd6..bb1a839d4d6d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1218,59 +1218,119 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
iwch_dev->rdev.rnic_info.pdev->device);
}
-static int iwch_get_mib(struct ib_device *ibdev,
- union rdma_protocol_stats *stats)
+enum counters {
+ IPINRECEIVES,
+ IPINHDRERRORS,
+ IPINADDRERRORS,
+ IPINUNKNOWNPROTOS,
+ IPINDISCARDS,
+ IPINDELIVERS,
+ IPOUTREQUESTS,
+ IPOUTDISCARDS,
+ IPOUTNOROUTES,
+ IPREASMTIMEOUT,
+ IPREASMREQDS,
+ IPREASMOKS,
+ IPREASMFAILS,
+ TCPACTIVEOPENS,
+ TCPPASSIVEOPENS,
+ TCPATTEMPTFAILS,
+ TCPESTABRESETS,
+ TCPCURRESTAB,
+ TCPINSEGS,
+ TCPOUTSEGS,
+ TCPRETRANSSEGS,
+ TCPINERRS,
+ TCPOUTRSTS,
+ TCPRTOMIN,
+ TCPRTOMAX,
+ NR_COUNTERS
+};
+
+static const char * const names[] = {
+ [IPINRECEIVES] = "ipInReceives",
+ [IPINHDRERRORS] = "ipInHdrErrors",
+ [IPINADDRERRORS] = "ipInAddrErrors",
+ [IPINUNKNOWNPROTOS] = "ipInUnknownProtos",
+ [IPINDISCARDS] = "ipInDiscards",
+ [IPINDELIVERS] = "ipInDelivers",
+ [IPOUTREQUESTS] = "ipOutRequests",
+ [IPOUTDISCARDS] = "ipOutDiscards",
+ [IPOUTNOROUTES] = "ipOutNoRoutes",
+ [IPREASMTIMEOUT] = "ipReasmTimeout",
+ [IPREASMREQDS] = "ipReasmReqds",
+ [IPREASMOKS] = "ipReasmOKs",
+ [IPREASMFAILS] = "ipReasmFails",
+ [TCPACTIVEOPENS] = "tcpActiveOpens",
+ [TCPPASSIVEOPENS] = "tcpPassiveOpens",
+ [TCPATTEMPTFAILS] = "tcpAttemptFails",
+ [TCPESTABRESETS] = "tcpEstabResets",
+ [TCPCURRESTAB] = "tcpCurrEstab",
+ [TCPINSEGS] = "tcpInSegs",
+ [TCPOUTSEGS] = "tcpOutSegs",
+ [TCPRETRANSSEGS] = "tcpRetransSegs",
+ [TCPINERRS] = "tcpInErrs",
+ [TCPOUTRSTS] = "tcpOutRsts",
+ [TCPRTOMIN] = "tcpRtoMin",
+ [TCPRTOMAX] = "tcpRtoMax",
+};
+
+static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
+
+ /* Our driver only supports device level stats */
+ if (port_num != 0)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u8 port, int index)
{
struct iwch_dev *dev;
struct tp_mib_stats m;
int ret;
+ if (port != 0 || !stats)
+ return -ENOSYS;
+
PDBG("%s ibdev %p\n", __func__, ibdev);
dev = to_iwch_dev(ibdev);
ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
if (ret)
return -ENOSYS;
- memset(stats, 0, sizeof *stats);
- stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) +
- m.ipInReceive_lo;
- stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) +
- m.ipInHdrErrors_lo;
- stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) +
- m.ipInAddrErrors_lo;
- stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) +
- m.ipInUnknownProtos_lo;
- stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) +
- m.ipInDiscards_lo;
- stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) +
- m.ipInDelivers_lo;
- stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) +
- m.ipOutRequests_lo;
- stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) +
- m.ipOutDiscards_lo;
- stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) +
- m.ipOutNoRoutes_lo;
- stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout;
- stats->iw.ipReasmReqds = (u64) m.ipReasmReqds;
- stats->iw.ipReasmOKs = (u64) m.ipReasmOKs;
- stats->iw.ipReasmFails = (u64) m.ipReasmFails;
- stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens;
- stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens;
- stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails;
- stats->iw.tcpEstabResets = (u64) m.tcpEstabResets;
- stats->iw.tcpOutRsts = (u64) m.tcpOutRsts;
- stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab;
- stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) +
- m.tcpInSegs_lo;
- stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) +
- m.tcpOutSegs_lo;
- stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) +
- m.tcpRetransSeg_lo;
- stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) +
- m.tcpInErrs_lo;
- stats->iw.tcpRtoMin = (u64) m.tcpRtoMin;
- stats->iw.tcpRtoMax = (u64) m.tcpRtoMax;
- return 0;
+ stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo;
+ stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo;
+ stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo;
+ stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo;
+ stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo;
+ stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo;
+ stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo;
+ stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo;
+ stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo;
+ stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout;
+ stats->value[IPREASMREQDS] = m.ipReasmReqds;
+ stats->value[IPREASMOKS] = m.ipReasmOKs;
+ stats->value[IPREASMFAILS] = m.ipReasmFails;
+ stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens;
+ stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens;
+ stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails;
+ stats->value[TCPESTABRESETS] = m.tcpEstabResets;
+ stats->value[TCPCURRESTAB] = m.tcpOutRsts;
+ stats->value[TCPINSEGS] = m.tcpCurrEstab;
+ stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo;
+ stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo;
+ stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo,
+ stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo;
+ stats->value[TCPRTOMIN] = m.tcpRtoMin;
+ stats->value[TCPRTOMAX] = m.tcpRtoMax;
+
+ return stats->num_counters;
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
@@ -1373,7 +1433,8 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.req_notify_cq = iwch_arm_cq;
dev->ibdev.post_send = iwch_post_send;
dev->ibdev.post_recv = iwch_post_receive;
- dev->ibdev.get_protocol_stats = iwch_get_mib;
+ dev->ibdev.alloc_hw_stats = iwch_alloc_stats;
+ dev->ibdev.get_hw_stats = iwch_get_mib;
dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = iwch_port_immutable;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 7574f394fdac..dd8a86b726d2 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -446,20 +446,59 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
c4iw_dev->rdev.lldi.pdev->device);
}
+enum counters {
+ IP4INSEGS,
+ IP4OUTSEGS,
+ IP4RETRANSSEGS,
+ IP4OUTRSTS,
+ IP6INSEGS,
+ IP6OUTSEGS,
+ IP6RETRANSSEGS,
+ IP6OUTRSTS,
+ NR_COUNTERS
+};
+
+static const char * const names[] = {
+ [IP4INSEGS] = "ip4InSegs",
+ [IP4OUTSEGS] = "ip4OutSegs",
+ [IP4RETRANSSEGS] = "ip4RetransSegs",
+ [IP4OUTRSTS] = "ip4OutRsts",
+ [IP6INSEGS] = "ip6InSegs",
+ [IP6OUTSEGS] = "ip6OutSegs",
+ [IP6RETRANSSEGS] = "ip6RetransSegs",
+ [IP6OUTRSTS] = "ip6OutRsts"
+};
+
+static struct rdma_hw_stats *c4iw_alloc_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
+
+ if (port_num != 0)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
static int c4iw_get_mib(struct ib_device *ibdev,
- union rdma_protocol_stats *stats)
+ struct rdma_hw_stats *stats,
+ u8 port, int index)
{
struct tp_tcp_stats v4, v6;
struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev);
cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6);
- memset(stats, 0, sizeof *stats);
- stats->iw.tcpInSegs = v4.tcp_in_segs + v6.tcp_in_segs;
- stats->iw.tcpOutSegs = v4.tcp_out_segs + v6.tcp_out_segs;
- stats->iw.tcpRetransSegs = v4.tcp_retrans_segs + v6.tcp_retrans_segs;
- stats->iw.tcpOutRsts = v4.tcp_out_rsts + v6.tcp_out_rsts;
-
- return 0;
+ stats->value[IP4INSEGS] = v4.tcp_in_segs;
+ stats->value[IP4OUTSEGS] = v4.tcp_out_segs;
+ stats->value[IP4RETRANSSEGS] = v4.tcp_retrans_segs;
+ stats->value[IP4OUTRSTS] = v4.tcp_out_rsts;
+ stats->value[IP6INSEGS] = v6.tcp_in_segs;
+ stats->value[IP6OUTSEGS] = v6.tcp_out_segs;
+ stats->value[IP6RETRANSSEGS] = v6.tcp_retrans_segs;
+ stats->value[IP6OUTRSTS] = v6.tcp_out_rsts;
+
+ return stats->num_counters;
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
@@ -562,7 +601,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.req_notify_cq = c4iw_arm_cq;
dev->ibdev.post_send = c4iw_post_send;
dev->ibdev.post_recv = c4iw_post_receive;
- dev->ibdev.get_protocol_stats = c4iw_get_mib;
+ dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
+ dev->ibdev.get_hw_stats = c4iw_get_mib;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = c4iw_port_immutable;
dev->ibdev.drain_sq = c4iw_drain_sq;
diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index a925fb0db706..a925fb0db706 100644
--- a/drivers/staging/rdma/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 8dc59382ee96..9b5382c94b0c 100644
--- a/drivers/staging/rdma/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -7,7 +7,7 @@
#
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
-hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \
+hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
eprom.o file_ops.o firmware.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \
diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 6e7050ab9e16..6e7050ab9e16 100644
--- a/drivers/staging/rdma/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 20f52fe74091..20f52fe74091 100644
--- a/drivers/staging/rdma/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3b49b5..0d58fe3b49b5 100644
--- a/drivers/staging/rdma/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index dcae8e723f98..3b876da745a1 100644
--- a/drivers/staging/rdma/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1037,6 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *);
static void dc_start(struct hfi1_devdata *);
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np);
+static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd);
/*
* Error interrupt table entry. This is used as input to the interrupt
@@ -6105,7 +6106,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
}
/* this access is valid only when the link is up */
- if ((ppd->host_link_state & HLS_UP) == 0) {
+ if (ppd->host_link_state & HLS_DOWN) {
dd_dev_info(dd, "%s: link state %s not up\n",
__func__, link_state_name(ppd->host_link_state));
ret = -EBUSY;
@@ -6961,6 +6962,8 @@ void handle_link_down(struct work_struct *work)
}
reset_neighbor_info(ppd);
+ if (ppd->mgmt_allowed)
+ remove_full_mgmt_pkey(ppd);
/* disable the port */
clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -7069,6 +7072,12 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
}
+static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd)
+{
+ ppd->pkeys[2] = 0;
+ (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+}
+
/*
* Convert the given link width to the OPA link width bitmask.
*/
@@ -7429,7 +7438,7 @@ void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
retry:
mutex_lock(&ppd->hls_lock);
/* only apply if the link is up */
- if (!(ppd->host_link_state & HLS_UP)) {
+ if (ppd->host_link_state & HLS_DOWN) {
/* still going up..wait and retry */
if (ppd->host_link_state & HLS_GOING_UP) {
if (++tries < 1000) {
@@ -9212,9 +9221,6 @@ void reset_qsfp(struct hfi1_pportdata *ppd)
/* Reset the QSFP */
mask = (u64)QSFP_HFI0_RESET_N;
- qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
- qsfp_mask |= mask;
- write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask);
qsfp_mask = read_csr(dd,
dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
@@ -9252,6 +9258,12 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
__func__);
+ /*
+ * The remaining alarms/warnings don't matter if the link is down.
+ */
+ if (ppd->host_link_state & HLS_DOWN)
+ return 0;
+
if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
@@ -9346,9 +9358,8 @@ void qsfp_event(struct work_struct *work)
return;
/*
- * Turn DC back on after cables has been
- * re-inserted. Up until now, the DC has been in
- * reset to save power.
+ * Turn DC back on after cable has been re-inserted. Up until
+ * now, the DC has been in reset to save power.
*/
dc_start(dd);
@@ -9480,7 +9491,15 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
return ret;
}
- /* tune the SERDES to a ballpark setting for
+ get_port_type(ppd);
+ if (ppd->port_type == PORT_TYPE_QSFP) {
+ set_qsfp_int_n(ppd, 0);
+ wait_for_qsfp_init(ppd);
+ set_qsfp_int_n(ppd, 1);
+ }
+
+ /*
+ * Tune the SerDes to a ballpark setting for
* optimal signal and bit error rate
* Needs to be done before starting the link
*/
@@ -10074,7 +10093,7 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd)
*/
u32 driver_logical_state(struct hfi1_pportdata *ppd)
{
- if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP))
+ if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN))
return IB_PORT_DOWN;
switch (ppd->host_link_state & HLS_UP) {
@@ -14578,7 +14597,7 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
(reason), (ret))
/*
- * Initialize the Avago Thermal sensor.
+ * Initialize the thermal sensor.
*
* After initialization, enable polling of thermal sensor through
* SBus interface. In order for this to work, the SBus Master
diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 1948706fff1a..66a327978739 100644
--- a/drivers/staging/rdma/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -398,6 +398,12 @@
/* Lane ID for general configuration registers */
#define GENERAL_CONFIG 4
+/* LINK_TUNING_PARAMETERS fields */
+#define TUNING_METHOD_SHIFT 24
+
+/* LINK_OPTIMIZATION_SETTINGS fields */
+#define ENABLE_EXT_DEV_CONFIG_SHIFT 24
+
/* LOAD_DATA 8051 command shifts and fields */
#define LOAD_DATA_FIELD_ID_SHIFT 40
#define LOAD_DATA_FIELD_ID_MASK 0xfull
diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 8744de6667c2..8744de6667c2 100644
--- a/drivers/staging/rdma/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index e9b6bb322025..fcc9c217a97a 100644
--- a/drivers/staging/rdma/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -178,7 +178,8 @@
HFI1_CAP_PKEY_CHECK | \
HFI1_CAP_NO_INTEGRITY)
-#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << 16) | HFI1_USER_SWMINOR)
+#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \
+ HFI1_USER_SWMINOR)
#ifndef HFI1_KERN_TYPE
#define HFI1_KERN_TYPE 0
@@ -349,6 +350,8 @@ struct hfi1_message_header {
#define HFI1_BECN_MASK 1
#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
+#define HFI1_PSM_IOC_BASE_SEQ 0x0
+
static inline __u64 rhf_to_cpu(const __le32 *rbuf)
{
return __le64_to_cpu(*((__le64 *)rbuf));
diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index dbab9d9cc288..dbab9d9cc288 100644
--- a/drivers/staging/rdma/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index b6fb6814f1b8..b6fb6814f1b8 100644
--- a/drivers/staging/rdma/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index c05c39da83b1..bf64b5a7bfd7 100644
--- a/drivers/staging/rdma/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -60,7 +60,8 @@ static dev_t hfi1_dev;
int hfi1_cdev_init(int minor, const char *name,
const struct file_operations *fops,
struct cdev *cdev, struct device **devp,
- bool user_accessible)
+ bool user_accessible,
+ struct kobject *parent)
{
const dev_t dev = MKDEV(MAJOR(hfi1_dev), minor);
struct device *device = NULL;
@@ -68,6 +69,7 @@ int hfi1_cdev_init(int minor, const char *name,
cdev_init(cdev, fops);
cdev->owner = THIS_MODULE;
+ cdev->kobj.parent = parent;
kobject_set_name(&cdev->kobj, name);
ret = cdev_add(cdev, dev, 1);
@@ -82,13 +84,13 @@ int hfi1_cdev_init(int minor, const char *name,
else
device = device_create(class, NULL, dev, NULL, "%s", name);
- if (!IS_ERR(device))
- goto done;
- ret = PTR_ERR(device);
- device = NULL;
- pr_err("Could not create device for minor %d, %s (err %d)\n",
- minor, name, -ret);
- cdev_del(cdev);
+ if (IS_ERR(device)) {
+ ret = PTR_ERR(device);
+ device = NULL;
+ pr_err("Could not create device for minor %d, %s (err %d)\n",
+ minor, name, -ret);
+ cdev_del(cdev);
+ }
done:
*devp = device;
return ret;
diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/infiniband/hw/hfi1/device.h
index 5bb3e83cf2da..c3ec19cb0ac9 100644
--- a/drivers/staging/rdma/hfi1/device.h
+++ b/drivers/infiniband/hw/hfi1/device.h
@@ -50,7 +50,8 @@
int hfi1_cdev_init(int minor, const char *name,
const struct file_operations *fops,
struct cdev *cdev, struct device **devp,
- bool user_accessible);
+ bool user_accessible,
+ struct kobject *parent);
void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp);
const char *class_name(void);
int __init dev_init(void);
diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/infiniband/hw/hfi1/dma.c
index 7e8dab892848..7e8dab892848 100644
--- a/drivers/staging/rdma/hfi1/dma.c
+++ b/drivers/infiniband/hw/hfi1/dma.c
diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 700c6fa3a633..c75b0ae688f8 100644
--- a/drivers/staging/rdma/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1161,7 +1161,7 @@ int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc)
ppd->lmc = lmc;
hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0);
- dd_dev_info(dd, "IB%u:%u got a lid: 0x%x\n", dd->unit, ppd->port, lid);
+ dd_dev_info(dd, "port %u: got a lid: 0x%x\n", ppd->port, lid);
return 0;
}
diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index 106349fc1fb9..106349fc1fb9 100644
--- a/drivers/staging/rdma/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c
diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/infiniband/hw/hfi1/efivar.h
index 94e9e70de568..94e9e70de568 100644
--- a/drivers/staging/rdma/hfi1/efivar.h
+++ b/drivers/infiniband/hw/hfi1/efivar.h
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
new file mode 100644
index 000000000000..36b77943cbfd
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/delay.h>
+#include "hfi.h"
+#include "common.h"
+#include "eprom.h"
+
+#define CMD_SHIFT 24
+#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT))
+
+/* controller interface speeds */
+#define EP_SPEED_FULL 0x2 /* full speed */
+
+/*
+ * How long to wait for the EPROM to become available, in ms.
+ * The spec 32 Mb EPROM takes around 40s to erase then write.
+ * Double it for safety.
+ */
+#define EPROM_TIMEOUT 80000 /* ms */
+/*
+ * Initialize the EPROM handler.
+ */
+int eprom_init(struct hfi1_devdata *dd)
+{
+ int ret = 0;
+
+ /* only the discrete chip has an EPROM */
+ if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0)
+ return 0;
+
+ /*
+ * It is OK if both HFIs reset the EPROM as long as they don't
+ * do it at the same time.
+ */
+ ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: unable to acquire EPROM resource, no EPROM support\n",
+ __func__);
+ goto done_asic;
+ }
+
+ /* reset EPROM to be sure it is in a good state */
+
+ /* set reset */
+ write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
+ /* clear reset, set speed */
+ write_csr(dd, ASIC_EEP_CTL_STAT,
+ EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
+
+ /* wake the device with command "release powerdown NoID" */
+ write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID);
+
+ dd->eprom_available = true;
+ release_chip_resource(dd, CR_EPROM);
+done_asic:
+ return ret;
+}
diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index d41f0b1afb15..d41f0b1afb15 100644
--- a/drivers/staging/rdma/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index c1c5bf82addb..7a5b0e676cc7 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -72,8 +72,6 @@
*/
static int hfi1_file_open(struct inode *, struct file *);
static int hfi1_file_close(struct inode *, struct file *);
-static ssize_t hfi1_file_write(struct file *, const char __user *,
- size_t, loff_t *);
static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *);
static unsigned int hfi1_poll(struct file *, struct poll_table_struct *);
static int hfi1_file_mmap(struct file *, struct vm_area_struct *);
@@ -86,8 +84,7 @@ static int get_ctxt_info(struct file *, void __user *, __u32);
static int get_base_info(struct file *, void __user *, __u32);
static int setup_ctxt(struct file *);
static int setup_subctxt(struct hfi1_ctxtdata *);
-static int get_user_context(struct file *, struct hfi1_user_info *,
- int, unsigned);
+static int get_user_context(struct file *, struct hfi1_user_info *, int);
static int find_shared_ctxt(struct file *, const struct hfi1_user_info *);
static int allocate_ctxt(struct file *, struct hfi1_devdata *,
struct hfi1_user_info *);
@@ -97,13 +94,15 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
static int vma_fault(struct vm_area_struct *, struct vm_fault *);
+static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
+ unsigned long arg);
static const struct file_operations hfi1_file_ops = {
.owner = THIS_MODULE,
- .write = hfi1_file_write,
.write_iter = hfi1_write_iter,
.open = hfi1_file_open,
.release = hfi1_file_close,
+ .unlocked_ioctl = hfi1_file_ioctl,
.poll = hfi1_poll,
.mmap = hfi1_file_mmap,
.llseek = noop_llseek,
@@ -169,6 +168,13 @@ static inline int is_valid_mmap(u64 token)
static int hfi1_file_open(struct inode *inode, struct file *fp)
{
+ struct hfi1_devdata *dd = container_of(inode->i_cdev,
+ struct hfi1_devdata,
+ user_cdev);
+
+ /* Just take a ref now. Not all opens result in a context assign */
+ kobject_get(&dd->kobj);
+
/* The real work is performed later in assign_ctxt() */
fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL);
if (fp->private_data) /* no cpu affinity by default */
@@ -176,127 +182,59 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
return fp->private_data ? 0 : -ENOMEM;
}
-static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
- size_t count, loff_t *offset)
+static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
+ unsigned long arg)
{
- const struct hfi1_cmd __user *ucmd;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_cmd cmd;
struct hfi1_user_info uinfo;
struct hfi1_tid_info tinfo;
+ int ret = 0;
unsigned long addr;
- ssize_t consumed = 0, copy = 0, ret = 0;
- void *dest = NULL;
- __u64 user_val = 0;
- int uctxt_required = 1;
- int must_be_root = 0;
-
- /* FIXME: This interface cannot continue out of staging */
- if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
- return -EACCES;
-
- if (count < sizeof(cmd)) {
- ret = -EINVAL;
- goto bail;
- }
-
- ucmd = (const struct hfi1_cmd __user *)data;
- if (copy_from_user(&cmd, ucmd, sizeof(cmd))) {
- ret = -EFAULT;
- goto bail;
- }
-
- consumed = sizeof(cmd);
-
- switch (cmd.type) {
- case HFI1_CMD_ASSIGN_CTXT:
- uctxt_required = 0; /* assigned user context not required */
- copy = sizeof(uinfo);
- dest = &uinfo;
- break;
- case HFI1_CMD_SDMA_STATUS_UPD:
- case HFI1_CMD_CREDIT_UPD:
- copy = 0;
- break;
- case HFI1_CMD_TID_UPDATE:
- case HFI1_CMD_TID_FREE:
- case HFI1_CMD_TID_INVAL_READ:
- copy = sizeof(tinfo);
- dest = &tinfo;
- break;
- case HFI1_CMD_USER_INFO:
- case HFI1_CMD_RECV_CTRL:
- case HFI1_CMD_POLL_TYPE:
- case HFI1_CMD_ACK_EVENT:
- case HFI1_CMD_CTXT_INFO:
- case HFI1_CMD_SET_PKEY:
- case HFI1_CMD_CTXT_RESET:
- copy = 0;
- user_val = cmd.addr;
- break;
- case HFI1_CMD_EP_INFO:
- case HFI1_CMD_EP_ERASE_CHIP:
- case HFI1_CMD_EP_ERASE_RANGE:
- case HFI1_CMD_EP_READ_RANGE:
- case HFI1_CMD_EP_WRITE_RANGE:
- uctxt_required = 0; /* assigned user context not required */
- must_be_root = 1; /* validate user */
- copy = 0;
- break;
- default:
- ret = -EINVAL;
- goto bail;
- }
-
- /* If the command comes with user data, copy it. */
- if (copy) {
- if (copy_from_user(dest, (void __user *)cmd.addr, copy)) {
- ret = -EFAULT;
- goto bail;
- }
- consumed += copy;
- }
-
- /*
- * Make sure there is a uctxt when needed.
- */
- if (uctxt_required && !uctxt) {
- ret = -EINVAL;
- goto bail;
- }
+ int uval = 0;
+ unsigned long ul_uval = 0;
+ u16 uval16 = 0;
+
+ hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd);
+ if (cmd != HFI1_IOCTL_ASSIGN_CTXT &&
+ cmd != HFI1_IOCTL_GET_VERS &&
+ !uctxt)
+ return -EINVAL;
- /* only root can do these operations */
- if (must_be_root && !capable(CAP_SYS_ADMIN)) {
- ret = -EPERM;
- goto bail;
- }
+ switch (cmd) {
+ case HFI1_IOCTL_ASSIGN_CTXT:
+ if (copy_from_user(&uinfo,
+ (struct hfi1_user_info __user *)arg,
+ sizeof(uinfo)))
+ return -EFAULT;
- switch (cmd.type) {
- case HFI1_CMD_ASSIGN_CTXT:
ret = assign_ctxt(fp, &uinfo);
if (ret < 0)
- goto bail;
- ret = setup_ctxt(fp);
+ return ret;
+ setup_ctxt(fp);
if (ret)
- goto bail;
+ return ret;
ret = user_init(fp);
break;
- case HFI1_CMD_CTXT_INFO:
- ret = get_ctxt_info(fp, (void __user *)(unsigned long)
- user_val, cmd.len);
- break;
- case HFI1_CMD_USER_INFO:
- ret = get_base_info(fp, (void __user *)(unsigned long)
- user_val, cmd.len);
+ case HFI1_IOCTL_CTXT_INFO:
+ ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg,
+ sizeof(struct hfi1_ctxt_info));
break;
- case HFI1_CMD_SDMA_STATUS_UPD:
+ case HFI1_IOCTL_USER_INFO:
+ ret = get_base_info(fp, (void __user *)(unsigned long)arg,
+ sizeof(struct hfi1_base_info));
break;
- case HFI1_CMD_CREDIT_UPD:
+ case HFI1_IOCTL_CREDIT_UPD:
if (uctxt && uctxt->sc)
sc_return_credits(uctxt->sc);
break;
- case HFI1_CMD_TID_UPDATE:
+
+ case HFI1_IOCTL_TID_UPDATE:
+ if (copy_from_user(&tinfo,
+ (struct hfi11_tid_info __user *)arg,
+ sizeof(tinfo)))
+ return -EFAULT;
+
ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
if (!ret) {
/*
@@ -305,57 +243,82 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
* These fields are adjacent in the structure so
* we can copy them at the same time.
*/
- addr = (unsigned long)cmd.addr +
- offsetof(struct hfi1_tid_info, tidcnt);
+ addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
sizeof(tinfo.tidcnt) +
sizeof(tinfo.length)))
ret = -EFAULT;
}
break;
- case HFI1_CMD_TID_INVAL_READ:
- ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
+
+ case HFI1_IOCTL_TID_FREE:
+ if (copy_from_user(&tinfo,
+ (struct hfi11_tid_info __user *)arg,
+ sizeof(tinfo)))
+ return -EFAULT;
+
+ ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
if (ret)
break;
- addr = (unsigned long)cmd.addr +
- offsetof(struct hfi1_tid_info, tidcnt);
+ addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
sizeof(tinfo.tidcnt)))
ret = -EFAULT;
break;
- case HFI1_CMD_TID_FREE:
- ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
+
+ case HFI1_IOCTL_TID_INVAL_READ:
+ if (copy_from_user(&tinfo,
+ (struct hfi11_tid_info __user *)arg,
+ sizeof(tinfo)))
+ return -EFAULT;
+
+ ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
if (ret)
break;
- addr = (unsigned long)cmd.addr +
- offsetof(struct hfi1_tid_info, tidcnt);
+ addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
sizeof(tinfo.tidcnt)))
ret = -EFAULT;
break;
- case HFI1_CMD_RECV_CTRL:
- ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val);
+
+ case HFI1_IOCTL_RECV_CTRL:
+ ret = get_user(uval, (int __user *)arg);
+ if (ret != 0)
+ return -EFAULT;
+ ret = manage_rcvq(uctxt, fd->subctxt, uval);
break;
- case HFI1_CMD_POLL_TYPE:
- uctxt->poll_type = (typeof(uctxt->poll_type))user_val;
+
+ case HFI1_IOCTL_POLL_TYPE:
+ ret = get_user(uval, (int __user *)arg);
+ if (ret != 0)
+ return -EFAULT;
+ uctxt->poll_type = (typeof(uctxt->poll_type))uval;
break;
- case HFI1_CMD_ACK_EVENT:
- ret = user_event_ack(uctxt, fd->subctxt, user_val);
+
+ case HFI1_IOCTL_ACK_EVENT:
+ ret = get_user(ul_uval, (unsigned long __user *)arg);
+ if (ret != 0)
+ return -EFAULT;
+ ret = user_event_ack(uctxt, fd->subctxt, ul_uval);
break;
- case HFI1_CMD_SET_PKEY:
+
+ case HFI1_IOCTL_SET_PKEY:
+ ret = get_user(uval16, (u16 __user *)arg);
+ if (ret != 0)
+ return -EFAULT;
if (HFI1_CAP_IS_USET(PKEY_CHECK))
- ret = set_ctxt_pkey(uctxt, fd->subctxt, user_val);
+ ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16);
else
- ret = -EPERM;
+ return -EPERM;
break;
- case HFI1_CMD_CTXT_RESET: {
+
+ case HFI1_IOCTL_CTXT_RESET: {
struct send_context *sc;
struct hfi1_devdata *dd;
- if (!uctxt || !uctxt->dd || !uctxt->sc) {
- ret = -EINVAL;
- break;
- }
+ if (!uctxt || !uctxt->dd || !uctxt->sc)
+ return -EINVAL;
+
/*
* There is no protection here. User level has to
* guarantee that no one will be writing to the send
@@ -373,10 +336,9 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
wait_event_interruptible_timeout(
sc->halt_wait, (sc->flags & SCF_HALTED),
msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
- if (!(sc->flags & SCF_HALTED)) {
- ret = -ENOLCK;
- break;
- }
+ if (!(sc->flags & SCF_HALTED))
+ return -ENOLCK;
+
/*
* If the send context was halted due to a Freeze,
* wait until the device has been "unfrozen" before
@@ -387,18 +349,16 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
dd->event_queue,
!(ACCESS_ONCE(dd->flags) & HFI1_FROZEN),
msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
- if (dd->flags & HFI1_FROZEN) {
- ret = -ENOLCK;
- break;
- }
- if (dd->flags & HFI1_FORCED_FREEZE) {
+ if (dd->flags & HFI1_FROZEN)
+ return -ENOLCK;
+
+ if (dd->flags & HFI1_FORCED_FREEZE)
/*
* Don't allow context reset if we are into
* forced freeze
*/
- ret = -ENODEV;
- break;
- }
+ return -ENODEV;
+
sc_disable(sc);
ret = sc_enable(sc);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
@@ -410,18 +370,17 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
sc_return_credits(sc);
break;
}
- case HFI1_CMD_EP_INFO:
- case HFI1_CMD_EP_ERASE_CHIP:
- case HFI1_CMD_EP_ERASE_RANGE:
- case HFI1_CMD_EP_READ_RANGE:
- case HFI1_CMD_EP_WRITE_RANGE:
- ret = handle_eprom_command(fp, &cmd);
+
+ case HFI1_IOCTL_GET_VERS:
+ uval = HFI1_USER_SWVERSION;
+ if (put_user(uval, (int __user *)arg))
+ return -EFAULT;
break;
+
+ default:
+ return -EINVAL;
}
- if (ret >= 0)
- ret = consumed;
-bail:
return ret;
}
@@ -738,7 +697,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
{
struct hfi1_filedata *fdata = fp->private_data;
struct hfi1_ctxtdata *uctxt = fdata->uctxt;
- struct hfi1_devdata *dd;
+ struct hfi1_devdata *dd = container_of(inode->i_cdev,
+ struct hfi1_devdata,
+ user_cdev);
unsigned long flags, *ev;
fp->private_data = NULL;
@@ -747,7 +708,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
goto done;
hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt);
- dd = uctxt->dd;
mutex_lock(&hfi1_mutex);
flush_wc();
@@ -813,6 +773,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
mutex_unlock(&hfi1_mutex);
hfi1_free_ctxtdata(dd, uctxt);
done:
+ kobject_put(&dd->kobj);
kfree(fdata);
return 0;
}
@@ -836,7 +797,7 @@ static u64 kvirt_to_phys(void *addr)
static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
{
int i_minor, ret = 0;
- unsigned swmajor, swminor, alg = HFI1_ALG_ACROSS;
+ unsigned int swmajor, swminor;
swmajor = uinfo->userversion >> 16;
if (swmajor != HFI1_USER_SWMAJOR) {
@@ -846,9 +807,6 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
swminor = uinfo->userversion & 0xffff;
- if (uinfo->hfi1_alg < HFI1_ALG_COUNT)
- alg = uinfo->hfi1_alg;
-
mutex_lock(&hfi1_mutex);
/* First, lets check if we need to setup a shared context? */
if (uinfo->subctxt_cnt) {
@@ -868,7 +826,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
*/
if (!ret) {
i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
- ret = get_user_context(fp, uinfo, i_minor - 1, alg);
+ ret = get_user_context(fp, uinfo, i_minor);
}
done_unlock:
mutex_unlock(&hfi1_mutex);
@@ -876,71 +834,26 @@ done:
return ret;
}
-/* return true if the device available for general use */
-static int usable_device(struct hfi1_devdata *dd)
-{
- struct hfi1_pportdata *ppd = dd->pport;
-
- return driver_lstate(ppd) == IB_PORT_ACTIVE;
-}
-
static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo,
- int devno, unsigned alg)
+ int devno)
{
struct hfi1_devdata *dd = NULL;
- int ret = 0, devmax, npresent, nup, dev;
+ int devmax, npresent, nup;
devmax = hfi1_count_units(&npresent, &nup);
- if (!npresent) {
- ret = -ENXIO;
- goto done;
- }
- if (!nup) {
- ret = -ENETDOWN;
- goto done;
- }
- if (devno >= 0) {
- dd = hfi1_lookup(devno);
- if (!dd)
- ret = -ENODEV;
- else if (!dd->freectxts)
- ret = -EBUSY;
- } else {
- struct hfi1_devdata *pdd;
-
- if (alg == HFI1_ALG_ACROSS) {
- unsigned free = 0U;
-
- for (dev = 0; dev < devmax; dev++) {
- pdd = hfi1_lookup(dev);
- if (!pdd)
- continue;
- if (!usable_device(pdd))
- continue;
- if (pdd->freectxts &&
- pdd->freectxts > free) {
- dd = pdd;
- free = pdd->freectxts;
- }
- }
- } else {
- for (dev = 0; dev < devmax; dev++) {
- pdd = hfi1_lookup(dev);
- if (!pdd)
- continue;
- if (!usable_device(pdd))
- continue;
- if (pdd->freectxts) {
- dd = pdd;
- break;
- }
- }
- }
- if (!dd)
- ret = -EBUSY;
- }
-done:
- return ret ? ret : allocate_ctxt(fp, dd, uinfo);
+ if (!npresent)
+ return -ENXIO;
+
+ if (!nup)
+ return -ENETDOWN;
+
+ dd = hfi1_lookup(devno);
+ if (!dd)
+ return -ENODEV;
+ else if (!dd->freectxts)
+ return -EBUSY;
+
+ return allocate_ctxt(fp, dd, uinfo);
}
static int find_shared_ctxt(struct file *fp,
@@ -1546,170 +1459,10 @@ done:
return ret;
}
-static int ui_open(struct inode *inode, struct file *filp)
-{
- struct hfi1_devdata *dd;
-
- dd = container_of(inode->i_cdev, struct hfi1_devdata, ui_cdev);
- filp->private_data = dd; /* for other methods */
- return 0;
-}
-
-static int ui_release(struct inode *inode, struct file *filp)
-{
- /* nothing to do */
- return 0;
-}
-
-static loff_t ui_lseek(struct file *filp, loff_t offset, int whence)
-{
- struct hfi1_devdata *dd = filp->private_data;
-
- return fixed_size_llseek(filp, offset, whence,
- (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE);
-}
-
-/* NOTE: assumes unsigned long is 8 bytes */
-static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
- loff_t *f_pos)
-{
- struct hfi1_devdata *dd = filp->private_data;
- void __iomem *base = dd->kregbase;
- unsigned long total, csr_off,
- barlen = (dd->kregend - dd->kregbase);
- u64 data;
-
- /* only read 8 byte quantities */
- if ((count % 8) != 0)
- return -EINVAL;
- /* offset must be 8-byte aligned */
- if ((*f_pos % 8) != 0)
- return -EINVAL;
- /* destination buffer must be 8-byte aligned */
- if ((unsigned long)buf % 8 != 0)
- return -EINVAL;
- /* must be in range */
- if (*f_pos + count > (barlen + DC8051_DATA_MEM_SIZE))
- return -EINVAL;
- /* only set the base if we are not starting past the BAR */
- if (*f_pos < barlen)
- base += *f_pos;
- csr_off = *f_pos;
- for (total = 0; total < count; total += 8, csr_off += 8) {
- /* accessing LCB CSRs requires more checks */
- if (is_lcb_offset(csr_off)) {
- if (read_lcb_csr(dd, csr_off, (u64 *)&data))
- break; /* failed */
- }
- /*
- * Cannot read ASIC GPIO/QSFP* clear and force CSRs without a
- * false parity error. Avoid the whole issue by not reading
- * them. These registers are defined as having a read value
- * of 0.
- */
- else if (csr_off == ASIC_GPIO_CLEAR ||
- csr_off == ASIC_GPIO_FORCE ||
- csr_off == ASIC_QSFP1_CLEAR ||
- csr_off == ASIC_QSFP1_FORCE ||
- csr_off == ASIC_QSFP2_CLEAR ||
- csr_off == ASIC_QSFP2_FORCE)
- data = 0;
- else if (csr_off >= barlen) {
- /*
- * read_8051_data can read more than just 8 bytes at
- * a time. However, folding this into the loop and
- * handling the reads in 8 byte increments allows us
- * to smoothly transition from chip memory to 8051
- * memory.
- */
- if (read_8051_data(dd,
- (u32)(csr_off - barlen),
- sizeof(data), &data))
- break; /* failed */
- } else
- data = readq(base + total);
- if (put_user(data, (unsigned long __user *)(buf + total)))
- break;
- }
- *f_pos += total;
- return total;
-}
-
-/* NOTE: assumes unsigned long is 8 bytes */
-static ssize_t ui_write(struct file *filp, const char __user *buf,
- size_t count, loff_t *f_pos)
-{
- struct hfi1_devdata *dd = filp->private_data;
- void __iomem *base;
- unsigned long total, data, csr_off;
- int in_lcb;
-
- /* only write 8 byte quantities */
- if ((count % 8) != 0)
- return -EINVAL;
- /* offset must be 8-byte aligned */
- if ((*f_pos % 8) != 0)
- return -EINVAL;
- /* source buffer must be 8-byte aligned */
- if ((unsigned long)buf % 8 != 0)
- return -EINVAL;
- /* must be in range */
- if (*f_pos + count > dd->kregend - dd->kregbase)
- return -EINVAL;
-
- base = (void __iomem *)dd->kregbase + *f_pos;
- csr_off = *f_pos;
- in_lcb = 0;
- for (total = 0; total < count; total += 8, csr_off += 8) {
- if (get_user(data, (unsigned long __user *)(buf + total)))
- break;
- /* accessing LCB CSRs requires a special procedure */
- if (is_lcb_offset(csr_off)) {
- if (!in_lcb) {
- int ret = acquire_lcb_access(dd, 1);
-
- if (ret)
- break;
- in_lcb = 1;
- }
- } else {
- if (in_lcb) {
- release_lcb_access(dd, 1);
- in_lcb = 0;
- }
- }
- writeq(data, base + total);
- }
- if (in_lcb)
- release_lcb_access(dd, 1);
- *f_pos += total;
- return total;
-}
-
-static const struct file_operations ui_file_ops = {
- .owner = THIS_MODULE,
- .llseek = ui_lseek,
- .read = ui_read,
- .write = ui_write,
- .open = ui_open,
- .release = ui_release,
-};
-
-#define UI_OFFSET 192 /* device minor offset for UI devices */
-static int create_ui = 1;
-
-static struct cdev wildcard_cdev;
-static struct device *wildcard_device;
-
-static atomic_t user_count = ATOMIC_INIT(0);
-
static void user_remove(struct hfi1_devdata *dd)
{
- if (atomic_dec_return(&user_count) == 0)
- hfi1_cdev_cleanup(&wildcard_cdev, &wildcard_device);
hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device);
- hfi1_cdev_cleanup(&dd->ui_cdev, &dd->ui_device);
}
static int user_add(struct hfi1_devdata *dd)
@@ -1717,34 +1470,13 @@ static int user_add(struct hfi1_devdata *dd)
char name[10];
int ret;
- if (atomic_inc_return(&user_count) == 1) {
- ret = hfi1_cdev_init(0, class_name(), &hfi1_file_ops,
- &wildcard_cdev, &wildcard_device,
- true);
- if (ret)
- goto done;
- }
-
snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit);
- ret = hfi1_cdev_init(dd->unit + 1, name, &hfi1_file_ops,
+ ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops,
&dd->user_cdev, &dd->user_device,
- true);
+ true, &dd->kobj);
if (ret)
- goto done;
+ user_remove(dd);
- if (create_ui) {
- snprintf(name, sizeof(name),
- "%s_ui%d", class_name(), dd->unit);
- ret = hfi1_cdev_init(dd->unit + UI_OFFSET, name, &ui_file_ops,
- &dd->ui_cdev, &dd->ui_device,
- false);
- if (ret)
- goto done;
- }
-
- return 0;
-done:
- user_remove(dd);
return ret;
}
@@ -1753,13 +1485,7 @@ done:
*/
int hfi1_device_create(struct hfi1_devdata *dd)
{
- int r, ret;
-
- r = user_add(dd);
- ret = hfi1_diag_add(dd);
- if (r && !ret)
- ret = r;
- return ret;
+ return user_add(dd);
}
/*
@@ -1769,5 +1495,4 @@ int hfi1_device_create(struct hfi1_devdata *dd)
void hfi1_device_remove(struct hfi1_devdata *dd)
{
user_remove(dd);
- hfi1_diag_remove(dd);
}
diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index ed680fda611d..ed680fda611d 100644
--- a/drivers/staging/rdma/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 7b78d56de7f5..4417a0fd3ef9 100644
--- a/drivers/staging/rdma/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -453,6 +453,7 @@ struct rvt_sge_state;
#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP)
#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
+#define HLS_DOWN ~(HLS_UP)
/* use this MTU size if none other is given */
#define HFI1_DEFAULT_ACTIVE_MTU 10240
@@ -1168,6 +1169,7 @@ struct hfi1_devdata {
atomic_t aspm_disabled_cnt;
struct hfi1_affinity *affinity;
+ struct kobject kobj;
};
/* 8051 firmware version helper */
@@ -1882,9 +1884,8 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
get_unit_name((dd)->unit), ##__VA_ARGS__)
#define hfi1_dev_porterr(dd, port, fmt, ...) \
- dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
- get_unit_name((dd)->unit), (dd)->unit, (port), \
- ##__VA_ARGS__)
+ dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
+ get_unit_name((dd)->unit), (port), ##__VA_ARGS__)
/*
* this is used for formatting hw error messages...
diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 502b7cf4647d..5cc492e5776d 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -732,12 +732,12 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
lastfail = hfi1_create_rcvhdrq(dd, rcd);
if (!lastfail)
lastfail = hfi1_setup_eagerbufs(rcd);
- if (lastfail)
+ if (lastfail) {
dd_dev_err(dd,
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
+ ret = lastfail;
+ }
}
- if (lastfail)
- ret = lastfail;
/* Allocate enough memory for user event notification. */
len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS *
@@ -989,8 +989,10 @@ static void release_asic_data(struct hfi1_devdata *dd)
dd->asic_data = NULL;
}
-void hfi1_free_devdata(struct hfi1_devdata *dd)
+static void __hfi1_free_devdata(struct kobject *kobj)
{
+ struct hfi1_devdata *dd =
+ container_of(kobj, struct hfi1_devdata, kobj);
unsigned long flags;
spin_lock_irqsave(&hfi1_devs_lock, flags);
@@ -1007,6 +1009,15 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
+static struct kobj_type hfi1_devdata_type = {
+ .release = __hfi1_free_devdata,
+};
+
+void hfi1_free_devdata(struct hfi1_devdata *dd)
+{
+ kobject_put(&dd->kobj);
+}
+
/*
* Allocate our primary per-unit data structure. Must be done via verbs
* allocator, because the verbs cleanup process both does cleanup and
@@ -1102,6 +1113,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
&pdev->dev,
"Could not alloc cpulist info, cpu affinity might be wrong\n");
}
+ kobject_init(&dd->kobj, &hfi1_devdata_type);
return dd;
bail:
@@ -1300,7 +1312,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
spin_lock(&ppd->cc_state_lock);
cc_state = get_cc_state(ppd);
- rcu_assign_pointer(ppd->cc_state, NULL);
+ RCU_INIT_POINTER(ppd->cc_state, NULL);
spin_unlock(&ppd->cc_state_lock);
if (cc_state)
diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 65348d16ab2f..65348d16ab2f 100644
--- a/drivers/staging/rdma/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 2ec6ef38d389..2ec6ef38d389 100644
--- a/drivers/staging/rdma/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index ed58cf21e790..219029576ba0 100644
--- a/drivers/staging/rdma/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1403,6 +1403,12 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
if (key == okey)
continue;
/*
+ * Don't update pkeys[2], if an HFI port without MgmtAllowed
+ * by neighbor is a switch.
+ */
+ if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
+ continue;
+ /*
* The SM gives us the complete PKey table. We have
* to ensure that we put the PKeys in the matching
* slots.
@@ -3363,6 +3369,50 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
return reply((struct ib_mad_hdr *)smp);
}
+/*
+ * Apply congestion control information stored in the ppd to the
+ * active structure.
+ */
+static void apply_cc_state(struct hfi1_pportdata *ppd)
+{
+ struct cc_state *old_cc_state, *new_cc_state;
+
+ new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
+ if (!new_cc_state)
+ return;
+
+ /*
+ * Hold the lock for updating *and* to prevent ppd information
+ * from changing during the update.
+ */
+ spin_lock(&ppd->cc_state_lock);
+
+ old_cc_state = get_cc_state(ppd);
+ if (!old_cc_state) {
+ /* never active, or shutting down */
+ spin_unlock(&ppd->cc_state_lock);
+ kfree(new_cc_state);
+ return;
+ }
+
+ *new_cc_state = *old_cc_state;
+
+ new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
+ memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
+ ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
+
+ new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
+ new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
+ memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
+ OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
+
+ rcu_assign_pointer(ppd->cc_state, new_cc_state);
+
+ spin_unlock(&ppd->cc_state_lock);
+
+ call_rcu(&old_cc_state->rcu, cc_state_reclaim);
+}
+
static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
u32 *resp_len)
@@ -3374,6 +3424,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
struct opa_congestion_setting_entry_shadow *entries;
int i;
+ /*
+ * Save details from packet into the ppd. Hold the cc_state_lock so
+ * our information is consistent with anyone trying to apply the state.
+ */
+ spin_lock(&ppd->cc_state_lock);
ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
entries = ppd->congestion_entries;
@@ -3384,6 +3439,10 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
p->entries[i].trigger_threshold;
entries[i].ccti_min = p->entries[i].ccti_min;
}
+ spin_unlock(&ppd->cc_state_lock);
+
+ /* now apply the information */
+ apply_cc_state(ppd);
return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
resp_len);
@@ -3526,7 +3585,6 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
int i, j;
u32 sentry, eentry;
u16 ccti_limit;
- struct cc_state *old_cc_state, *new_cc_state;
/* sanity check n_blocks, start_block */
if (n_blocks == 0 ||
@@ -3546,45 +3604,20 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
- new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
- if (!new_cc_state)
- goto getit;
-
+ /*
+ * Save details from packet into the ppd. Hold the cc_state_lock so
+ * our information is consistent with anyone trying to apply the state.
+ */
spin_lock(&ppd->cc_state_lock);
-
- old_cc_state = get_cc_state(ppd);
-
- if (!old_cc_state) {
- spin_unlock(&ppd->cc_state_lock);
- kfree(new_cc_state);
- return reply((struct ib_mad_hdr *)smp);
- }
-
- *new_cc_state = *old_cc_state;
-
- new_cc_state->cct.ccti_limit = ccti_limit;
-
- entries = ppd->ccti_entries;
ppd->total_cct_entry = ccti_limit + 1;
-
+ entries = ppd->ccti_entries;
for (j = 0, i = sentry; i < eentry; j++, i++)
entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
-
- memcpy(new_cc_state->cct.entries, entries,
- eentry * sizeof(struct ib_cc_table_entry));
-
- new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
- new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
- memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
- OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
-
- rcu_assign_pointer(ppd->cc_state, new_cc_state);
-
spin_unlock(&ppd->cc_state_lock);
- call_rcu(&old_cc_state->rcu, cc_state_reclaim);
+ /* now apply the information */
+ apply_cc_state(ppd);
-getit:
return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
}
diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 55ee08675333..55ee08675333 100644
--- a/drivers/staging/rdma/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 2b0e91d3093d..b7a80aa1ae30 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -45,6 +45,7 @@
*
*/
#include <linux/list.h>
+#include <linux/rculist.h>
#include <linux/mmu_notifier.h>
#include <linux/interval_tree_generic.h>
@@ -97,7 +98,6 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node)
int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
{
struct mmu_rb_handler *handlr;
- unsigned long flags;
if (!ops->invalidate)
return -EINVAL;
@@ -111,9 +111,9 @@ int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
INIT_HLIST_NODE(&handlr->mn.hlist);
spin_lock_init(&handlr->lock);
handlr->mn.ops = &mn_opts;
- spin_lock_irqsave(&mmu_rb_lock, flags);
- list_add_tail(&handlr->list, &mmu_rb_handlers);
- spin_unlock_irqrestore(&mmu_rb_lock, flags);
+ spin_lock(&mmu_rb_lock);
+ list_add_tail_rcu(&handlr->list, &mmu_rb_handlers);
+ spin_unlock(&mmu_rb_lock);
return mmu_notifier_register(&handlr->mn, current->mm);
}
@@ -130,9 +130,10 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
if (current->mm)
mmu_notifier_unregister(&handler->mn, current->mm);
- spin_lock_irqsave(&mmu_rb_lock, flags);
- list_del(&handler->list);
- spin_unlock_irqrestore(&mmu_rb_lock, flags);
+ spin_lock(&mmu_rb_lock);
+ list_del_rcu(&handler->list);
+ spin_unlock(&mmu_rb_lock);
+ synchronize_rcu();
spin_lock_irqsave(&handler->lock, flags);
if (!RB_EMPTY_ROOT(root)) {
@@ -271,16 +272,15 @@ void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
{
struct mmu_rb_handler *handler;
- unsigned long flags;
- spin_lock_irqsave(&mmu_rb_lock, flags);
- list_for_each_entry(handler, &mmu_rb_handlers, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) {
if (handler->root == root)
goto unlock;
}
handler = NULL;
unlock:
- spin_unlock_irqrestore(&mmu_rb_lock, flags);
+ rcu_read_unlock();
return handler;
}
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index 7a57b9c49d27..7a57b9c49d27 100644
--- a/drivers/staging/rdma/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h
index 6ef3c1cbdcd7..6ef3c1cbdcd7 100644
--- a/drivers/staging/rdma/hfi1/opa_compat.h
+++ b/drivers/infiniband/hw/hfi1/opa_compat.h
diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 0bac21e6a658..0bac21e6a658 100644
--- a/drivers/staging/rdma/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index c67b9ad3fcf4..d5edb1afbb8f 100644
--- a/drivers/staging/rdma/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1835,8 +1835,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
struct pio_vl_map *oldmap, *newmap;
if (!vl_scontexts) {
- /* send context 0 reserved for VL15 */
- for (i = 1; i < dd->num_send_contexts; i++)
+ for (i = 0; i < dd->num_send_contexts; i++)
if (dd->send_contexts[i].type == SC_KERNEL)
num_kernel_send_contexts++;
/* truncate divide */
diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 53a08edb7f64..464cbd27b975 100644
--- a/drivers/staging/rdma/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -49,10 +49,10 @@
/* send context types */
#define SC_KERNEL 0
-#define SC_ACK 1
-#define SC_USER 2
-#define SC_VL15 3
-#define SC_MAX 4
+#define SC_VL15 1
+#define SC_ACK 2
+#define SC_USER 3 /* must be the last one: it may take all left */
+#define SC_MAX 4 /* count of send context types */
/* invalid send context index */
#define INVALID_SCI 0xff
diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 8c25e1b58849..8c25e1b58849 100644
--- a/drivers/staging/rdma/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 8fe8a205b5bb..03df9322f862 100644
--- a/drivers/staging/rdma/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -87,6 +87,17 @@ void free_platform_config(struct hfi1_devdata *dd)
*/
}
+void get_port_type(struct hfi1_pportdata *ppd)
+{
+ int ret;
+
+ ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+ PORT_TABLE_PORT_TYPE, &ppd->port_type,
+ 4);
+ if (ret)
+ ppd->port_type = PORT_TYPE_UNKNOWN;
+}
+
int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
{
u8 tx_ctrl_byte = on ? 0x0 : 0xF;
@@ -529,7 +540,8 @@ static void apply_tunings(
/* Enable external device config if channel is limiting active */
read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
GENERAL_CONFIG, &config_data);
- config_data |= limiting_active;
+ config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT);
+ config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT);
ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
GENERAL_CONFIG, config_data);
if (ret != HCMD_SUCCESS)
@@ -542,7 +554,8 @@ static void apply_tunings(
/* Pass tuning method to 8051 */
read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
&config_data);
- config_data |= tuning_method;
+ config_data &= ~(0xff << TUNING_METHOD_SHIFT);
+ config_data |= ((u32)tuning_method << TUNING_METHOD_SHIFT);
ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
config_data);
if (ret != HCMD_SUCCESS)
@@ -564,8 +577,8 @@ static void apply_tunings(
ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
GENERAL_CONFIG, &config_data);
/* Clear, then set the external device config field */
- config_data &= ~(0xFF << 24);
- config_data |= (external_device_config << 24);
+ config_data &= ~(u32)0xFF;
+ config_data |= external_device_config;
ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
GENERAL_CONFIG, config_data);
if (ret != HCMD_SUCCESS)
@@ -784,12 +797,6 @@ void tune_serdes(struct hfi1_pportdata *ppd)
return;
}
- ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
- PORT_TABLE_PORT_TYPE, &ppd->port_type,
- 4);
- if (ret)
- ppd->port_type = PORT_TYPE_UNKNOWN;
-
switch (ppd->port_type) {
case PORT_TYPE_DISCONNECTED:
ppd->offline_disabled_reason =
diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h
index 19620cf546d5..e2c21613c326 100644
--- a/drivers/staging/rdma/hfi1/platform.h
+++ b/drivers/infiniband/hw/hfi1/platform.h
@@ -298,6 +298,7 @@ enum link_tuning_encoding {
/* platform.c */
void get_platform_config(struct hfi1_devdata *dd);
void free_platform_config(struct hfi1_devdata *dd);
+void get_port_type(struct hfi1_pportdata *ppd);
int set_qsfp_tx(struct hfi1_pportdata *ppd, int on);
void tune_serdes(struct hfi1_pportdata *ppd);
diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 91eb42316df9..1a942ffba4cb 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -49,7 +49,6 @@
#include <linux/vmalloc.h>
#include <linux/hash.h>
#include <linux/module.h>
-#include <linux/random.h>
#include <linux/seq_file.h>
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_qp.h>
@@ -161,9 +160,6 @@ static inline int opa_mtu_enum_to_int(int mtu)
* This function is what we would push to the core layer if we wanted to be a
* "first class citizen". Instead we hide this here and rely on Verbs ULPs
* to blindly pass the MTU enum value from the PathRecord to us.
- *
- * The actual flag used to determine "8k MTU" will change and is currently
- * unknown.
*/
static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
{
@@ -516,6 +512,7 @@ static void iowait_wakeup(struct iowait *wait, int reason)
static void iowait_sdma_drained(struct iowait *wait)
{
struct rvt_qp *qp = iowait_to_qp(wait);
+ unsigned long flags;
/*
* This happens when the send engine notes
@@ -523,12 +520,12 @@ static void iowait_sdma_drained(struct iowait *wait)
* do the flush work until that QP's
* sdma work has finished.
*/
- spin_lock(&qp->s_lock);
+ spin_lock_irqsave(&qp->s_lock, flags);
if (qp->s_flags & RVT_S_WAIT_DMA) {
qp->s_flags &= ~RVT_S_WAIT_DMA;
hfi1_schedule_send(qp);
}
- spin_unlock(&qp->s_lock);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
}
/**
diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index e7bc8d6cf681..e7bc8d6cf681 100644
--- a/drivers/staging/rdma/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 2441669f0817..2441669f0817 100644
--- a/drivers/staging/rdma/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index dadc66c442b9..dadc66c442b9 100644
--- a/drivers/staging/rdma/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 792f15eb8efe..792f15eb8efe 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index a659aec3c3c6..a659aec3c3c6 100644
--- a/drivers/staging/rdma/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index abb8ebc1fcac..f9befc05b349 100644
--- a/drivers/staging/rdma/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -134,6 +134,7 @@ static const char * const sdma_state_names[] = {
[sdma_state_s99_running] = "s99_Running",
};
+#ifdef CONFIG_SDMA_VERBOSITY
static const char * const sdma_event_names[] = {
[sdma_event_e00_go_hw_down] = "e00_GoHwDown",
[sdma_event_e10_go_hw_start] = "e10_GoHwStart",
@@ -150,6 +151,7 @@ static const char * const sdma_event_names[] = {
[sdma_event_e85_link_down] = "e85_LinkDown",
[sdma_event_e90_sw_halted] = "e90_SwHalted",
};
+#endif
static const struct sdma_set_state_action sdma_action_table[] = {
[sdma_state_s00_hw_down] = {
@@ -376,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde,
sdma_txclean(sde->dd, tx);
if (complete)
(*complete)(tx, res);
- if (iowait_sdma_dec(wait) && wait)
+ if (wait && iowait_sdma_dec(wait))
iowait_drain_wakeup(wait);
}
diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 8f50c99fe711..8f50c99fe711 100644
--- a/drivers/staging/rdma/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h
index bf7d777d756e..bf7d777d756e 100644
--- a/drivers/staging/rdma/hfi1/sdma_txreq.h
+++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h
diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 8cd6df8634ad..91fc2aed6aed 100644
--- a/drivers/staging/rdma/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -721,8 +721,8 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
}
dd_dev_info(dd,
- "IB%u: Congestion Control Agent enabled for port %d\n",
- dd->unit, port_num);
+ "Congestion Control Agent enabled for port %d\n",
+ port_num);
return 0;
diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 8b62fefcf903..79b2952c0dfb 100644
--- a/drivers/staging/rdma/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -66,6 +66,7 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x"
#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
+#define IETH_PRN "ieth rkey 0x%.8x"
#define ATOMICACKETH_PRN "origdata %lld"
#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld"
@@ -166,6 +167,12 @@ const char *parse_everbs_hdrs(
be32_to_cpu(eh->ud.deth[0]),
be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK);
break;
+ /* ieth */
+ case OP(RC, SEND_LAST_WITH_INVALIDATE):
+ case OP(RC, SEND_ONLY_WITH_INVALIDATE):
+ trace_seq_printf(p, IETH_PRN,
+ be32_to_cpu(eh->ieth));
+ break;
}
trace_seq_putc(p, 0);
return ret;
@@ -233,3 +240,4 @@ __hfi1_trace_fn(FIRMWARE);
__hfi1_trace_fn(RCVCTRL);
__hfi1_trace_fn(TID);
__hfi1_trace_fn(MMU);
+__hfi1_trace_fn(IOCTL);
diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 963dc948c38a..28c1d0832886 100644
--- a/drivers/staging/rdma/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -74,8 +74,8 @@ __print_symbolic(etype, \
TRACE_EVENT(hfi1_rcvhdr,
TP_PROTO(struct hfi1_devdata *dd,
- u64 eflags,
u32 ctxt,
+ u64 eflags,
u32 etype,
u32 hlen,
u32 tlen,
@@ -392,6 +392,8 @@ __print_symbolic(opcode, \
ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
ib_opcode_name(RC_COMPARE_SWAP), \
ib_opcode_name(RC_FETCH_ADD), \
+ ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \
+ ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \
ib_opcode_name(UC_SEND_FIRST), \
ib_opcode_name(UC_SEND_MIDDLE), \
ib_opcode_name(UC_SEND_LAST), \
@@ -1341,6 +1343,7 @@ __hfi1_trace_def(FIRMWARE);
__hfi1_trace_def(RCVCTRL);
__hfi1_trace_def(TID);
__hfi1_trace_def(MMU);
+__hfi1_trace_def(IOCTL);
#define hfi1_cdbg(which, fmt, ...) \
__hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)
diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/infiniband/hw/hfi1/twsi.c
index e82e52a63d35..e82e52a63d35 100644
--- a/drivers/staging/rdma/hfi1/twsi.c
+++ b/drivers/infiniband/hw/hfi1/twsi.c
diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/infiniband/hw/hfi1/twsi.h
index 5b8a5b5e7eae..5b8a5b5e7eae 100644
--- a/drivers/staging/rdma/hfi1/twsi.h
+++ b/drivers/infiniband/hw/hfi1/twsi.h
diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index df773d433297..df773d433297 100644
--- a/drivers/staging/rdma/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 1e503ad0bebb..1e503ad0bebb 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 1b640a35b3fe..1b640a35b3fe 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 9bc8d9fba87e..9bc8d9fba87e 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 88e10b5f55f1..88e10b5f55f1 100644
--- a/drivers/staging/rdma/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 0014c9c0e967..29f4795f866c 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -166,6 +166,8 @@ static unsigned initial_pkt_count = 8;
#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
+struct sdma_mmu_node;
+
struct user_sdma_iovec {
struct list_head list;
struct iovec iov;
@@ -178,6 +180,7 @@ struct user_sdma_iovec {
* which we last left off.
*/
u64 offset;
+ struct sdma_mmu_node *node;
};
#define SDMA_CACHE_NODE_EVICT BIT(0)
@@ -507,6 +510,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
struct sdma_req_info info;
struct user_sdma_request *req;
u8 opcode, sc, vl;
+ int req_queued = 0;
if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
hfi1_cdbg(
@@ -703,6 +707,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
atomic_inc(&pq->n_reqs);
+ req_queued = 1;
/* Send the first N packets in the request to buy us some time */
ret = user_sdma_send_pkts(req, pcount);
if (unlikely(ret < 0 && ret != -EBUSY)) {
@@ -747,7 +752,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
return 0;
free_req:
user_sdma_free_request(req, true);
- pq_update(pq);
+ if (req_queued)
+ pq_update(pq);
set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
return ret;
}
@@ -1153,6 +1159,7 @@ retry:
}
iovec->pages = node->pages;
iovec->npages = npages;
+ iovec->node = node;
ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
if (ret) {
@@ -1519,18 +1526,13 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
}
if (req->data_iovs) {
struct sdma_mmu_node *node;
- struct mmu_rb_node *mnode;
int i;
for (i = 0; i < req->data_iovs; i++) {
- mnode = hfi1_mmu_rb_search(
- &req->pq->sdma_rb_root,
- (unsigned long)req->iovs[i].iov.iov_base,
- req->iovs[i].iov.iov_len);
- if (!mnode || IS_ERR(mnode))
+ node = req->iovs[i].node;
+ if (!node)
continue;
- node = container_of(mnode, struct sdma_mmu_node, rb);
if (unpin)
hfi1_mmu_rb_remove(&req->pq->sdma_rb_root,
&node->rb);
diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index b9240e351161..b9240e351161 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 9cdc85fa366f..849c4b9399d4 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -52,7 +52,6 @@
#include <linux/utsname.h>
#include <linux/rculist.h>
#include <linux/mm.h>
-#include <linux/random.h>
#include <linux/vmalloc.h>
#include "hfi.h"
@@ -336,6 +335,8 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4,
[IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28,
[IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
+ [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
+ [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
[IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
@@ -946,7 +947,6 @@ static int pio_wait(struct rvt_qp *qp,
dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
- dev->n_piowait++;
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
list_add_tail(&priv->s_iowait.list, &sc->piowait);
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 3ee223983b20..488356775627 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -152,6 +152,7 @@ union ib_ehdrs {
} at;
__be32 imm_data;
__be32 aeth;
+ __be32 ieth;
struct ib_atomic_eth atomic_eth;
} __packed;
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index bc95c4112c61..bc95c4112c61 100644
--- a/drivers/staging/rdma/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1cf69b2fe4a5..1cf69b2fe4a5 100644
--- a/drivers/staging/rdma/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 4a740f7a0519..02a735b64208 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2361,58 +2361,130 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+static const char * const i40iw_hw_stat_names[] = {
+ // 32bit names
+ [I40IW_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards",
+ [I40IW_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts",
+ [I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes",
+ [I40IW_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards",
+ [I40IW_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts",
+ [I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes",
+ [I40IW_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs",
+ [I40IW_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors",
+ [I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors",
+ // 64bit names
+ [I40IW_HW_STAT_INDEX_IP4RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip4InOctets",
+ [I40IW_HW_STAT_INDEX_IP4RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip4InPkts",
+ [I40IW_HW_STAT_INDEX_IP4RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip4InReasmRqd",
+ [I40IW_HW_STAT_INDEX_IP4RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip4InMcastPkts",
+ [I40IW_HW_STAT_INDEX_IP4TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip4OutOctets",
+ [I40IW_HW_STAT_INDEX_IP4TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip4OutPkts",
+ [I40IW_HW_STAT_INDEX_IP4TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip4OutSegRqd",
+ [I40IW_HW_STAT_INDEX_IP4TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip4OutMcastPkts",
+ [I40IW_HW_STAT_INDEX_IP6RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip6InOctets",
+ [I40IW_HW_STAT_INDEX_IP6RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip6InPkts",
+ [I40IW_HW_STAT_INDEX_IP6RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip6InReasmRqd",
+ [I40IW_HW_STAT_INDEX_IP6RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip6InMcastPkts",
+ [I40IW_HW_STAT_INDEX_IP6TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip6OutOctets",
+ [I40IW_HW_STAT_INDEX_IP6TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip6OutPkts",
+ [I40IW_HW_STAT_INDEX_IP6TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip6OutSegRqd",
+ [I40IW_HW_STAT_INDEX_IP6TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "ip6OutMcastPkts",
+ [I40IW_HW_STAT_INDEX_TCPRXSEGS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "tcpInSegs",
+ [I40IW_HW_STAT_INDEX_TCPTXSEG + I40IW_HW_STAT_INDEX_MAX_32] =
+ "tcpOutSegs",
+ [I40IW_HW_STAT_INDEX_RDMARXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "iwInRdmaReads",
+ [I40IW_HW_STAT_INDEX_RDMARXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "iwInRdmaSends",
+ [I40IW_HW_STAT_INDEX_RDMARXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "iwInRdmaWrites",
+ [I40IW_HW_STAT_INDEX_RDMATXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "iwOutRdmaReads",
+ [I40IW_HW_STAT_INDEX_RDMATXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "iwOutRdmaSends",
+ [I40IW_HW_STAT_INDEX_RDMATXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
+ "iwOutRdmaWrites",
+ [I40IW_HW_STAT_INDEX_RDMAVBND + I40IW_HW_STAT_INDEX_MAX_32] =
+ "iwRdmaBnd",
+ [I40IW_HW_STAT_INDEX_RDMAVINV + I40IW_HW_STAT_INDEX_MAX_32] =
+ "iwRdmaInv"
+};
+
/**
- * i40iw_get_protocol_stats - Populates the rdma_stats structure
- * @ibdev: ib dev struct
- * @stats: iw protocol stats struct
+ * i40iw_alloc_hw_stats - Allocate a hw stats structure
+ * @ibdev: device pointer from stack
+ * @port_num: port number
*/
-static int i40iw_get_protocol_stats(struct ib_device *ibdev,
- union rdma_protocol_stats *stats)
+static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ struct i40iw_device *iwdev = to_iwdev(ibdev);
+ struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ int num_counters = I40IW_HW_STAT_INDEX_MAX_32 +
+ I40IW_HW_STAT_INDEX_MAX_64;
+ unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
+
+ BUILD_BUG_ON(ARRAY_SIZE(i40iw_hw_stat_names) !=
+ (I40IW_HW_STAT_INDEX_MAX_32 +
+ I40IW_HW_STAT_INDEX_MAX_64));
+
+ /*
+ * PFs get the default update lifespan, but VFs only update once
+ * per second
+ */
+ if (!dev->is_pf)
+ lifespan = 1000;
+ return rdma_alloc_hw_stats_struct(i40iw_hw_stat_names, num_counters,
+ lifespan);
+}
+
+/**
+ * i40iw_get_hw_stats - Populates the rdma_hw_stats structure
+ * @ibdev: device pointer from stack
+ * @stats: stats pointer from stack
+ * @port_num: port number
+ * @index: which hw counter the stack is requesting we update
+ */
+static int i40iw_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port_num, int index)
{
struct i40iw_device *iwdev = to_iwdev(ibdev);
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
- struct timespec curr_time;
- static struct timespec last_rd_time = {0, 0};
unsigned long flags;
- curr_time = current_kernel_time();
- memset(stats, 0, sizeof(*stats));
-
if (dev->is_pf) {
spin_lock_irqsave(&devstat->stats_lock, flags);
devstat->ops.iw_hw_stat_read_all(devstat,
&devstat->hw_stats);
spin_unlock_irqrestore(&devstat->stats_lock, flags);
} else {
- if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1)
- if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
- return -ENOSYS;
+ if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
+ return -ENOSYS;
}
- stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] +
- hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXPKTS];
- stats->iw.ipInTruncatedPkts = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] +
- hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC];
- stats->iw.ipInDiscards = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] +
- hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD];
- stats->iw.ipOutNoRoutes = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] +
- hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE];
- stats->iw.ipReasmReqds = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] +
- hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS];
- stats->iw.ipFragCreates = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] +
- hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS];
- stats->iw.ipInMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] +
- hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS];
- stats->iw.ipOutMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] +
- hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXMCPKTS];
- stats->iw.tcpOutSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPTXSEG];
- stats->iw.tcpInSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPRXSEGS];
- stats->iw.tcpRetransSegs = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_TCPRTXSEG];
-
- last_rd_time = curr_time;
- return 0;
+ memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats));
+
+ return stats->num_counters;
}
/**
@@ -2551,7 +2623,8 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr;
iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr;
iwibdev->ibdev.dereg_mr = i40iw_dereg_mr;
- iwibdev->ibdev.get_protocol_stats = i40iw_get_protocol_stats;
+ iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats;
+ iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats;
iwibdev->ibdev.query_device = i40iw_query_device;
iwibdev->ibdev.create_ah = i40iw_create_ah;
iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 82d7c4bf5970..ce4034071f9c 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1308,21 +1308,6 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
SYM_LSB(IntMask, fldname##17IntMask)), \
.msg = #fldname "_C", .sz = sizeof(#fldname "_C") }
-static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = {
- INTR_AUTO_P(SDmaInt),
- INTR_AUTO_P(SDmaProgressInt),
- INTR_AUTO_P(SDmaIdleInt),
- INTR_AUTO_P(SDmaCleanupDone),
- INTR_AUTO_C(RcvUrg),
- INTR_AUTO_P(ErrInt),
- INTR_AUTO(ErrInt), /* non-port-specific errs */
- INTR_AUTO(AssertGPIOInt),
- INTR_AUTO_P(SendDoneInt),
- INTR_AUTO(SendBufAvailInt),
- INTR_AUTO_C(RcvAvail),
- { .mask = 0, .sz = 0 }
-};
-
#define TXSYMPTOM_AUTO_P(fldname) \
{ .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \
.msg = #fldname, .sz = sizeof(#fldname) }
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 0bd18375d7df..d2ac29861af5 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -1172,11 +1172,13 @@ static int pma_get_classportinfo(struct ib_pma_mad *pmp,
* Set the most significant bit of CM2 to indicate support for
* congestion statistics
*/
- p->reserved[0] = dd->psxmitwait_supported << 7;
+ ib_set_cpi_capmask2(p,
+ dd->psxmitwait_supported <<
+ (31 - IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE));
/*
* Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
*/
- p->resp_time_value = 18;
+ ib_set_cpi_resp_time(p, 18);
return reply((struct ib_smp *) pmp);
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 6888f03c6d61..4f878151f81f 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -159,6 +159,7 @@ struct qib_other_headers {
} at;
__be32 imm_data;
__be32 aeth;
+ __be32 ieth;
struct ib_atomic_eth atomic_eth;
} u;
} __packed;
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index b1ffc8b4a6c0..6ca6fa80dd6e 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -525,6 +525,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi)
return PTR_ERR(task);
}
+ set_user_nice(task, MIN_NICE);
cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
kthread_bind(task, cpu);
wake_up_process(task);
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 0ff765bfd619..0f4d4500f45e 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -124,11 +124,13 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
int count)
{
int m, i = 0;
+ struct rvt_dev_info *dev = ib_to_rvt(pd->device);
mr->mapsz = 0;
m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
for (; i < m; i++) {
- mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
+ mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
+ dev->dparms.node);
if (!mr->map[i]) {
rvt_deinit_mregion(mr);
return -ENOMEM;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 0f12c211c385..5fa4d4d81ee0 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -397,6 +397,7 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
{
unsigned n;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
rvt_put_ss(&qp->s_rdma_read_sge);
@@ -431,7 +432,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
if (qp->ibqp.qp_type != IB_QPT_RC)
return;
- for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
+ for (n = 0; n < rvt_max_atomic(rdi); n++) {
struct rvt_ack_entry *e = &qp->s_ack_queue[n];
if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
@@ -569,7 +570,12 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
qp->s_ssn = 1;
qp->s_lsn = 0;
qp->s_mig_state = IB_MIG_MIGRATED;
- memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+ if (qp->s_ack_queue)
+ memset(
+ qp->s_ack_queue,
+ 0,
+ rvt_max_atomic(rdi) *
+ sizeof(*qp->s_ack_queue));
qp->r_head_ack_queue = 0;
qp->s_tail_ack_queue = 0;
qp->s_num_rd_atomic = 0;
@@ -653,9 +659,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
if (gfp == GFP_NOIO)
swq = __vmalloc(
(init_attr->cap.max_send_wr + 1) * sz,
- gfp, PAGE_KERNEL);
+ gfp | __GFP_ZERO, PAGE_KERNEL);
else
- swq = vmalloc_node(
+ swq = vzalloc_node(
(init_attr->cap.max_send_wr + 1) * sz,
rdi->dparms.node);
if (!swq)
@@ -677,6 +683,16 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
goto bail_swq;
RCU_INIT_POINTER(qp->next, NULL);
+ if (init_attr->qp_type == IB_QPT_RC) {
+ qp->s_ack_queue =
+ kzalloc_node(
+ sizeof(*qp->s_ack_queue) *
+ rvt_max_atomic(rdi),
+ gfp,
+ rdi->dparms.node);
+ if (!qp->s_ack_queue)
+ goto bail_qp;
+ }
/*
* Driver needs to set up it's private QP structure and do any
@@ -704,9 +720,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
qp->r_rq.wq = __vmalloc(
sizeof(struct rvt_rwq) +
qp->r_rq.size * sz,
- gfp, PAGE_KERNEL);
+ gfp | __GFP_ZERO, PAGE_KERNEL);
else
- qp->r_rq.wq = vmalloc_node(
+ qp->r_rq.wq = vzalloc_node(
sizeof(struct rvt_rwq) +
qp->r_rq.size * sz,
rdi->dparms.node);
@@ -857,6 +873,7 @@ bail_driver_priv:
rdi->driver_f.qp_priv_free(rdi, qp);
bail_qp:
+ kfree(qp->s_ack_queue);
kfree(qp);
bail_swq:
@@ -1284,6 +1301,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
vfree(qp->r_rq.wq);
vfree(qp->s_wq);
rdi->driver_f.qp_priv_free(rdi, qp);
+ kfree(qp->s_ack_queue);
kfree(qp);
return 0;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index caec8e9c4666..bab7db6fa9ab 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -92,6 +92,8 @@ enum {
IPOIB_FLAG_UMCAST = 10,
IPOIB_STOP_NEIGH_GC = 11,
IPOIB_NEIGH_TBL_FLUSH = 12,
+ IPOIB_FLAG_DEV_ADDR_SET = 13,
+ IPOIB_FLAG_DEV_ADDR_CTRL = 14,
IPOIB_MAX_BACKOFF_SECONDS = 16,
@@ -392,6 +394,7 @@ struct ipoib_dev_priv {
struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer;
unsigned max_send_sge;
+ bool sm_fullmember_sendonly_support;
};
struct ipoib_ah {
@@ -476,6 +479,7 @@ void ipoib_reap_ah(struct work_struct *work);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
+int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 418e5a1c8744..45c40a17d6a6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -997,6 +997,106 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv)
return 0;
}
+/*
+ * returns true if the device address of the ipoib interface has changed and the
+ * new address is a valid one (i.e in the gid table), return false otherwise.
+ */
+static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
+{
+ union ib_gid search_gid;
+ union ib_gid gid0;
+ union ib_gid *netdev_gid;
+ int err;
+ u16 index;
+ u8 port;
+ bool ret = false;
+
+ netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4);
+ if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
+ return false;
+
+ netif_addr_lock(priv->dev);
+
+ /* The subnet prefix may have changed, update it now so we won't have
+ * to do it later
+ */
+ priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix;
+ netdev_gid->global.subnet_prefix = gid0.global.subnet_prefix;
+ search_gid.global.subnet_prefix = gid0.global.subnet_prefix;
+
+ search_gid.global.interface_id = priv->local_gid.global.interface_id;
+
+ netif_addr_unlock(priv->dev);
+
+ err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB,
+ priv->dev, &port, &index);
+
+ netif_addr_lock(priv->dev);
+
+ if (search_gid.global.interface_id !=
+ priv->local_gid.global.interface_id)
+ /* There was a change while we were looking up the gid, bail
+ * here and let the next work sort this out
+ */
+ goto out;
+
+ /* The next section of code needs some background:
+ * Per IB spec the port GUID can't change if the HCA is powered on.
+ * port GUID is the basis for GID at index 0 which is the basis for
+ * the default device address of a ipoib interface.
+ *
+ * so it seems the flow should be:
+ * if user_changed_dev_addr && gid in gid tbl
+ * set bit dev_addr_set
+ * return true
+ * else
+ * return false
+ *
+ * The issue is that there are devices that don't follow the spec,
+ * they change the port GUID when the HCA is powered, so in order
+ * not to break userspace applications, We need to check if the
+ * user wanted to control the device address and we assume that
+ * if he sets the device address back to be based on GID index 0,
+ * he no longer wishs to control it.
+ *
+ * If the user doesn't control the the device address,
+ * IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means
+ * the port GUID has changed and GID at index 0 has changed
+ * so we need to change priv->local_gid and priv->dev->dev_addr
+ * to reflect the new GID.
+ */
+ if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
+ if (!err && port == priv->port) {
+ set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
+ if (index == 0)
+ clear_bit(IPOIB_FLAG_DEV_ADDR_CTRL,
+ &priv->flags);
+ else
+ set_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags);
+ ret = true;
+ } else {
+ ret = false;
+ }
+ } else {
+ if (!err && port == priv->port) {
+ ret = true;
+ } else {
+ if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) {
+ memcpy(&priv->local_gid, &gid0,
+ sizeof(priv->local_gid));
+ memcpy(priv->dev->dev_addr + 4, &gid0,
+ sizeof(priv->local_gid));
+ ret = true;
+ }
+ }
+ }
+
+out:
+ netif_addr_unlock(priv->dev);
+
+ return ret;
+}
+
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
enum ipoib_flush_level level,
int nesting)
@@ -1018,6 +1118,9 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
level != IPOIB_FLUSH_HEAVY) {
+ /* Make sure the dev_addr is set even if not flushing */
+ if (level == IPOIB_FLUSH_LIGHT)
+ ipoib_dev_addr_changed_valid(priv);
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return;
}
@@ -1029,7 +1132,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
update_parent_pkey(priv);
else
update_child_pkey(priv);
- }
+ } else if (level == IPOIB_FLUSH_LIGHT)
+ ipoib_dev_addr_changed_valid(priv);
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
return;
}
@@ -1081,7 +1185,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
if (level >= IPOIB_FLUSH_NORMAL)
ipoib_ib_dev_up(dev);
- ipoib_mcast_restart_task(&priv->restart_task);
+ if (ipoib_dev_addr_changed_valid(priv))
+ ipoib_mcast_restart_task(&priv->restart_task);
}
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index b940ef1c19c7..2d7c16346648 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -99,6 +99,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
struct ib_device *dev, u8 port, u16 pkey,
const union ib_gid *gid, const struct sockaddr *addr,
void *client_data);
+static int ipoib_set_mac(struct net_device *dev, void *addr);
static struct ib_client ipoib_client = {
.name = "ipoib",
@@ -117,6 +118,8 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
+ priv->sm_fullmember_sendonly_support = false;
+
if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0;
@@ -629,6 +632,77 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
spin_unlock_irq(&priv->lock);
}
+struct classport_info_context {
+ struct ipoib_dev_priv *priv;
+ struct completion done;
+ struct ib_sa_query *sa_query;
+};
+
+static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
+ void *context)
+{
+ struct classport_info_context *cb_ctx = context;
+ struct ipoib_dev_priv *priv;
+
+ WARN_ON(!context);
+
+ priv = cb_ctx->priv;
+
+ if (status || !rec) {
+ pr_debug("device: %s failed query classport_info status: %d\n",
+ priv->dev->name, status);
+ /* keeps the default, will try next mcast_restart */
+ priv->sm_fullmember_sendonly_support = false;
+ goto out;
+ }
+
+ if (ib_get_cpi_capmask2(rec) &
+ IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
+ pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
+ priv->dev->name);
+ priv->sm_fullmember_sendonly_support = true;
+ } else {
+ pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
+ priv->dev->name);
+ priv->sm_fullmember_sendonly_support = false;
+ }
+
+out:
+ complete(&cb_ctx->done);
+}
+
+int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
+{
+ struct classport_info_context *callback_context;
+ int ret;
+
+ callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
+ if (!callback_context)
+ return -ENOMEM;
+
+ callback_context->priv = priv;
+ init_completion(&callback_context->done);
+
+ ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
+ priv->ca, priv->port, 3000,
+ GFP_KERNEL,
+ classport_info_query_cb,
+ callback_context,
+ &callback_context->sa_query);
+ if (ret < 0) {
+ pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
+ priv->dev->name, ret);
+ kfree(callback_context);
+ return ret;
+ }
+
+ /* waiting for the callback to finish before returnning */
+ wait_for_completion(&callback_context->done);
+ kfree(callback_context);
+
+ return ret;
+}
+
void ipoib_flush_paths(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -1649,6 +1723,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
.ndo_get_vf_config = ipoib_get_vf_config,
.ndo_get_vf_stats = ipoib_get_vf_stats,
.ndo_set_vf_guid = ipoib_set_vf_guid,
+ .ndo_set_mac_address = ipoib_set_mac,
};
static const struct net_device_ops ipoib_netdev_ops_vf = {
@@ -1771,6 +1846,70 @@ int ipoib_add_umcast_attr(struct net_device *dev)
return device_create_file(&dev->dev, &dev_attr_umcast);
}
+static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
+{
+ struct ipoib_dev_priv *child_priv;
+ struct net_device *netdev = priv->dev;
+
+ netif_addr_lock(netdev);
+
+ memcpy(&priv->local_gid.global.interface_id,
+ &gid->global.interface_id,
+ sizeof(gid->global.interface_id));
+ memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid));
+ clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
+
+ netif_addr_unlock(netdev);
+
+ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
+ down_read(&priv->vlan_rwsem);
+ list_for_each_entry(child_priv, &priv->child_intfs, list)
+ set_base_guid(child_priv, gid);
+ up_read(&priv->vlan_rwsem);
+ }
+}
+
+static int ipoib_check_lladdr(struct net_device *dev,
+ struct sockaddr_storage *ss)
+{
+ union ib_gid *gid = (union ib_gid *)(ss->__data + 4);
+ int ret = 0;
+
+ netif_addr_lock(dev);
+
+ /* Make sure the QPN, reserved and subnet prefix match the current
+ * lladdr, it also makes sure the lladdr is unicast.
+ */
+ if (memcmp(dev->dev_addr, ss->__data,
+ 4 + sizeof(gid->global.subnet_prefix)) ||
+ gid->global.interface_id == 0)
+ ret = -EINVAL;
+
+ netif_addr_unlock(dev);
+
+ return ret;
+}
+
+static int ipoib_set_mac(struct net_device *dev, void *addr)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct sockaddr_storage *ss = addr;
+ int ret;
+
+ if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
+ return -EBUSY;
+
+ ret = ipoib_check_lladdr(dev, ss);
+ if (ret)
+ return ret;
+
+ set_base_guid(priv, (union ib_gid *)(ss->__data + 4));
+
+ queue_work(ipoib_workqueue, &priv->flush_light);
+
+ return 0;
+}
+
static ssize_t create_child(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -1894,6 +2033,7 @@ static struct net_device *ipoib_add_port(const char *format,
goto device_init_failed;
} else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
+ set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
result = ipoib_dev_init(priv->dev, hca, port);
if (result < 0) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 25889311b1e9..82fbc9442608 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -64,6 +64,9 @@ struct ipoib_mcast_iter {
unsigned int send_only;
};
+/* join state that allows creating mcg with sendonly member request */
+#define SENDONLY_FULLMEMBER_JOIN 8
+
/*
* This should be called with the priv->lock held
*/
@@ -326,12 +329,23 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task);
struct ib_port_attr attr;
+ int ret;
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
return;
}
+ /*
+ * Check if can send sendonly MCG's with sendonly-fullmember join state.
+ * It done here after the successfully join to the broadcast group,
+ * because the broadcast group must always be joined first and is always
+ * re-joined if the SM changes substantially.
+ */
+ ret = ipoib_check_sm_sendonly_fullmember_support(priv);
+ if (ret < 0)
+ pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n",
+ priv->dev->name, ret);
/*
* Take rtnl_lock to avoid racing with ipoib_stop() and
@@ -515,22 +529,20 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
/*
- * Send-only IB Multicast joins do not work at the core
- * IB layer yet, so we can't use them here. However,
- * we are emulating an Ethernet multicast send, which
- * does not require a multicast subscription and will
- * still send properly. The most appropriate thing to
+ * Send-only IB Multicast joins work at the core IB layer but
+ * require specific SM support.
+ * We can use such joins here only if the current SM supports that feature.
+ * However, if not, we emulate an Ethernet multicast send,
+ * which does not require a multicast subscription and will
+ * still send properly. The most appropriate thing to
* do is to create the group if it doesn't exist as that
* most closely emulates the behavior, from a user space
- * application perspecitive, of Ethernet multicast
- * operation. For now, we do a full join, maybe later
- * when the core IB layers support send only joins we
- * will use them.
+ * application perspective, of Ethernet multicast operation.
*/
-#if 0
- if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
- rec.join_state = 4;
-#endif
+ if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
+ priv->sm_fullmember_sendonly_support)
+ /* SM supports sendonly-fullmember, otherwise fallback to full-member */
+ rec.join_state = SENDONLY_FULLMEMBER_JOIN;
}
spin_unlock_irq(&priv->lock);
@@ -570,11 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work)
return;
}
priv->local_lid = port_attr.lid;
+ netif_addr_lock(dev);
- if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL))
- ipoib_warn(priv, "ib_query_gid() failed\n");
- else
- memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
+ if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
+ netif_addr_unlock(dev);
+ return;
+ }
+ netif_addr_unlock(dev);
spin_lock_irq(&priv->lock);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index b809c373e40e..1e7cbbaa15bd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -307,5 +307,8 @@ void ipoib_event(struct ib_event_handler *handler,
queue_work(ipoib_workqueue, &priv->flush_normal);
} else if (record->event == IB_EVENT_PKEY_CHANGE) {
queue_work(ipoib_workqueue, &priv->flush_heavy);
+ } else if (record->event == IB_EVENT_GID_CHANGE &&
+ !test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
+ queue_work(ipoib_workqueue, &priv->flush_light);
}
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index fca1a882de27..64a35595eab8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -68,6 +68,8 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
priv->pkey = pkey;
memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
+ memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
+ set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
priv->dev->broadcast[8] = pkey >> 8;
priv->dev->broadcast[9] = pkey & 0xff;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 2843f1ae75bd..887ebadd4774 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -254,8 +254,8 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad)
memset(cif, 0, sizeof(*cif));
cif->base_version = 1;
cif->class_version = 1;
- cif->resp_time_value = 20;
+ ib_set_cpi_resp_time(cif, 20);
mad->mad_hdr.status = 0;
}
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 5bac28a3944e..7c197d1a1231 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -66,8 +66,6 @@ source "drivers/staging/nvec/Kconfig"
source "drivers/staging/media/Kconfig"
-source "drivers/staging/rdma/Kconfig"
-
source "drivers/staging/android/Kconfig"
source "drivers/staging/board/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index a954242b0f2c..a470c7276142 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -23,7 +23,6 @@ obj-$(CONFIG_FB_XGI) += xgifb/
obj-$(CONFIG_USB_EMXX) += emxx_udc/
obj-$(CONFIG_SPEAKUP) += speakup/
obj-$(CONFIG_MFD_NVEC) += nvec/
-obj-$(CONFIG_STAGING_RDMA) += rdma/
obj-$(CONFIG_ANDROID) += android/
obj-$(CONFIG_STAGING_BOARD) += board/
obj-$(CONFIG_LTE_GDM724X) += gdm724x/
diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig
deleted file mode 100644
index f1f3ecadf0fb..000000000000
--- a/drivers/staging/rdma/Kconfig
+++ /dev/null
@@ -1,27 +0,0 @@
-menuconfig STAGING_RDMA
- tristate "RDMA staging drivers"
- depends on INFINIBAND
- depends on PCI || BROKEN
- depends on HAS_IOMEM
- depends on NET
- depends on INET
- default n
- ---help---
- This option allows you to select a number of RDMA drivers that
- fall into one of two categories: deprecated drivers being held
- here before finally being removed or new drivers that still need
- some work before being moved to the normal RDMA driver area.
-
- If you wish to work on these drivers, to help improve them, or
- to report problems you have with them, please use the
- linux-rdma@vger.kernel.org mailing list.
-
- If in doubt, say N here.
-
-
-# Please keep entries in alphabetic order
-if STAGING_RDMA
-
-source "drivers/staging/rdma/hfi1/Kconfig"
-
-endif
diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile
deleted file mode 100644
index 8c7fc1de48a7..000000000000
--- a/drivers/staging/rdma/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# Entries for RDMA_STAGING tree
-obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO
deleted file mode 100644
index 4c6f1d7d2eaf..000000000000
--- a/drivers/staging/rdma/hfi1/TODO
+++ /dev/null
@@ -1,6 +0,0 @@
-July, 2015
-
-- Remove unneeded file entries in sysfs
-- Remove software processing of IB protocol and place in library for use
- by qib, ipath (if still present), hfi1, and eventually soft-roce
-- Replace incorrect uAPI
diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c
deleted file mode 100644
index bb2409ad891a..000000000000
--- a/drivers/staging/rdma/hfi1/diag.c
+++ /dev/null
@@ -1,1925 +0,0 @@
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-/*
- * This file contains support for diagnostic functions. It is accessed by
- * opening the hfi1_diag device, normally minor number 129. Diagnostic use
- * of the chip may render the chip or board unusable until the driver
- * is unloaded, or in some cases, until the system is rebooted.
- *
- * Accesses to the chip through this interface are not similar to going
- * through the /sys/bus/pci resource mmap interface.
- */
-
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <linux/poll.h>
-#include <linux/vmalloc.h>
-#include <linux/export.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#include <linux/module.h>
-#include <rdma/ib_smi.h>
-#include "hfi.h"
-#include "device.h"
-#include "common.h"
-#include "verbs_txreq.h"
-#include "trace.h"
-
-#undef pr_fmt
-#define pr_fmt(fmt) DRIVER_NAME ": " fmt
-#define snoop_dbg(fmt, ...) \
- hfi1_cdbg(SNOOP, fmt, ##__VA_ARGS__)
-
-/* Snoop option mask */
-#define SNOOP_DROP_SEND BIT(0)
-#define SNOOP_USE_METADATA BIT(1)
-#define SNOOP_SET_VL0TOVL15 BIT(2)
-
-static u8 snoop_flags;
-
-/*
- * Extract packet length from LRH header.
- * This is in Dwords so multiply by 4 to get size in bytes
- */
-#define HFI1_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2)
-
-enum hfi1_filter_status {
- HFI1_FILTER_HIT,
- HFI1_FILTER_ERR,
- HFI1_FILTER_MISS
-};
-
-/* snoop processing functions */
-rhf_rcv_function_ptr snoop_rhf_rcv_functions[8] = {
- [RHF_RCV_TYPE_EXPECTED] = snoop_recv_handler,
- [RHF_RCV_TYPE_EAGER] = snoop_recv_handler,
- [RHF_RCV_TYPE_IB] = snoop_recv_handler,
- [RHF_RCV_TYPE_ERROR] = snoop_recv_handler,
- [RHF_RCV_TYPE_BYPASS] = snoop_recv_handler,
- [RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
- [RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
- [RHF_RCV_TYPE_INVALID7] = process_receive_invalid
-};
-
-/* Snoop packet structure */
-struct snoop_packet {
- struct list_head list;
- u32 total_len;
- u8 data[];
-};
-
-/* Do not make these an enum or it will blow up the capture_md */
-#define PKT_DIR_EGRESS 0x0
-#define PKT_DIR_INGRESS 0x1
-
-/* Packet capture metadata returned to the user with the packet. */
-struct capture_md {
- u8 port;
- u8 dir;
- u8 reserved[6];
- union {
- u64 pbc;
- u64 rhf;
- } u;
-};
-
-static atomic_t diagpkt_count = ATOMIC_INIT(0);
-static struct cdev diagpkt_cdev;
-static struct device *diagpkt_device;
-
-static ssize_t diagpkt_write(struct file *fp, const char __user *data,
- size_t count, loff_t *off);
-
-static const struct file_operations diagpkt_file_ops = {
- .owner = THIS_MODULE,
- .write = diagpkt_write,
- .llseek = noop_llseek,
-};
-
-/*
- * This is used for communication with user space for snoop extended IOCTLs
- */
-struct hfi1_link_info {
- __be64 node_guid;
- u8 port_mode;
- u8 port_state;
- u16 link_speed_active;
- u16 link_width_active;
- u16 vl15_init;
- u8 port_number;
- /*
- * Add padding to make this a full IB SMP payload. Note: changing the
- * size of this structure will make the IOCTLs created with _IOWR
- * change.
- * Be sure to run tests on all IOCTLs when making changes to this
- * structure.
- */
- u8 res[47];
-};
-
-/*
- * This starts our ioctl sequence numbers *way* off from the ones
- * defined in ib_core.
- */
-#define SNOOP_CAPTURE_VERSION 0x1
-
-#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl-number.txt */
-#define HFI1_SNOOP_IOC_MAGIC IB_IOCTL_MAGIC
-#define HFI1_SNOOP_IOC_BASE_SEQ 0x80
-
-#define HFI1_SNOOP_IOCGETLINKSTATE \
- _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ)
-#define HFI1_SNOOP_IOCSETLINKSTATE \
- _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 1)
-#define HFI1_SNOOP_IOCCLEARQUEUE \
- _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 2)
-#define HFI1_SNOOP_IOCCLEARFILTER \
- _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 3)
-#define HFI1_SNOOP_IOCSETFILTER \
- _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 4)
-#define HFI1_SNOOP_IOCGETVERSION \
- _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 5)
-#define HFI1_SNOOP_IOCSET_OPTS \
- _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6)
-
-/*
- * These offsets +6/+7 could change, but these are already known and used
- * IOCTL numbers so don't change them without a good reason.
- */
-#define HFI1_SNOOP_IOCGETLINKSTATE_EXTRA \
- _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6, \
- struct hfi1_link_info)
-#define HFI1_SNOOP_IOCSETLINKSTATE_EXTRA \
- _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 7, \
- struct hfi1_link_info)
-
-static int hfi1_snoop_open(struct inode *in, struct file *fp);
-static ssize_t hfi1_snoop_read(struct file *fp, char __user *data,
- size_t pkt_len, loff_t *off);
-static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data,
- size_t count, loff_t *off);
-static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg);
-static unsigned int hfi1_snoop_poll(struct file *fp,
- struct poll_table_struct *wait);
-static int hfi1_snoop_release(struct inode *in, struct file *fp);
-
-struct hfi1_packet_filter_command {
- int opcode;
- int length;
- void *value_ptr;
-};
-
-/* Can't re-use PKT_DIR_*GRESS here because 0 means no packets for this */
-#define HFI1_SNOOP_INGRESS 0x1
-#define HFI1_SNOOP_EGRESS 0x2
-
-enum hfi1_packet_filter_opcodes {
- FILTER_BY_LID,
- FILTER_BY_DLID,
- FILTER_BY_MAD_MGMT_CLASS,
- FILTER_BY_QP_NUMBER,
- FILTER_BY_PKT_TYPE,
- FILTER_BY_SERVICE_LEVEL,
- FILTER_BY_PKEY,
- FILTER_BY_DIRECTION,
-};
-
-static const struct file_operations snoop_file_ops = {
- .owner = THIS_MODULE,
- .open = hfi1_snoop_open,
- .read = hfi1_snoop_read,
- .unlocked_ioctl = hfi1_ioctl,
- .poll = hfi1_snoop_poll,
- .write = hfi1_snoop_write,
- .release = hfi1_snoop_release
-};
-
-struct hfi1_filter_array {
- int (*filter)(void *, void *, void *);
-};
-
-static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value);
-static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value);
-static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
- void *value);
-static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value);
-static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data,
- void *value);
-static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data,
- void *value);
-static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value);
-static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value);
-
-static const struct hfi1_filter_array hfi1_filters[] = {
- { hfi1_filter_lid },
- { hfi1_filter_dlid },
- { hfi1_filter_mad_mgmt_class },
- { hfi1_filter_qp_number },
- { hfi1_filter_ibpacket_type },
- { hfi1_filter_ib_service_level },
- { hfi1_filter_ib_pkey },
- { hfi1_filter_direction },
-};
-
-#define HFI1_MAX_FILTERS ARRAY_SIZE(hfi1_filters)
-#define HFI1_DIAG_MINOR_BASE 129
-
-static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name);
-
-int hfi1_diag_add(struct hfi1_devdata *dd)
-{
- char name[16];
- int ret = 0;
-
- snprintf(name, sizeof(name), "%s_diagpkt%d", class_name(),
- dd->unit);
- /*
- * Do this for each device as opposed to the normal diagpkt
- * interface which is one per host
- */
- ret = hfi1_snoop_add(dd, name);
- if (ret)
- dd_dev_err(dd, "Unable to init snoop/capture device");
-
- snprintf(name, sizeof(name), "%s_diagpkt", class_name());
- if (atomic_inc_return(&diagpkt_count) == 1) {
- ret = hfi1_cdev_init(HFI1_DIAGPKT_MINOR, name,
- &diagpkt_file_ops, &diagpkt_cdev,
- &diagpkt_device, false);
- }
-
- return ret;
-}
-
-/* this must be called w/ dd->snoop_in_lock held */
-static void drain_snoop_list(struct list_head *queue)
-{
- struct list_head *pos, *q;
- struct snoop_packet *packet;
-
- list_for_each_safe(pos, q, queue) {
- packet = list_entry(pos, struct snoop_packet, list);
- list_del(pos);
- kfree(packet);
- }
-}
-
-static void hfi1_snoop_remove(struct hfi1_devdata *dd)
-{
- unsigned long flags = 0;
-
- spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
- drain_snoop_list(&dd->hfi1_snoop.queue);
- hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev);
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-}
-
-void hfi1_diag_remove(struct hfi1_devdata *dd)
-{
- hfi1_snoop_remove(dd);
- if (atomic_dec_and_test(&diagpkt_count))
- hfi1_cdev_cleanup(&diagpkt_cdev, &diagpkt_device);
- hfi1_cdev_cleanup(&dd->diag_cdev, &dd->diag_device);
-}
-
-/*
- * Allocated structure shared between the credit return mechanism and
- * diagpkt_send().
- */
-struct diagpkt_wait {
- struct completion credits_returned;
- int code;
- atomic_t count;
-};
-
-/*
- * When each side is finished with the structure, they call this.
- * The last user frees the structure.
- */
-static void put_diagpkt_wait(struct diagpkt_wait *wait)
-{
- if (atomic_dec_and_test(&wait->count))
- kfree(wait);
-}
-
-/*
- * Callback from the credit return code. Set the complete, which
- * will let diapkt_send() continue.
- */
-static void diagpkt_complete(void *arg, int code)
-{
- struct diagpkt_wait *wait = (struct diagpkt_wait *)arg;
-
- wait->code = code;
- complete(&wait->credits_returned);
- put_diagpkt_wait(wait); /* finished with the structure */
-}
-
-/**
- * diagpkt_send - send a packet
- * @dp: diag packet descriptor
- */
-static ssize_t diagpkt_send(struct diag_pkt *dp)
-{
- struct hfi1_devdata *dd;
- struct send_context *sc;
- struct pio_buf *pbuf;
- u32 *tmpbuf = NULL;
- ssize_t ret = 0;
- u32 pkt_len, total_len;
- pio_release_cb credit_cb = NULL;
- void *credit_arg = NULL;
- struct diagpkt_wait *wait = NULL;
- int trycount = 0;
-
- dd = hfi1_lookup(dp->unit);
- if (!dd || !(dd->flags & HFI1_PRESENT) || !dd->kregbase) {
- ret = -ENODEV;
- goto bail;
- }
- if (!(dd->flags & HFI1_INITTED)) {
- /* no hardware, freeze, etc. */
- ret = -ENODEV;
- goto bail;
- }
-
- if (dp->version != _DIAG_PKT_VERS) {
- dd_dev_err(dd, "Invalid version %u for diagpkt_write\n",
- dp->version);
- ret = -EINVAL;
- goto bail;
- }
-
- /* send count must be an exact number of dwords */
- if (dp->len & 3) {
- ret = -EINVAL;
- goto bail;
- }
-
- /* there is only port 1 */
- if (dp->port != 1) {
- ret = -EINVAL;
- goto bail;
- }
-
- /* need a valid context */
- if (dp->sw_index >= dd->num_send_contexts) {
- ret = -EINVAL;
- goto bail;
- }
- /* can only use kernel contexts */
- if (dd->send_contexts[dp->sw_index].type != SC_KERNEL &&
- dd->send_contexts[dp->sw_index].type != SC_VL15) {
- ret = -EINVAL;
- goto bail;
- }
- /* must be allocated */
- sc = dd->send_contexts[dp->sw_index].sc;
- if (!sc) {
- ret = -EINVAL;
- goto bail;
- }
- /* must be enabled */
- if (!(sc->flags & SCF_ENABLED)) {
- ret = -EINVAL;
- goto bail;
- }
-
- /* allocate a buffer and copy the data in */
- tmpbuf = vmalloc(dp->len);
- if (!tmpbuf) {
- ret = -ENOMEM;
- goto bail;
- }
-
- if (copy_from_user(tmpbuf,
- (const void __user *)(unsigned long)dp->data,
- dp->len)) {
- ret = -EFAULT;
- goto bail;
- }
-
- /*
- * pkt_len is how much data we have to write, includes header and data.
- * total_len is length of the packet in Dwords plus the PBC should not
- * include the CRC.
- */
- pkt_len = dp->len >> 2;
- total_len = pkt_len + 2; /* PBC + packet */
-
- /* if 0, fill in a default */
- if (dp->pbc == 0) {
- struct hfi1_pportdata *ppd = dd->pport;
-
- hfi1_cdbg(PKT, "Generating PBC");
- dp->pbc = create_pbc(ppd, 0, 0, 0, total_len);
- } else {
- hfi1_cdbg(PKT, "Using passed in PBC");
- }
-
- hfi1_cdbg(PKT, "Egress PBC content is 0x%llx", dp->pbc);
-
- /*
- * The caller wants to wait until the packet is sent and to
- * check for errors. The best we can do is wait until
- * the buffer credits are returned and check if any packet
- * error has occurred. If there are any late errors, this
- * could miss it. If there are other senders who generate
- * an error, this may find it. However, in general, it
- * should catch most.
- */
- if (dp->flags & F_DIAGPKT_WAIT) {
- /* always force a credit return */
- dp->pbc |= PBC_CREDIT_RETURN;
- /* turn on credit return interrupts */
- sc_add_credit_return_intr(sc);
- wait = kmalloc(sizeof(*wait), GFP_KERNEL);
- if (!wait) {
- ret = -ENOMEM;
- goto bail;
- }
- init_completion(&wait->credits_returned);
- atomic_set(&wait->count, 2);
- wait->code = PRC_OK;
-
- credit_cb = diagpkt_complete;
- credit_arg = wait;
- }
-
-retry:
- pbuf = sc_buffer_alloc(sc, total_len, credit_cb, credit_arg);
- if (!pbuf) {
- if (trycount == 0) {
- /* force a credit return and try again */
- sc_return_credits(sc);
- trycount = 1;
- goto retry;
- }
- /*
- * No send buffer means no credit callback. Undo
- * the wait set-up that was done above. We free wait
- * because the callback will never be called.
- */
- if (dp->flags & F_DIAGPKT_WAIT) {
- sc_del_credit_return_intr(sc);
- kfree(wait);
- wait = NULL;
- }
- ret = -ENOSPC;
- goto bail;
- }
-
- pio_copy(dd, pbuf, dp->pbc, tmpbuf, pkt_len);
- /* no flush needed as the HW knows the packet size */
-
- ret = sizeof(*dp);
-
- if (dp->flags & F_DIAGPKT_WAIT) {
- /* wait for credit return */
- ret = wait_for_completion_interruptible(
- &wait->credits_returned);
- /*
- * If the wait returns an error, the wait was interrupted,
- * e.g. with a ^C in the user program. The callback is
- * still pending. This is OK as the wait structure is
- * kmalloc'ed and the structure will free itself when
- * all users are done with it.
- *
- * A context disable occurs on a send context restart, so
- * include that in the list of errors below to check for.
- * NOTE: PRC_FILL_ERR is at best informational and cannot
- * be depended on.
- */
- if (!ret && (((wait->code & PRC_STATUS_ERR) ||
- (wait->code & PRC_FILL_ERR) ||
- (wait->code & PRC_SC_DISABLE))))
- ret = -EIO;
-
- put_diagpkt_wait(wait); /* finished with the structure */
- sc_del_credit_return_intr(sc);
- }
-
-bail:
- vfree(tmpbuf);
- return ret;
-}
-
-static ssize_t diagpkt_write(struct file *fp, const char __user *data,
- size_t count, loff_t *off)
-{
- struct hfi1_devdata *dd;
- struct send_context *sc;
- u8 vl;
-
- struct diag_pkt dp;
-
- if (count != sizeof(dp))
- return -EINVAL;
-
- if (copy_from_user(&dp, data, sizeof(dp)))
- return -EFAULT;
-
- /*
- * The Send Context is derived from the PbcVL value
- * if PBC is populated
- */
- if (dp.pbc) {
- dd = hfi1_lookup(dp.unit);
- if (!dd)
- return -ENODEV;
- vl = (dp.pbc >> PBC_VL_SHIFT) & PBC_VL_MASK;
- sc = dd->vld[vl].sc;
- if (sc) {
- dp.sw_index = sc->sw_index;
- hfi1_cdbg(
- PKT,
- "Packet sent over VL %d via Send Context %u(%u)",
- vl, sc->sw_index, sc->hw_context);
- }
- }
-
- return diagpkt_send(&dp);
-}
-
-static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name)
-{
- int ret = 0;
-
- dd->hfi1_snoop.mode_flag = 0;
- spin_lock_init(&dd->hfi1_snoop.snoop_lock);
- INIT_LIST_HEAD(&dd->hfi1_snoop.queue);
- init_waitqueue_head(&dd->hfi1_snoop.waitq);
-
- ret = hfi1_cdev_init(HFI1_SNOOP_CAPTURE_BASE + dd->unit, name,
- &snoop_file_ops,
- &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev,
- false);
-
- if (ret) {
- dd_dev_err(dd, "Couldn't create %s device: %d", name, ret);
- hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev,
- &dd->hfi1_snoop.class_dev);
- }
-
- return ret;
-}
-
-static struct hfi1_devdata *hfi1_dd_from_sc_inode(struct inode *in)
-{
- int unit = iminor(in) - HFI1_SNOOP_CAPTURE_BASE;
- struct hfi1_devdata *dd;
-
- dd = hfi1_lookup(unit);
- return dd;
-}
-
-/* clear or restore send context integrity checks */
-static void adjust_integrity_checks(struct hfi1_devdata *dd)
-{
- struct send_context *sc;
- unsigned long sc_flags;
- int i;
-
- spin_lock_irqsave(&dd->sc_lock, sc_flags);
- for (i = 0; i < dd->num_send_contexts; i++) {
- int enable;
-
- sc = dd->send_contexts[i].sc;
-
- if (!sc)
- continue; /* not allocated */
-
- enable = likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) &&
- dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE;
-
- set_pio_integrity(sc);
-
- if (enable) /* take HFI_CAP_* flags into account */
- hfi1_init_ctxt(sc);
- }
- spin_unlock_irqrestore(&dd->sc_lock, sc_flags);
-}
-
-static int hfi1_snoop_open(struct inode *in, struct file *fp)
-{
- int ret;
- int mode_flag = 0;
- unsigned long flags = 0;
- struct hfi1_devdata *dd;
- struct list_head *queue;
-
- mutex_lock(&hfi1_mutex);
-
- dd = hfi1_dd_from_sc_inode(in);
- if (!dd) {
- ret = -ENODEV;
- goto bail;
- }
-
- /*
- * File mode determines snoop or capture. Some existing user
- * applications expect the capture device to be able to be opened RDWR
- * because they expect a dedicated capture device. For this reason we
- * support a module param to force capture mode even if the file open
- * mode matches snoop.
- */
- if ((fp->f_flags & O_ACCMODE) == O_RDONLY) {
- snoop_dbg("Capture Enabled");
- mode_flag = HFI1_PORT_CAPTURE_MODE;
- } else if ((fp->f_flags & O_ACCMODE) == O_RDWR) {
- snoop_dbg("Snoop Enabled");
- mode_flag = HFI1_PORT_SNOOP_MODE;
- } else {
- snoop_dbg("Invalid");
- ret = -EINVAL;
- goto bail;
- }
- queue = &dd->hfi1_snoop.queue;
-
- /*
- * We are not supporting snoop and capture at the same time.
- */
- spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
- if (dd->hfi1_snoop.mode_flag) {
- ret = -EBUSY;
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
- goto bail;
- }
-
- dd->hfi1_snoop.mode_flag = mode_flag;
- drain_snoop_list(queue);
-
- dd->hfi1_snoop.filter_callback = NULL;
- dd->hfi1_snoop.filter_value = NULL;
-
- /*
- * Send side packet integrity checks are not helpful when snooping so
- * disable and re-enable when we stop snooping.
- */
- if (mode_flag == HFI1_PORT_SNOOP_MODE) {
- /* clear after snoop mode is on */
- adjust_integrity_checks(dd); /* clear */
-
- /*
- * We also do not want to be doing the DLID LMC check for
- * ingressed packets.
- */
- dd->hfi1_snoop.dcc_cfg = read_csr(dd, DCC_CFG_PORT_CONFIG1);
- write_csr(dd, DCC_CFG_PORT_CONFIG1,
- (dd->hfi1_snoop.dcc_cfg >> 32) << 32);
- }
-
- /*
- * As soon as we set these function pointers the recv and send handlers
- * are active. This is a race condition so we must make sure to drain
- * the queue and init filter values above. Technically we should add
- * locking here but all that will happen is on recv a packet will get
- * allocated and get stuck on the snoop_lock before getting added to the
- * queue. Same goes for send.
- */
- dd->rhf_rcv_function_map = snoop_rhf_rcv_functions;
- dd->process_pio_send = snoop_send_pio_handler;
- dd->process_dma_send = snoop_send_pio_handler;
- dd->pio_inline_send = snoop_inline_pio_send;
-
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
- ret = 0;
-
-bail:
- mutex_unlock(&hfi1_mutex);
-
- return ret;
-}
-
-static int hfi1_snoop_release(struct inode *in, struct file *fp)
-{
- unsigned long flags = 0;
- struct hfi1_devdata *dd;
- int mode_flag;
-
- dd = hfi1_dd_from_sc_inode(in);
- if (!dd)
- return -ENODEV;
-
- spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-
- /* clear the snoop mode before re-adjusting send context CSRs */
- mode_flag = dd->hfi1_snoop.mode_flag;
- dd->hfi1_snoop.mode_flag = 0;
-
- /*
- * Drain the queue and clear the filters we are done with it. Don't
- * forget to restore the packet integrity checks
- */
- drain_snoop_list(&dd->hfi1_snoop.queue);
- if (mode_flag == HFI1_PORT_SNOOP_MODE) {
- /* restore after snoop mode is clear */
- adjust_integrity_checks(dd); /* restore */
-
- /*
- * Also should probably reset the DCC_CONFIG1 register for DLID
- * checking on incoming packets again. Use the value saved when
- * opening the snoop device.
- */
- write_csr(dd, DCC_CFG_PORT_CONFIG1, dd->hfi1_snoop.dcc_cfg);
- }
-
- dd->hfi1_snoop.filter_callback = NULL;
- kfree(dd->hfi1_snoop.filter_value);
- dd->hfi1_snoop.filter_value = NULL;
-
- /*
- * User is done snooping and capturing, return control to the normal
- * handler. Re-enable SDMA handling.
- */
- dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions;
- dd->process_pio_send = hfi1_verbs_send_pio;
- dd->process_dma_send = hfi1_verbs_send_dma;
- dd->pio_inline_send = pio_copy;
-
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-
- snoop_dbg("snoop/capture device released");
-
- return 0;
-}
-
-static unsigned int hfi1_snoop_poll(struct file *fp,
- struct poll_table_struct *wait)
-{
- int ret = 0;
- unsigned long flags = 0;
-
- struct hfi1_devdata *dd;
-
- dd = hfi1_dd_from_sc_inode(fp->f_inode);
- if (!dd)
- return -ENODEV;
-
- spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-
- poll_wait(fp, &dd->hfi1_snoop.waitq, wait);
- if (!list_empty(&dd->hfi1_snoop.queue))
- ret |= POLLIN | POLLRDNORM;
-
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
- return ret;
-}
-
-static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data,
- size_t count, loff_t *off)
-{
- struct diag_pkt dpkt;
- struct hfi1_devdata *dd;
- size_t ret;
- u8 byte_two, sl, sc5, sc4, vl, byte_one;
- struct send_context *sc;
- u32 len;
- u64 pbc;
- struct hfi1_ibport *ibp;
- struct hfi1_pportdata *ppd;
-
- dd = hfi1_dd_from_sc_inode(fp->f_inode);
- if (!dd)
- return -ENODEV;
-
- ppd = dd->pport;
- snoop_dbg("received %lu bytes from user", count);
-
- memset(&dpkt, 0, sizeof(struct diag_pkt));
- dpkt.version = _DIAG_PKT_VERS;
- dpkt.unit = dd->unit;
- dpkt.port = 1;
-
- if (likely(!(snoop_flags & SNOOP_USE_METADATA))) {
- /*
- * We need to generate the PBC and not let diagpkt_send do it,
- * to do this we need the VL and the length in dwords.
- * The VL can be determined by using the SL and looking up the
- * SC. Then the SC can be converted into VL. The exception to
- * this is those packets which are from an SMI queue pair.
- * Since we can't detect anything about the QP here we have to
- * rely on the SC. If its 0xF then we assume its SMI and
- * do not look at the SL.
- */
- if (copy_from_user(&byte_one, data, 1))
- return -EINVAL;
-
- if (copy_from_user(&byte_two, data + 1, 1))
- return -EINVAL;
-
- sc4 = (byte_one >> 4) & 0xf;
- if (sc4 == 0xF) {
- snoop_dbg("Detected VL15 packet ignoring SL in packet");
- vl = sc4;
- } else {
- sl = (byte_two >> 4) & 0xf;
- ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1);
- sc5 = ibp->sl_to_sc[sl];
- vl = sc_to_vlt(dd, sc5);
- if (vl != sc4) {
- snoop_dbg("VL %d does not match SC %d of packet",
- vl, sc4);
- return -EINVAL;
- }
- }
-
- sc = dd->vld[vl].sc; /* Look up the context based on VL */
- if (sc) {
- dpkt.sw_index = sc->sw_index;
- snoop_dbg("Sending on context %u(%u)", sc->sw_index,
- sc->hw_context);
- } else {
- snoop_dbg("Could not find context for vl %d", vl);
- return -EINVAL;
- }
-
- len = (count >> 2) + 2; /* Add in PBC */
- pbc = create_pbc(ppd, 0, 0, vl, len);
- } else {
- if (copy_from_user(&pbc, data, sizeof(pbc)))
- return -EINVAL;
- vl = (pbc >> PBC_VL_SHIFT) & PBC_VL_MASK;
- sc = dd->vld[vl].sc; /* Look up the context based on VL */
- if (sc) {
- dpkt.sw_index = sc->sw_index;
- } else {
- snoop_dbg("Could not find context for vl %d", vl);
- return -EINVAL;
- }
- data += sizeof(pbc);
- count -= sizeof(pbc);
- }
- dpkt.len = count;
- dpkt.data = (unsigned long)data;
-
- snoop_dbg("PBC: vl=0x%llx Length=0x%llx",
- (pbc >> 12) & 0xf,
- (pbc & 0xfff));
-
- dpkt.pbc = pbc;
- ret = diagpkt_send(&dpkt);
- /*
- * diagpkt_send only returns number of bytes in the diagpkt so patch
- * that up here before returning.
- */
- if (ret == sizeof(dpkt))
- return count;
-
- return ret;
-}
-
-static ssize_t hfi1_snoop_read(struct file *fp, char __user *data,
- size_t pkt_len, loff_t *off)
-{
- ssize_t ret = 0;
- unsigned long flags = 0;
- struct snoop_packet *packet = NULL;
- struct hfi1_devdata *dd;
-
- dd = hfi1_dd_from_sc_inode(fp->f_inode);
- if (!dd)
- return -ENODEV;
-
- spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
-
- while (list_empty(&dd->hfi1_snoop.queue)) {
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
-
- if (fp->f_flags & O_NONBLOCK)
- return -EAGAIN;
-
- if (wait_event_interruptible(
- dd->hfi1_snoop.waitq,
- !list_empty(&dd->hfi1_snoop.queue)))
- return -EINTR;
-
- spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
- }
-
- if (!list_empty(&dd->hfi1_snoop.queue)) {
- packet = list_entry(dd->hfi1_snoop.queue.next,
- struct snoop_packet, list);
- list_del(&packet->list);
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
- if (pkt_len >= packet->total_len) {
- if (copy_to_user(data, packet->data,
- packet->total_len))
- ret = -EFAULT;
- else
- ret = packet->total_len;
- } else {
- ret = -EINVAL;
- }
-
- kfree(packet);
- } else {
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
- }
-
- return ret;
-}
-
-/**
- * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others
- * @ppd : ptr to hfi1 port data
- * @value : options from user space
- *
- * Assumes the rest of the CM credit registers are zero from a
- * previous global or credit reset.
- * Leave shared count at zero for both global and all vls.
- * In snoop mode ideally we don't use shared credits
- * Reserve 8.5k for VL15
- * If total credits less than 8.5kbytes return error.
- * Divide the rest of the credits across VL0 to VL7 and if
- * each of these levels has less than 34 credits (at least 2048 + 128 bytes)
- * return with an error.
- * The credit registers will be reset to zero on link negotiation or link up
- * so this function should be activated from user space only if the port has
- * gone past link negotiation and link up.
- *
- * Return -- 0 if successful else error condition
- *
- */
-static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd,
- int value)
-{
-#define OPA_MIN_PER_VL_CREDITS 34 /* 2048 + 128 bytes */
- struct buffer_control t;
- int i;
- struct hfi1_devdata *dd = ppd->dd;
- u16 total_credits = (value >> 16) & 0xffff;
- u16 vl15_credits = dd->vl15_init / 2;
- u16 per_vl_credits;
- __be16 be_per_vl_credits;
-
- if (!(ppd->host_link_state & HLS_UP))
- goto err_exit;
- if (total_credits < vl15_credits)
- goto err_exit;
-
- per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL;
-
- if (per_vl_credits < OPA_MIN_PER_VL_CREDITS)
- goto err_exit;
-
- memset(&t, 0, sizeof(t));
- be_per_vl_credits = cpu_to_be16(per_vl_credits);
-
- for (i = 0; i < TXE_NUM_DATA_VL; i++)
- t.vl[i].dedicated = be_per_vl_credits;
-
- t.vl[15].dedicated = cpu_to_be16(vl15_credits);
- return set_buffer_control(ppd, &t);
-
-err_exit:
- snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d",
- ppd->host_link_state, total_credits, vl15_credits);
-
- return -EINVAL;
-}
-
-static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
-{
- struct hfi1_devdata *dd;
- void *filter_value = NULL;
- long ret = 0;
- int value = 0;
- u8 phys_state = 0;
- u8 link_state = 0;
- u16 dev_state = 0;
- unsigned long flags = 0;
- unsigned long *argp = NULL;
- struct hfi1_packet_filter_command filter_cmd = {0};
- int mode_flag = 0;
- struct hfi1_pportdata *ppd = NULL;
- unsigned int index;
- struct hfi1_link_info link_info;
- int read_cmd, write_cmd, read_ok, write_ok;
-
- dd = hfi1_dd_from_sc_inode(fp->f_inode);
- if (!dd)
- return -ENODEV;
-
- mode_flag = dd->hfi1_snoop.mode_flag;
- read_cmd = _IOC_DIR(cmd) & _IOC_READ;
- write_cmd = _IOC_DIR(cmd) & _IOC_WRITE;
- write_ok = access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd));
- read_ok = access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd));
-
- if ((read_cmd && !write_ok) || (write_cmd && !read_ok))
- return -EFAULT;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if ((mode_flag & HFI1_PORT_CAPTURE_MODE) &&
- (cmd != HFI1_SNOOP_IOCCLEARQUEUE) &&
- (cmd != HFI1_SNOOP_IOCCLEARFILTER) &&
- (cmd != HFI1_SNOOP_IOCSETFILTER))
- /* Capture devices are allowed only 3 operations
- * 1.Clear capture queue
- * 2.Clear capture filter
- * 3.Set capture filter
- * Other are invalid.
- */
- return -EINVAL;
-
- switch (cmd) {
- case HFI1_SNOOP_IOCSETLINKSTATE_EXTRA:
- memset(&link_info, 0, sizeof(link_info));
-
- if (copy_from_user(&link_info,
- (struct hfi1_link_info __user *)arg,
- sizeof(link_info)))
- return -EFAULT;
-
- value = link_info.port_state;
- index = link_info.port_number;
- if (index > dd->num_pports - 1)
- return -EINVAL;
-
- ppd = &dd->pport[index];
- if (!ppd)
- return -EINVAL;
-
- /* What we want to transition to */
- phys_state = (value >> 4) & 0xF;
- link_state = value & 0xF;
- snoop_dbg("Setting link state 0x%x", value);
-
- switch (link_state) {
- case IB_PORT_NOP:
- if (phys_state == 0)
- break;
- /* fall through */
- case IB_PORT_DOWN:
- switch (phys_state) {
- case 0:
- dev_state = HLS_DN_DOWNDEF;
- break;
- case 2:
- dev_state = HLS_DN_POLL;
- break;
- case 3:
- dev_state = HLS_DN_DISABLE;
- break;
- default:
- return -EINVAL;
- }
- ret = set_link_state(ppd, dev_state);
- break;
- case IB_PORT_ARMED:
- ret = set_link_state(ppd, HLS_UP_ARMED);
- if (!ret)
- send_idle_sma(dd, SMA_IDLE_ARM);
- break;
- case IB_PORT_ACTIVE:
- ret = set_link_state(ppd, HLS_UP_ACTIVE);
- if (!ret)
- send_idle_sma(dd, SMA_IDLE_ACTIVE);
- break;
- default:
- return -EINVAL;
- }
-
- if (ret)
- break;
- /* fall through */
- case HFI1_SNOOP_IOCGETLINKSTATE:
- case HFI1_SNOOP_IOCGETLINKSTATE_EXTRA:
- if (cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) {
- memset(&link_info, 0, sizeof(link_info));
- if (copy_from_user(&link_info,
- (struct hfi1_link_info __user *)arg,
- sizeof(link_info)))
- return -EFAULT;
- index = link_info.port_number;
- } else {
- ret = __get_user(index, (int __user *)arg);
- if (ret != 0)
- break;
- }
-
- if (index > dd->num_pports - 1)
- return -EINVAL;
-
- ppd = &dd->pport[index];
- if (!ppd)
- return -EINVAL;
-
- value = hfi1_ibphys_portstate(ppd);
- value <<= 4;
- value |= driver_lstate(ppd);
-
- snoop_dbg("Link port | Link State: %d", value);
-
- if ((cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) ||
- (cmd == HFI1_SNOOP_IOCSETLINKSTATE_EXTRA)) {
- link_info.port_state = value;
- link_info.node_guid = cpu_to_be64(ppd->guid);
- link_info.link_speed_active =
- ppd->link_speed_active;
- link_info.link_width_active =
- ppd->link_width_active;
- if (copy_to_user((struct hfi1_link_info __user *)arg,
- &link_info, sizeof(link_info)))
- return -EFAULT;
- } else {
- ret = __put_user(value, (int __user *)arg);
- }
- break;
-
- case HFI1_SNOOP_IOCCLEARQUEUE:
- snoop_dbg("Clearing snoop queue");
- spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
- drain_snoop_list(&dd->hfi1_snoop.queue);
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
- break;
-
- case HFI1_SNOOP_IOCCLEARFILTER:
- snoop_dbg("Clearing filter");
- spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
- if (dd->hfi1_snoop.filter_callback) {
- /* Drain packets first */
- drain_snoop_list(&dd->hfi1_snoop.queue);
- dd->hfi1_snoop.filter_callback = NULL;
- }
- kfree(dd->hfi1_snoop.filter_value);
- dd->hfi1_snoop.filter_value = NULL;
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
- break;
-
- case HFI1_SNOOP_IOCSETFILTER:
- snoop_dbg("Setting filter");
- /* just copy command structure */
- argp = (unsigned long *)arg;
- if (copy_from_user(&filter_cmd, (void __user *)argp,
- sizeof(filter_cmd)))
- return -EFAULT;
-
- if (filter_cmd.opcode >= HFI1_MAX_FILTERS) {
- pr_alert("Invalid opcode in request\n");
- return -EINVAL;
- }
-
- snoop_dbg("Opcode %d Len %d Ptr %p",
- filter_cmd.opcode, filter_cmd.length,
- filter_cmd.value_ptr);
-
- filter_value = kcalloc(filter_cmd.length, sizeof(u8),
- GFP_KERNEL);
- if (!filter_value)
- return -ENOMEM;
-
- /* copy remaining data from userspace */
- if (copy_from_user((u8 *)filter_value,
- (void __user *)filter_cmd.value_ptr,
- filter_cmd.length)) {
- kfree(filter_value);
- return -EFAULT;
- }
- /* Drain packets first */
- spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
- drain_snoop_list(&dd->hfi1_snoop.queue);
- dd->hfi1_snoop.filter_callback =
- hfi1_filters[filter_cmd.opcode].filter;
- /* just in case we see back to back sets */
- kfree(dd->hfi1_snoop.filter_value);
- dd->hfi1_snoop.filter_value = filter_value;
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
- break;
- case HFI1_SNOOP_IOCGETVERSION:
- value = SNOOP_CAPTURE_VERSION;
- snoop_dbg("Getting version: %d", value);
- ret = __put_user(value, (int __user *)arg);
- break;
- case HFI1_SNOOP_IOCSET_OPTS:
- snoop_flags = 0;
- ret = __get_user(value, (int __user *)arg);
- if (ret != 0)
- break;
-
- snoop_dbg("Setting snoop option %d", value);
- if (value & SNOOP_DROP_SEND)
- snoop_flags |= SNOOP_DROP_SEND;
- if (value & SNOOP_USE_METADATA)
- snoop_flags |= SNOOP_USE_METADATA;
- if (value & (SNOOP_SET_VL0TOVL15)) {
- ppd = &dd->pport[0]; /* first port will do */
- ret = hfi1_assign_snoop_link_credits(ppd, value);
- }
- break;
- default:
- return -ENOTTY;
- }
-
- return ret;
-}
-
-static void snoop_list_add_tail(struct snoop_packet *packet,
- struct hfi1_devdata *dd)
-{
- unsigned long flags = 0;
-
- spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags);
- if (likely((dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) ||
- (dd->hfi1_snoop.mode_flag & HFI1_PORT_CAPTURE_MODE))) {
- list_add_tail(&packet->list, &dd->hfi1_snoop.queue);
- snoop_dbg("Added packet to list");
- }
-
- /*
- * Technically we can could have closed the snoop device while waiting
- * on the above lock and it is gone now. The snoop mode_flag will
- * prevent us from adding the packet to the queue though.
- */
-
- spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags);
- wake_up_interruptible(&dd->hfi1_snoop.waitq);
-}
-
-static inline int hfi1_filter_check(void *val, const char *msg)
-{
- if (!val) {
- snoop_dbg("Error invalid %s value for filter", msg);
- return HFI1_FILTER_ERR;
- }
- return 0;
-}
-
-static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value)
-{
- struct hfi1_ib_header *hdr;
- int ret;
-
- ret = hfi1_filter_check(ibhdr, "header");
- if (ret)
- return ret;
- ret = hfi1_filter_check(value, "user");
- if (ret)
- return ret;
- hdr = (struct hfi1_ib_header *)ibhdr;
-
- if (*((u16 *)value) == be16_to_cpu(hdr->lrh[3])) /* matches slid */
- return HFI1_FILTER_HIT; /* matched */
-
- return HFI1_FILTER_MISS; /* Not matched */
-}
-
-static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value)
-{
- struct hfi1_ib_header *hdr;
- int ret;
-
- ret = hfi1_filter_check(ibhdr, "header");
- if (ret)
- return ret;
- ret = hfi1_filter_check(value, "user");
- if (ret)
- return ret;
-
- hdr = (struct hfi1_ib_header *)ibhdr;
-
- if (*((u16 *)value) == be16_to_cpu(hdr->lrh[1]))
- return HFI1_FILTER_HIT;
-
- return HFI1_FILTER_MISS;
-}
-
-/* Not valid for outgoing packets, send handler passes null for data*/
-static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
- void *value)
-{
- struct hfi1_ib_header *hdr;
- struct hfi1_other_headers *ohdr = NULL;
- struct ib_smp *smp = NULL;
- u32 qpn = 0;
- int ret;
-
- ret = hfi1_filter_check(ibhdr, "header");
- if (ret)
- return ret;
- ret = hfi1_filter_check(packet_data, "packet_data");
- if (ret)
- return ret;
- ret = hfi1_filter_check(value, "user");
- if (ret)
- return ret;
-
- hdr = (struct hfi1_ib_header *)ibhdr;
-
- /* Check for GRH */
- if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
- else
- ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
-
- qpn = be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF;
- if (qpn <= 1) {
- smp = (struct ib_smp *)packet_data;
- if (*((u8 *)value) == smp->mgmt_class)
- return HFI1_FILTER_HIT;
- else
- return HFI1_FILTER_MISS;
- }
- return HFI1_FILTER_ERR;
-}
-
-static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value)
-{
- struct hfi1_ib_header *hdr;
- struct hfi1_other_headers *ohdr = NULL;
- int ret;
-
- ret = hfi1_filter_check(ibhdr, "header");
- if (ret)
- return ret;
- ret = hfi1_filter_check(value, "user");
- if (ret)
- return ret;
-
- hdr = (struct hfi1_ib_header *)ibhdr;
-
- /* Check for GRH */
- if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
- else
- ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
- if (*((u32 *)value) == (be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF))
- return HFI1_FILTER_HIT;
-
- return HFI1_FILTER_MISS;
-}
-
-static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data,
- void *value)
-{
- u32 lnh = 0;
- u8 opcode = 0;
- struct hfi1_ib_header *hdr;
- struct hfi1_other_headers *ohdr = NULL;
- int ret;
-
- ret = hfi1_filter_check(ibhdr, "header");
- if (ret)
- return ret;
- ret = hfi1_filter_check(value, "user");
- if (ret)
- return ret;
-
- hdr = (struct hfi1_ib_header *)ibhdr;
-
- lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
-
- if (lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else if (lnh == HFI1_LRH_GRH)
- ohdr = &hdr->u.l.oth;
- else
- return HFI1_FILTER_ERR;
-
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-
- if (*((u8 *)value) == ((opcode >> 5) & 0x7))
- return HFI1_FILTER_HIT;
-
- return HFI1_FILTER_MISS;
-}
-
-static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data,
- void *value)
-{
- struct hfi1_ib_header *hdr;
- int ret;
-
- ret = hfi1_filter_check(ibhdr, "header");
- if (ret)
- return ret;
- ret = hfi1_filter_check(value, "user");
- if (ret)
- return ret;
-
- hdr = (struct hfi1_ib_header *)ibhdr;
-
- if ((*((u8 *)value)) == ((be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF))
- return HFI1_FILTER_HIT;
-
- return HFI1_FILTER_MISS;
-}
-
-static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value)
-{
- u32 lnh = 0;
- struct hfi1_ib_header *hdr;
- struct hfi1_other_headers *ohdr = NULL;
- int ret;
-
- ret = hfi1_filter_check(ibhdr, "header");
- if (ret)
- return ret;
- ret = hfi1_filter_check(value, "user");
- if (ret)
- return ret;
-
- hdr = (struct hfi1_ib_header *)ibhdr;
-
- lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
- if (lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else if (lnh == HFI1_LRH_GRH)
- ohdr = &hdr->u.l.oth;
- else
- return HFI1_FILTER_ERR;
-
- /* P_key is 16-bit entity, however top most bit indicates
- * type of membership. 0 for limited and 1 for Full.
- * Limited members cannot accept information from other
- * Limited members, but communication is allowed between
- * every other combination of membership.
- * Hence we'll omit comparing top-most bit while filtering
- */
-
- if ((*(u16 *)value & 0x7FFF) ==
- ((be32_to_cpu(ohdr->bth[0])) & 0x7FFF))
- return HFI1_FILTER_HIT;
-
- return HFI1_FILTER_MISS;
-}
-
-/*
- * If packet_data is NULL then this is coming from one of the send functions.
- * Thus we know if its an ingressed or egressed packet.
- */
-static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value)
-{
- u8 user_dir = *(u8 *)value;
- int ret;
-
- ret = hfi1_filter_check(value, "user");
- if (ret)
- return ret;
-
- if (packet_data) {
- /* Incoming packet */
- if (user_dir & HFI1_SNOOP_INGRESS)
- return HFI1_FILTER_HIT;
- } else {
- /* Outgoing packet */
- if (user_dir & HFI1_SNOOP_EGRESS)
- return HFI1_FILTER_HIT;
- }
-
- return HFI1_FILTER_MISS;
-}
-
-/*
- * Allocate a snoop packet. The structure that is stored in the ring buffer, not
- * to be confused with an hfi packet type.
- */
-static struct snoop_packet *allocate_snoop_packet(u32 hdr_len,
- u32 data_len,
- u32 md_len)
-{
- struct snoop_packet *packet;
-
- packet = kzalloc(sizeof(*packet) + hdr_len + data_len
- + md_len,
- GFP_ATOMIC | __GFP_NOWARN);
- if (likely(packet))
- INIT_LIST_HEAD(&packet->list);
-
- return packet;
-}
-
-/*
- * Instead of having snoop and capture code intermixed with the recv functions,
- * both the interrupt handler and hfi1_ib_rcv() we are going to hijack the call
- * and land in here for snoop/capture but if not enabled the call will go
- * through as before. This gives us a single point to constrain all of the snoop
- * snoop recv logic. There is nothing special that needs to happen for bypass
- * packets. This routine should not try to look into the packet. It just copied
- * it. There is no guarantee for filters when it comes to bypass packets as
- * there is no specific support. Bottom line is this routine does now even know
- * what a bypass packet is.
- */
-int snoop_recv_handler(struct hfi1_packet *packet)
-{
- struct hfi1_pportdata *ppd = packet->rcd->ppd;
- struct hfi1_ib_header *hdr = packet->hdr;
- int header_size = packet->hlen;
- void *data = packet->ebuf;
- u32 tlen = packet->tlen;
- struct snoop_packet *s_packet = NULL;
- int ret;
- int snoop_mode = 0;
- u32 md_len = 0;
- struct capture_md md;
-
- snoop_dbg("PACKET IN: hdr size %d tlen %d data %p", header_size, tlen,
- data);
-
- trace_snoop_capture(ppd->dd, header_size, hdr, tlen - header_size,
- data);
-
- if (!ppd->dd->hfi1_snoop.filter_callback) {
- snoop_dbg("filter not set");
- ret = HFI1_FILTER_HIT;
- } else {
- ret = ppd->dd->hfi1_snoop.filter_callback(hdr, data,
- ppd->dd->hfi1_snoop.filter_value);
- }
-
- switch (ret) {
- case HFI1_FILTER_ERR:
- snoop_dbg("Error in filter call");
- break;
- case HFI1_FILTER_MISS:
- snoop_dbg("Filter Miss");
- break;
- case HFI1_FILTER_HIT:
-
- if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
- snoop_mode = 1;
- if ((snoop_mode == 0) ||
- unlikely(snoop_flags & SNOOP_USE_METADATA))
- md_len = sizeof(struct capture_md);
-
- s_packet = allocate_snoop_packet(header_size,
- tlen - header_size,
- md_len);
-
- if (unlikely(!s_packet)) {
- dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n");
- break;
- }
-
- if (md_len > 0) {
- memset(&md, 0, sizeof(struct capture_md));
- md.port = 1;
- md.dir = PKT_DIR_INGRESS;
- md.u.rhf = packet->rhf;
- memcpy(s_packet->data, &md, md_len);
- }
-
- /* We should always have a header */
- if (hdr) {
- memcpy(s_packet->data + md_len, hdr, header_size);
- } else {
- dd_dev_err(ppd->dd, "Unable to copy header to snoop/capture packet\n");
- kfree(s_packet);
- break;
- }
-
- /*
- * Packets with no data are possible. If there is no data needed
- * to take care of the last 4 bytes which are normally included
- * with data buffers and are included in tlen. Since we kzalloc
- * the buffer we do not need to set any values but if we decide
- * not to use kzalloc we should zero them.
- */
- if (data)
- memcpy(s_packet->data + header_size + md_len, data,
- tlen - header_size);
-
- s_packet->total_len = tlen + md_len;
- snoop_list_add_tail(s_packet, ppd->dd);
-
- /*
- * If we are snooping the packet not capturing then throw away
- * after adding to the list.
- */
- snoop_dbg("Capturing packet");
- if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) {
- snoop_dbg("Throwing packet away");
- /*
- * If we are dropping the packet we still may need to
- * handle the case where error flags are set, this is
- * normally done by the type specific handler but that
- * won't be called in this case.
- */
- if (unlikely(rhf_err_flags(packet->rhf)))
- handle_eflags(packet);
-
- /* throw the packet on the floor */
- return RHF_RCV_CONTINUE;
- }
- break;
- default:
- break;
- }
-
- /*
- * We do not care what type of packet came in here - just pass it off
- * to the normal handler.
- */
- return ppd->dd->normal_rhf_rcv_functions[rhf_rcv_type(packet->rhf)]
- (packet);
-}
-
-/*
- * Handle snooping and capturing packets when sdma is being used.
- */
-int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
- u64 pbc)
-{
- pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n");
- snoop_dbg("Unsupported Operation");
- return hfi1_verbs_send_dma(qp, ps, 0);
-}
-
-/*
- * Handle snooping and capturing packets when pio is being used. Does not handle
- * bypass packets. The only way to send a bypass packet currently is to use the
- * diagpkt interface. When that interface is enable snoop/capture is not.
- */
-int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
- u64 pbc)
-{
- u32 hdrwords = qp->s_hdrwords;
- struct rvt_sge_state *ss = qp->s_cur_sge;
- u32 len = qp->s_cur_size;
- u32 dwords = (len + 3) >> 2;
- u32 plen = hdrwords + dwords + 2; /* includes pbc */
- struct hfi1_pportdata *ppd = ps->ppd;
- struct snoop_packet *s_packet = NULL;
- u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
- u32 length = 0;
- struct rvt_sge_state temp_ss;
- void *data = NULL;
- void *data_start = NULL;
- int ret;
- int snoop_mode = 0;
- int md_len = 0;
- struct capture_md md;
- u32 vl;
- u32 hdr_len = hdrwords << 2;
- u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr);
-
- md.u.pbc = 0;
-
- snoop_dbg("PACKET OUT: hdrword %u len %u plen %u dwords %u tlen %u",
- hdrwords, len, plen, dwords, tlen);
- if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
- snoop_mode = 1;
- if ((snoop_mode == 0) ||
- unlikely(snoop_flags & SNOOP_USE_METADATA))
- md_len = sizeof(struct capture_md);
-
- /* not using ss->total_len as arg 2 b/c that does not count CRC */
- s_packet = allocate_snoop_packet(hdr_len, tlen - hdr_len, md_len);
-
- if (unlikely(!s_packet)) {
- dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n");
- goto out;
- }
-
- s_packet->total_len = tlen + md_len;
-
- if (md_len > 0) {
- memset(&md, 0, sizeof(struct capture_md));
- md.port = 1;
- md.dir = PKT_DIR_EGRESS;
- if (likely(pbc == 0)) {
- vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12;
- md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen);
- } else {
- md.u.pbc = 0;
- }
- memcpy(s_packet->data, &md, md_len);
- } else {
- md.u.pbc = pbc;
- }
-
- /* Copy header */
- if (likely(hdr)) {
- memcpy(s_packet->data + md_len, hdr, hdr_len);
- } else {
- dd_dev_err(ppd->dd,
- "Unable to copy header to snoop/capture packet\n");
- kfree(s_packet);
- goto out;
- }
-
- if (ss) {
- data = s_packet->data + hdr_len + md_len;
- data_start = data;
-
- /*
- * Copy SGE State
- * The update_sge() function below will not modify the
- * individual SGEs in the array. It will make a copy each time
- * and operate on that. So we only need to copy this instance
- * and it won't impact PIO.
- */
- temp_ss = *ss;
- length = len;
-
- snoop_dbg("Need to copy %d bytes", length);
- while (length) {
- void *addr = temp_ss.sge.vaddr;
- u32 slen = temp_ss.sge.length;
-
- if (slen > length) {
- slen = length;
- snoop_dbg("slen %d > len %d", slen, length);
- }
- snoop_dbg("copy %d to %p", slen, addr);
- memcpy(data, addr, slen);
- update_sge(&temp_ss, slen);
- length -= slen;
- data += slen;
- snoop_dbg("data is now %p bytes left %d", data, length);
- }
- snoop_dbg("Completed SGE copy");
- }
-
- /*
- * Why do the filter check down here? Because the event tracing has its
- * own filtering and we need to have the walked the SGE list.
- */
- if (!ppd->dd->hfi1_snoop.filter_callback) {
- snoop_dbg("filter not set\n");
- ret = HFI1_FILTER_HIT;
- } else {
- ret = ppd->dd->hfi1_snoop.filter_callback(
- &ps->s_txreq->phdr.hdr,
- NULL,
- ppd->dd->hfi1_snoop.filter_value);
- }
-
- switch (ret) {
- case HFI1_FILTER_ERR:
- snoop_dbg("Error in filter call");
- /* fall through */
- case HFI1_FILTER_MISS:
- snoop_dbg("Filter Miss");
- kfree(s_packet);
- break;
- case HFI1_FILTER_HIT:
- snoop_dbg("Capturing packet");
- snoop_list_add_tail(s_packet, ppd->dd);
-
- if (unlikely((snoop_flags & SNOOP_DROP_SEND) &&
- (ppd->dd->hfi1_snoop.mode_flag &
- HFI1_PORT_SNOOP_MODE))) {
- unsigned long flags;
-
- snoop_dbg("Dropping packet");
- if (qp->s_wqe) {
- spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_send_complete(
- qp,
- qp->s_wqe,
- IB_WC_SUCCESS);
- spin_unlock_irqrestore(&qp->s_lock, flags);
- } else if (qp->ibqp.qp_type == IB_QPT_RC) {
- spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_rc_send_complete(qp,
- &ps->s_txreq->phdr.hdr);
- spin_unlock_irqrestore(&qp->s_lock, flags);
- }
-
- /*
- * If snoop is dropping the packet we need to put the
- * txreq back because no one else will.
- */
- hfi1_put_txreq(ps->s_txreq);
- return 0;
- }
- break;
- default:
- kfree(s_packet);
- break;
- }
-out:
- return hfi1_verbs_send_pio(qp, ps, md.u.pbc);
-}
-
-/*
- * Callers of this must pass a hfi1_ib_header type for the from ptr. Currently
- * this can be used anywhere, but the intention is for inline ACKs for RC and
- * CCA packets. We don't restrict this usage though.
- */
-void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
- u64 pbc, const void *from, size_t count)
-{
- int snoop_mode = 0;
- int md_len = 0;
- struct capture_md md;
- struct snoop_packet *s_packet = NULL;
-
- /*
- * count is in dwords so we need to convert to bytes.
- * We also need to account for CRC which would be tacked on by hardware.
- */
- int packet_len = (count << 2) + 4;
- int ret;
-
- snoop_dbg("ACK OUT: len %d", packet_len);
-
- if (!dd->hfi1_snoop.filter_callback) {
- snoop_dbg("filter not set");
- ret = HFI1_FILTER_HIT;
- } else {
- ret = dd->hfi1_snoop.filter_callback(
- (struct hfi1_ib_header *)from,
- NULL,
- dd->hfi1_snoop.filter_value);
- }
-
- switch (ret) {
- case HFI1_FILTER_ERR:
- snoop_dbg("Error in filter call");
- /* fall through */
- case HFI1_FILTER_MISS:
- snoop_dbg("Filter Miss");
- break;
- case HFI1_FILTER_HIT:
- snoop_dbg("Capturing packet");
- if (dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE)
- snoop_mode = 1;
- if ((snoop_mode == 0) ||
- unlikely(snoop_flags & SNOOP_USE_METADATA))
- md_len = sizeof(struct capture_md);
-
- s_packet = allocate_snoop_packet(packet_len, 0, md_len);
-
- if (unlikely(!s_packet)) {
- dd_dev_warn_ratelimited(dd, "Unable to allocate snoop/capture packet\n");
- goto inline_pio_out;
- }
-
- s_packet->total_len = packet_len + md_len;
-
- /* Fill in the metadata for the packet */
- if (md_len > 0) {
- memset(&md, 0, sizeof(struct capture_md));
- md.port = 1;
- md.dir = PKT_DIR_EGRESS;
- md.u.pbc = pbc;
- memcpy(s_packet->data, &md, md_len);
- }
-
- /* Add the packet data which is a single buffer */
- memcpy(s_packet->data + md_len, from, packet_len);
-
- snoop_list_add_tail(s_packet, dd);
-
- if (unlikely((snoop_flags & SNOOP_DROP_SEND) && snoop_mode)) {
- snoop_dbg("Dropping packet");
- return;
- }
- break;
- default:
- break;
- }
-
-inline_pio_out:
- pio_copy(dd, pbuf, pbc, from, count);
-}
diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c
deleted file mode 100644
index bd8771570f81..000000000000
--- a/drivers/staging/rdma/hfi1/eprom.c
+++ /dev/null
@@ -1,471 +0,0 @@
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include <linux/delay.h>
-#include "hfi.h"
-#include "common.h"
-#include "eprom.h"
-
-/*
- * The EPROM is logically divided into three partitions:
- * partition 0: the first 128K, visible from PCI ROM BAR
- * partition 1: 4K config file (sector size)
- * partition 2: the rest
- */
-#define P0_SIZE (128 * 1024)
-#define P1_SIZE (4 * 1024)
-#define P1_START P0_SIZE
-#define P2_START (P0_SIZE + P1_SIZE)
-
-/* erase sizes supported by the controller */
-#define SIZE_4KB (4 * 1024)
-#define MASK_4KB (SIZE_4KB - 1)
-
-#define SIZE_32KB (32 * 1024)
-#define MASK_32KB (SIZE_32KB - 1)
-
-#define SIZE_64KB (64 * 1024)
-#define MASK_64KB (SIZE_64KB - 1)
-
-/* controller page size, in bytes */
-#define EP_PAGE_SIZE 256
-#define EEP_PAGE_MASK (EP_PAGE_SIZE - 1)
-
-/* controller commands */
-#define CMD_SHIFT 24
-#define CMD_NOP (0)
-#define CMD_PAGE_PROGRAM(addr) ((0x02 << CMD_SHIFT) | addr)
-#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr)
-#define CMD_READ_SR1 ((0x05 << CMD_SHIFT))
-#define CMD_WRITE_ENABLE ((0x06 << CMD_SHIFT))
-#define CMD_SECTOR_ERASE_4KB(addr) ((0x20 << CMD_SHIFT) | addr)
-#define CMD_SECTOR_ERASE_32KB(addr) ((0x52 << CMD_SHIFT) | addr)
-#define CMD_CHIP_ERASE ((0x60 << CMD_SHIFT))
-#define CMD_READ_MANUF_DEV_ID ((0x90 << CMD_SHIFT))
-#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT))
-#define CMD_SECTOR_ERASE_64KB(addr) ((0xd8 << CMD_SHIFT) | addr)
-
-/* controller interface speeds */
-#define EP_SPEED_FULL 0x2 /* full speed */
-
-/* controller status register 1 bits */
-#define SR1_BUSY 0x1ull /* the BUSY bit in SR1 */
-
-/* sleep length while waiting for controller */
-#define WAIT_SLEEP_US 100 /* must be larger than 5 (see usage) */
-#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US))
-
-/* GPIO pins */
-#define EPROM_WP_N BIT_ULL(14) /* EPROM write line */
-
-/*
- * How long to wait for the EPROM to become available, in ms.
- * The spec 32 Mb EPROM takes around 40s to erase then write.
- * Double it for safety.
- */
-#define EPROM_TIMEOUT 80000 /* ms */
-
-/*
- * Turn on external enable line that allows writing on the flash.
- */
-static void write_enable(struct hfi1_devdata *dd)
-{
- /* raise signal */
- write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
- /* raise enable */
- write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
-}
-
-/*
- * Turn off external enable line that allows writing on the flash.
- */
-static void write_disable(struct hfi1_devdata *dd)
-{
- /* lower signal */
- write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
- /* lower enable */
- write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
-}
-
-/*
- * Wait for the device to become not busy. Must be called after all
- * write or erase operations.
- */
-static int wait_for_not_busy(struct hfi1_devdata *dd)
-{
- unsigned long count = 0;
- u64 reg;
- int ret = 0;
-
- /* starts page mode */
- write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_SR1);
- while (1) {
- udelay(WAIT_SLEEP_US);
- usleep_range(WAIT_SLEEP_US - 5, WAIT_SLEEP_US + 5);
- count++;
- reg = read_csr(dd, ASIC_EEP_DATA);
- if ((reg & SR1_BUSY) == 0)
- break;
- /* 200s is the largest time for a 128Mb device */
- if (count > COUNT_DELAY_SEC(200)) {
- dd_dev_err(dd, "waited too long for SPI FLASH busy to clear - failing\n");
- ret = -ETIMEDOUT;
- break; /* break, not goto - must stop page mode */
- }
- }
-
- /* stop page mode with a NOP */
- write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP);
-
- return ret;
-}
-
-/*
- * Read the device ID from the SPI controller.
- */
-static u32 read_device_id(struct hfi1_devdata *dd)
-{
- /* read the Manufacture Device ID */
- write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_MANUF_DEV_ID);
- return (u32)read_csr(dd, ASIC_EEP_DATA);
-}
-
-/*
- * Erase the whole flash.
- */
-static int erase_chip(struct hfi1_devdata *dd)
-{
- int ret;
-
- write_enable(dd);
-
- write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
- write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_CHIP_ERASE);
- ret = wait_for_not_busy(dd);
-
- write_disable(dd);
-
- return ret;
-}
-
-/*
- * Erase a range.
- */
-static int erase_range(struct hfi1_devdata *dd, u32 start, u32 len)
-{
- u32 end = start + len;
- int ret = 0;
-
- if (end < start)
- return -EINVAL;
-
- /* check the end points for the minimum erase */
- if ((start & MASK_4KB) || (end & MASK_4KB)) {
- dd_dev_err(dd,
- "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n",
- __func__, start, end);
- return -EINVAL;
- }
-
- write_enable(dd);
-
- while (start < end) {
- write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
- /* check in order of largest to smallest */
- if (((start & MASK_64KB) == 0) && (start + SIZE_64KB <= end)) {
- write_csr(dd, ASIC_EEP_ADDR_CMD,
- CMD_SECTOR_ERASE_64KB(start));
- start += SIZE_64KB;
- } else if (((start & MASK_32KB) == 0) &&
- (start + SIZE_32KB <= end)) {
- write_csr(dd, ASIC_EEP_ADDR_CMD,
- CMD_SECTOR_ERASE_32KB(start));
- start += SIZE_32KB;
- } else { /* 4KB will work */
- write_csr(dd, ASIC_EEP_ADDR_CMD,
- CMD_SECTOR_ERASE_4KB(start));
- start += SIZE_4KB;
- }
- ret = wait_for_not_busy(dd);
- if (ret)
- goto done;
- }
-
-done:
- write_disable(dd);
-
- return ret;
-}
-
-/*
- * Read a 256 byte (64 dword) EPROM page.
- * All callers have verified the offset is at a page boundary.
- */
-static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
-{
- int i;
-
- write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
- for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++)
- result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
- write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
-}
-
-/*
- * Read length bytes starting at offset. Copy to user address addr.
- */
-static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
-{
- u32 offset;
- u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
- int ret = 0;
-
- /* reject anything not on an EPROM page boundary */
- if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK))
- return -EINVAL;
-
- for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
- read_page(dd, start + offset, buffer);
- if (copy_to_user((void __user *)(addr + offset),
- buffer, EP_PAGE_SIZE)) {
- ret = -EFAULT;
- goto done;
- }
- }
-
-done:
- return ret;
-}
-
-/*
- * Write a 256 byte (64 dword) EPROM page.
- * All callers have verified the offset is at a page boundary.
- */
-static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data)
-{
- int i;
-
- write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
- write_csr(dd, ASIC_EEP_DATA, data[0]);
- write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset));
- for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++)
- write_csr(dd, ASIC_EEP_DATA, data[i]);
- /* will close the open page */
- return wait_for_not_busy(dd);
-}
-
-/*
- * Write length bytes starting at offset. Read from user address addr.
- */
-static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
-{
- u32 offset;
- u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
- int ret = 0;
-
- /* reject anything not on an EPROM page boundary */
- if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK))
- return -EINVAL;
-
- write_enable(dd);
-
- for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
- if (copy_from_user(buffer, (void __user *)(addr + offset),
- EP_PAGE_SIZE)) {
- ret = -EFAULT;
- goto done;
- }
- ret = write_page(dd, start + offset, buffer);
- if (ret)
- goto done;
- }
-
-done:
- write_disable(dd);
- return ret;
-}
-
-/* convert an range composite to a length, in bytes */
-static inline u32 extract_rlen(u32 composite)
-{
- return (composite & 0xffff) * EP_PAGE_SIZE;
-}
-
-/* convert an range composite to a start, in bytes */
-static inline u32 extract_rstart(u32 composite)
-{
- return (composite >> 16) * EP_PAGE_SIZE;
-}
-
-/*
- * Perform the given operation on the EPROM. Called from user space. The
- * user credentials have already been checked.
- *
- * Return 0 on success, -ERRNO on error
- */
-int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd)
-{
- struct hfi1_devdata *dd;
- u32 dev_id;
- u32 rlen; /* range length */
- u32 rstart; /* range start */
- int i_minor;
- int ret = 0;
-
- /*
- * Map the device file to device data using the relative minor.
- * The device file minor number is the unit number + 1. 0 is
- * the generic device file - reject it.
- */
- i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
- if (i_minor <= 0)
- return -EINVAL;
- dd = hfi1_lookup(i_minor - 1);
- if (!dd) {
- pr_err("%s: cannot find unit %d!\n", __func__, i_minor);
- return -EINVAL;
- }
-
- /* some devices do not have an EPROM */
- if (!dd->eprom_available)
- return -EOPNOTSUPP;
-
- ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
- if (ret) {
- dd_dev_err(dd, "%s: unable to acquire EPROM resource\n",
- __func__);
- goto done_asic;
- }
-
- dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n",
- __func__, cmd->type, cmd->len, cmd->addr);
-
- switch (cmd->type) {
- case HFI1_CMD_EP_INFO:
- if (cmd->len != sizeof(u32)) {
- ret = -ERANGE;
- break;
- }
- dev_id = read_device_id(dd);
- /* addr points to a u32 user buffer */
- if (copy_to_user((void __user *)cmd->addr, &dev_id,
- sizeof(u32)))
- ret = -EFAULT;
- break;
-
- case HFI1_CMD_EP_ERASE_CHIP:
- ret = erase_chip(dd);
- break;
-
- case HFI1_CMD_EP_ERASE_RANGE:
- rlen = extract_rlen(cmd->len);
- rstart = extract_rstart(cmd->len);
- ret = erase_range(dd, rstart, rlen);
- break;
-
- case HFI1_CMD_EP_READ_RANGE:
- rlen = extract_rlen(cmd->len);
- rstart = extract_rstart(cmd->len);
- ret = read_length(dd, rstart, rlen, cmd->addr);
- break;
-
- case HFI1_CMD_EP_WRITE_RANGE:
- rlen = extract_rlen(cmd->len);
- rstart = extract_rstart(cmd->len);
- ret = write_length(dd, rstart, rlen, cmd->addr);
- break;
-
- default:
- dd_dev_err(dd, "%s: unexpected command %d\n",
- __func__, cmd->type);
- ret = -EINVAL;
- break;
- }
-
- release_chip_resource(dd, CR_EPROM);
-done_asic:
- return ret;
-}
-
-/*
- * Initialize the EPROM handler.
- */
-int eprom_init(struct hfi1_devdata *dd)
-{
- int ret = 0;
-
- /* only the discrete chip has an EPROM */
- if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0)
- return 0;
-
- /*
- * It is OK if both HFIs reset the EPROM as long as they don't
- * do it at the same time.
- */
- ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
- if (ret) {
- dd_dev_err(dd,
- "%s: unable to acquire EPROM resource, no EPROM support\n",
- __func__);
- goto done_asic;
- }
-
- /* reset EPROM to be sure it is in a good state */
-
- /* set reset */
- write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
- /* clear reset, set speed */
- write_csr(dd, ASIC_EEP_CTL_STAT,
- EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
-
- /* wake the device with command "release powerdown NoID" */
- write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID);
-
- dd->eprom_available = true;
- release_chip_resource(dd, CR_EPROM);
-done_asic:
- return ret;
-}