From 663912a6378a34fd4f43b8d873f0c6c6322d9d0e Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 16 Sep 2019 10:11:52 +0300 Subject: RDMA/counter: Prevent QP counter manual binding in auto mode If auto mode is configured, manual counter allocation and QP bind is not allowed. Fixes: 1bd8e0a9d0fd ("RDMA/counter: Allow manual mode configuration support") Link: https://lore.kernel.org/r/20190916071154.20383-3-leon@kernel.org Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 680ad27f497d..736ab760025d 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -463,10 +463,15 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, u32 qp_num, u32 counter_id) { + struct rdma_port_counter *port_counter; struct rdma_counter *counter; struct ib_qp *qp; int ret; + port_counter = &dev->port_data[port].port_counter; + if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) + return -EINVAL; + qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; @@ -503,6 +508,7 @@ err: int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, u32 qp_num, u32 *counter_id) { + struct rdma_port_counter *port_counter; struct rdma_counter *counter; struct ib_qp *qp; int ret; @@ -510,9 +516,13 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, if (!rdma_is_port_valid(dev, port)) return -EINVAL; - if (!dev->port_data[port].port_counter.hstats) + port_counter = &dev->port_data[port].port_counter; + if (!port_counter->hstats) return -EOPNOTSUPP; + if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) + return -EINVAL; + qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; -- cgit v1.2.3-59-g8ed1b From 70bcc63f84eaa08c53a14e669550cffc947ca3b7 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 24 Sep 2019 00:32:49 +0300 Subject: IB/iser: add unlikely checks in the fast path ib_post_send, ib_post_recv and ib_dma_map_sg operations should succeed unless something unusual happened to the ib device. Link: https://lore.kernel.org/r/1569274369-29217-1-git-send-email-maxg@mellanox.com Signed-off-by: Max Gurtovoy Acked-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iser_memory.c | 2 +- drivers/infiniband/ulp/iser/iser_verbs.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 2cc89a9b9e9b..3a26e5be1717 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -170,7 +170,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, dev = iser_task->iser_conn->ib_conn.device->ib_device; data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir); - if (data->dma_nents == 0) { + if (unlikely(data->dma_nents == 0)) { iser_err("dma_map_sg failed!!!\n"); return -EINVAL; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index a6548de0e218..94b50112cdb2 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1019,7 +1019,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) ib_conn->post_recv_buf_count += count; ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL); - if (ib_ret) { + if (unlikely(ib_ret)) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count -= count; } else @@ -1060,7 +1060,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, first_wr = wr; ib_ret = ib_post_send(ib_conn->qp, first_wr, NULL); - if (ib_ret) + if (unlikely(ib_ret)) iser_err("ib_post_send failed, ret:%d opcode:%d\n", ib_ret, wr->opcode); -- cgit v1.2.3-59-g8ed1b From 7718cf03c3ce4b6ebd90107643ccd01c952a1fce Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 25 Sep 2019 00:03:47 +0300 Subject: IB/iser: bound protection_sg size by data_sg size In case we don't set the sg_prot_tablesize, the scsi layer assign the default size (65535 entries). We should limit this size since we should take into consideration the underlaying device capability. This cap is considered when calculating the sg_tablesize. Otherwise, for example, we can get that /sys/block/sdb/queue/max_segments is 128 and /sys/block/sdb/queue/max_integrity_segments is 65535. Link: https://lore.kernel.org/r/1569359027-10987-1-git-send-email-maxg@mellanox.com Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 2e72fc5af157..c4c015c60446 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -646,6 +646,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, if (ib_conn->pi_support) { u32 sig_caps = ib_dev->attrs.sig_prot_cap; + shost->sg_prot_tablesize = shost->sg_tablesize; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP | SHOST_DIX_GUARD_CRC); -- cgit v1.2.3-59-g8ed1b From 6eeff06db999bf9b17aaa52fddda4fd449726a64 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 25 Sep 2019 00:05:48 +0300 Subject: IB/iser: remove redundant macro definitions Use the general linux definition for 4K and retrieve the rest from it. Link: https://lore.kernel.org/r/1569359148-12312-1-git-send-email-maxg@mellanox.com Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- drivers/infiniband/ulp/iser/iscsi_iser.h | 8 ++------ drivers/infiniband/ulp/iser/iser_memory.c | 4 ++-- drivers/infiniband/ulp/iser/iser_verbs.c | 4 ++-- 4 files changed, 7 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index c4c015c60446..55f45edeb5f9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -653,7 +653,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, } if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) - shost->virt_boundary_mask = ~MASK_4K; + shost->virt_boundary_mask = SZ_4K - 1; if (iscsi_host_add(shost, ib_dev->dev.parent)) { mutex_unlock(&iser_conn->state_mutex); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 52ce63592dcf..2500c0df2670 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -96,16 +96,12 @@ #define iser_err(fmt, arg...) \ pr_err(PFX "%s: " fmt, __func__ , ## arg) -#define SHIFT_4K 12 -#define SIZE_4K (1ULL << SHIFT_4K) -#define MASK_4K (~(SIZE_4K-1)) - /* Default support is 512KB I/O size */ #define ISER_DEF_MAX_SECTORS 1024 #define ISCSI_ISER_DEF_SG_TABLESIZE \ - ((ISER_DEF_MAX_SECTORS * SECTOR_SIZE) >> SHIFT_4K) + ((ISER_DEF_MAX_SECTORS * SECTOR_SIZE) >> ilog2(SZ_4K)) /* Maximum support is 16MB I/O size */ -#define ISCSI_ISER_MAX_SG_TABLESIZE ((32768 * SECTOR_SIZE) >> SHIFT_4K) +#define ISCSI_ISER_MAX_SG_TABLESIZE ((32768 * SECTOR_SIZE) >> ilog2(SZ_4K)) #define ISER_DEF_XMIT_CMDS_DEFAULT 512 #if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 3a26e5be1717..0f74dc6d12fa 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -237,7 +237,7 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, int ret, plen; page_vec->npages = 0; - page_vec->fake_mr.page_size = SIZE_4K; + page_vec->fake_mr.page_size = SZ_4K; plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, mem->dma_nents, NULL, iser_set_page); if (unlikely(plen < mem->dma_nents)) { @@ -451,7 +451,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SIZE_4K); + n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SZ_4K); if (unlikely(n != mem->dma_nents)) { iser_err("failed to map sg (%d/%d)\n", n, mem->dma_nents); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 94b50112cdb2..17f8e914b531 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -180,7 +180,7 @@ int iser_alloc_fmr_pool(struct ib_conn *ib_conn, page_vec->pages = (u64 *)(page_vec + 1); - params.page_shift = SHIFT_4K; + params.page_shift = ilog2(SZ_4K); params.max_pages_per_fmr = size; /* make the pool size twice the max number of SCSI commands * * the ML is expected to queue, watermark for unmap at 50% */ @@ -670,7 +670,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, else max_num_sg = attr->max_fast_reg_page_list_len; - sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K); + sg_tablesize = DIV_ROUND_UP(max_sectors * SECTOR_SIZE, SZ_4K); if (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) sup_sg_tablesize = min_t( -- cgit v1.2.3-59-g8ed1b From 23c1c13cdd556027ed6f886f37975bd59180efe7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:16:55 -0700 Subject: RDMA/siw: Simplify several debug messages Do not print the remote address if it is not used. Use %pISp instead of %pI4 %d. Link: https://lore.kernel.org/r/20190930231707.48259-4-bvanassche@acm.org Signed-off-by: Bart Van Assche Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_cm.c | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 8c1931a57f4a..5a75deb9870b 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1373,22 +1373,8 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) rv = -EINVAL; goto error; } - if (v4) - siw_dbg_qp(qp, - "pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n", - pd_len, - &((struct sockaddr_in *)(laddr))->sin_addr, - ntohs(((struct sockaddr_in *)(laddr))->sin_port), - &((struct sockaddr_in *)(raddr))->sin_addr, - ntohs(((struct sockaddr_in *)(raddr))->sin_port)); - else - siw_dbg_qp(qp, - "pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n", - pd_len, - &((struct sockaddr_in6 *)(laddr))->sin6_addr, - ntohs(((struct sockaddr_in6 *)(laddr))->sin6_port), - &((struct sockaddr_in6 *)(raddr))->sin6_addr, - ntohs(((struct sockaddr_in6 *)(raddr))->sin6_port)); + siw_dbg_qp(qp, "pd_len %d, laddr %pISp, raddr %pISp\n", pd_len, laddr, + raddr); rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s); if (rv < 0) @@ -1935,7 +1921,7 @@ static void siw_drop_listeners(struct iw_cm_id *id) /* * siw_create_listen - Create resources for a listener's IWCM ID @id * - * Listens on the socket addresses id->local_addr and id->remote_addr. + * Listens on the socket address id->local_addr. * * If the listener's @id provides a specific local IP address, at most one * listening socket is created and associated with @id. @@ -1959,7 +1945,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) */ if (id->local_addr.ss_family == AF_INET) { struct in_device *in_dev = in_dev_get(dev); - struct sockaddr_in s_laddr, *s_raddr; + struct sockaddr_in s_laddr; const struct in_ifaddr *ifa; if (!in_dev) { @@ -1967,12 +1953,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) goto out; } memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr)); - s_raddr = (struct sockaddr_in *)&id->remote_addr; - siw_dbg(id->device, - "laddr %pI4:%d, raddr %pI4:%d\n", - &s_laddr.sin_addr, ntohs(s_laddr.sin_port), - &s_raddr->sin_addr, ntohs(s_raddr->sin_port)); + siw_dbg(id->device, "laddr %pISp\n", &s_laddr); rtnl_lock(); in_dev_for_each_ifa_rtnl(ifa, in_dev) { @@ -1992,17 +1974,13 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) } else if (id->local_addr.ss_family == AF_INET6) { struct inet6_dev *in6_dev = in6_dev_get(dev); struct inet6_ifaddr *ifp; - struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr), - *s_raddr = &to_sockaddr_in6(id->remote_addr); + struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr); if (!in6_dev) { rv = -ENODEV; goto out; } - siw_dbg(id->device, - "laddr %pI6:%d, raddr %pI6:%d\n", - &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), - &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port)); + siw_dbg(id->device, "laddr %pISp\n", &s_laddr); rtnl_lock(); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { -- cgit v1.2.3-59-g8ed1b From 050dbddf249eee3e936b5734c30b2e1b427efdc3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:16:56 -0700 Subject: RDMA/siw: Fix port number endianness in a debug message sin_port and sin6_port are big endian member variables. Convert these port numbers into CPU endianness before printing. Link: https://lore.kernel.org/r/20190930231707.48259-5-bvanassche@acm.org Fixes: 6c52fdc244b5 ("rdma/siw: connection management") Signed-off-by: Bart Van Assche Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_cm.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 5a75deb9870b..3bccfef40e7e 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1853,14 +1853,7 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); cep->state = SIW_EPSTATE_LISTENING; - if (addr_family == AF_INET) - siw_dbg(id->device, "Listen at laddr %pI4 %u\n", - &(((struct sockaddr_in *)laddr)->sin_addr), - ((struct sockaddr_in *)laddr)->sin_port); - else - siw_dbg(id->device, "Listen at laddr %pI6 %u\n", - &(((struct sockaddr_in6 *)laddr)->sin6_addr), - ((struct sockaddr_in6 *)laddr)->sin6_port); + siw_dbg(id->device, "Listen at laddr %pISp\n", laddr); return 0; -- cgit v1.2.3-59-g8ed1b From d0f3ef36bf49bbc7de04da0d7f65ef95749b8b30 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 23 Sep 2019 13:41:56 +0300 Subject: RDMA/core: Fix return code when modify_device isn't supported The proper return code is "-EOPNOTSUPP" when modify_device callback is not supported. Link: https://lore.kernel.org/r/20190923104158.5331-2-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 2 +- drivers/infiniband/core/sysfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 99c4a55545cf..a667636f74bf 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2365,7 +2365,7 @@ int ib_modify_device(struct ib_device *device, struct ib_device_modify *device_modify) { if (!device->ops.modify_device) - return -ENOSYS; + return -EOPNOTSUPP; return device->ops.modify_device(device, device_modify_mask, device_modify); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 7a50cedcef1f..92c932c067cb 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1268,7 +1268,7 @@ static ssize_t node_desc_store(struct device *device, int ret; if (!dev->ops.modify_device) - return -EIO; + return -EOPNOTSUPP; memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); -- cgit v1.2.3-59-g8ed1b From 39ce85f3b18597b17a3db74906db2c6360d66c16 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 23 Sep 2019 13:41:57 +0300 Subject: RDMA/bnxt_re: Remove unsupported modify_device callback There is no need to return always zero for function which is not supported. Link: https://lore.kernel.org/r/20190923104158.5331-3-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 18 ------------------ drivers/infiniband/hw/bnxt_re/ib_verbs.h | 3 --- drivers/infiniband/hw/bnxt_re/main.c | 1 - 3 files changed, 22 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b4149dc9e824..8afd7d93cfe4 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -191,24 +191,6 @@ int bnxt_re_query_device(struct ib_device *ibdev, return 0; } -int bnxt_re_modify_device(struct ib_device *ibdev, - int device_modify_mask, - struct ib_device_modify *device_modify) -{ - switch (device_modify_mask) { - case IB_DEVICE_MODIFY_SYS_IMAGE_GUID: - /* Modify the GUID requires the modification of the GID table */ - /* GUID should be made as READ-ONLY */ - break; - case IB_DEVICE_MODIFY_NODE_DESC: - /* Node Desc should be made as READ-ONLY */ - break; - default: - break; - } - return 0; -} - /* Port */ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 31662b1ee35a..23d972da5652 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -145,9 +145,6 @@ struct bnxt_re_ucontext { int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata); -int bnxt_re_modify_device(struct ib_device *ibdev, - int device_modify_mask, - struct ib_device_modify *device_modify); int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr); int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 30a54f8aa42c..7b914bdd7c13 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -625,7 +625,6 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .map_mr_sg = bnxt_re_map_mr_sg, .mmap = bnxt_re_mmap, .modify_ah = bnxt_re_modify_ah, - .modify_device = bnxt_re_modify_device, .modify_qp = bnxt_re_modify_qp, .modify_srq = bnxt_re_modify_srq, .poll_cq = bnxt_re_poll_cq, -- cgit v1.2.3-59-g8ed1b From f3fceba5da5e149ecd8e58c1e1c60464629de27b Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 23 Sep 2019 13:41:58 +0300 Subject: RDMA/rxe: Verify modify_device mask Verify that the passed mask to rxe_modify_device() is supported. Link: https://lore.kernel.org/r/20190923104158.5331-4-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 623129f27f5a..fa47bdcc7f54 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -106,6 +106,10 @@ static int rxe_modify_device(struct ib_device *dev, { struct rxe_dev *rxe = to_rdev(dev); + if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | + IB_DEVICE_MODIFY_NODE_DESC)) + return -EOPNOTSUPP; + if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); -- cgit v1.2.3-59-g8ed1b From 30e0f6cf5acb39cd04316d1eecbf4c6087c7ee02 Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Mon, 30 Sep 2019 13:12:52 +0530 Subject: RDMA/iw_cxgb3: Remove the iw_cxgb3 module from kernel Remove iw_cxgb3 module from kernel as the corresponding HW Chelsio T3 has reached EOL. Link: https://lore.kernel.org/r/20190930074252.20133-1-bharat@chelsio.com Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- Documentation/ABI/stable/sysfs-class-infiniband | 19 - MAINTAINERS | 8 - drivers/infiniband/Kconfig | 1 - drivers/infiniband/hw/Makefile | 1 - drivers/infiniband/hw/cxgb3/Kconfig | 19 - drivers/infiniband/hw/cxgb3/Makefile | 7 - drivers/infiniband/hw/cxgb3/cxio_hal.c | 1312 ------------- drivers/infiniband/hw/cxgb3/cxio_hal.h | 204 -- drivers/infiniband/hw/cxgb3/cxio_resource.c | 344 ---- drivers/infiniband/hw/cxgb3/cxio_resource.h | 69 - drivers/infiniband/hw/cxgb3/cxio_wr.h | 802 -------- drivers/infiniband/hw/cxgb3/iwch.c | 282 --- drivers/infiniband/hw/cxgb3/iwch.h | 155 -- drivers/infiniband/hw/cxgb3/iwch_cm.c | 2258 ----------------------- drivers/infiniband/hw/cxgb3/iwch_cm.h | 233 --- drivers/infiniband/hw/cxgb3/iwch_cq.c | 230 --- drivers/infiniband/hw/cxgb3/iwch_ev.c | 232 --- drivers/infiniband/hw/cxgb3/iwch_mem.c | 101 - drivers/infiniband/hw/cxgb3/iwch_provider.c | 1321 ------------- drivers/infiniband/hw/cxgb3/iwch_provider.h | 347 ---- drivers/infiniband/hw/cxgb3/iwch_qp.c | 1082 ----------- drivers/infiniband/hw/cxgb3/tcb.h | 632 ------- include/uapi/rdma/cxgb3-abi.h | 82 - include/uapi/rdma/rdma_user_ioctl_cmds.h | 1 - 24 files changed, 9742 deletions(-) delete mode 100644 drivers/infiniband/hw/cxgb3/Kconfig delete mode 100644 drivers/infiniband/hw/cxgb3/Makefile delete mode 100644 drivers/infiniband/hw/cxgb3/cxio_hal.c delete mode 100644 drivers/infiniband/hw/cxgb3/cxio_hal.h delete mode 100644 drivers/infiniband/hw/cxgb3/cxio_resource.c delete mode 100644 drivers/infiniband/hw/cxgb3/cxio_resource.h delete mode 100644 drivers/infiniband/hw/cxgb3/cxio_wr.h delete mode 100644 drivers/infiniband/hw/cxgb3/iwch.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch.h delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_cm.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_cm.h delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_cq.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_ev.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_mem.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_provider.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_provider.h delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_qp.c delete mode 100644 drivers/infiniband/hw/cxgb3/tcb.h delete mode 100644 include/uapi/rdma/cxgb3-abi.h (limited to 'drivers') diff --git a/Documentation/ABI/stable/sysfs-class-infiniband b/Documentation/ABI/stable/sysfs-class-infiniband index aed21b8916a2..96dfe1926b76 100644 --- a/Documentation/ABI/stable/sysfs-class-infiniband +++ b/Documentation/ABI/stable/sysfs-class-infiniband @@ -314,25 +314,6 @@ Description: board_id: (RO) Manufacturing board ID -sysfs interface for Chelsio T3 RDMA Driver (cxgb3) --------------------------------------------------- - -What: /sys/class/infiniband/cxgb3_X/hw_rev -What: /sys/class/infiniband/cxgb3_X/hca_type -What: /sys/class/infiniband/cxgb3_X/board_id -Date: Feb, 2007 -KernelVersion: v2.6.21 -Contact: linux-rdma@vger.kernel.org -Description: - hw_rev: (RO) Hardware revision number - - hca_type: (RO) HCA type. Here it is a driver short name. - It should normally match the name in its bus - driver structure (e.g. pci_driver::name). - - board_id: (RO) Manufacturing board id - - sysfs interface for Mellanox ConnectX HCA IB driver (mlx4) ---------------------------------------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 296de2b51c83..4fd037a437ec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4459,14 +4459,6 @@ W: http://www.chelsio.com S: Supported F: drivers/scsi/cxgbi/cxgb3i -CXGB3 IWARP RNIC DRIVER (IW_CXGB3) -M: Potnuri Bharat Teja -L: linux-rdma@vger.kernel.org -W: http://www.openfabrics.org -S: Supported -F: drivers/infiniband/hw/cxgb3/ -F: include/uapi/rdma/cxgb3-abi.h - CXGB4 CRYPTO DRIVER (chcr) M: Atul Gupta L: linux-crypto@vger.kernel.org diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index b44b1c322ec8..ade86388434f 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -83,7 +83,6 @@ config INFINIBAND_ADDR_TRANS_CONFIGFS if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/qib/Kconfig" -source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/efa/Kconfig" source "drivers/infiniband/hw/i40iw/Kconfig" diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 433fca59febd..0aeccd984889 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ -obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_INFINIBAND_EFA) += efa/ obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/ diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig deleted file mode 100644 index 8c1a72bff447..000000000000 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config INFINIBAND_CXGB3 - tristate "Chelsio RDMA Driver" - depends on CHELSIO_T3 - select GENERIC_ALLOCATOR - ---help--- - This is an iWARP/RDMA driver for the Chelsio T3 1GbE and - 10GbE adapters. - - For general information about Chelsio and our products, visit - our website at . - - For customer support, please visit our customer support page at - . - - Please send feedback to . - - To compile this driver as a module, choose M here: the module - will be called iw_cxgb3. diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile deleted file mode 100644 index 34bb86a6ae3a..000000000000 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb3 - -obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o - -iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ - iwch_provider.o iwch.o cxio_hal.o cxio_resource.o diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c deleted file mode 100644 index 95b22a651673..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ /dev/null @@ -1,1312 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cxio_resource.h" -#include "cxio_hal.h" -#include "cxgb3_offload.h" -#include "sge_defs.h" - -static LIST_HEAD(rdev_list); -static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL; - -static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name) -{ - struct cxio_rdev *rdev; - - list_for_each_entry(rdev, &rdev_list, entry) - if (!strcmp(rdev->dev_name, dev_name)) - return rdev; - return NULL; -} - -static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev) -{ - struct cxio_rdev *rdev; - - list_for_each_entry(rdev, &rdev_list, entry) - if (rdev->t3cdev_p == tdev) - return rdev; - return NULL; -} - -int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, - enum t3_cq_opcode op, u32 credit) -{ - int ret; - struct t3_cqe *cqe; - u32 rptr; - - struct rdma_cq_op setup; - setup.id = cq->cqid; - setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0; - setup.op = op; - ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup); - - if ((ret < 0) || (op == CQ_CREDIT_UPDATE)) - return ret; - - /* - * If the rearm returned an index other than our current index, - * then there might be CQE's in flight (being DMA'd). We must wait - * here for them to complete or the consumer can miss a notification. - */ - if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) { - int i=0; - - rptr = cq->rptr; - - /* - * Keep the generation correct by bumping rptr until it - * matches the index returned by the rearm - 1. - */ - while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret) - rptr++; - - /* - * Now rptr is the index for the (last) cqe that was - * in-flight at the time the HW rearmed the CQ. We - * spin until that CQE is valid. - */ - cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2); - while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { - udelay(1); - if (i++ > 1000000) { - pr_err("%s: stalled rnic\n", rdev_p->dev_name); - return -EIO; - } - } - - return 1; - } - - return 0; -} - -static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) -{ - struct rdma_cq_setup setup; - setup.id = cqid; - setup.base_addr = 0; /* NULL address */ - setup.size = 0; /* disaable the CQ */ - setup.credits = 0; - setup.credit_thres = 0; - setup.ovfl_mode = 0; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) -{ - u64 sge_cmd; - struct t3_modify_qp_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - if (!skb) { - pr_debug("%s alloc_skb failed\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, - T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7, - T3_SOPEOP); - wqe->flags = cpu_to_be32(MODQP_WRITE_EC); - sge_cmd = qpid << 8 | 3; - wqe->sge_cmd = cpu_to_be64(sge_cmd); - skb->priority = CPL_PRIORITY_CONTROL; - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -} - -int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) -{ - struct rdma_cq_setup setup; - int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); - - size += 1; /* one extra page for storing cq-in-err state */ - cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); - if (!cq->cqid) - return -ENOMEM; - if (kernel) { - cq->sw_queue = kzalloc(size, GFP_KERNEL); - if (!cq->sw_queue) - return -ENOMEM; - } - cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size, - &(cq->dma_addr), GFP_KERNEL); - if (!cq->queue) { - kfree(cq->sw_queue); - return -ENOMEM; - } - dma_unmap_addr_set(cq, mapping, cq->dma_addr); - setup.id = cq->cqid; - setup.base_addr = (u64) (cq->dma_addr); - setup.size = 1UL << cq->size_log2; - setup.credits = 65535; - setup.credit_thres = 1; - if (rdev_p->t3cdev_p->type != T3A) - setup.ovfl_mode = 0; - else - setup.ovfl_mode = 1; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - struct cxio_qpid_list *entry; - u32 qpid; - int i; - - mutex_lock(&uctx->lock); - if (!list_empty(&uctx->qpids)) { - entry = list_entry(uctx->qpids.next, struct cxio_qpid_list, - entry); - list_del(&entry->entry); - qpid = entry->qpid; - kfree(entry); - } else { - qpid = cxio_hal_get_qpid(rdev_p->rscp); - if (!qpid) - goto out; - for (i = qpid+1; i & rdev_p->qpmask; i++) { - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - break; - entry->qpid = i; - list_add_tail(&entry->entry, &uctx->qpids); - } - } -out: - mutex_unlock(&uctx->lock); - pr_debug("%s qpid 0x%x\n", __func__, qpid); - return qpid; -} - -static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid, - struct cxio_ucontext *uctx) -{ - struct cxio_qpid_list *entry; - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return; - pr_debug("%s qpid 0x%x\n", __func__, qpid); - entry->qpid = qpid; - mutex_lock(&uctx->lock); - list_add_tail(&entry->entry, &uctx->qpids); - mutex_unlock(&uctx->lock); -} - -void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - struct list_head *pos, *nxt; - struct cxio_qpid_list *entry; - - mutex_lock(&uctx->lock); - list_for_each_safe(pos, nxt, &uctx->qpids) { - entry = list_entry(pos, struct cxio_qpid_list, entry); - list_del_init(&entry->entry); - if (!(entry->qpid & rdev_p->qpmask)) - cxio_hal_put_qpid(rdev_p->rscp, entry->qpid); - kfree(entry); - } - mutex_unlock(&uctx->lock); -} - -void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - INIT_LIST_HEAD(&uctx->qpids); - mutex_init(&uctx->lock); -} - -int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, - struct t3_wq *wq, struct cxio_ucontext *uctx) -{ - int depth = 1UL << wq->size_log2; - int rqsize = 1UL << wq->rq_size_log2; - - wq->qpid = get_qpid(rdev_p, uctx); - if (!wq->qpid) - return -ENOMEM; - - wq->rq = kcalloc(depth, sizeof(struct t3_swrq), GFP_KERNEL); - if (!wq->rq) - goto err1; - - wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize); - if (!wq->rq_addr) - goto err2; - - wq->sq = kcalloc(depth, sizeof(struct t3_swsq), GFP_KERNEL); - if (!wq->sq) - goto err3; - - wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), - depth * sizeof(union t3_wr), - &(wq->dma_addr), GFP_KERNEL); - if (!wq->queue) - goto err4; - - dma_unmap_addr_set(wq, mapping, wq->dma_addr); - wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; - if (!kernel_domain) - wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + - (wq->qpid << rdev_p->qpshift); - wq->rdev = rdev_p; - pr_debug("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", - __func__, wq->qpid, wq->doorbell, (unsigned long long)wq->udb); - return 0; -err4: - kfree(wq->sq); -err3: - cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize); -err2: - kfree(wq->rq); -err1: - put_qpid(rdev_p, wq->qpid, uctx); - return -ENOMEM; -} - -void cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) -{ - cxio_hal_clear_cq_ctx(rdev_p, cq->cqid); - kfree(cq->sw_queue); - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (cq->size_log2)) - * sizeof(struct t3_cqe) + 1, cq->queue, - dma_unmap_addr(cq, mapping)); - cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); -} - -int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq, - struct cxio_ucontext *uctx) -{ - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (wq->size_log2)) - * sizeof(union t3_wr), wq->queue, - dma_unmap_addr(wq, mapping)); - kfree(wq->sq); - cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2)); - kfree(wq->rq); - put_qpid(rdev_p, wq->qpid, uctx); - return 0; -} - -static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) -{ - struct t3_cqe cqe; - - pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, - wq, cq, cq->sw_rptr, cq->sw_wptr); - memset(&cqe, 0, sizeof(cqe)); - cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | - V_CQE_OPCODE(T3_SEND) | - V_CQE_TYPE(0) | - V_CQE_SWCQE(1) | - V_CQE_QPID(wq->qpid) | - V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, - cq->size_log2))); - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; - cq->sw_wptr++; -} - -int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) -{ - u32 ptr; - int flushed = 0; - - pr_debug("%s wq %p cq %p\n", __func__, wq, cq); - - /* flush RQ */ - pr_debug("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__, - wq->rq_rptr, wq->rq_wptr, count); - ptr = wq->rq_rptr + count; - while (ptr++ != wq->rq_wptr) { - insert_recv_cqe(wq, cq); - flushed++; - } - return flushed; -} - -static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, - struct t3_swsq *sqp) -{ - struct t3_cqe cqe; - - pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, - wq, cq, cq->sw_rptr, cq->sw_wptr); - memset(&cqe, 0, sizeof(cqe)); - cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | - V_CQE_OPCODE(sqp->opcode) | - V_CQE_TYPE(1) | - V_CQE_SWCQE(1) | - V_CQE_QPID(wq->qpid) | - V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, - cq->size_log2))); - cqe.u.scqe.wrid_hi = sqp->sq_wptr; - - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; - cq->sw_wptr++; -} - -int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) -{ - __u32 ptr = wq->sq_rptr + count; - int flushed = 0; - struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - - while (ptr != wq->sq_wptr) { - sqp->signaled = 0; - insert_sq_cqe(wq, cq, sqp); - ptr++; - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - flushed++; - } - return flushed; -} - -/* - * Move all CQEs from the HWCQ into the SWCQ. - */ -void cxio_flush_hw_cq(struct t3_cq *cq) -{ - struct t3_cqe *cqe, *swcqe; - - pr_debug("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); - cqe = cxio_next_hw_cqe(cq); - while (cqe) { - pr_debug("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n", - __func__, cq->rptr, cq->sw_wptr); - swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2); - *swcqe = *cqe; - swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); - cq->sw_wptr++; - cq->rptr++; - cqe = cxio_next_hw_cqe(cq); - } -} - -static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) -{ - if (CQE_OPCODE(*cqe) == T3_TERMINATE) - return 0; - - if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe)) - return 0; - - if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe)) - return 0; - - if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) && - Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) - return 0; - - return 1; -} - -void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count) -{ - struct t3_cqe *cqe; - u32 ptr; - - *count = 0; - ptr = cq->sw_rptr; - while (!Q_EMPTY(ptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if ((SQ_TYPE(*cqe) || - ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) && - (CQE_QPID(*cqe) == wq->qpid)) - (*count)++; - ptr++; - } - pr_debug("%s cq %p count %d\n", __func__, cq, *count); -} - -void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count) -{ - struct t3_cqe *cqe; - u32 ptr; - - *count = 0; - pr_debug("%s count zero %d\n", __func__, *count); - ptr = cq->sw_rptr; - while (!Q_EMPTY(ptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) && - (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq)) - (*count)++; - ptr++; - } - pr_debug("%s cq %p count %d\n", __func__, cq, *count); -} - -static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p) -{ - struct rdma_cq_setup setup; - setup.id = 0; - setup.base_addr = 0; /* NULL address */ - setup.size = 1; /* enable the CQ */ - setup.credits = 0; - - /* force SGE to redirect to RspQ and interrupt */ - setup.credit_thres = 0; - setup.ovfl_mode = 1; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) -{ - int err; - u64 sge_cmd, ctx0, ctx1; - u64 base_addr; - struct t3_modify_qp_wr *wqe; - struct sk_buff *skb; - - skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - if (!skb) { - pr_debug("%s alloc_skb failed\n", __func__); - return -ENOMEM; - } - err = cxio_hal_init_ctrl_cq(rdev_p); - if (err) { - pr_debug("%s err %d initializing ctrl_cq\n", __func__, err); - goto err; - } - rdev_p->ctrl_qp.workq = dma_alloc_coherent( - &(rdev_p->rnic_info.pdev->dev), - (1 << T3_CTRL_QP_SIZE_LOG2) * - sizeof(union t3_wr), - &(rdev_p->ctrl_qp.dma_addr), - GFP_KERNEL); - if (!rdev_p->ctrl_qp.workq) { - pr_debug("%s dma_alloc_coherent failed\n", __func__); - err = -ENOMEM; - goto err; - } - dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping, - rdev_p->ctrl_qp.dma_addr); - rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; - - mutex_init(&rdev_p->ctrl_qp.lock); - init_waitqueue_head(&rdev_p->ctrl_qp.waitq); - - /* update HW Ctrl QP context */ - base_addr = rdev_p->ctrl_qp.dma_addr; - base_addr >>= 12; - ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) | - V_EC_BASE_LO((u32) base_addr & 0xffff)); - ctx0 <<= 32; - ctx0 |= V_EC_CREDITS(FW_WR_NUM); - base_addr >>= 16; - ctx1 = (u32) base_addr; - base_addr >>= 32; - ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) | - V_EC_TYPE(0) | V_EC_GEN(1) | - V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; - wqe = skb_put_zero(skb, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, - T3_CTL_QP_TID, 7, T3_SOPEOP); - wqe->flags = cpu_to_be32(MODQP_WRITE_EC); - sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; - wqe->sge_cmd = cpu_to_be64(sge_cmd); - wqe->ctx1 = cpu_to_be64(ctx1); - wqe->ctx0 = cpu_to_be64(ctx0); - pr_debug("CtrlQP dma_addr %pad workq %p size %d\n", - &rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq, - 1 << T3_CTRL_QP_SIZE_LOG2); - skb->priority = CPL_PRIORITY_CONTROL; - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -err: - kfree_skb(skb); - return err; -} - -static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) -{ - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << T3_CTRL_QP_SIZE_LOG2) - * sizeof(union t3_wr), rdev_p->ctrl_qp.workq, - dma_unmap_addr(&rdev_p->ctrl_qp, mapping)); - return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID); -} - -/* write len bytes of data into addr (32B aligned address) - * If data is NULL, clear len byte of memory to zero. - * caller acquires the ctrl_qp lock before the call - */ -static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, - u32 len, void *data) -{ - u32 i, nr_wqe, copy_len; - u8 *copy_data; - u8 wr_len, utx_len; /* length in 8 byte flit */ - enum t3_wr_flags flag; - __be64 *wqe; - u64 utx_cmd; - addr &= 0x7FFFFFF; - nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */ - pr_debug("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n", - __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len, - nr_wqe, data, addr); - utx_len = 3; /* in 32B unit */ - for (i = 0; i < nr_wqe; i++) { - if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2)) { - pr_debug("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, wait for more space i %d\n", - __func__, - rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i); - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - !Q_FULL(rdev_p->ctrl_qp.rptr, - rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2))) { - pr_debug("%s ctrl_qp workq interrupted\n", - __func__); - return -ERESTARTSYS; - } - pr_debug("%s ctrl_qp wakeup, continue posting work request i %d\n", - __func__, i); - } - wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % - (1 << T3_CTRL_QP_SIZE_LOG2))); - flag = 0; - if (i == (nr_wqe - 1)) { - /* last WQE */ - flag = T3_COMPLETION_FLAG; - if (len % 32) - utx_len = len / 32 + 1; - else - utx_len = len / 32; - } - - /* - * Force a CQE to return the credit to the workq in case - * we posted more than half the max QP size of WRs - */ - if ((i != 0) && - (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) { - flag = T3_COMPLETION_FLAG; - pr_debug("%s force completion at i %d\n", __func__, i); - } - - /* build the utx mem command */ - wqe += (sizeof(struct t3_bypass_wr) >> 3); - utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3); - utx_cmd <<= 32; - utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1); - *wqe = cpu_to_be64(utx_cmd); - wqe++; - copy_data = (u8 *) data + i * 96; - copy_len = len > 96 ? 96 : len; - - /* clear memory content if data is NULL */ - if (data) - memcpy(wqe, copy_data, copy_len); - else - memset(wqe, 0, copy_len); - if (copy_len % 32) - memset(((u8 *) wqe) + copy_len, 0, - 32 - (copy_len % 32)); - wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 + - (utx_len << 2); - wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % - (1 << T3_CTRL_QP_SIZE_LOG2))); - - /* wptr in the WRID[31:0] */ - ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr; - - /* - * This must be the last write with a memory barrier - * for the genbit - */ - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, - Q_GENBIT(rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, - wr_len, T3_SOPEOP); - if (flag == T3_COMPLETION_FLAG) - ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); - len -= 96; - rdev_p->ctrl_qp.wptr++; - } - return 0; -} - -/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr - * OUT: stag index - * TBD: shared memory region support - */ -static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, - u32 *stag, u8 stag_state, u32 pdid, - enum tpt_mem_type type, enum tpt_mem_perm perm, - u32 zbva, u64 to, u32 len, u8 page_size, - u32 pbl_size, u32 pbl_addr) -{ - int err; - struct tpt_entry tpt; - u32 stag_idx; - u32 wptr; - - if (cxio_fatal_error(rdev_p)) - return -EIO; - - stag_state = stag_state > 0; - stag_idx = (*stag) >> 8; - - if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) { - stag_idx = cxio_hal_get_stag(rdev_p->rscp); - if (!stag_idx) - return -ENOMEM; - *stag = (stag_idx << 8) | ((*stag) & 0xFF); - } - pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", - __func__, stag_state, type, pdid, stag_idx); - - mutex_lock(&rdev_p->ctrl_qp.lock); - - /* write TPT entry */ - if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); - else { - tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID | - V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) | - V_TPT_STAG_STATE(stag_state) | - V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid)); - BUG_ON(page_size >= 28); - tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) | - ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) | - V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | - V_TPT_PAGE_SIZE(page_size)); - tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); - tpt.len = cpu_to_be32(len); - tpt.va_hi = cpu_to_be32((u32) (to >> 32)); - tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); - tpt.rsvd_bind_cnt_or_pstag = 0; - tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); - } - err = cxio_hal_ctrl_qp_write_mem(rdev_p, - stag_idx + - (rdev_p->rnic_info.tpt_base >> 5), - sizeof(tpt), &tpt); - - /* release the stag index to free pool */ - if (reset_tpt_entry) - cxio_hal_put_stag(rdev_p->rscp, stag_idx); - - wptr = rdev_p->ctrl_qp.wptr; - mutex_unlock(&rdev_p->ctrl_qp.lock); - if (!err) - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - SEQ32_GE(rdev_p->ctrl_qp.rptr, - wptr))) - return -ERESTARTSYS; - return err; -} - -int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, - u32 pbl_addr, u32 pbl_size) -{ - u32 wptr; - int err; - - pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", - __func__, pbl_addr, rdev_p->rnic_info.pbl_base, - pbl_size); - - mutex_lock(&rdev_p->ctrl_qp.lock); - err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3, - pbl); - wptr = rdev_p->ctrl_qp.wptr; - mutex_unlock(&rdev_p->ctrl_qp.lock); - if (err) - return err; - - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - SEQ32_GE(rdev_p->ctrl_qp.rptr, - wptr))) - return -ERESTARTSYS; - - return 0; -} - -int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl_size, pbl_addr); -} - -int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr) -{ - return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl_size, pbl_addr); -} - -int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, - u32 pbl_addr) -{ - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, - pbl_size, pbl_addr); -} - -int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, - 0, 0); -} - -int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) -{ - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, - 0, 0); -} - -int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, - 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr); -} - -int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) -{ - struct t3_rdma_init_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC); - if (!skb) - return -ENOMEM; - pr_debug("%s rdev_p %p\n", __func__, rdev_p); - wqe = __skb_put(skb, sizeof(*wqe)); - wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT)); - wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) | - V_FW_RIWR_LEN(sizeof(*wqe) >> 3)); - wqe->wrid.id1 = 0; - wqe->qpid = cpu_to_be32(attr->qpid); - wqe->pdid = cpu_to_be32(attr->pdid); - wqe->scqid = cpu_to_be32(attr->scqid); - wqe->rcqid = cpu_to_be32(attr->rcqid); - wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base); - wqe->rq_size = cpu_to_be32(attr->rq_size); - wqe->mpaattrs = attr->mpaattrs; - wqe->qpcaps = attr->qpcaps; - wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); - wqe->rqe_count = cpu_to_be16(attr->rqe_count); - wqe->flags_rtr_type = cpu_to_be16(attr->flags | - V_RTR_TYPE(attr->rtr_type) | - V_CHAN(attr->chan)); - wqe->ord = cpu_to_be32(attr->ord); - wqe->ird = cpu_to_be32(attr->ird); - wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); - wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); - wqe->irs = cpu_to_be32(attr->irs); - skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -} - -void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb) -{ - cxio_ev_cb = ev_cb; -} - -void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb) -{ - cxio_ev_cb = NULL; -} - -static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb) -{ - static int cnt; - struct cxio_rdev *rdev_p = NULL; - struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; - pr_debug("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x se %0x notify %0x cqbranch %0x creditth %0x\n", - cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg), - RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg), - RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg), - RSPQ_CREDIT_THRESH(rsp_msg)); - pr_debug("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe), - CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), - CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - rdev_p = (struct cxio_rdev *)t3cdev_p->ulp; - if (!rdev_p) { - pr_debug("%s called by t3cdev %p with null ulp\n", __func__, - t3cdev_p); - return 0; - } - if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) { - rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1; - wake_up_interruptible(&rdev_p->ctrl_qp.waitq); - dev_kfree_skb_irq(skb); - } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8) - dev_kfree_skb_irq(skb); - else if (cxio_ev_cb) - (*cxio_ev_cb) (rdev_p, skb); - else - dev_kfree_skb_irq(skb); - cnt++; - return 0; -} - -/* Caller takes care of locking if needed */ -int cxio_rdev_open(struct cxio_rdev *rdev_p) -{ - struct net_device *netdev_p = NULL; - int err = 0; - if (strlen(rdev_p->dev_name)) { - if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) { - return -EBUSY; - } - netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name); - if (!netdev_p) { - return -EINVAL; - } - dev_put(netdev_p); - } else if (rdev_p->t3cdev_p) { - if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) { - return -EBUSY; - } - netdev_p = rdev_p->t3cdev_p->lldev; - strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name, - T3_MAX_DEV_NAME_LEN); - } else { - pr_debug("%s t3cdev_p or dev_name must be set\n", __func__); - return -EINVAL; - } - - list_add_tail(&rdev_p->entry, &rdev_list); - - pr_debug("%s opening rnic dev %s\n", __func__, rdev_p->dev_name); - memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp)); - if (!rdev_p->t3cdev_p) - rdev_p->t3cdev_p = dev2t3cdev(netdev_p); - rdev_p->t3cdev_p->ulp = (void *) rdev_p; - - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO, - &(rdev_p->fw_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) { - pr_err("fatal firmware version mismatch: need version %u but adapter has version %u\n", - CXIO_FW_MAJ, - G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers)); - err = -EINVAL; - goto err1; - } - - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS, - &(rdev_p->rnic_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS, - &(rdev_p->port_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - - /* - * qpshift is the number of bits to shift the qpid left in order - * to get the correct address of the doorbell for that qp. - */ - cxio_init_ucontext(rdev_p, &rdev_p->uctx); - rdev_p->qpshift = PAGE_SHIFT - - ilog2(65536 >> - ilog2(rdev_p->rnic_info.udbell_len >> - PAGE_SHIFT)); - rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT; - rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1; - pr_debug("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n", - __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base, - rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p), - rdev_p->rnic_info.pbl_base, - rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base, - rdev_p->rnic_info.rqt_top); - pr_debug("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu qpnr %d qpmask 0x%x\n", - rdev_p->rnic_info.udbell_len, - rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr, - rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask); - - err = cxio_hal_init_ctrl_qp(rdev_p); - if (err) { - pr_err("%s error %d initializing ctrl_qp\n", __func__, err); - goto err1; - } - err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0, - 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ, - T3_MAX_NUM_PD); - if (err) { - pr_err("%s error %d initializing hal resources\n", - __func__, err); - goto err2; - } - err = cxio_hal_pblpool_create(rdev_p); - if (err) { - pr_err("%s error %d initializing pbl mem pool\n", - __func__, err); - goto err3; - } - err = cxio_hal_rqtpool_create(rdev_p); - if (err) { - pr_err("%s error %d initializing rqt mem pool\n", - __func__, err); - goto err4; - } - return 0; -err4: - cxio_hal_pblpool_destroy(rdev_p); -err3: - cxio_hal_destroy_resource(rdev_p->rscp); -err2: - cxio_hal_destroy_ctrl_qp(rdev_p); -err1: - rdev_p->t3cdev_p->ulp = NULL; - list_del(&rdev_p->entry); - return err; -} - -void cxio_rdev_close(struct cxio_rdev *rdev_p) -{ - if (rdev_p) { - cxio_hal_pblpool_destroy(rdev_p); - cxio_hal_rqtpool_destroy(rdev_p); - list_del(&rdev_p->entry); - cxio_hal_destroy_ctrl_qp(rdev_p); - cxio_hal_destroy_resource(rdev_p->rscp); - rdev_p->t3cdev_p->ulp = NULL; - } -} - -int __init cxio_hal_init(void) -{ - if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI)) - return -ENOMEM; - t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler); - return 0; -} - -void __exit cxio_hal_exit(void) -{ - struct cxio_rdev *rdev, *tmp; - - t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL); - list_for_each_entry_safe(rdev, tmp, &rdev_list, entry) - cxio_rdev_close(rdev); - cxio_hal_destroy_rhdl_resource(); -} - -static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) -{ - struct t3_swsq *sqp; - __u32 ptr = wq->sq_rptr; - int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr); - - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - while (count--) - if (!sqp->signaled) { - ptr++; - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - } else if (sqp->complete) { - - /* - * Insert this completed cqe into the swcq. - */ - pr_debug("%s moving cqe into swcq sq idx %ld cq idx %ld\n", - __func__, Q_PTR2IDX(ptr, wq->sq_size_log2), - Q_PTR2IDX(cq->sw_wptr, cq->size_log2)); - sqp->cqe.header |= htonl(V_CQE_SWCQE(1)); - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) - = sqp->cqe; - cq->sw_wptr++; - sqp->signaled = 0; - break; - } else - break; -} - -static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe, - struct t3_cqe *read_cqe) -{ - read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr; - read_cqe->len = wq->oldest_read->read_len; - read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) | - V_CQE_SWCQE(SW_CQE(*hw_cqe)) | - V_CQE_OPCODE(T3_READ_REQ) | - V_CQE_TYPE(1)); -} - -/* - * Return a ptr to the next read wr in the SWSQ or NULL. - */ -static void advance_oldest_read(struct t3_wq *wq) -{ - - u32 rptr = wq->oldest_read - wq->sq + 1; - u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2); - - while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) { - wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2); - - if (wq->oldest_read->opcode == T3_READ_REQ) - return; - rptr++; - } - wq->oldest_read = NULL; -} - -/* - * cxio_poll_cq - * - * Caller must: - * check the validity of the first CQE, - * supply the wq assicated with the qpid. - * - * credit: cq credit to return to sge. - * cqe_flushed: 1 iff the CQE is flushed. - * cqe: copy of the polled CQE. - * - * return value: - * 0 CQE returned, - * -1 CQE skipped, try again. - */ -int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit) -{ - int ret = 0; - struct t3_cqe *hw_cqe, read_cqe; - - *cqe_flushed = 0; - *credit = 0; - hw_cqe = cxio_next_cqe(cq); - - pr_debug("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe), - CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe), - CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe), - CQE_WRID_LOW(*hw_cqe)); - - /* - * skip cqe's not affiliated with a QP. - */ - if (wq == NULL) { - ret = -1; - goto skip_cqe; - } - - /* - * Gotta tweak READ completions: - * 1) the cqe doesn't contain the sq_wptr from the wr. - * 2) opcode not reflected from the wr. - * 3) read_len not reflected from the wr. - * 4) cq_type is RQ_TYPE not SQ_TYPE. - */ - if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { - - /* - * If this is an unsolicited read response, then the read - * was generated by the kernel driver as part of peer-2-peer - * connection setup. So ignore the completion. - */ - if (!wq->oldest_read) { - if (CQE_STATUS(*hw_cqe)) - wq->error = 1; - ret = -1; - goto skip_cqe; - } - - /* - * Don't write to the HWCQ, so create a new read req CQE - * in local memory. - */ - create_read_req_cqe(wq, hw_cqe, &read_cqe); - hw_cqe = &read_cqe; - advance_oldest_read(wq); - } - - /* - * T3A: Discard TERMINATE CQEs. - */ - if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) { - ret = -1; - wq->error = 1; - goto skip_cqe; - } - - if (CQE_STATUS(*hw_cqe) || wq->error) { - *cqe_flushed = wq->error; - wq->error = 1; - - /* - * T3A inserts errors into the CQE. We cannot return - * these as work completions. - */ - /* incoming write failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE) - && RQ_TYPE(*hw_cqe)) { - ret = -1; - goto skip_cqe; - } - /* incoming read request failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) { - ret = -1; - goto skip_cqe; - } - - /* incoming SEND with no receive posted failures */ - if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) && - Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { - ret = -1; - goto skip_cqe; - } - BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe)); - goto proc_cqe; - } - - /* - * RECV completion. - */ - if (RQ_TYPE(*hw_cqe)) { - - /* - * HW only validates 4 bits of MSN. So we must validate that - * the MSN in the SEND is the next expected MSN. If its not, - * then we complete this with TPT_ERR_MSN and mark the wq in - * error. - */ - - if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { - wq->error = 1; - ret = -1; - goto skip_cqe; - } - - if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) { - wq->error = 1; - hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN)); - goto proc_cqe; - } - goto proc_cqe; - } - - /* - * If we get here its a send completion. - * - * Handle out of order completion. These get stuffed - * in the SW SQ. Then the SW SQ is walked to move any - * now in-order completions into the SW CQ. This handles - * 2 cases: - * 1) reaping unsignaled WRs when the first subsequent - * signaled WR is completed. - * 2) out of order read completions. - */ - if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) { - struct t3_swsq *sqp; - - pr_debug("%s out of order completion going in swsq at idx %ld\n", - __func__, - Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), - wq->sq_size_log2)); - sqp = wq->sq + - Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2); - sqp->cqe = *hw_cqe; - sqp->complete = 1; - ret = -1; - goto flush_wq; - } - -proc_cqe: - *cqe = *hw_cqe; - - /* - * Reap the associated WR(s) that are freed up with this - * completion. - */ - if (SQ_TYPE(*hw_cqe)) { - wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); - pr_debug("%s completing sq idx %ld\n", __func__, - Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); - *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; - wq->sq_rptr++; - } else { - pr_debug("%s completing rq idx %ld\n", __func__, - Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); - *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; - if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) - cxio_hal_pblpool_free(wq->rdev, - wq->rq[Q_PTR2IDX(wq->rq_rptr, - wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); - BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr)); - wq->rq_rptr++; - } - -flush_wq: - /* - * Flush any completed cqes that are now in-order. - */ - flush_completed_wrs(wq, cq); - -skip_cqe: - if (SW_CQE(*hw_cqe)) { - pr_debug("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n", - __func__, cq, cq->cqid, cq->sw_rptr); - ++cq->sw_rptr; - } else { - pr_debug("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n", - __func__, cq, cq->cqid, cq->rptr); - ++cq->rptr; - - /* - * T3A: compute credits. - */ - if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1))) - || ((cq->rptr - cq->wptr) >= 128)) { - *credit = cq->rptr - cq->wptr; - cq->wptr = cq->rptr; - } - } - return ret; -} diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h deleted file mode 100644 index 40c029ffa425..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_HAL_H__ -#define __CXIO_HAL_H__ - -#include -#include -#include - -#include "t3_cpl.h" -#include "t3cdev.h" -#include "cxgb3_ctl_defs.h" -#include "cxio_wr.h" - -#define T3_CTRL_QP_ID FW_RI_SGEEC_START -#define T3_CTL_QP_TID FW_RI_TID_START -#define T3_CTRL_QP_SIZE_LOG2 8 -#define T3_CTRL_CQ_ID 0 - -#define T3_MAX_NUM_RI (1<<15) -#define T3_MAX_NUM_QP (1<<15) -#define T3_MAX_NUM_CQ (1<<15) -#define T3_MAX_NUM_PD (1<<15) -#define T3_MAX_PBL_SIZE 256 -#define T3_MAX_RQ_SIZE 1024 -#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) -#define T3_MAX_CQ_DEPTH 65536 -#define T3_MAX_NUM_STAG (1<<15) -#define T3_MAX_MR_SIZE 0x100000000ULL -#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ - -#define T3_STAG_UNSET 0xffffffff - -#define T3_MAX_DEV_NAME_LEN 32 - -#define CXIO_FW_MAJ 7 - -struct cxio_hal_ctrl_qp { - u32 wptr; - u32 rptr; - struct mutex lock; /* for the wtpr, can sleep */ - wait_queue_head_t waitq;/* wait for RspQ/CQE msg */ - union t3_wr *workq; /* the work request queue */ - dma_addr_t dma_addr; /* pci bus address of the workq */ - DEFINE_DMA_UNMAP_ADDR(mapping); - void __iomem *doorbell; -}; - -struct cxio_hal_resource { - struct kfifo tpt_fifo; - spinlock_t tpt_fifo_lock; - struct kfifo qpid_fifo; - spinlock_t qpid_fifo_lock; - struct kfifo cqid_fifo; - spinlock_t cqid_fifo_lock; - struct kfifo pdid_fifo; - spinlock_t pdid_fifo_lock; -}; - -struct cxio_qpid_list { - struct list_head entry; - u32 qpid; -}; - -struct cxio_ucontext { - struct list_head qpids; - struct mutex lock; -}; - -struct cxio_rdev { - char dev_name[T3_MAX_DEV_NAME_LEN]; - struct t3cdev *t3cdev_p; - struct rdma_info rnic_info; - struct adap_ports port_info; - struct cxio_hal_resource *rscp; - struct cxio_hal_ctrl_qp ctrl_qp; - void *ulp; - unsigned long qpshift; - u32 qpnr; - u32 qpmask; - struct cxio_ucontext uctx; - struct gen_pool *pbl_pool; - struct gen_pool *rqt_pool; - struct list_head entry; - struct ch_embedded_info fw_info; - u32 flags; -#define CXIO_ERROR_FATAL 1 -}; - -static inline int cxio_fatal_error(struct cxio_rdev *rdev_p) -{ - return rdev_p->flags & CXIO_ERROR_FATAL; -} - -static inline int cxio_num_stags(struct cxio_rdev *rdev_p) -{ - return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5)); -} - -typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p, - struct sk_buff * skb); - -#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff) -#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff) -#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1) -#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1) -#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1) -#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1) -#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1) -#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1) -#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1) - -struct respQ_msg_t { - __be32 flags; /* flit 0 */ - __be32 cq_ptrid; - __be64 rsvd; /* flit 1 */ - struct t3_cqe cqe; /* flits 2-3 */ -}; - -enum t3_cq_opcode { - CQ_ARM_AN = 0x2, - CQ_ARM_SE = 0x6, - CQ_FORCE_AN = 0x3, - CQ_CREDIT_UPDATE = 0x7 -}; - -int cxio_rdev_open(struct cxio_rdev *rdev); -void cxio_rdev_close(struct cxio_rdev *rdev); -int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, - enum t3_cq_opcode op, u32 credit); -int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel); -void cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); -void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); -void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); -int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, - struct cxio_ucontext *uctx); -int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, - struct cxio_ucontext *uctx); -int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); -int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, - u32 pbl_addr, u32 pbl_size); -int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr); -int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr); -int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, - u32 pbl_addr); -int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); -int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr); -int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); -int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); -void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); -void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb); -u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); -void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); -int __init cxio_hal_init(void); -void __exit cxio_hal_exit(void); -int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); -int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); -void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); -void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); -void cxio_flush_hw_cq(struct t3_cq *cq); -int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit); -int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb); - -#ifdef pr_fmt -#undef pr_fmt -#endif - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c deleted file mode 100644 index c6e7bc4420b6..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -/* Crude resource management */ -#include -#include -#include -#include -#include -#include -#include "cxio_resource.h" -#include "cxio_hal.h" - -static struct kfifo rhdl_fifo; -static spinlock_t rhdl_fifo_lock; - -#define RANDOM_SIZE 16 - -static int __cxio_init_resource_fifo(struct kfifo *fifo, - spinlock_t *fifo_lock, - u32 nr, u32 skip_low, - u32 skip_high, - int random) -{ - u32 i, j, entry = 0, idx; - u32 random_bytes; - u32 rarray[16]; - spin_lock_init(fifo_lock); - - if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL)) - return -ENOMEM; - - for (i = 0; i < skip_low + skip_high; i++) - kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); - if (random) { - j = 0; - random_bytes = prandom_u32(); - for (i = 0; i < RANDOM_SIZE; i++) - rarray[i] = i + skip_low; - for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { - if (j >= RANDOM_SIZE) { - j = 0; - random_bytes = prandom_u32(); - } - idx = (random_bytes >> (j * 2)) & 0xF; - kfifo_in(fifo, - (unsigned char *) &rarray[idx], - sizeof(u32)); - rarray[idx] = i; - j++; - } - for (i = 0; i < RANDOM_SIZE; i++) - kfifo_in(fifo, - (unsigned char *) &rarray[i], - sizeof(u32)); - } else - for (i = skip_low; i < nr - skip_high; i++) - kfifo_in(fifo, (unsigned char *) &i, sizeof(u32)); - - for (i = 0; i < skip_low + skip_high; i++) - if (kfifo_out_locked(fifo, (unsigned char *) &entry, - sizeof(u32), fifo_lock) != sizeof(u32)) - break; - return 0; -} - -static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 0)); -} - -static int cxio_init_resource_fifo_random(struct kfifo *fifo, - spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - - return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 1)); -} - -static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p) -{ - u32 i; - - spin_lock_init(&rdev_p->rscp->qpid_fifo_lock); - - if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32), - GFP_KERNEL)) - return -ENOMEM; - - for (i = 16; i < T3_MAX_NUM_QP; i++) - if (!(i & rdev_p->qpmask)) - kfifo_in(&rdev_p->rscp->qpid_fifo, - (unsigned char *) &i, sizeof(u32)); - return 0; -} - -int cxio_hal_init_rhdl_resource(u32 nr_rhdl) -{ - return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1, - 0); -} - -void cxio_hal_destroy_rhdl_resource(void) -{ - kfifo_free(&rhdl_fifo); -} - -/* nr_* must be power of 2 */ -int cxio_hal_init_resource(struct cxio_rdev *rdev_p, - u32 nr_tpt, u32 nr_pbl, - u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid) -{ - int err = 0; - struct cxio_hal_resource *rscp; - - rscp = kmalloc(sizeof(*rscp), GFP_KERNEL); - if (!rscp) - return -ENOMEM; - rdev_p->rscp = rscp; - err = cxio_init_resource_fifo_random(&rscp->tpt_fifo, - &rscp->tpt_fifo_lock, - nr_tpt, 1, 0); - if (err) - goto tpt_err; - err = cxio_init_qpid_fifo(rdev_p); - if (err) - goto qpid_err; - err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, - nr_cqid, 1, 0); - if (err) - goto cqid_err; - err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, - nr_pdid, 1, 0); - if (err) - goto pdid_err; - return 0; -pdid_err: - kfifo_free(&rscp->cqid_fifo); -cqid_err: - kfifo_free(&rscp->qpid_fifo); -qpid_err: - kfifo_free(&rscp->tpt_fifo); -tpt_err: - return -ENOMEM; -} - -/* - * returns 0 if no resource available - */ -static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock) -{ - u32 entry; - if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)) - return entry; - else - return 0; /* fifo emptry */ -} - -static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock, - u32 entry) -{ - BUG_ON( - kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock) - == 0); -} - -u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock); -} - -void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag) -{ - cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag); -} - -u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp) -{ - u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo, - &rscp->qpid_fifo_lock); - pr_debug("%s qpid 0x%x\n", __func__, qpid); - return qpid; -} - -void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid) -{ - pr_debug("%s qpid 0x%x\n", __func__, qpid); - cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid); -} - -u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock); -} - -void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid) -{ - cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid); -} - -u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock); -} - -void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid) -{ - cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid); -} - -void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) -{ - kfifo_free(&rscp->tpt_fifo); - kfifo_free(&rscp->cqid_fifo); - kfifo_free(&rscp->qpid_fifo); - kfifo_free(&rscp->pdid_fifo); - kfree(rscp); -} - -/* - * PBL Memory Manager. Uses Linux generic allocator. - */ - -#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ - -u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) -{ - unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size); - return (u32)addr; -} - -void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) -{ - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size); - gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); -} - -int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) -{ - unsigned pbl_start, pbl_chunk; - - rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); - if (!rdev_p->pbl_pool) - return -ENOMEM; - - pbl_start = rdev_p->rnic_info.pbl_base; - pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1; - - while (pbl_start < rdev_p->rnic_info.pbl_top) { - pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1, - pbl_chunk); - if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) { - pr_debug("%s failed to add PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); - if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { - pr_warn("%s: Failed to add all PBL chunks (%x/%x)\n", - __func__, pbl_start, - rdev_p->rnic_info.pbl_top - pbl_start); - return 0; - } - pbl_chunk >>= 1; - } else { - pr_debug("%s added PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); - pbl_start += pbl_chunk; - } - } - - return 0; -} - -void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) -{ - gen_pool_destroy(rdev_p->pbl_pool); -} - -/* - * RQT Memory Manager. Uses Linux generic allocator. - */ - -#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */ -#define RQT_CHUNK 2*1024*1024 - -u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) -{ - unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); - return (u32)addr; -} - -void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) -{ - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6); - gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); -} - -int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p) -{ - unsigned long i; - rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); - if (rdev_p->rqt_pool) - for (i = rdev_p->rnic_info.rqt_base; - i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1; - i += RQT_CHUNK) - gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); - return rdev_p->rqt_pool ? 0 : -ENOMEM; -} - -void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p) -{ - gen_pool_destroy(rdev_p->rqt_pool); -} diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h deleted file mode 100644 index a2703a3d882d..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_RESOURCE_H__ -#define __CXIO_RESOURCE_H__ - -#include -#include -#include -#include -#include -#include -#include -#include "cxio_hal.h" - -extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl); -extern void cxio_hal_destroy_rhdl_resource(void); -extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p, - u32 nr_tpt, u32 nr_pbl, - u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, - u32 nr_pdid); -extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag); -extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid); -extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid); -extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp); - -#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base ) -extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p); -extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p); -extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size); -extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); - -#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base ) -extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p); -extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p); -extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size); -extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); -#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h deleted file mode 100644 index 53aa5c36247a..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ /dev/null @@ -1,802 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_WR_H__ -#define __CXIO_WR_H__ - -#include -#include -#include -#include "firmware_exports.h" - -#define T3_MAX_SGE 4 -#define T3_MAX_INLINE 64 -#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3) -#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024) -#define T3_STAG0_PAGE_SHIFT 15 - -#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) -#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ - ((rptr)!=(wptr)) ) -#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1)) -#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<> S_FW_RIWR_OP)) & M_FW_RIWR_OP) - -#define S_FW_RIWR_SOPEOP 22 -#define M_FW_RIWR_SOPEOP 0x3 -#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP) - -#define S_FW_RIWR_FLAGS 8 -#define M_FW_RIWR_FLAGS 0x3fffff -#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS) -#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS) - -#define S_FW_RIWR_TID 8 -#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID) - -#define S_FW_RIWR_LEN 0 -#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN) - -#define S_FW_RIWR_GEN 31 -#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN) - -struct t3_sge { - __be32 stag; - __be32 len; - __be64 to; -}; - -/* If num_sgle is zero, flit 5+ contains immediate data.*/ -struct t3_send_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - - u8 rdmaop; /* 2 */ - u8 reserved[3]; - __be32 rem_stag; - __be32 plen; /* 3 */ - __be32 num_sgle; - struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ -}; - -#define T3_MAX_FASTREG_DEPTH 10 -#define T3_MAX_FASTREG_FRAG 10 - -struct t3_fastreg_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 stag; /* 2 */ - __be32 len; - __be32 va_base_hi; /* 3 */ - __be32 va_base_lo_fbo; - __be32 page_type_perms; /* 4 */ - __be32 reserved1; - __be64 pbl_addrs[0]; /* 5+ */ -}; - -/* - * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this. - */ -struct t3_pbl_frag { - struct fw_riwrh wrh; /* 0 */ - __be64 pbl_addrs[14]; /* 1..14 */ -}; - -#define S_FR_PAGE_COUNT 24 -#define M_FR_PAGE_COUNT 0xff -#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT) -#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT) - -#define S_FR_PAGE_SIZE 16 -#define M_FR_PAGE_SIZE 0x1f -#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE) -#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE) - -#define S_FR_TYPE 8 -#define M_FR_TYPE 0x1 -#define V_FR_TYPE(x) ((x) << S_FR_TYPE) -#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE) - -#define S_FR_PERMS 0 -#define M_FR_PERMS 0xff -#define V_FR_PERMS(x) ((x) << S_FR_PERMS) -#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS) - -struct t3_local_inv_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 stag; /* 2 */ - __be32 reserved; -}; - -struct t3_rdma_write_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 rdmaop; /* 2 */ - u8 reserved[3]; - __be32 stag_sink; - __be64 to_sink; /* 3 */ - __be32 plen; /* 4 */ - __be32 num_sgle; - struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */ -}; - -struct t3_rdma_read_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 rdmaop; /* 2 */ - u8 local_inv; - u8 reserved[2]; - __be32 rem_stag; - __be64 rem_to; /* 3 */ - __be32 local_stag; /* 4 */ - __be32 local_len; - __be64 local_to; /* 5 */ -}; - -struct t3_bind_mw_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u16 reserved; /* 2 */ - u8 type; - u8 perms; - __be32 mr_stag; - __be32 mw_stag; /* 3 */ - __be32 mw_len; - __be64 mw_va; /* 4 */ - __be32 mr_pbl_addr; /* 5 */ - u8 reserved2[3]; - u8 mr_pagesz; -}; - -struct t3_receive_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 pagesz[T3_MAX_SGE]; - __be32 num_sgle; /* 2 */ - struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */ - __be32 pbl_addr[T3_MAX_SGE]; -}; - -struct t3_bypass_wr { - struct fw_riwrh wrh; - union t3_wrid wrid; /* 1 */ -}; - -struct t3_modify_qp_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 flags; /* 2 */ - __be32 quiesce; /* 2 */ - __be32 max_ird; /* 3 */ - __be32 max_ord; /* 3 */ - __be64 sge_cmd; /* 4 */ - __be64 ctx1; /* 5 */ - __be64 ctx0; /* 6 */ -}; - -enum t3_modify_qp_flags { - MODQP_QUIESCE = 0x01, - MODQP_MAX_IRD = 0x02, - MODQP_MAX_ORD = 0x04, - MODQP_WRITE_EC = 0x08, - MODQP_READ_EC = 0x10, -}; - - -enum t3_mpa_attrs { - uP_RI_MPA_RX_MARKER_ENABLE = 0x1, - uP_RI_MPA_TX_MARKER_ENABLE = 0x2, - uP_RI_MPA_CRC_ENABLE = 0x4, - uP_RI_MPA_IETF_ENABLE = 0x8 -} __packed; - -enum t3_qp_caps { - uP_RI_QP_RDMA_READ_ENABLE = 0x01, - uP_RI_QP_RDMA_WRITE_ENABLE = 0x02, - uP_RI_QP_BIND_ENABLE = 0x04, - uP_RI_QP_FAST_REGISTER_ENABLE = 0x08, - uP_RI_QP_STAG0_ENABLE = 0x10 -} __packed; - -enum rdma_init_rtr_types { - RTR_READ = 1, - RTR_WRITE = 2, - RTR_SEND = 3, -}; - -#define S_RTR_TYPE 2 -#define M_RTR_TYPE 0x3 -#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE) -#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE) - -#define S_CHAN 4 -#define M_CHAN 0x3 -#define V_CHAN(x) ((x) << S_CHAN) -#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN) - -struct t3_rdma_init_attr { - u32 tid; - u32 qpid; - u32 pdid; - u32 scqid; - u32 rcqid; - u32 rq_addr; - u32 rq_size; - enum t3_mpa_attrs mpaattrs; - enum t3_qp_caps qpcaps; - u16 tcp_emss; - u32 ord; - u32 ird; - u64 qp_dma_addr; - u32 qp_dma_size; - enum rdma_init_rtr_types rtr_type; - u16 flags; - u16 rqe_count; - u32 irs; - u32 chan; -}; - -struct t3_rdma_init_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 qpid; /* 2 */ - __be32 pdid; - __be32 scqid; /* 3 */ - __be32 rcqid; - __be32 rq_addr; /* 4 */ - __be32 rq_size; - u8 mpaattrs; /* 5 */ - u8 qpcaps; - __be16 ulpdu_size; - __be16 flags_rtr_type; - __be16 rqe_count; - __be32 ord; /* 6 */ - __be32 ird; - __be64 qp_dma_addr; /* 7 */ - __be32 qp_dma_size; /* 8 */ - __be32 irs; -}; - -struct t3_genbit { - u64 flit[15]; - __be64 genbit; -}; - -struct t3_wq_in_err { - u64 flit[13]; - u64 err; -}; - -enum rdma_init_wr_flags { - MPA_INITIATOR = (1<<0), - PRIV_QP = (1<<1), -}; - -union t3_wr { - struct t3_send_wr send; - struct t3_rdma_write_wr write; - struct t3_rdma_read_wr read; - struct t3_receive_wr recv; - struct t3_fastreg_wr fastreg; - struct t3_pbl_frag pbl_frag; - struct t3_local_inv_wr local_inv; - struct t3_bind_mw_wr bind; - struct t3_bypass_wr bypass; - struct t3_rdma_init_wr init; - struct t3_modify_qp_wr qp_mod; - struct t3_genbit genbit; - struct t3_wq_in_err wq_in_err; - __be64 flit[16]; -}; - -#define T3_SQ_CQE_FLIT 13 -#define T3_SQ_COOKIE_FLIT 14 - -#define T3_RQ_COOKIE_FLIT 13 -#define T3_RQ_CQE_FLIT 14 - -static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe) -{ - return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags)); -} - -enum t3_wr_hdr_bits { - T3_EOP = 1, - T3_SOP = 2, - T3_SOPEOP = T3_EOP|T3_SOP, -}; - -static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, - enum t3_wr_flags flags, u8 genbit, u32 tid, - u8 len, u8 sopeop) -{ - wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) | - V_FW_RIWR_SOPEOP(sopeop) | - V_FW_RIWR_FLAGS(flags)); - wmb(); - wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) | - V_FW_RIWR_TID(tid) | - V_FW_RIWR_LEN(len)); - /* 2nd gen bit... */ - ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit); -} - -/* - * T3 ULP2_TX commands - */ -enum t3_utx_mem_op { - T3_UTX_MEM_READ = 2, - T3_UTX_MEM_WRITE = 3 -}; - -/* T3 MC7 RDMA TPT entry format */ - -enum tpt_mem_type { - TPT_NON_SHARED_MR = 0x0, - TPT_SHARED_MR = 0x1, - TPT_MW = 0x2, - TPT_MW_RELAXED_PROTECTION = 0x3 -}; - -enum tpt_addr_type { - TPT_ZBTO = 0, - TPT_VATO = 1 -}; - -enum tpt_mem_perm { - TPT_MW_BIND = 0x10, - TPT_LOCAL_READ = 0x8, - TPT_LOCAL_WRITE = 0x4, - TPT_REMOTE_READ = 0x2, - TPT_REMOTE_WRITE = 0x1 -}; - -struct tpt_entry { - __be32 valid_stag_pdid; - __be32 flags_pagesize_qpid; - - __be32 rsvd_pbl_addr; - __be32 len; - __be32 va_hi; - __be32 va_low_or_fbo; - - __be32 rsvd_bind_cnt_or_pstag; - __be32 rsvd_pbl_size; -}; - -#define S_TPT_VALID 31 -#define V_TPT_VALID(x) ((x) << S_TPT_VALID) -#define F_TPT_VALID V_TPT_VALID(1U) - -#define S_TPT_STAG_KEY 23 -#define M_TPT_STAG_KEY 0xFF -#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY) -#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY) - -#define S_TPT_STAG_STATE 22 -#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE) -#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U) - -#define S_TPT_STAG_TYPE 20 -#define M_TPT_STAG_TYPE 0x3 -#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE) -#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE) - -#define S_TPT_PDID 0 -#define M_TPT_PDID 0xFFFFF -#define V_TPT_PDID(x) ((x) << S_TPT_PDID) -#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID) - -#define S_TPT_PERM 28 -#define M_TPT_PERM 0xF -#define V_TPT_PERM(x) ((x) << S_TPT_PERM) -#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM) - -#define S_TPT_REM_INV_DIS 27 -#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS) -#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U) - -#define S_TPT_ADDR_TYPE 26 -#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE) -#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U) - -#define S_TPT_MW_BIND_ENABLE 25 -#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE) -#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U) - -#define S_TPT_PAGE_SIZE 20 -#define M_TPT_PAGE_SIZE 0x1F -#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE) -#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE) - -#define S_TPT_PBL_ADDR 0 -#define M_TPT_PBL_ADDR 0x1FFFFFFF -#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR) -#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR) - -#define S_TPT_QPID 0 -#define M_TPT_QPID 0xFFFFF -#define V_TPT_QPID(x) ((x) << S_TPT_QPID) -#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID) - -#define S_TPT_PSTAG 0 -#define M_TPT_PSTAG 0xFFFFFF -#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG) -#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG) - -#define S_TPT_PBL_SIZE 0 -#define M_TPT_PBL_SIZE 0xFFFFF -#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE) -#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE) - -/* - * CQE defs - */ -struct t3_cqe { - __be32 header; - __be32 len; - union { - struct { - __be32 stag; - __be32 msn; - } rcqe; - struct { - u32 wrid_hi; - u32 wrid_low; - } scqe; - } u; -}; - -#define S_CQE_OOO 31 -#define M_CQE_OOO 0x1 -#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO) -#define V_CEQ_OOO(x) ((x)<> S_CQE_QPID)) & M_CQE_QPID) -#define V_CQE_QPID(x) ((x)<> S_CQE_SWCQE)) & M_CQE_SWCQE) -#define V_CQE_SWCQE(x) ((x)<> S_CQE_GENBIT) & M_CQE_GENBIT) -#define V_CQE_GENBIT(x) ((x)<> S_CQE_STATUS)) & M_CQE_STATUS) -#define V_CQE_STATUS(x) ((x)<> S_CQE_TYPE)) & M_CQE_TYPE) -#define V_CQE_TYPE(x) ((x)<> S_CQE_OPCODE)) & M_CQE_OPCODE) -#define V_CQE_OPCODE(x) ((x)<queue[1 << cq->size_log2])->cq_err; -} - -static inline void cxio_set_cq_in_error(struct t3_cq *cq) -{ - ((struct t3_cq_status_page *) - &cq->queue[1 << cq->size_log2])->cq_err = 1; -} - -static inline void cxio_set_wq_in_error(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err |= 1; -} - -static inline void cxio_disable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err |= 2; -} - -static inline void cxio_enable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err &= ~2; -} - -static inline int cxio_wq_db_enabled(struct t3_wq *wq) -{ - return !(wq->queue->wq_in_err.err & 2); -} - -static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); - if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) - return cqe; - return NULL; -} - -static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); - return cqe; - } - return NULL; -} - -static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); - return cqe; - } - cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); - if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) - return cqe; - return NULL; -} - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c deleted file mode 100644 index 56a8ab6210cf..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include - -#include - -#include "cxgb3_offload.h" -#include "iwch_provider.h" -#include -#include "iwch.h" -#include "iwch_cm.h" - -#define DRV_VERSION "1.1" - -MODULE_AUTHOR("Boyd Faulkner, Steve Wise"); -MODULE_DESCRIPTION("Chelsio T3 RDMA Driver"); -MODULE_LICENSE("Dual BSD/GPL"); - -static void open_rnic_dev(struct t3cdev *); -static void close_rnic_dev(struct t3cdev *); -static void iwch_event_handler(struct t3cdev *, u32, u32); - -struct cxgb3_client t3c_client = { - .name = "iw_cxgb3", - .add = open_rnic_dev, - .remove = close_rnic_dev, - .handlers = t3c_handlers, - .redirect = iwch_ep_redirect, - .event_handler = iwch_event_handler -}; - -static LIST_HEAD(dev_list); -static DEFINE_MUTEX(dev_mutex); - -static void disable_dbs(struct iwch_dev *rnicp) -{ - unsigned long index; - struct iwch_qp *qhp; - - xa_lock_irq(&rnicp->qps); - xa_for_each(&rnicp->qps, index, qhp) - cxio_disable_wq_db(&qhp->wq); - xa_unlock_irq(&rnicp->qps); -} - -static void enable_dbs(struct iwch_dev *rnicp, int ring_db) -{ - unsigned long index; - struct iwch_qp *qhp; - - xa_lock_irq(&rnicp->qps); - xa_for_each(&rnicp->qps, index, qhp) { - if (ring_db) - ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, - qhp->wq.qpid); - cxio_enable_wq_db(&qhp->wq); - } - xa_unlock_irq(&rnicp->qps); -} - -static void iwch_db_drop_task(struct work_struct *work) -{ - struct iwch_dev *rnicp = container_of(work, struct iwch_dev, - db_drop_task.work); - enable_dbs(rnicp, 1); -} - -static void rnic_init(struct iwch_dev *rnicp) -{ - pr_debug("%s iwch_dev %p\n", __func__, rnicp); - xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ); - xa_init_flags(&rnicp->qps, XA_FLAGS_LOCK_IRQ); - xa_init_flags(&rnicp->mrs, XA_FLAGS_LOCK_IRQ); - INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); - - rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; - rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; - rnicp->attr.max_sge_per_wr = T3_MAX_SGE; - rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; - rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; - rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH; - rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); - rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; - rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; - rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK; - rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; - rnicp->attr.can_resize_wq = 0; - rnicp->attr.max_rdma_reads_per_qp = 8; - rnicp->attr.max_rdma_read_resources = - rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps; - rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */ - rnicp->attr.max_rdma_read_depth = - rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps; - rnicp->attr.rq_overflow_handled = 0; - rnicp->attr.can_modify_ird = 0; - rnicp->attr.can_modify_ord = 0; - rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1; - rnicp->attr.stag0_value = 1; - rnicp->attr.zbva_support = 1; - rnicp->attr.local_invalidate_fence = 1; - rnicp->attr.cq_overflow_detection = 1; - return; -} - -static void open_rnic_dev(struct t3cdev *tdev) -{ - struct iwch_dev *rnicp; - - pr_debug("%s t3cdev %p\n", __func__, tdev); - pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION); - rnicp = ib_alloc_device(iwch_dev, ibdev); - if (!rnicp) { - pr_err("Cannot allocate ib device\n"); - return; - } - rnicp->rdev.ulp = rnicp; - rnicp->rdev.t3cdev_p = tdev; - - mutex_lock(&dev_mutex); - - if (cxio_rdev_open(&rnicp->rdev)) { - mutex_unlock(&dev_mutex); - pr_err("Unable to open CXIO rdev\n"); - ib_dealloc_device(&rnicp->ibdev); - return; - } - - rnic_init(rnicp); - - list_add_tail(&rnicp->entry, &dev_list); - mutex_unlock(&dev_mutex); - - if (iwch_register_device(rnicp)) { - pr_err("Unable to register device\n"); - close_rnic_dev(tdev); - } - pr_info("Initialized device %s\n", - pci_name(rnicp->rdev.rnic_info.pdev)); - return; -} - -static void close_rnic_dev(struct t3cdev *tdev) -{ - struct iwch_dev *dev, *tmp; - pr_debug("%s t3cdev %p\n", __func__, tdev); - mutex_lock(&dev_mutex); - list_for_each_entry_safe(dev, tmp, &dev_list, entry) { - if (dev->rdev.t3cdev_p == tdev) { - dev->rdev.flags = CXIO_ERROR_FATAL; - synchronize_net(); - cancel_delayed_work_sync(&dev->db_drop_task); - list_del(&dev->entry); - iwch_unregister_device(dev); - cxio_rdev_close(&dev->rdev); - WARN_ON(!xa_empty(&dev->cqs)); - WARN_ON(!xa_empty(&dev->qps)); - WARN_ON(!xa_empty(&dev->mrs)); - ib_dealloc_device(&dev->ibdev); - break; - } - } - mutex_unlock(&dev_mutex); -} - -static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) -{ - struct cxio_rdev *rdev = tdev->ulp; - struct iwch_dev *rnicp; - struct ib_event event; - u32 portnum = port_id + 1; - int dispatch = 0; - - if (!rdev) - return; - rnicp = rdev_to_iwch_dev(rdev); - switch (evt) { - case OFFLOAD_STATUS_DOWN: { - rdev->flags = CXIO_ERROR_FATAL; - synchronize_net(); - event.event = IB_EVENT_DEVICE_FATAL; - dispatch = 1; - break; - } - case OFFLOAD_PORT_DOWN: { - event.event = IB_EVENT_PORT_ERR; - dispatch = 1; - break; - } - case OFFLOAD_PORT_UP: { - event.event = IB_EVENT_PORT_ACTIVE; - dispatch = 1; - break; - } - case OFFLOAD_DB_FULL: { - disable_dbs(rnicp); - break; - } - case OFFLOAD_DB_EMPTY: { - enable_dbs(rnicp, 1); - break; - } - case OFFLOAD_DB_DROP: { - unsigned long delay = 1000; - unsigned short r; - - disable_dbs(rnicp); - get_random_bytes(&r, 2); - delay += r & 1023; - - /* - * delay is between 1000-2023 usecs. - */ - schedule_delayed_work(&rnicp->db_drop_task, - usecs_to_jiffies(delay)); - break; - } - } - - if (dispatch) { - event.device = &rnicp->ibdev; - event.element.port_num = portnum; - ib_dispatch_event(&event); - } - - return; -} - -static int __init iwch_init_module(void) -{ - int err; - - err = cxio_hal_init(); - if (err) - return err; - err = iwch_cm_init(); - if (err) - return err; - cxio_register_ev_cb(iwch_ev_dispatch); - cxgb3_register_client(&t3c_client); - return 0; -} - -static void __exit iwch_exit_module(void) -{ - cxgb3_unregister_client(&t3c_client); - cxio_unregister_ev_cb(iwch_ev_dispatch); - iwch_cm_term(); - cxio_hal_exit(); -} - -module_init(iwch_init_module); -module_exit(iwch_exit_module); diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h deleted file mode 100644 index 310a937bffcf..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __IWCH_H__ -#define __IWCH_H__ - -#include -#include -#include -#include -#include - -#include - -#include "cxio_hal.h" -#include "cxgb3_offload.h" - -struct iwch_pd; -struct iwch_cq; -struct iwch_qp; -struct iwch_mr; - -struct iwch_rnic_attributes { - u32 max_qps; - u32 max_wrs; /* Max for any SQ/RQ */ - u32 max_sge_per_wr; - u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */ - u32 max_cqs; - u32 max_cqes_per_cq; - u32 max_mem_regs; - u32 max_phys_buf_entries; /* for phys buf list */ - u32 max_pds; - - /* - * The memory page sizes supported by this RNIC. - * Bit position i in bitmap indicates page of - * size (4k)^i. Phys block list mode unsupported. - */ - u32 mem_pgsizes_bitmask; - u64 max_mr_size; - u8 can_resize_wq; - - /* - * The maximum number of RDMA Reads that can be outstanding - * per QP with this RNIC as the target. - */ - u32 max_rdma_reads_per_qp; - - /* - * The maximum number of resources used for RDMA Reads - * by this RNIC with this RNIC as the target. - */ - u32 max_rdma_read_resources; - - /* - * The max depth per QP for initiation of RDMA Read - * by this RNIC. - */ - u32 max_rdma_read_qp_depth; - - /* - * The maximum depth for initiation of RDMA Read - * operations by this RNIC on all QPs - */ - u32 max_rdma_read_depth; - u8 rq_overflow_handled; - u32 can_modify_ird; - u32 can_modify_ord; - u32 max_mem_windows; - u32 stag0_value; - u8 zbva_support; - u8 local_invalidate_fence; - u32 cq_overflow_detection; -}; - -struct iwch_dev { - struct ib_device ibdev; - struct cxio_rdev rdev; - u32 device_cap_flags; - struct iwch_rnic_attributes attr; - struct xarray cqs; - struct xarray qps; - struct xarray mrs; - struct list_head entry; - struct delayed_work db_drop_task; -}; - -static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) -{ - return container_of(ibdev, struct iwch_dev, ibdev); -} - -static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev) -{ - return container_of(rdev, struct iwch_dev, rdev); -} - -static inline int t3b_device(const struct iwch_dev *rhp) -{ - return rhp->rdev.t3cdev_p->type == T3B; -} - -static inline int t3a_device(const struct iwch_dev *rhp) -{ - return rhp->rdev.t3cdev_p->type == T3A; -} - -static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid) -{ - return xa_load(&rhp->cqs, cqid); -} - -static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid) -{ - return xa_load(&rhp->qps, qpid); -} - -static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid) -{ - return xa_load(&rhp->mrs, mmid); -} - -extern struct cxgb3_client t3c_client; -extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS]; -extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb); - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c deleted file mode 100644 index 0bca72cb4d9a..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ /dev/null @@ -1,2258 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "tcb.h" -#include "cxgb3_offload.h" -#include "iwch.h" -#include "iwch_provider.h" -#include "iwch_cm.h" - -static char *states[] = { - "idle", - "listen", - "connecting", - "mpa_wait_req", - "mpa_req_sent", - "mpa_req_rcvd", - "mpa_rep_sent", - "fpdu_mode", - "aborting", - "closing", - "moribund", - "dead", - NULL, -}; - -int peer2peer = 0; -module_param(peer2peer, int, 0644); -MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); - -static int ep_timeout_secs = 60; -module_param(ep_timeout_secs, int, 0644); -MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " - "in seconds (default=60)"); - -static int mpa_rev = 1; -module_param(mpa_rev, int, 0644); -MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " - "1 is spec compliant. (default=1)"); - -static int markers_enabled = 0; -module_param(markers_enabled, int, 0644); -MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); - -static int crc_enabled = 1; -module_param(crc_enabled, int, 0644); -MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); - -static int rcv_win = 256 * 1024; -module_param(rcv_win, int, 0644); -MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)"); - -static int snd_win = 32 * 1024; -module_param(snd_win, int, 0644); -MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)"); - -static unsigned int nocong = 0; -module_param(nocong, uint, 0644); -MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)"); - -static unsigned int cong_flavor = 1; -module_param(cong_flavor, uint, 0644); -MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)"); - -static struct workqueue_struct *workq; - -static struct sk_buff_head rxq; - -static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); -static void ep_timeout(struct timer_list *t); -static void connect_reply_upcall(struct iwch_ep *ep, int status); - -static void start_ep_timer(struct iwch_ep *ep) -{ - pr_debug("%s ep %p\n", __func__, ep); - if (timer_pending(&ep->timer)) { - pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep); - del_timer_sync(&ep->timer); - } else - get_ep(&ep->com); - ep->timer.expires = jiffies + ep_timeout_secs * HZ; - add_timer(&ep->timer); -} - -static void stop_ep_timer(struct iwch_ep *ep) -{ - pr_debug("%s ep %p\n", __func__, ep); - if (!timer_pending(&ep->timer)) { - WARN(1, "%s timer stopped when its not running! ep %p state %u\n", - __func__, ep, ep->com.state); - return; - } - del_timer_sync(&ep->timer); - put_ep(&ep->com); -} - -static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e) -{ - int error = 0; - struct cxio_rdev *rdev; - - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - kfree_skb(skb); - return -EIO; - } - error = l2t_send(tdev, skb, l2e); - if (error < 0) - kfree_skb(skb); - return error < 0 ? error : 0; -} - -int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) -{ - int error = 0; - struct cxio_rdev *rdev; - - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - kfree_skb(skb); - return -EIO; - } - error = cxgb3_ofld_send(tdev, skb); - if (error < 0) - kfree_skb(skb); - return error < 0 ? error : 0; -} - -static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) -{ - struct cpl_tid_release *req; - - skb = get_skb(skb, sizeof(*req), GFP_KERNEL); - if (!skb) - return; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); - skb->priority = CPL_PRIORITY_SETUP; - iwch_cxgb3_ofld_send(tdev, skb); - return; -} - -int iwch_quiesce_tid(struct iwch_ep *ep) -{ - struct cpl_set_tcb_field *req; - struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - - if (!skb) - return -ENOMEM; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_RX_QUIESCE); - req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); - req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); - - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -int iwch_resume_tid(struct iwch_ep *ep) -{ - struct cpl_set_tcb_field *req; - struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - - if (!skb) - return -ENOMEM; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_RX_QUIESCE); - req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); - req->val = 0; - - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static void set_emss(struct iwch_ep *ep, u16 opt) -{ - pr_debug("%s ep %p opt %u\n", __func__, ep, opt); - ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40; - if (G_TCPOPT_TSTAMP(opt)) - ep->emss -= 12; - if (ep->emss < 128) - ep->emss = 128; - pr_debug("emss=%d\n", ep->emss); -} - -static enum iwch_ep_state state_read(struct iwch_ep_common *epc) -{ - unsigned long flags; - enum iwch_ep_state state; - - spin_lock_irqsave(&epc->lock, flags); - state = epc->state; - spin_unlock_irqrestore(&epc->lock, flags); - return state; -} - -static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) -{ - epc->state = new; -} - -static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) -{ - unsigned long flags; - - spin_lock_irqsave(&epc->lock, flags); - pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]); - __state_set(epc, new); - spin_unlock_irqrestore(&epc->lock, flags); - return; -} - -static void *alloc_ep(int size, gfp_t gfp) -{ - struct iwch_ep_common *epc; - - epc = kzalloc(size, gfp); - if (epc) { - kref_init(&epc->kref); - spin_lock_init(&epc->lock); - init_waitqueue_head(&epc->waitq); - } - pr_debug("%s alloc ep %p\n", __func__, epc); - return epc; -} - -void __free_ep(struct kref *kref) -{ - struct iwch_ep *ep; - ep = container_of(container_of(kref, struct iwch_ep_common, kref), - struct iwch_ep, com); - pr_debug("%s ep %p state %s\n", - __func__, ep, states[state_read(&ep->com)]); - if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { - cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); - dst_release(ep->dst); - l2t_release(ep->com.tdev, ep->l2t); - } - kfree(ep); -} - -static void release_ep_resources(struct iwch_ep *ep) -{ - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - set_bit(RELEASE_RESOURCES, &ep->com.flags); - put_ep(&ep->com); -} - -static int status2errno(int status) -{ - switch (status) { - case CPL_ERR_NONE: - return 0; - case CPL_ERR_CONN_RESET: - return -ECONNRESET; - case CPL_ERR_ARP_MISS: - return -EHOSTUNREACH; - case CPL_ERR_CONN_TIMEDOUT: - return -ETIMEDOUT; - case CPL_ERR_TCAM_FULL: - return -ENOMEM; - case CPL_ERR_CONN_EXIST: - return -EADDRINUSE; - default: - return -EIO; - } -} - -/* - * Try and reuse skbs already allocated... - */ -static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) -{ - if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { - skb_trim(skb, 0); - skb_get(skb); - } else { - skb = alloc_skb(len, gfp); - } - return skb; -} - -static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip, - __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - return rt; -} - -static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu) -{ - int i = 0; - - while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) - ++i; - return i; -} - -static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb) -{ - pr_debug("%s t3cdev %p\n", __func__, dev); - kfree_skb(skb); -} - -/* - * Handle an ARP failure for an active open. - */ -static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb) -{ - pr_err("ARP failure during connect\n"); - kfree_skb(skb); -} - -/* - * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant - * and send it along. - */ -static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb) -{ - struct cpl_abort_req *req = cplhdr(skb); - - pr_debug("%s t3cdev %p\n", __func__, dev); - req->cmd = CPL_ABORT_NO_RST; - iwch_cxgb3_ofld_send(dev, skb); -} - -static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) -{ - struct cpl_close_con_req *req; - struct sk_buff *skb; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), gfp); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, arp_failure_discard); - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid)); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) -{ - struct cpl_abort_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(skb, sizeof(*req), gfp); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, abort_arp_failure); - req = skb_put_zero(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); - req->cmd = CPL_ABORT_SEND_RST; - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_connect(struct iwch_ep *ep) -{ - struct cpl_act_open_req *req; - struct sk_buff *skb; - u32 opt0h, opt0l, opt2; - unsigned int mtu_idx; - int wscale; - - pr_debug("%s ep %p\n", __func__, ep); - - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); - wscale = compute_wscale(rcv_win); - opt0h = V_NAGLE(0) | - V_NO_CONG(nocong) | - V_KEEP_ALIVE(1) | - F_TCAM_BYPASS | - V_WND_SCALE(wscale) | - V_MSS_IDX(mtu_idx) | - V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); - opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | - V_CONG_CONTROL_FLAVOR(cong_flavor); - skb->priority = CPL_PRIORITY_SETUP; - set_arp_failure_handler(skb, act_open_req_arp_failure); - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid)); - req->local_port = ep->com.local_addr.sin_port; - req->peer_port = ep->com.remote_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; - req->opt0h = htonl(opt0h); - req->opt0l = htonl(opt0l); - req->params = 0; - req->opt2 = htonl(opt2); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - int len; - - pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen); - - BUG_ON(skb_cloned(skb)); - - mpalen = sizeof(*mpa) + ep->plen; - if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) { - kfree_skb(skb); - skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - connect_reply_upcall(ep, -ENOMEM); - return; - } - } - skb_trim(skb, 0); - skb_reserve(skb, sizeof(*req)); - skb_put(skb, mpalen); - skb->priority = CPL_PRIORITY_DATA; - mpa = (struct mpa_message *) skb->data; - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); - mpa->flags = (crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); - mpa->private_data_size = htons(ep->plen); - mpa->revision = mpa_rev; - - if (ep->plen) - memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); - - /* - * Reference the mpa skb. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - len = skb->len; - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(len); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - BUG_ON(ep->mpa_skb); - ep->mpa_skb = skb; - iwch_l2t_send(ep->com.tdev, skb, ep->l2t); - start_ep_timer(ep); - state_set(&ep->com, MPA_REQ_SENT); - return; -} - -static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - struct sk_buff *skb; - - pr_debug("%s ep %p plen %d\n", __func__, ep, plen); - - mpalen = sizeof(*mpa) + plen; - - skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - cannot alloc skb!\n", __func__); - return -ENOMEM; - } - skb_reserve(skb, sizeof(*req)); - mpa = skb_put(skb, mpalen); - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = MPA_REJECT; - mpa->revision = mpa_rev; - mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); - - /* - * Reference the mpa skb again. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(mpalen); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - BUG_ON(ep->mpa_skb); - ep->mpa_skb = skb; - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - int len; - struct sk_buff *skb; - - pr_debug("%s ep %p plen %d\n", __func__, ep, plen); - - mpalen = sizeof(*mpa) + plen; - - skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - cannot alloc skb!\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - skb_reserve(skb, sizeof(*req)); - mpa = skb_put(skb, mpalen); - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); - mpa->revision = mpa_rev; - mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); - - /* - * Reference the mpa skb. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - len = skb->len; - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(len); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - ep->mpa_skb = skb; - state_set(&ep->com, MPA_REP_SENT); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_act_establish *req = cplhdr(skb); - unsigned int tid = GET_TID(req); - - pr_debug("%s ep %p tid %d\n", __func__, ep, tid); - - dst_confirm(ep->dst); - - /* setup the hwtid for this connection */ - ep->hwtid = tid; - cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid); - - ep->snd_seq = ntohl(req->snd_isn); - ep->rcv_seq = ntohl(req->rcv_isn); - - set_emss(ep, ntohs(req->tcp_opt)); - - /* dealloc the atid */ - cxgb3_free_atid(ep->com.tdev, ep->atid); - - /* start MPA negotiation */ - send_mpa_req(ep, skb); - - return 0; -} - -static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) -{ - pr_debug("%s ep %p\n", __FILE__, ep); - state_set(&ep->com, ABORTING); - send_abort(ep, skb, gfp); -} - -static void close_complete_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CLOSE; - if (ep->com.cm_id) { - pr_debug("close complete delivered ep %p cm_id %p tid %d\n", - ep, ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void peer_close_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_DISCONNECT; - if (ep->com.cm_id) { - pr_debug("peer close delivered ep %p cm_id %p tid %d\n", - ep, ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } -} - -static void peer_abort_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CLOSE; - event.status = -ECONNRESET; - if (ep->com.cm_id) { - pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep, - ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void connect_reply_upcall(struct iwch_ep *ep, int status) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p status %d\n", __func__, ep, status); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CONNECT_REPLY; - event.status = status; - memcpy(&event.local_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&event.remote_addr, &ep->com.remote_addr, - sizeof(ep->com.remote_addr)); - - if ((status == 0) || (status == -ECONNREFUSED)) { - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); - } - if (ep->com.cm_id) { - pr_debug("%s ep %p tid %d status %d\n", __func__, ep, - ep->hwtid, status); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } - if (status < 0) { - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void connect_request_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CONNECT_REQUEST; - memcpy(&event.local_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&event.remote_addr, &ep->com.remote_addr, - sizeof(ep->com.local_addr)); - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); - event.provider_data = ep; - /* - * Until ird/ord negotiation via MPAv2 support is added, send max - * supported values - */ - event.ird = event.ord = 8; - if (state_read(&ep->parent_ep->com) != DEAD) { - get_ep(&ep->com); - ep->parent_ep->com.cm_id->event_handler( - ep->parent_ep->com.cm_id, - &event); - } - put_ep(&ep->parent_ep->com); - ep->parent_ep = NULL; -} - -static void established_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_ESTABLISHED; - /* - * Until ird/ord negotiation via MPAv2 support is added, send max - * supported values - */ - event.ird = event.ord = 8; - if (ep->com.cm_id) { - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } -} - -static int update_rx_credits(struct iwch_ep *ep, u32 credits) -{ - struct cpl_rx_data_ack *req; - struct sk_buff *skb; - - pr_debug("%s ep %p credits %u\n", __func__, ep, credits); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("update_rx_credits - cannot alloc skb!\n"); - return 0; - } - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid)); - req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1)); - skb->priority = CPL_PRIORITY_ACK; - iwch_cxgb3_ofld_send(ep->com.tdev, skb); - return credits; -} - -static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) -{ - struct mpa_message *mpa; - u16 plen; - struct iwch_qp_attributes attrs; - enum iwch_qp_attr_mask mask; - int err; - - pr_debug("%s ep %p\n", __func__, ep); - - /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. - */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_SENT) - return; - - /* - * If we get more than the supported amount of private data - * then we must fail this connection. - */ - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - err = -EINVAL; - goto err; - } - - /* - * copy the new data into our accumulation buffer. - */ - skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), - skb->len); - ep->mpa_pkt_len += skb->len; - - /* - * if we don't even have the mpa message, then bail. - */ - if (ep->mpa_pkt_len < sizeof(*mpa)) - return; - mpa = (struct mpa_message *) ep->mpa_pkt; - - /* Validate MPA header. */ - if (mpa->revision != mpa_rev) { - err = -EPROTO; - goto err; - } - if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { - err = -EPROTO; - goto err; - } - - plen = ntohs(mpa->private_data_size); - - /* - * Fail if there's too much private data. - */ - if (plen > MPA_MAX_PRIVATE_DATA) { - err = -EPROTO; - goto err; - } - - /* - * If plen does not account for pkt size - */ - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - err = -EPROTO; - goto err; - } - - ep->plen = (u8) plen; - - /* - * If we don't have all the pdata yet, then bail. - * We'll continue process when more data arrives. - */ - if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; - - if (mpa->flags & MPA_REJECT) { - err = -ECONNREFUSED; - goto err; - } - - /* - * If we get here we have accumulated the entire mpa - * start reply message including private data. And - * the MPA header is valid. - */ - state_set(&ep->com, FPDU_MODE); - ep->mpa_attr.initiator = 1; - ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; - ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n", - __func__, - ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, - ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); - - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ird; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; - - /* bind QP and TID with INIT_WR */ - err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - if (err) - goto err; - - if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { - iwch_post_zb_read(ep); - } - - goto out; -err: - abort_connection(ep, skb, GFP_KERNEL); -out: - connect_reply_upcall(ep, err); - return; -} - -static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) -{ - struct mpa_message *mpa; - u16 plen; - - pr_debug("%s ep %p\n", __func__, ep); - - /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. - */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_WAIT) - return; - - /* - * If we get more than the supported amount of private data - * then we must fail this connection. - */ - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - - /* - * Copy the new data into our accumulation buffer. - */ - skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), - skb->len); - ep->mpa_pkt_len += skb->len; - - /* - * If we don't even have the mpa message, then bail. - * We'll continue process when more data arrives. - */ - if (ep->mpa_pkt_len < sizeof(*mpa)) - return; - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - mpa = (struct mpa_message *) ep->mpa_pkt; - - /* - * Validate MPA Header. - */ - if (mpa->revision != mpa_rev) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - plen = ntohs(mpa->private_data_size); - - /* - * Fail if there's too much private data. - */ - if (plen > MPA_MAX_PRIVATE_DATA) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - /* - * If plen does not account for pkt size - */ - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - ep->plen = (u8) plen; - - /* - * If we don't have all the pdata yet, then bail. - */ - if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; - - /* - * If we get here we have accumulated the entire mpa - * start reply message including private data. - */ - ep->mpa_attr.initiator = 0; - ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; - ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n", - __func__, - ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, - ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); - - state_set(&ep->com, MPA_REQ_RCVD); - - /* drive upcall */ - connect_request_upcall(ep); - return; -} - -static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_rx_data *hdr = cplhdr(skb); - unsigned int dlen = ntohs(hdr->len); - - pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen); - - skb_pull(skb, sizeof(*hdr)); - skb_trim(skb, dlen); - - ep->rcv_seq += dlen; - BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); - - switch (state_read(&ep->com)) { - case MPA_REQ_SENT: - process_mpa_reply(ep, skb); - break; - case MPA_REQ_WAIT: - process_mpa_request(ep, skb); - break; - case MPA_REP_SENT: - break; - default: - pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n", - __func__, ep, state_read(&ep->com), ep->hwtid); - - /* - * The ep will timeout and inform the ULP of the failure. - * See ep_timeout(). - */ - break; - } - - /* update RX credits */ - update_rx_credits(ep, dlen); - - return CPL_RET_BUF_DONE; -} - -/* - * Upcall from the adapter indicating data has been transmitted. - * For us its just the single MPA request or reply. We can now free - * the skb holding the mpa message. - */ -static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_wr_ack *hdr = cplhdr(skb); - unsigned int credits = ntohs(hdr->credits); - unsigned long flags; - int post_zb = 0; - - pr_debug("%s ep %p credits %u\n", __func__, ep, credits); - - if (credits == 0) { - pr_debug("%s 0 credit ack ep %p state %u\n", - __func__, ep, state_read(&ep->com)); - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - BUG_ON(credits != 1); - dst_confirm(ep->dst); - if (!ep->mpa_skb) { - pr_debug("%s rdma_init wr_ack ep %p state %u\n", - __func__, ep, ep->com.state); - if (ep->mpa_attr.initiator) { - pr_debug("%s initiator ep %p state %u\n", - __func__, ep, ep->com.state); - if (peer2peer && ep->com.state == FPDU_MODE) - post_zb = 1; - } else { - pr_debug("%s responder ep %p state %u\n", - __func__, ep, ep->com.state); - if (ep->com.state == MPA_REQ_RCVD) { - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - } - } - } else { - pr_debug("%s lsm ack ep %p state %u freeing skb\n", - __func__, ep, ep->com.state); - kfree_skb(ep->mpa_skb); - ep->mpa_skb = NULL; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (post_zb) - iwch_post_zb_read(ep); - return CPL_RET_BUF_DONE; -} - -static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - unsigned long flags; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(!ep); - - /* - * We get 2 abort replies from the HW. The first one must - * be ignored except for scribbling that we need one more. - */ - if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) { - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case ABORTING: - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - break; - default: - pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state); - break; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * Return whether a failed active open has allocated a TID - */ -static inline int act_open_has_tid(int status) -{ - return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && - status != CPL_ERR_ARP_MISS; -} - -static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_act_open_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status, - status2errno(rpl->status)); - connect_reply_upcall(ep, status2errno(rpl->status)); - state_set(&ep->com, DEAD); - if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status)) - release_tid(ep->com.tdev, GET_TID(rpl), NULL); - cxgb3_free_atid(ep->com.tdev, ep->atid); - dst_release(ep->dst); - l2t_release(ep->com.tdev, ep->l2t); - put_ep(&ep->com); - return CPL_RET_BUF_DONE; -} - -static int listen_start(struct iwch_listen_ep *ep) -{ - struct sk_buff *skb; - struct cpl_pass_open_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("t3c_listen_start failed to alloc skb!\n"); - return -ENOMEM; - } - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid)); - req->local_port = ep->com.local_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_port = 0; - req->peer_ip = 0; - req->peer_netmask = 0; - req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS); - req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10)); - req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK)); - - skb->priority = 1; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_listen_ep *ep = ctx; - struct cpl_pass_open_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p status %d error %d\n", __func__, ep, - rpl->status, status2errno(rpl->status)); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - - return CPL_RET_BUF_DONE; -} - -static int listen_stop(struct iwch_listen_ep *ep) -{ - struct sk_buff *skb; - struct cpl_close_listserv_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->cpu_idx = 0; - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid)); - skb->priority = 1; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb, - void *ctx) -{ - struct iwch_listen_ep *ep = ctx; - struct cpl_close_listserv_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p\n", __func__, ep); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - return CPL_RET_BUF_DONE; -} - -static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) -{ - struct cpl_pass_accept_rpl *rpl; - unsigned int mtu_idx; - u32 opt0h, opt0l, opt2; - int wscale; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(*rpl)); - skb_get(skb); - mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); - wscale = compute_wscale(rcv_win); - opt0h = V_NAGLE(0) | - V_NO_CONG(nocong) | - V_KEEP_ALIVE(1) | - F_TCAM_BYPASS | - V_WND_SCALE(wscale) | - V_MSS_IDX(mtu_idx) | - V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); - opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | - V_CONG_CONTROL_FLAVOR(cong_flavor); - - rpl = cplhdr(skb); - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); - rpl->peer_ip = peer_ip; - rpl->opt0h = htonl(opt0h); - rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT); - rpl->opt2 = htonl(opt2); - rpl->rsvd = rpl->opt2; /* workaround for HW bug */ - skb->priority = CPL_PRIORITY_SETUP; - iwch_l2t_send(ep->com.tdev, skb, ep->l2t); - - return; -} - -static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, - struct sk_buff *skb) -{ - pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid, - peer_ip); - BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(struct cpl_tid_release)); - skb_get(skb); - - if (tdev->type != T3A) - release_tid(tdev, hwtid, skb); - else { - struct cpl_pass_accept_rpl *rpl; - - rpl = cplhdr(skb); - skb->priority = CPL_PRIORITY_SETUP; - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - hwtid)); - rpl->peer_ip = peer_ip; - rpl->opt0h = htonl(F_TCAM_BYPASS); - rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); - rpl->opt2 = 0; - rpl->rsvd = rpl->opt2; - iwch_cxgb3_ofld_send(tdev, skb); - } -} - -static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *child_ep, *parent_ep = ctx; - struct cpl_pass_accept_req *req = cplhdr(skb); - unsigned int hwtid = GET_TID(req); - struct dst_entry *dst; - struct l2t_entry *l2t; - struct rtable *rt; - struct iff_mac tim; - - pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); - - if (state_read(&parent_ep->com) != LISTEN) { - pr_err("%s - listening ep not in LISTEN\n", __func__); - goto reject; - } - - /* - * Find the netdev for this connection request. - */ - tim.mac_addr = req->dst_mac; - tim.vlan_tag = ntohs(req->vlan_tag); - if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { - pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac); - goto reject; - } - - /* Find output route */ - rt = find_route(tdev, - req->local_ip, - req->peer_ip, - req->local_port, - req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid))); - if (!rt) { - pr_err("%s - failed to find dst entry!\n", __func__); - goto reject; - } - dst = &rt->dst; - l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip); - if (!l2t) { - pr_err("%s - failed to allocate l2t entry!\n", __func__); - dst_release(dst); - goto reject; - } - child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); - if (!child_ep) { - pr_err("%s - failed to allocate ep entry!\n", __func__); - l2t_release(tdev, l2t); - dst_release(dst); - goto reject; - } - state_set(&child_ep->com, CONNECTING); - child_ep->com.tdev = tdev; - child_ep->com.cm_id = NULL; - child_ep->com.local_addr.sin_family = AF_INET; - child_ep->com.local_addr.sin_port = req->local_port; - child_ep->com.local_addr.sin_addr.s_addr = req->local_ip; - child_ep->com.remote_addr.sin_family = AF_INET; - child_ep->com.remote_addr.sin_port = req->peer_port; - child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip; - get_ep(&parent_ep->com); - child_ep->parent_ep = parent_ep; - child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid)); - child_ep->l2t = l2t; - child_ep->dst = dst; - child_ep->hwtid = hwtid; - timer_setup(&child_ep->timer, ep_timeout, 0); - cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid); - accept_cr(child_ep, req->peer_ip, skb); - goto out; -reject: - reject_cr(tdev, hwtid, req->peer_ip, skb); -out: - return CPL_RET_BUF_DONE; -} - -static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_pass_establish *req = cplhdr(skb); - - pr_debug("%s ep %p\n", __func__, ep); - ep->snd_seq = ntohl(req->snd_isn); - ep->rcv_seq = ntohl(req->rcv_isn); - - set_emss(ep, ntohs(req->tcp_opt)); - - dst_confirm(ep->dst); - state_set(&ep->com, MPA_REQ_WAIT); - start_ep_timer(ep); - - return CPL_RET_BUF_DONE; -} - -static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct iwch_qp_attributes attrs; - unsigned long flags; - int disconnect = 1; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - dst_confirm(ep->dst); - - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case MPA_REQ_WAIT: - __state_set(&ep->com, CLOSING); - break; - case MPA_REQ_SENT: - __state_set(&ep->com, CLOSING); - connect_reply_upcall(ep, -ECONNRESET); - break; - case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see iwch_accept_cr()). - */ - __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MPA_REP_SENT: - __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case FPDU_MODE: - start_ep_timer(ep); - __state_set(&ep->com, CLOSING); - attrs.next_state = IWCH_QP_STATE_CLOSING; - iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); - peer_close_upcall(ep); - break; - case ABORTING: - disconnect = 0; - break; - case CLOSING: - __state_set(&ep->com, MORIBUND); - disconnect = 0; - break; - case MORIBUND: - stop_ep_timer(ep); - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_IDLE; - iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); - } - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - disconnect = 0; - break; - case DEAD: - disconnect = 0; - break; - default: - BUG_ON(1); - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (disconnect) - iwch_ep_disconnect(ep, 0, GFP_KERNEL); - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * Returns whether an ABORT_REQ_RSS message is a negative advice. - */ -static int is_neg_adv_abort(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE; -} - -static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_abort_req_rss *req = cplhdr(skb); - struct iwch_ep *ep = ctx; - struct cpl_abort_rpl *rpl; - struct sk_buff *rpl_skb; - struct iwch_qp_attributes attrs; - int ret; - int release = 0; - unsigned long flags; - - if (is_neg_adv_abort(req->status)) { - pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep, - ep->hwtid); - t3_l2t_send_event(ep->com.tdev, ep->l2t); - return CPL_RET_BUF_DONE; - } - - /* - * We get 2 peer aborts from the HW. The first one must - * be ignored except for scribbling that we need one more. - */ - if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) { - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state); - switch (ep->com.state) { - case CONNECTING: - break; - case MPA_REQ_WAIT: - stop_ep_timer(ep); - break; - case MPA_REQ_SENT: - stop_ep_timer(ep); - connect_reply_upcall(ep, -ECONNRESET); - break; - case MPA_REP_SENT: - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see iwch_accept_cr()). - */ - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MORIBUND: - case CLOSING: - stop_ep_timer(ep); - /*FALLTHROUGH*/ - case FPDU_MODE: - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_ERROR; - ret = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (ret) - pr_err("%s - qp <- error failed!\n", __func__); - } - peer_abort_upcall(ep); - break; - case ABORTING: - break; - case DEAD: - pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); - spin_unlock_irqrestore(&ep->com.lock, flags); - return CPL_RET_BUF_DONE; - default: - BUG_ON(1); - break; - } - dst_confirm(ep->dst); - if (ep->com.state != ABORTING) { - __state_set(&ep->com, DEAD); - release = 1; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - - rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); - if (!rpl_skb) { - pr_err("%s - cannot allocate skb!\n", __func__); - release = 1; - goto out; - } - rpl_skb->priority = CPL_PRIORITY_DATA; - rpl = skb_put(rpl_skb, sizeof(*rpl)); - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); - rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); - rpl->cmd = CPL_ABORT_NO_RST; - iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb); -out: - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct iwch_qp_attributes attrs; - unsigned long flags; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(!ep); - - /* The cm_id may be null if we failed to connect */ - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case CLOSING: - __state_set(&ep->com, MORIBUND); - break; - case MORIBUND: - stop_ep_timer(ep); - if ((ep->com.cm_id) && (ep->com.qp)) { - attrs.next_state = IWCH_QP_STATE_IDLE; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - } - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - break; - case ABORTING: - case DEAD: - break; - default: - BUG_ON(1); - break; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * T3A does 3 things when a TERM is received: - * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet - * 2) generate an async event on the QP with the TERMINATE opcode - * 3) post a TERMINATE opcode cqe into the associated CQ. - * - * For (1), we save the message in the qp for later consumer consumption. - * For (2), we move the QP into TERMINATE, post a QP event and disconnect. - * For (3), we toss the CQE in cxio_poll_cq(). - * - * terminate() handles case (1)... - */ -static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - - if (state_read(&ep->com) != FPDU_MODE) - return CPL_RET_BUF_DONE; - - pr_debug("%s ep %p\n", __func__, ep); - skb_pull(skb, sizeof(struct cpl_rdma_terminate)); - pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len); - skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer, - skb->len); - ep->com.qp->attr.terminate_msg_len = skb->len; - ep->com.qp->attr.is_terminate_local = 0; - return CPL_RET_BUF_DONE; -} - -static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_rdma_ec_status *rep = cplhdr(skb); - struct iwch_ep *ep = ctx; - - pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, - rep->status); - if (rep->status) { - struct iwch_qp_attributes attrs; - - pr_err("%s BAD CLOSE - Aborting tid %u\n", - __func__, ep->hwtid); - stop_ep_timer(ep); - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - abort_connection(ep, NULL, GFP_KERNEL); - } - return CPL_RET_BUF_DONE; -} - -static void ep_timeout(struct timer_list *t) -{ - struct iwch_ep *ep = from_timer(ep, t, timer); - struct iwch_qp_attributes attrs; - unsigned long flags; - int abort = 1; - - spin_lock_irqsave(&ep->com.lock, flags); - pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, - ep->com.state); - switch (ep->com.state) { - case MPA_REQ_SENT: - __state_set(&ep->com, ABORTING); - connect_reply_upcall(ep, -ETIMEDOUT); - break; - case MPA_REQ_WAIT: - __state_set(&ep->com, ABORTING); - break; - case CLOSING: - case MORIBUND: - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - } - __state_set(&ep->com, ABORTING); - break; - default: - WARN(1, "%s unexpected state ep %p state %u\n", - __func__, ep, ep->com.state); - abort = 0; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (abort) - abort_connection(ep, NULL, GFP_ATOMIC); - put_ep(&ep->com); -} - -int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) -{ - struct iwch_ep *ep = to_ep(cm_id); - - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - - if (state_read(&ep->com) == DEAD) { - put_ep(&ep->com); - return -ECONNRESET; - } - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); - if (mpa_rev == 0) - abort_connection(ep, NULL, GFP_KERNEL); - else { - send_mpa_reject(ep, pdata, pdata_len); - iwch_ep_disconnect(ep, 0, GFP_KERNEL); - } - put_ep(&ep->com); - return 0; -} - -int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - int err; - struct iwch_qp_attributes attrs; - enum iwch_qp_attr_mask mask; - struct iwch_ep *ep = to_ep(cm_id); - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_qp *qp = get_qhp(h, conn_param->qpn); - - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { - err = -ECONNRESET; - goto err; - } - - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); - BUG_ON(!qp); - - if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || - (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { - abort_connection(ep, NULL, GFP_KERNEL); - err = -EINVAL; - goto err; - } - - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->com.qp = qp; - - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - - if (peer2peer && ep->ird == 0) - ep->ird = 1; - - pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); - - /* bind QP to EP and move to RTS */ - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ird; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - /* bind QP and TID with INIT_WR */ - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_MAX_ORD; - - err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - if (err) - goto err1; - - /* if needed, wait for wr_ack */ - if (iwch_rqes_posted(qp)) { - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (err) - goto err1; - } - - err = send_mpa_reply(ep, conn_param->private_data, - conn_param->private_data_len); - if (err) - goto err1; - - - state_set(&ep->com, FPDU_MODE); - established_upcall(ep); - put_ep(&ep->com); - return 0; -err1: - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); -err: - put_ep(&ep->com); - return err; -} - -static int is_loopback_dst(struct iw_cm_id *cm_id) -{ - struct net_device *dev; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr); - if (!dev) - return 0; - dev_put(dev); - return 1; -} - -int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_ep *ep; - struct rtable *rt; - int err = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - if (cm_id->m_remote_addr.ss_family != PF_INET) { - err = -ENOSYS; - goto out; - } - - if (is_loopback_dst(cm_id)) { - err = -ENOSYS; - goto out; - } - - ep = alloc_ep(sizeof(*ep), GFP_KERNEL); - if (!ep) { - pr_err("%s - cannot alloc ep\n", __func__); - err = -ENOMEM; - goto out; - } - timer_setup(&ep->timer, ep_timeout, 0); - ep->plen = conn_param->private_data_len; - if (ep->plen) - memcpy(ep->mpa_pkt + sizeof(struct mpa_message), - conn_param->private_data, ep->plen); - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - - if (peer2peer && ep->ord == 0) - ep->ord = 1; - - ep->com.tdev = h->rdev.t3cdev_p; - - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->com.qp = get_qhp(h, conn_param->qpn); - BUG_ON(!ep->com.qp); - pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, - ep->com.qp, cm_id); - - /* - * Allocate an active TID to initiate a TCP connection. - */ - ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep); - if (ep->atid == -1) { - pr_err("%s - cannot alloc atid\n", __func__); - err = -ENOMEM; - goto fail2; - } - - /* find a route */ - rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, IPTOS_LOWDELAY); - if (!rt) { - pr_err("%s - cannot find route\n", __func__); - err = -EHOSTUNREACH; - goto fail3; - } - ep->dst = &rt->dst; - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL, - &raddr->sin_addr.s_addr); - if (!ep->l2t) { - pr_err("%s - cannot alloc l2e\n", __func__); - err = -ENOMEM; - goto fail4; - } - - state_set(&ep->com, CONNECTING); - ep->tos = IPTOS_LOWDELAY; - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, - sizeof(ep->com.remote_addr)); - - /* send connect request to rnic */ - err = send_connect(ep); - if (!err) - goto out; - - l2t_release(h->rdev.t3cdev_p, ep->l2t); -fail4: - dst_release(ep->dst); -fail3: - cxgb3_free_atid(ep->com.tdev, ep->atid); -fail2: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); -out: - return err; -} - -int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) -{ - int err = 0; - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_listen_ep *ep; - - - might_sleep(); - - if (cm_id->m_local_addr.ss_family != PF_INET) { - err = -ENOSYS; - goto fail1; - } - - ep = alloc_ep(sizeof(*ep), GFP_KERNEL); - if (!ep) { - pr_err("%s - cannot alloc ep\n", __func__); - err = -ENOMEM; - goto fail1; - } - pr_debug("%s ep %p\n", __func__, ep); - ep->com.tdev = h->rdev.t3cdev_p; - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->backlog = backlog; - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - - /* - * Allocate a server TID. - */ - ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep); - if (ep->stid == -1) { - pr_err("%s - cannot alloc atid\n", __func__); - err = -ENOMEM; - goto fail2; - } - - state_set(&ep->com, LISTEN); - err = listen_start(ep); - if (err) - goto fail3; - - /* wait for pass_open_rpl */ - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (!err) { - cm_id->provider_data = ep; - goto out; - } -fail3: - cxgb3_free_stid(ep->com.tdev, ep->stid); -fail2: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); -fail1: -out: - return err; -} - -int iwch_destroy_listen(struct iw_cm_id *cm_id) -{ - int err; - struct iwch_listen_ep *ep = to_listen_ep(cm_id); - - pr_debug("%s ep %p\n", __func__, ep); - - might_sleep(); - state_set(&ep->com, DEAD); - ep->com.rpl_done = 0; - ep->com.rpl_err = 0; - err = listen_stop(ep); - if (err) - goto done; - wait_event(ep->com.waitq, ep->com.rpl_done); - cxgb3_free_stid(ep->com.tdev, ep->stid); -done: - err = ep->com.rpl_err; - cm_id->rem_ref(cm_id); - put_ep(&ep->com); - return err; -} - -int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) -{ - int ret=0; - unsigned long flags; - int close = 0; - int fatal = 0; - struct t3cdev *tdev; - struct cxio_rdev *rdev; - - spin_lock_irqsave(&ep->com.lock, flags); - - pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep, - states[ep->com.state], abrupt); - - tdev = (struct t3cdev *)ep->com.tdev; - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - fatal = 1; - close_complete_upcall(ep); - ep->com.state = DEAD; - } - switch (ep->com.state) { - case MPA_REQ_WAIT: - case MPA_REQ_SENT: - case MPA_REQ_RCVD: - case MPA_REP_SENT: - case FPDU_MODE: - close = 1; - if (abrupt) - ep->com.state = ABORTING; - else { - ep->com.state = CLOSING; - start_ep_timer(ep); - } - set_bit(CLOSE_SENT, &ep->com.flags); - break; - case CLOSING: - if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { - close = 1; - if (abrupt) { - stop_ep_timer(ep); - ep->com.state = ABORTING; - } else - ep->com.state = MORIBUND; - } - break; - case MORIBUND: - case ABORTING: - case DEAD: - pr_debug("%s ignoring disconnect ep %p state %u\n", - __func__, ep, ep->com.state); - break; - default: - BUG(); - break; - } - - spin_unlock_irqrestore(&ep->com.lock, flags); - if (close) { - if (abrupt) - ret = send_abort(ep, NULL, gfp); - else - ret = send_halfclose(ep, gfp); - if (ret) - fatal = 1; - } - if (fatal) - release_ep_resources(ep); - return ret; -} - -int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, - struct l2t_entry *l2t) -{ - struct iwch_ep *ep = ctx; - - if (ep->dst != old) - return 0; - - pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, - l2t); - dst_hold(new); - l2t_release(ep->com.tdev, ep->l2t); - ep->l2t = l2t; - dst_release(old); - ep->dst = new; - return 1; -} - -/* - * All the CM events are handled on a work queue to have a safe context. - * These are the real handlers that are called from the work queue. - */ -static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = { - [CPL_ACT_ESTABLISH] = act_establish, - [CPL_ACT_OPEN_RPL] = act_open_rpl, - [CPL_RX_DATA] = rx_data, - [CPL_TX_DMA_ACK] = tx_ack, - [CPL_ABORT_RPL_RSS] = abort_rpl, - [CPL_ABORT_RPL] = abort_rpl, - [CPL_PASS_OPEN_RPL] = pass_open_rpl, - [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, - [CPL_PASS_ACCEPT_REQ] = pass_accept_req, - [CPL_PASS_ESTABLISH] = pass_establish, - [CPL_PEER_CLOSE] = peer_close, - [CPL_ABORT_REQ_RSS] = peer_abort, - [CPL_CLOSE_CON_RPL] = close_con_rpl, - [CPL_RDMA_TERMINATE] = terminate, - [CPL_RDMA_EC_STATUS] = ec_status, -}; - -static void process_work(struct work_struct *work) -{ - struct sk_buff *skb = NULL; - void *ep; - struct t3cdev *tdev; - int ret; - - while ((skb = skb_dequeue(&rxq))) { - ep = *((void **) (skb->cb)); - tdev = *((struct t3cdev **) (skb->cb + sizeof(void *))); - ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep); - if (ret & CPL_RET_BUF_DONE) - kfree_skb(skb); - - /* - * ep was referenced in sched(), and is freed here. - */ - put_ep((struct iwch_ep_common *)ep); - } -} - -static DECLARE_WORK(skb_work, process_work); - -static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep_common *epc = ctx; - - get_ep(epc); - - /* - * Save ctx and tdev in the skb->cb area. - */ - *((void **) skb->cb) = ctx; - *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev; - - /* - * Queue the skb and schedule the worker thread. - */ - skb_queue_tail(&rxq, skb); - queue_work(workq, &skb_work); - return 0; -} - -static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_set_tcb_rpl *rpl = cplhdr(skb); - - if (rpl->status != CPL_ERR_NONE) { - pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n", - rpl->status, GET_TID(rpl)); - } - return CPL_RET_BUF_DONE; -} - -/* - * All upcalls from the T3 Core go to sched() to schedule the - * processing on a work queue. - */ -cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = { - [CPL_ACT_ESTABLISH] = sched, - [CPL_ACT_OPEN_RPL] = sched, - [CPL_RX_DATA] = sched, - [CPL_TX_DMA_ACK] = sched, - [CPL_ABORT_RPL_RSS] = sched, - [CPL_ABORT_RPL] = sched, - [CPL_PASS_OPEN_RPL] = sched, - [CPL_CLOSE_LISTSRV_RPL] = sched, - [CPL_PASS_ACCEPT_REQ] = sched, - [CPL_PASS_ESTABLISH] = sched, - [CPL_PEER_CLOSE] = sched, - [CPL_CLOSE_CON_RPL] = sched, - [CPL_ABORT_REQ_RSS] = sched, - [CPL_RDMA_TERMINATE] = sched, - [CPL_RDMA_EC_STATUS] = sched, - [CPL_SET_TCB_RPL] = set_tcb_rpl, -}; - -int __init iwch_cm_init(void) -{ - skb_queue_head_init(&rxq); - - workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM); - if (!workq) - return -ENOMEM; - - return 0; -} - -void __exit iwch_cm_term(void) -{ - flush_workqueue(workq); - destroy_workqueue(workq); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h deleted file mode 100644 index cc7fe644d260..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _IWCH_CM_H_ -#define _IWCH_CM_H_ - -#include -#include -#include -#include - -#include -#include - -#include "cxgb3_offload.h" -#include "iwch_provider.h" - -#define MPA_KEY_REQ "MPA ID Req Frame" -#define MPA_KEY_REP "MPA ID Rep Frame" - -#define MPA_MAX_PRIVATE_DATA 256 -#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */ -#define MPA_REJECT 0x20 -#define MPA_CRC 0x40 -#define MPA_MARKERS 0x80 -#define MPA_FLAGS_MASK 0xE0 - -#define put_ep(ep) { \ - pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \ - __func__, __LINE__, ep, kref_read(&((ep)->kref))); \ - WARN_ON(kref_read(&((ep)->kref)) < 1); \ - kref_put(&((ep)->kref), __free_ep); \ -} - -#define get_ep(ep) { \ - pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \ - __func__, __LINE__, ep, kref_read(&((ep)->kref))); \ - kref_get(&((ep)->kref)); \ -} - -struct mpa_message { - u8 key[16]; - u8 flags; - u8 revision; - __be16 private_data_size; - u8 private_data[0]; -}; - -struct terminate_message { - u8 layer_etype; - u8 ecode; - __be16 hdrct_rsvd; - u8 len_hdrs[0]; -}; - -#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) - -enum iwch_layers_types { - LAYER_RDMAP = 0x00, - LAYER_DDP = 0x10, - LAYER_MPA = 0x20, - RDMAP_LOCAL_CATA = 0x00, - RDMAP_REMOTE_PROT = 0x01, - RDMAP_REMOTE_OP = 0x02, - DDP_LOCAL_CATA = 0x00, - DDP_TAGGED_ERR = 0x01, - DDP_UNTAGGED_ERR = 0x02, - DDP_LLP = 0x03 -}; - -enum iwch_rdma_ecodes { - RDMAP_INV_STAG = 0x00, - RDMAP_BASE_BOUNDS = 0x01, - RDMAP_ACC_VIOL = 0x02, - RDMAP_STAG_NOT_ASSOC = 0x03, - RDMAP_TO_WRAP = 0x04, - RDMAP_INV_VERS = 0x05, - RDMAP_INV_OPCODE = 0x06, - RDMAP_STREAM_CATA = 0x07, - RDMAP_GLOBAL_CATA = 0x08, - RDMAP_CANT_INV_STAG = 0x09, - RDMAP_UNSPECIFIED = 0xff -}; - -enum iwch_ddp_ecodes { - DDPT_INV_STAG = 0x00, - DDPT_BASE_BOUNDS = 0x01, - DDPT_STAG_NOT_ASSOC = 0x02, - DDPT_TO_WRAP = 0x03, - DDPT_INV_VERS = 0x04, - DDPU_INV_QN = 0x01, - DDPU_INV_MSN_NOBUF = 0x02, - DDPU_INV_MSN_RANGE = 0x03, - DDPU_INV_MO = 0x04, - DDPU_MSG_TOOBIG = 0x05, - DDPU_INV_VERS = 0x06 -}; - -enum iwch_mpa_ecodes { - MPA_CRC_ERR = 0x02, - MPA_MARKER_ERR = 0x03 -}; - -enum iwch_ep_state { - IDLE = 0, - LISTEN, - CONNECTING, - MPA_REQ_WAIT, - MPA_REQ_SENT, - MPA_REQ_RCVD, - MPA_REP_SENT, - FPDU_MODE, - ABORTING, - CLOSING, - MORIBUND, - DEAD, -}; - -enum iwch_ep_flags { - PEER_ABORT_IN_PROGRESS = 0, - ABORT_REQ_IN_PROGRESS = 1, - RELEASE_RESOURCES = 2, - CLOSE_SENT = 3, -}; - -struct iwch_ep_common { - struct iw_cm_id *cm_id; - struct iwch_qp *qp; - struct t3cdev *tdev; - enum iwch_ep_state state; - struct kref kref; - spinlock_t lock; - struct sockaddr_in local_addr; - struct sockaddr_in remote_addr; - wait_queue_head_t waitq; - int rpl_done; - int rpl_err; - unsigned long flags; -}; - -struct iwch_listen_ep { - struct iwch_ep_common com; - unsigned int stid; - int backlog; -}; - -struct iwch_ep { - struct iwch_ep_common com; - struct iwch_ep *parent_ep; - struct timer_list timer; - unsigned int atid; - u32 hwtid; - u32 snd_seq; - u32 rcv_seq; - struct l2t_entry *l2t; - struct dst_entry *dst; - struct sk_buff *mpa_skb; - struct iwch_mpa_attributes mpa_attr; - unsigned int mpa_pkt_len; - u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; - u8 tos; - u16 emss; - u16 plen; - u32 ird; - u32 ord; -}; - -static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) -{ - return cm_id->provider_data; -} - -static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) -{ - return cm_id->provider_data; -} - -static inline int compute_wscale(int win) -{ - int wscale = 0; - - while (wscale < 14 && (65535<wq : NULL; - struct t3_cqe cqe; - u32 credit = 0; - u8 cqe_flushed; - u64 cookie; - int ret = 1; - - ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, - &credit); - if (t3a_device(chp->rhp) && credit) { - pr_debug("%s updating %d cq credits on id %d\n", __func__, - credit, chp->cq.cqid); - cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit); - } - - if (ret) { - ret = -EAGAIN; - goto out; - } - ret = 1; - - wc->wr_id = cookie; - wc->qp = qhp ? &qhp->ibqp : NULL; - wc->vendor_err = CQE_STATUS(cqe); - wc->wc_flags = 0; - - pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x lo 0x%x cookie 0x%llx\n", - __func__, - CQE_QPID(cqe), CQE_TYPE(cqe), - CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe), - CQE_WRID_LOW(cqe), (unsigned long long)cookie); - - if (CQE_TYPE(cqe) == 0) { - if (!CQE_STATUS(cqe)) - wc->byte_len = CQE_LEN(cqe); - else - wc->byte_len = 0; - wc->opcode = IB_WC_RECV; - if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV || - CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) { - wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe); - wc->wc_flags |= IB_WC_WITH_INVALIDATE; - } - } else { - switch (CQE_OPCODE(cqe)) { - case T3_RDMA_WRITE: - wc->opcode = IB_WC_RDMA_WRITE; - break; - case T3_READ_REQ: - wc->opcode = IB_WC_RDMA_READ; - wc->byte_len = CQE_LEN(cqe); - break; - case T3_SEND: - case T3_SEND_WITH_SE: - case T3_SEND_WITH_INV: - case T3_SEND_WITH_SE_INV: - wc->opcode = IB_WC_SEND; - break; - case T3_LOCAL_INV: - wc->opcode = IB_WC_LOCAL_INV; - break; - case T3_FAST_REGISTER: - wc->opcode = IB_WC_REG_MR; - break; - default: - pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", - CQE_OPCODE(cqe), CQE_QPID(cqe)); - ret = -EINVAL; - goto out; - } - } - - if (cqe_flushed) - wc->status = IB_WC_WR_FLUSH_ERR; - else { - - switch (CQE_STATUS(cqe)) { - case TPT_ERR_SUCCESS: - wc->status = IB_WC_SUCCESS; - break; - case TPT_ERR_STAG: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case TPT_ERR_PDID: - wc->status = IB_WC_LOC_PROT_ERR; - break; - case TPT_ERR_QPID: - case TPT_ERR_ACCESS: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case TPT_ERR_WRAP: - wc->status = IB_WC_GENERAL_ERR; - break; - case TPT_ERR_BOUND: - wc->status = IB_WC_LOC_LEN_ERR; - break; - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - wc->status = IB_WC_MW_BIND_ERR; - break; - case TPT_ERR_CRC: - case TPT_ERR_MARKER: - case TPT_ERR_PDU_LEN_ERR: - case TPT_ERR_OUT_OF_RQE: - case TPT_ERR_DDP_VERSION: - case TPT_ERR_RDMA_VERSION: - case TPT_ERR_DDP_QUEUE_NUM: - case TPT_ERR_MSN: - case TPT_ERR_TBIT: - case TPT_ERR_MO: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_IRD_OVERFLOW: - case TPT_ERR_OPCODE: - wc->status = IB_WC_FATAL_ERR; - break; - case TPT_ERR_SWFLUSH: - wc->status = IB_WC_WR_FLUSH_ERR; - break; - default: - pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n", - CQE_STATUS(cqe), CQE_QPID(cqe)); - ret = -EINVAL; - } - } -out: - return ret; -} - -/* - * Get one cq entry from cxio and map it to openib. - * - * Returns: - * 0 EMPTY; - * 1 cqe returned - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct ib_wc *wc) -{ - struct iwch_qp *qhp; - struct t3_cqe *rd_cqe; - int ret; - - rd_cqe = cxio_next_cqe(&chp->cq); - - if (!rd_cqe) - return 0; - - qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); - if (qhp) { - spin_lock(&qhp->lock); - ret = __iwch_poll_cq_one(rhp, chp, qhp, wc); - spin_unlock(&qhp->lock); - } else { - ret = __iwch_poll_cq_one(rhp, chp, NULL, wc); - } - return ret; -} - -int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) -{ - struct iwch_dev *rhp; - struct iwch_cq *chp; - unsigned long flags; - int npolled; - int err = 0; - - chp = to_iwch_cq(ibcq); - rhp = chp->rhp; - - spin_lock_irqsave(&chp->lock, flags); - for (npolled = 0; npolled < num_entries; ++npolled) { - - /* - * Because T3 can post CQEs that are _not_ associated - * with a WR, we might have to poll again after removing - * one of these. - */ - do { - err = iwch_poll_cq_one(rhp, chp, wc + npolled); - } while (err == -EAGAIN); - if (err <= 0) - break; - } - spin_unlock_irqrestore(&chp->lock, flags); - - if (err < 0) - return err; - else { - return npolled; - } -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c deleted file mode 100644 index 9d356c1301c7..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include -#include -#include "iwch_provider.h" -#include "iwch.h" -#include "iwch_cm.h" -#include "cxio_hal.h" -#include "cxio_wr.h" - -static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, - struct respQ_msg_t *rsp_msg, - enum ib_event_type ib_event, - int send_term) -{ - struct ib_event event; - struct iwch_qp_attributes attrs; - struct iwch_qp *qhp; - unsigned long flag; - - xa_lock(&rnicp->qps); - qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); - - if (!qhp) { - pr_err("%s unaffiliated error 0x%x qpid 0x%x\n", - __func__, CQE_STATUS(rsp_msg->cqe), - CQE_QPID(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - return; - } - - if ((qhp->attr.state == IWCH_QP_STATE_ERROR) || - (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) { - pr_debug("%s AE received after RTS - qp state %d qpid 0x%x status 0x%x\n", - __func__, - qhp->attr.state, qhp->wq.qpid, - CQE_STATUS(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - return; - } - - pr_err("%s - AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", - __func__, - CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), - CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - - atomic_inc(&qhp->refcnt); - xa_unlock(&rnicp->qps); - - if (qhp->attr.state == IWCH_QP_STATE_RTS) { - attrs.next_state = IWCH_QP_STATE_TERMINATE; - iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (send_term) - iwch_post_terminate(qhp, rsp_msg); - } - - event.event = ib_event; - event.device = chp->ibcq.device; - if (ib_event == IB_EVENT_CQ_ERR) - event.element.cq = &chp->ibcq; - else - event.element.qp = &qhp->ibqp; - - if (qhp->ibqp.event_handler) - (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); - - spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); -} - -void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) -{ - struct iwch_dev *rnicp; - struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; - struct iwch_cq *chp; - struct iwch_qp *qhp; - u32 cqid = RSPQ_CQID(rsp_msg); - unsigned long flag; - - rnicp = (struct iwch_dev *) rdev_p->ulp; - xa_lock(&rnicp->qps); - chp = get_chp(rnicp, cqid); - qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); - if (!chp || !qhp) { - pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", - cqid, CQE_QPID(rsp_msg->cqe), - CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), - CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), - CQE_WRID_LOW(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - goto out; - } - iwch_qp_add_ref(&qhp->ibqp); - atomic_inc(&chp->refcnt); - xa_unlock(&rnicp->qps); - - /* - * 1) completion of our sending a TERMINATE. - * 2) incoming TERMINATE message. - */ - if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) && - (CQE_STATUS(rsp_msg->cqe) == 0)) { - if (SQ_TYPE(rsp_msg->cqe)) { - pr_debug("%s QPID 0x%x ep %p disconnecting\n", - __func__, qhp->wq.qpid, qhp->ep); - iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); - } else { - pr_debug("%s post REQ_ERR AE QPID 0x%x\n", __func__, - qhp->wq.qpid); - post_qp_event(rnicp, chp, rsp_msg, - IB_EVENT_QP_REQ_ERR, 0); - iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); - } - goto done; - } - - /* Bad incoming Read request */ - if (SQ_TYPE(rsp_msg->cqe) && - (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) { - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); - goto done; - } - - /* Bad incoming write */ - if (RQ_TYPE(rsp_msg->cqe) && - (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) { - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); - goto done; - } - - switch (CQE_STATUS(rsp_msg->cqe)) { - - /* Completion Events */ - case TPT_ERR_SUCCESS: - - /* - * Confirm the destination entry if this is a RECV completion. - */ - if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) - dst_confirm(qhp->ep->dst); - spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - break; - - case TPT_ERR_STAG: - case TPT_ERR_PDID: - case TPT_ERR_QPID: - case TPT_ERR_ACCESS: - case TPT_ERR_WRAP: - case TPT_ERR_BOUND: - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); - break; - - /* Device Fatal Errors */ - case TPT_ERR_ECC: - case TPT_ERR_ECC_PSTAG: - case TPT_ERR_INTERNAL_ERR: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1); - break; - - /* QP Fatal Errors */ - case TPT_ERR_OUT_OF_RQE: - case TPT_ERR_PBL_ADDR_BOUND: - case TPT_ERR_CRC: - case TPT_ERR_MARKER: - case TPT_ERR_PDU_LEN_ERR: - case TPT_ERR_DDP_VERSION: - case TPT_ERR_RDMA_VERSION: - case TPT_ERR_OPCODE: - case TPT_ERR_DDP_QUEUE_NUM: - case TPT_ERR_MSN: - case TPT_ERR_TBIT: - case TPT_ERR_MO: - case TPT_ERR_MSN_GAP: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_RQE_ADDR_BOUND: - case TPT_ERR_IRD_OVERFLOW: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); - break; - - default: - pr_err("Unknown T3 status 0x%x QPID 0x%x\n", - CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid); - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); - break; - } -done: - if (atomic_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); - iwch_qp_rem_ref(&qhp->ibqp); -out: - dev_kfree_skb_irq(skb); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c deleted file mode 100644 index ce0f2741821d..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include - -#include -#include - -#include "cxio_hal.h" -#include "cxio_resource.h" -#include "iwch.h" -#include "iwch_provider.h" - -static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) -{ - u32 mmid; - - mhp->attr.state = 1; - mhp->attr.stag = stag; - mmid = stag >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); - return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); -} - -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, int shift) -{ - u32 stag; - int ret; - - if (cxio_register_phys_mem(&rhp->rdev, - &stag, mhp->attr.pdid, - mhp->attr.perms, - mhp->attr.zbva, - mhp->attr.va_fbo, - mhp->attr.len, - shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr)) - return -ENOMEM; - - ret = iwch_finish_mem_reg(mhp, stag); - if (ret) - cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - return ret; -} - -int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) -{ - mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, - npages << 3); - - if (!mhp->attr.pbl_addr) - return -ENOMEM; - - mhp->attr.pbl_size = npages; - - return 0; -} - -void iwch_free_pbl(struct iwch_mr *mhp) -{ - cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, - mhp->attr.pbl_size << 3); -} - -int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) -{ - return cxio_write_pbl(&mhp->rhp->rdev, pages, - mhp->attr.pbl_addr + (offset << 3), npages); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c deleted file mode 100644 index dcf02ec02810..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ /dev/null @@ -1,1321 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "cxio_hal.h" -#include "iwch.h" -#include "iwch_provider.h" -#include "iwch_cm.h" -#include -#include "common.h" - -static void iwch_dealloc_ucontext(struct ib_ucontext *context) -{ - struct iwch_dev *rhp = to_iwch_dev(context->device); - struct iwch_ucontext *ucontext = to_iwch_ucontext(context); - struct iwch_mm_entry *mm, *tmp; - - pr_debug("%s context %p\n", __func__, context); - list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) - kfree(mm); - cxio_release_ucontext(&rhp->rdev, &ucontext->uctx); -} - -static int iwch_alloc_ucontext(struct ib_ucontext *ucontext, - struct ib_udata *udata) -{ - struct ib_device *ibdev = ucontext->device; - struct iwch_ucontext *context = to_iwch_ucontext(ucontext); - struct iwch_dev *rhp = to_iwch_dev(ibdev); - - pr_debug("%s ibdev %p\n", __func__, ibdev); - cxio_init_ucontext(&rhp->rdev, &context->uctx); - INIT_LIST_HEAD(&context->mmaps); - spin_lock_init(&context->mmap_lock); - return 0; -} - -static void iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) -{ - struct iwch_cq *chp; - - pr_debug("%s ib_cq %p\n", __func__, ib_cq); - chp = to_iwch_cq(ib_cq); - - xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); - atomic_dec(&chp->refcnt); - wait_event(chp->wait, !atomic_read(&chp->refcnt)); - - cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); -} - -static int iwch_create_cq(struct ib_cq *ibcq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) -{ - struct ib_device *ibdev = ibcq->device; - int entries = attr->cqe; - struct iwch_dev *rhp = to_iwch_dev(ibcq->device); - struct iwch_cq *chp = to_iwch_cq(ibcq); - struct iwch_create_cq_resp uresp; - struct iwch_create_cq_req ureq; - static int warned; - size_t resplen; - - pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries); - if (attr->flags) - return -EINVAL; - - if (udata) { - if (!t3a_device(rhp)) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) - return -EFAULT; - - chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr; - } - } - - if (t3a_device(rhp)) { - - /* - * T3A: Add some fluff to handle extra CQEs inserted - * for various errors. - * Additional CQE possibilities: - * TERMINATE, - * incoming RDMA WRITE Failures - * incoming RDMA READ REQUEST FAILUREs - * NOTE: We cannot ensure the CQ won't overflow. - */ - entries += 16; - } - entries = roundup_pow_of_two(entries); - chp->cq.size_log2 = ilog2(entries); - - if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata)) - return -ENOMEM; - - chp->rhp = rhp; - chp->ibcq.cqe = 1 << chp->cq.size_log2; - spin_lock_init(&chp->lock); - spin_lock_init(&chp->comp_handler_lock); - atomic_set(&chp->refcnt, 1); - init_waitqueue_head(&chp->wait); - if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) { - cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); - return -ENOMEM; - } - - if (udata) { - struct iwch_mm_entry *mm; - struct iwch_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct iwch_ucontext, ibucontext); - - mm = kmalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) { - iwch_destroy_cq(&chp->ibcq, udata); - return -ENOMEM; - } - uresp.cqid = chp->cq.cqid; - uresp.size_log2 = chp->cq.size_log2; - spin_lock(&ucontext->mmap_lock); - uresp.key = ucontext->key; - ucontext->key += PAGE_SIZE; - spin_unlock(&ucontext->mmap_lock); - mm->key = uresp.key; - mm->addr = virt_to_phys(chp->cq.queue); - if (udata->outlen < sizeof(uresp)) { - if (!warned++) - pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n"); - mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * - sizeof(struct t3_cqe)); - resplen = sizeof(struct iwch_create_cq_resp_v0); - } else { - mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) * - sizeof(struct t3_cqe)); - uresp.memsize = mm->len; - uresp.reserved = 0; - resplen = sizeof(uresp); - } - if (ib_copy_to_udata(udata, &uresp, resplen)) { - kfree(mm); - iwch_destroy_cq(&chp->ibcq, udata); - return -EFAULT; - } - insert_mmap(ucontext, mm); - } - pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr %pad\n", - chp->cq.cqid, chp, (1 << chp->cq.size_log2), - &chp->cq.dma_addr); - return 0; -} - -static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) -{ - struct iwch_dev *rhp; - struct iwch_cq *chp; - enum t3_cq_opcode cq_op; - int err; - unsigned long flag; - u32 rptr; - - chp = to_iwch_cq(ibcq); - rhp = chp->rhp; - if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) - cq_op = CQ_ARM_SE; - else - cq_op = CQ_ARM_AN; - if (chp->user_rptr_addr) { - if (get_user(rptr, chp->user_rptr_addr)) - return -EFAULT; - spin_lock_irqsave(&chp->lock, flag); - chp->cq.rptr = rptr; - } else - spin_lock_irqsave(&chp->lock, flag); - pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr); - err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0); - spin_unlock_irqrestore(&chp->lock, flag); - if (err < 0) - pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid); - if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - err = 0; - return err; -} - -static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - int len = vma->vm_end - vma->vm_start; - u32 key = vma->vm_pgoff << PAGE_SHIFT; - struct cxio_rdev *rdev_p; - int ret = 0; - struct iwch_mm_entry *mm; - struct iwch_ucontext *ucontext; - u64 addr; - - pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff, - key, len); - - if (vma->vm_start & (PAGE_SIZE-1)) { - return -EINVAL; - } - - rdev_p = &(to_iwch_dev(context->device)->rdev); - ucontext = to_iwch_ucontext(context); - - mm = remove_mmap(ucontext, key, len); - if (!mm) - return -EINVAL; - addr = mm->addr; - kfree(mm); - - if ((addr >= rdev_p->rnic_info.udbell_physbase) && - (addr < (rdev_p->rnic_info.udbell_physbase + - rdev_p->rnic_info.udbell_len))) { - - /* - * Map T3 DB register. - */ - if (vma->vm_flags & VM_READ) { - return -EPERM; - } - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - vma->vm_flags &= ~VM_MAYREAD; - ret = io_remap_pfn_range(vma, vma->vm_start, - addr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } else { - - /* - * Map WQ or CQ contig dma memory... - */ - ret = remap_pfn_range(vma, vma->vm_start, - addr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } - - return ret; -} - -static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - - php = to_iwch_pd(pd); - rhp = php->rhp; - pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); - cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); -} - -static int iwch_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) -{ - struct iwch_pd *php = to_iwch_pd(pd); - struct ib_device *ibdev = pd->device; - u32 pdid; - struct iwch_dev *rhp; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - rhp = (struct iwch_dev *) ibdev; - pdid = cxio_hal_get_pdid(rhp->rdev.rscp); - if (!pdid) - return -EINVAL; - - php->pdid = pdid; - php->rhp = rhp; - if (udata) { - struct iwch_alloc_pd_resp resp = {.pdid = php->pdid}; - - if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - iwch_deallocate_pd(&php->ibpd, udata); - return -EFAULT; - } - } - pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); - return 0; -} - -static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_mr *mhp; - u32 mmid; - - pr_debug("%s ib_mr %p\n", __func__, ib_mr); - - mhp = to_iwch_mr(ib_mr); - kfree(mhp->pages); - rhp = mhp->rhp; - mmid = mhp->attr.stag >> 8; - cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - iwch_free_pbl(mhp); - xa_erase_irq(&rhp->mrs, mmid); - if (mhp->kva) - kfree((void *) (unsigned long) mhp->kva); - ib_umem_release(mhp->umem); - pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp); - kfree(mhp); - return 0; -} - -static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) -{ - const u64 total_size = 0xffffffff; - const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK; - struct iwch_pd *php = to_iwch_pd(pd); - struct iwch_dev *rhp = php->rhp; - struct iwch_mr *mhp; - __be64 *page_list; - int shift = 26, npages, ret, i; - - pr_debug("%s ib_pd %p\n", __func__, pd); - - /* - * T3 only supports 32 bits of size. - */ - if (sizeof(phys_addr_t) > 4) { - pr_warn_once("Cannot support dma_mrs on this platform\n"); - return ERR_PTR(-ENOTSUPP); - } - - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - npages = (total_size + (1ULL << shift) - 1) >> shift; - if (!npages) { - ret = -EINVAL; - goto err; - } - - page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL); - if (!page_list) { - ret = -ENOMEM; - goto err; - } - - for (i = 0; i < npages; i++) - page_list[i] = cpu_to_be64((u64)i << shift); - - pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n", - __func__, mask, shift, total_size, npages); - - ret = iwch_alloc_pbl(mhp, npages); - if (ret) { - kfree(page_list); - goto err_pbl; - } - - ret = iwch_write_pbl(mhp, page_list, npages, 0); - kfree(page_list); - if (ret) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - - mhp->attr.perms = iwch_ib_to_tpt_access(acc); - mhp->attr.va_fbo = 0; - mhp->attr.page_size = shift - 12; - - mhp->attr.len = (u32) total_size; - mhp->attr.pbl_size = npages; - ret = iwch_register_mem(rhp, php, mhp, shift); - if (ret) - goto err_pbl; - - return &mhp->ibmr; - -err_pbl: - iwch_free_pbl(mhp); - -err: - kfree(mhp); - return ERR_PTR(ret); -} - -static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int acc, struct ib_udata *udata) -{ - __be64 *pages; - int shift, n, i; - int err = 0; - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mr *mhp; - struct iwch_reg_user_mr_resp uresp; - struct sg_dma_page_iter sg_iter; - pr_debug("%s ib_pd %p\n", __func__, pd); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - mhp->umem = ib_umem_get(udata, start, length, acc, 0); - if (IS_ERR(mhp->umem)) { - err = PTR_ERR(mhp->umem); - kfree(mhp); - return ERR_PTR(err); - } - - shift = PAGE_SHIFT; - - n = ib_umem_num_pages(mhp->umem); - - err = iwch_alloc_pbl(mhp, n); - if (err) - goto err; - - pages = (__be64 *) __get_free_page(GFP_KERNEL); - if (!pages) { - err = -ENOMEM; - goto err_pbl; - } - - i = n = 0; - - for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { - pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); - if (i == PAGE_SIZE / sizeof(*pages)) { - err = iwch_write_pbl(mhp, pages, i, n); - if (err) - goto pbl_done; - n += i; - i = 0; - } - } - - if (i) - err = iwch_write_pbl(mhp, pages, i, n); - -pbl_done: - free_page((unsigned long) pages); - if (err) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - mhp->attr.perms = iwch_ib_to_tpt_access(acc); - mhp->attr.va_fbo = virt; - mhp->attr.page_size = shift - 12; - mhp->attr.len = (u32) length; - - err = iwch_register_mem(rhp, php, mhp, shift); - if (err) - goto err_pbl; - - if (udata && !t3a_device(rhp)) { - uresp.pbl_addr = (mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3; - pr_debug("%s user resp pbl_addr 0x%x\n", __func__, - uresp.pbl_addr); - - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - iwch_dereg_mr(&mhp->ibmr, udata); - err = -EFAULT; - goto err; - } - } - - return &mhp->ibmr; - -err_pbl: - iwch_free_pbl(mhp); - -err: - ib_umem_release(mhp->umem); - kfree(mhp); - return ERR_PTR(err); -} - -static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mw *mhp; - u32 mmid; - u32 stag = 0; - int ret; - - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid); - if (ret) { - kfree(mhp); - return ERR_PTR(ret); - } - mhp->rhp = rhp; - mhp->attr.pdid = php->pdid; - mhp->attr.type = TPT_MW; - mhp->attr.stag = stag; - mmid = (stag) >> 8; - mhp->ibmw.rkey = stag; - if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { - cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - kfree(mhp); - return ERR_PTR(-ENOMEM); - } - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); - return &(mhp->ibmw); -} - -static int iwch_dealloc_mw(struct ib_mw *mw) -{ - struct iwch_dev *rhp; - struct iwch_mw *mhp; - u32 mmid; - - mhp = to_iwch_mw(mw); - rhp = mhp->rhp; - mmid = (mw->rkey) >> 8; - cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - xa_erase_irq(&rhp->mrs, mmid); - pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); - kfree(mhp); - return 0; -} - -static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mr *mhp; - u32 mmid; - u32 stag = 0; - int ret = -ENOMEM; - - if (mr_type != IB_MR_TYPE_MEM_REG || - max_num_sg > T3_MAX_FASTREG_DEPTH) - return ERR_PTR(-EINVAL); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - goto err; - - mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); - if (!mhp->pages) - goto pl_err; - - mhp->rhp = rhp; - ret = iwch_alloc_pbl(mhp, max_num_sg); - if (ret) - goto err1; - mhp->attr.pbl_size = max_num_sg; - ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, - mhp->attr.pbl_size, mhp->attr.pbl_addr); - if (ret) - goto err2; - mhp->attr.pdid = php->pdid; - mhp->attr.type = TPT_NON_SHARED_MR; - mhp->attr.stag = stag; - mhp->attr.state = 1; - mmid = (stag) >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - ret = xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL); - if (ret) - goto err3; - - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); - return &(mhp->ibmr); -err3: - cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); -err2: - iwch_free_pbl(mhp); -err1: - kfree(mhp->pages); -pl_err: - kfree(mhp); -err: - return ERR_PTR(ret); -} - -static int iwch_set_page(struct ib_mr *ibmr, u64 addr) -{ - struct iwch_mr *mhp = to_iwch_mr(ibmr); - - if (unlikely(mhp->npages == mhp->attr.pbl_size)) - return -ENOMEM; - - mhp->pages[mhp->npages++] = addr; - - return 0; -} - -static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, - int sg_nents, unsigned int *sg_offset) -{ - struct iwch_mr *mhp = to_iwch_mr(ibmr); - - mhp->npages = 0; - - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page); -} - -static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - struct iwch_qp_attributes attrs; - struct iwch_ucontext *ucontext; - - qhp = to_iwch_qp(ib_qp); - rhp = qhp->rhp; - - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); - wait_event(qhp->wait, !qhp->ep); - - xa_erase_irq(&rhp->qps, qhp->wq.qpid); - - atomic_dec(&qhp->refcnt); - wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); - - ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, - ibucontext); - cxio_destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - - pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__, - ib_qp, qhp->wq.qpid, qhp); - kfree(qhp); - return 0; -} - -static struct ib_qp *iwch_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *attrs, - struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - struct iwch_pd *php; - struct iwch_cq *schp; - struct iwch_cq *rchp; - struct iwch_create_qp_resp uresp; - int wqsize, sqsize, rqsize; - struct iwch_ucontext *ucontext; - - pr_debug("%s ib_pd %p\n", __func__, pd); - if (attrs->qp_type != IB_QPT_RC) - return ERR_PTR(-EINVAL); - php = to_iwch_pd(pd); - rhp = php->rhp; - schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid); - rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid); - if (!schp || !rchp) - return ERR_PTR(-EINVAL); - - /* The RQT size must be # of entries + 1 rounded up to a power of two */ - rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr); - if (rqsize == attrs->cap.max_recv_wr) - rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1); - - /* T3 doesn't support RQT depth < 16 */ - if (rqsize < 16) - rqsize = 16; - - if (rqsize > T3_MAX_RQ_SIZE) - return ERR_PTR(-EINVAL); - - if (attrs->cap.max_inline_data > T3_MAX_INLINE) - return ERR_PTR(-EINVAL); - - /* - * NOTE: The SQ and total WQ sizes don't need to be - * a power of two. However, all the code assumes - * they are. EG: Q_FREECNT() and friends. - */ - sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); - wqsize = roundup_pow_of_two(rqsize + sqsize); - - /* - * Kernel users need more wq space for fastreg WRs which can take - * 2 WR fragments. - */ - ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, - ibucontext); - if (!ucontext && wqsize < (rqsize + (2 * sqsize))) - wqsize = roundup_pow_of_two(rqsize + - roundup_pow_of_two(attrs->cap.max_send_wr * 2)); - pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__, - wqsize, sqsize, rqsize); - qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); - if (!qhp) - return ERR_PTR(-ENOMEM); - qhp->wq.size_log2 = ilog2(wqsize); - qhp->wq.rq_size_log2 = ilog2(rqsize); - qhp->wq.sq_size_log2 = ilog2(sqsize); - if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { - kfree(qhp); - return ERR_PTR(-ENOMEM); - } - - attrs->cap.max_recv_wr = rqsize - 1; - attrs->cap.max_send_wr = sqsize; - attrs->cap.max_inline_data = T3_MAX_INLINE; - - qhp->rhp = rhp; - qhp->attr.pd = php->pdid; - qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; - qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid; - qhp->attr.sq_num_entries = attrs->cap.max_send_wr; - qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; - qhp->attr.sq_max_sges = attrs->cap.max_send_sge; - qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; - qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; - qhp->attr.state = IWCH_QP_STATE_IDLE; - qhp->attr.next_state = IWCH_QP_STATE_IDLE; - - /* - * XXX - These don't get passed in from the openib user - * at create time. The CM sets them via a QP modify. - * Need to fix... I think the CM should - */ - qhp->attr.enable_rdma_read = 1; - qhp->attr.enable_rdma_write = 1; - qhp->attr.enable_bind = 1; - qhp->attr.max_ord = 1; - qhp->attr.max_ird = 1; - - spin_lock_init(&qhp->lock); - init_waitqueue_head(&qhp->wait); - atomic_set(&qhp->refcnt, 1); - - if (xa_store_irq(&rhp->qps, qhp->wq.qpid, qhp, GFP_KERNEL)) { - cxio_destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - kfree(qhp); - return ERR_PTR(-ENOMEM); - } - - if (udata) { - - struct iwch_mm_entry *mm1, *mm2; - - mm1 = kmalloc(sizeof(*mm1), GFP_KERNEL); - if (!mm1) { - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-ENOMEM); - } - - mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL); - if (!mm2) { - kfree(mm1); - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-ENOMEM); - } - - uresp.qpid = qhp->wq.qpid; - uresp.size_log2 = qhp->wq.size_log2; - uresp.sq_size_log2 = qhp->wq.sq_size_log2; - uresp.rq_size_log2 = qhp->wq.rq_size_log2; - spin_lock(&ucontext->mmap_lock); - uresp.key = ucontext->key; - ucontext->key += PAGE_SIZE; - uresp.db_key = ucontext->key; - ucontext->key += PAGE_SIZE; - spin_unlock(&ucontext->mmap_lock); - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - kfree(mm1); - kfree(mm2); - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-EFAULT); - } - mm1->key = uresp.key; - mm1->addr = virt_to_phys(qhp->wq.queue); - mm1->len = PAGE_ALIGN(wqsize * sizeof(union t3_wr)); - insert_mmap(ucontext, mm1); - mm2->key = uresp.db_key; - mm2->addr = qhp->wq.udb & PAGE_MASK; - mm2->len = PAGE_SIZE; - insert_mmap(ucontext, mm2); - } - qhp->ibqp.qp_num = qhp->wq.qpid; - pr_debug( - "%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr %pad size %d rq_addr 0x%x\n", - __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, - qhp->wq.qpid, qhp, &qhp->wq.dma_addr, 1 << qhp->wq.size_log2, - qhp->wq.rq_addr); - return &qhp->ibqp; -} - -static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - enum iwch_qp_attr_mask mask = 0; - struct iwch_qp_attributes attrs = {}; - - pr_debug("%s ib_qp %p\n", __func__, ibqp); - - /* iwarp does not support the RTR state */ - if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) - attr_mask &= ~IB_QP_STATE; - - /* Make sure we still have something left to do */ - if (!attr_mask) - return 0; - - qhp = to_iwch_qp(ibqp); - rhp = qhp->rhp; - - attrs.next_state = iwch_convert_state(attr->qp_state); - attrs.enable_rdma_read = (attr->qp_access_flags & - IB_ACCESS_REMOTE_READ) ? 1 : 0; - attrs.enable_rdma_write = (attr->qp_access_flags & - IB_ACCESS_REMOTE_WRITE) ? 1 : 0; - attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; - - - mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0; - mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? - (IWCH_QP_ATTR_ENABLE_RDMA_READ | - IWCH_QP_ATTR_ENABLE_RDMA_WRITE | - IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0; - - return iwch_modify_qp(rhp, qhp, mask, &attrs, 0); -} - -void iwch_qp_add_ref(struct ib_qp *qp) -{ - pr_debug("%s ib_qp %p\n", __func__, qp); - atomic_inc(&(to_iwch_qp(qp)->refcnt)); -} - -void iwch_qp_rem_ref(struct ib_qp *qp) -{ - pr_debug("%s ib_qp %p\n", __func__, qp); - if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt))) - wake_up(&(to_iwch_qp(qp)->wait)); -} - -static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) -{ - pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); - return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn); -} - - -static int iwch_query_pkey(struct ib_device *ibdev, - u8 port, u16 index, u16 * pkey) -{ - pr_debug("%s ibdev %p\n", __func__, ibdev); - *pkey = 0; - return 0; -} - -static int iwch_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - struct iwch_dev *dev; - - pr_debug("%s ibdev %p, port %d, index %d, gid %p\n", - __func__, ibdev, port, index, gid); - dev = to_iwch_dev(ibdev); - BUG_ON(port == 0 || port > 2); - memset(&(gid->raw[0]), 0, sizeof(gid->raw)); - memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6); - return 0; -} - -static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) -{ - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - char *cp, *next; - unsigned fw_maj, fw_min, fw_mic; - - lldev->ethtool_ops->get_drvinfo(lldev, &info); - - next = info.fw_version + 1; - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_maj); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_min); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_mic); - - return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) | - (fw_mic & 0xffff); -} - -static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw) -{ - - struct iwch_dev *dev; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - - dev = to_iwch_dev(ibdev); - memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); - props->hw_ver = dev->rdev.t3cdev_p->type; - props->fw_ver = fw_vers_string_to_u64(dev); - props->device_cap_flags = dev->device_cap_flags; - props->page_size_cap = dev->attr.mem_pgsizes_bitmask; - props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; - props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; - props->max_mr_size = dev->attr.max_mr_size; - props->max_qp = dev->attr.max_qps; - props->max_qp_wr = dev->attr.max_wrs; - props->max_send_sge = dev->attr.max_sge_per_wr; - props->max_recv_sge = dev->attr.max_sge_per_wr; - props->max_sge_rd = 1; - props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; - props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; - props->max_cq = dev->attr.max_cqs; - props->max_cqe = dev->attr.max_cqes_per_cq; - props->max_mr = dev->attr.max_mem_regs; - props->max_pd = dev->attr.max_pds; - props->local_ca_ack_delay = 0; - props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH; - - return 0; -} - -static int iwch_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - pr_debug("%s ibdev %p\n", __func__, ibdev); - - props->port_cap_flags = - IB_PORT_CM_SUP | - IB_PORT_SNMP_TUNNEL_SUP | - IB_PORT_REINIT_SUP | - IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; - props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; - props->active_width = 2; - props->active_speed = IB_SPEED_DDR; - props->max_msg_sz = -1; - - return 0; -} - -static ssize_t hw_rev_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - - pr_debug("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); -} -static DEVICE_ATTR_RO(hw_rev); - -static ssize_t hca_type_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - pr_debug("%s dev 0x%p\n", __func__, dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.driver); -} -static DEVICE_ATTR_RO(hca_type); - -static ssize_t board_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - - pr_debug("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, - iwch_dev->rdev.rnic_info.pdev->device); -} -static DEVICE_ATTR_RO(board_id); - -enum counters { - IPINRECEIVES, - IPINHDRERRORS, - IPINADDRERRORS, - IPINUNKNOWNPROTOS, - IPINDISCARDS, - IPINDELIVERS, - IPOUTREQUESTS, - IPOUTDISCARDS, - IPOUTNOROUTES, - IPREASMTIMEOUT, - IPREASMREQDS, - IPREASMOKS, - IPREASMFAILS, - TCPACTIVEOPENS, - TCPPASSIVEOPENS, - TCPATTEMPTFAILS, - TCPESTABRESETS, - TCPCURRESTAB, - TCPINSEGS, - TCPOUTSEGS, - TCPRETRANSSEGS, - TCPINERRS, - TCPOUTRSTS, - TCPRTOMIN, - TCPRTOMAX, - NR_COUNTERS -}; - -static const char * const names[] = { - [IPINRECEIVES] = "ipInReceives", - [IPINHDRERRORS] = "ipInHdrErrors", - [IPINADDRERRORS] = "ipInAddrErrors", - [IPINUNKNOWNPROTOS] = "ipInUnknownProtos", - [IPINDISCARDS] = "ipInDiscards", - [IPINDELIVERS] = "ipInDelivers", - [IPOUTREQUESTS] = "ipOutRequests", - [IPOUTDISCARDS] = "ipOutDiscards", - [IPOUTNOROUTES] = "ipOutNoRoutes", - [IPREASMTIMEOUT] = "ipReasmTimeout", - [IPREASMREQDS] = "ipReasmReqds", - [IPREASMOKS] = "ipReasmOKs", - [IPREASMFAILS] = "ipReasmFails", - [TCPACTIVEOPENS] = "tcpActiveOpens", - [TCPPASSIVEOPENS] = "tcpPassiveOpens", - [TCPATTEMPTFAILS] = "tcpAttemptFails", - [TCPESTABRESETS] = "tcpEstabResets", - [TCPCURRESTAB] = "tcpCurrEstab", - [TCPINSEGS] = "tcpInSegs", - [TCPOUTSEGS] = "tcpOutSegs", - [TCPRETRANSSEGS] = "tcpRetransSegs", - [TCPINERRS] = "tcpInErrs", - [TCPOUTRSTS] = "tcpOutRsts", - [TCPRTOMIN] = "tcpRtoMin", - [TCPRTOMAX] = "tcpRtoMax", -}; - -static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev, - u8 port_num) -{ - BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); - - /* Our driver only supports device level stats */ - if (port_num != 0) - return NULL; - - return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats, - u8 port, int index) -{ - struct iwch_dev *dev; - struct tp_mib_stats m; - int ret; - - if (port != 0 || !stats) - return -ENOSYS; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - dev = to_iwch_dev(ibdev); - ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); - if (ret) - return -ENOSYS; - - stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo; - stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo; - stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo; - stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo; - stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo; - stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo; - stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo; - stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo; - stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo; - stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout; - stats->value[IPREASMREQDS] = m.ipReasmReqds; - stats->value[IPREASMOKS] = m.ipReasmOKs; - stats->value[IPREASMFAILS] = m.ipReasmFails; - stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens; - stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens; - stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails; - stats->value[TCPESTABRESETS] = m.tcpEstabResets; - stats->value[TCPCURRESTAB] = m.tcpOutRsts; - stats->value[TCPINSEGS] = m.tcpCurrEstab; - stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo; - stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo; - stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo, - stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo; - stats->value[TCPRTOMIN] = m.tcpRtoMin; - stats->value[TCPRTOMAX] = m.tcpRtoMax; - - return stats->num_counters; -} - -static struct attribute *iwch_class_attributes[] = { - &dev_attr_hw_rev.attr, - &dev_attr_hca_type.attr, - &dev_attr_board_id.attr, - NULL -}; - -static const struct attribute_group iwch_attr_group = { - .attrs = iwch_class_attributes, -}; - -static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; - - err = ib_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - - return 0; -} - -static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) -{ - struct iwch_dev *iwch_dev = to_iwch_dev(ibdev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - pr_debug("%s dev 0x%p\n", __func__, iwch_dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); -} - -static const struct ib_device_ops iwch_dev_ops = { - .owner = THIS_MODULE, - .driver_id = RDMA_DRIVER_CXGB3, - .uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION, - .uverbs_no_driver_id_binding = 1, - - .alloc_hw_stats = iwch_alloc_stats, - .alloc_mr = iwch_alloc_mr, - .alloc_mw = iwch_alloc_mw, - .alloc_pd = iwch_allocate_pd, - .alloc_ucontext = iwch_alloc_ucontext, - .create_cq = iwch_create_cq, - .create_qp = iwch_create_qp, - .dealloc_mw = iwch_dealloc_mw, - .dealloc_pd = iwch_deallocate_pd, - .dealloc_ucontext = iwch_dealloc_ucontext, - .dereg_mr = iwch_dereg_mr, - .destroy_cq = iwch_destroy_cq, - .destroy_qp = iwch_destroy_qp, - .get_dev_fw_str = get_dev_fw_ver_str, - .get_dma_mr = iwch_get_dma_mr, - .get_hw_stats = iwch_get_mib, - .get_port_immutable = iwch_port_immutable, - .iw_accept = iwch_accept_cr, - .iw_add_ref = iwch_qp_add_ref, - .iw_connect = iwch_connect, - .iw_create_listen = iwch_create_listen, - .iw_destroy_listen = iwch_destroy_listen, - .iw_get_qp = iwch_get_qp, - .iw_reject = iwch_reject_cr, - .iw_rem_ref = iwch_qp_rem_ref, - .map_mr_sg = iwch_map_mr_sg, - .mmap = iwch_mmap, - .modify_qp = iwch_ib_modify_qp, - .poll_cq = iwch_poll_cq, - .post_recv = iwch_post_receive, - .post_send = iwch_post_send, - .query_device = iwch_query_device, - .query_gid = iwch_query_gid, - .query_pkey = iwch_query_pkey, - .query_port = iwch_query_port, - .reg_user_mr = iwch_reg_user_mr, - .req_notify_cq = iwch_arm_cq, - INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd), - INIT_RDMA_OBJ_SIZE(ib_cq, iwch_cq, ibcq), - INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext), -}; - -static int set_netdevs(struct ib_device *ib_dev, struct cxio_rdev *rdev) -{ - int ret; - int i; - - for (i = 0; i < rdev->port_info.nports; i++) { - ret = ib_device_set_netdev(ib_dev, rdev->port_info.lldevs[i], - i + 1); - if (ret) - return ret; - } - return 0; -} - -int iwch_register_device(struct iwch_dev *dev) -{ - int err; - - pr_debug("%s iwch_dev %p\n", __func__, dev); - memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); - memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); - dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | - IB_DEVICE_MEM_WINDOW | - IB_DEVICE_MEM_MGT_EXTENSIONS; - - /* cxgb3 supports STag 0. */ - dev->ibdev.local_dma_lkey = 0; - - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV); - dev->ibdev.node_type = RDMA_NODE_RNIC; - BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); - memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); - dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; - dev->ibdev.num_comp_vectors = 1; - dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; - - memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name, - sizeof(dev->ibdev.iw_ifname)); - - rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); - ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); - err = set_netdevs(&dev->ibdev, &dev->rdev); - if (err) - return err; - - return ib_register_device(&dev->ibdev, "cxgb3_%d"); -} - -void iwch_unregister_device(struct iwch_dev *dev) -{ - pr_debug("%s iwch_dev %p\n", __func__, dev); - ib_unregister_device(&dev->ibdev); - return; -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h deleted file mode 100644 index 8adbe9658935..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __IWCH_PROVIDER_H__ -#define __IWCH_PROVIDER_H__ - -#include -#include -#include -#include -#include "t3cdev.h" -#include "iwch.h" -#include "cxio_wr.h" -#include "cxio_hal.h" - -struct iwch_pd { - struct ib_pd ibpd; - u32 pdid; - struct iwch_dev *rhp; -}; - -static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct iwch_pd, ibpd); -} - -struct tpt_attributes { - u32 stag; - u32 state:1; - u32 type:2; - u32 rsvd:1; - enum tpt_mem_perm perms; - u32 remote_invaliate_disable:1; - u32 zbva:1; - u32 mw_bind_enable:1; - u32 page_size:5; - - u32 pdid; - u32 qpid; - u32 pbl_addr; - u32 len; - u64 va_fbo; - u32 pbl_size; -}; - -struct iwch_mr { - struct ib_mr ibmr; - struct ib_umem *umem; - struct iwch_dev *rhp; - u64 kva; - struct tpt_attributes attr; - u64 *pages; - u32 npages; -}; - -typedef struct iwch_mw iwch_mw_handle; - -static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct iwch_mr, ibmr); -} - -struct iwch_mw { - struct ib_mw ibmw; - struct iwch_dev *rhp; - u64 kva; - struct tpt_attributes attr; -}; - -static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw) -{ - return container_of(ibmw, struct iwch_mw, ibmw); -} - -struct iwch_cq { - struct ib_cq ibcq; - struct iwch_dev *rhp; - struct t3_cq cq; - spinlock_t lock; - spinlock_t comp_handler_lock; - atomic_t refcnt; - wait_queue_head_t wait; - u32 __user *user_rptr_addr; -}; - -static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct iwch_cq, ibcq); -} - -enum IWCH_QP_FLAGS { - QP_QUIESCED = 0x01 -}; - -struct iwch_mpa_attributes { - u8 initiator; - u8 recv_marker_enabled; - u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */ - u8 crc_enabled; - u8 version; /* 0 or 1 */ -}; - -struct iwch_qp_attributes { - u32 scq; - u32 rcq; - u32 sq_num_entries; - u32 rq_num_entries; - u32 sq_max_sges; - u32 sq_max_sges_rdma_write; - u32 rq_max_sges; - u32 state; - u8 enable_rdma_read; - u8 enable_rdma_write; /* enable inbound Read Resp. */ - u8 enable_bind; - u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */ - /* - * Next QP state. If specify the current state, only the - * QP attributes will be modified. - */ - u32 max_ord; - u32 max_ird; - u32 pd; /* IN */ - u32 next_state; - char terminate_buffer[52]; - u32 terminate_msg_len; - u8 is_terminate_local; - struct iwch_mpa_attributes mpa_attr; /* IN-OUT */ - struct iwch_ep *llp_stream_handle; - char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */ - u32 stream_msg_buf_len; /* Only on Idle -> RTS */ -}; - -struct iwch_qp { - struct ib_qp ibqp; - struct iwch_dev *rhp; - struct iwch_ep *ep; - struct iwch_qp_attributes attr; - struct t3_wq wq; - spinlock_t lock; - atomic_t refcnt; - wait_queue_head_t wait; - enum IWCH_QP_FLAGS flags; -}; - -static inline int qp_quiesced(struct iwch_qp *qhp) -{ - return qhp->flags & QP_QUIESCED; -} - -static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct iwch_qp, ibqp); -} - -void iwch_qp_add_ref(struct ib_qp *qp); -void iwch_qp_rem_ref(struct ib_qp *qp); - -struct iwch_ucontext { - struct ib_ucontext ibucontext; - struct cxio_ucontext uctx; - u32 key; - spinlock_t mmap_lock; - struct list_head mmaps; -}; - -static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c) -{ - return container_of(c, struct iwch_ucontext, ibucontext); -} - -struct iwch_mm_entry { - struct list_head entry; - u64 addr; - u32 key; - unsigned len; -}; - -static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext, - u32 key, unsigned len) -{ - struct list_head *pos, *nxt; - struct iwch_mm_entry *mm; - - spin_lock(&ucontext->mmap_lock); - list_for_each_safe(pos, nxt, &ucontext->mmaps) { - - mm = list_entry(pos, struct iwch_mm_entry, entry); - if (mm->key == key && mm->len == len) { - list_del_init(&mm->entry); - spin_unlock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, key, - (unsigned long long)mm->addr, mm->len); - return mm; - } - } - spin_unlock(&ucontext->mmap_lock); - return NULL; -} - -static inline void insert_mmap(struct iwch_ucontext *ucontext, - struct iwch_mm_entry *mm) -{ - spin_lock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, mm->key, (unsigned long long)mm->addr, mm->len); - list_add_tail(&mm->entry, &ucontext->mmaps); - spin_unlock(&ucontext->mmap_lock); -} - -enum iwch_qp_attr_mask { - IWCH_QP_ATTR_NEXT_STATE = 1 << 0, - IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, - IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, - IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, - IWCH_QP_ATTR_MAX_ORD = 1 << 11, - IWCH_QP_ATTR_MAX_IRD = 1 << 12, - IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, - IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, - IWCH_QP_ATTR_MPA_ATTR = 1 << 24, - IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, - IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ | - IWCH_QP_ATTR_ENABLE_RDMA_WRITE | - IWCH_QP_ATTR_MAX_ORD | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_STREAM_MSG_BUFFER | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE) -}; - -int iwch_modify_qp(struct iwch_dev *rhp, - struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs, - int internal); - -enum iwch_qp_state { - IWCH_QP_STATE_IDLE, - IWCH_QP_STATE_RTS, - IWCH_QP_STATE_ERROR, - IWCH_QP_STATE_TERMINATE, - IWCH_QP_STATE_CLOSING, - IWCH_QP_STATE_TOT -}; - -static inline int iwch_convert_state(enum ib_qp_state ib_state) -{ - switch (ib_state) { - case IB_QPS_RESET: - case IB_QPS_INIT: - return IWCH_QP_STATE_IDLE; - case IB_QPS_RTS: - return IWCH_QP_STATE_RTS; - case IB_QPS_SQD: - return IWCH_QP_STATE_CLOSING; - case IB_QPS_SQE: - return IWCH_QP_STATE_TERMINATE; - case IB_QPS_ERR: - return IWCH_QP_STATE_ERROR; - default: - return -1; - } -} - -static inline u32 iwch_ib_to_tpt_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) | - (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) | - TPT_LOCAL_READ; -} - -static inline u32 iwch_ib_to_tpt_bind_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0); -} - -enum iwch_mmid_state { - IWCH_STAG_STATE_VALID, - IWCH_STAG_STATE_INVALID -}; - -enum iwch_qp_query_flags { - IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */ - IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */ - IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */ - - /* - * Quiesce QP context; Consumer - * will NOT replay outstanding WR - */ - IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4, - IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8, - IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */ -}; - -u16 iwch_rqes_posted(struct iwch_qp *qhp); -int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr); -int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr); -int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); -int iwch_post_zb_read(struct iwch_ep *ep); -int iwch_register_device(struct iwch_dev *dev); -void iwch_unregister_device(struct iwch_dev *dev); -void stop_read_rep_timer(struct iwch_qp *qhp); -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, int shift); -int iwch_alloc_pbl(struct iwch_mr *mhp, int npages); -void iwch_free_pbl(struct iwch_mr *mhp); -int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset); - -#define IWCH_NODE_DESC "cxgb3 Chelsio Communications" - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c deleted file mode 100644 index c649faad63f9..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ /dev/null @@ -1,1082 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include -#include "iwch_provider.h" -#include "iwch.h" -#include "iwch_cm.h" -#include "cxio_hal.h" -#include "cxio_resource.h" - -#define NO_SUPPORT -1 - -static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - int i; - u32 plen; - - switch (wr->opcode) { - case IB_WR_SEND: - if (wr->send_flags & IB_SEND_SOLICITED) - wqe->send.rdmaop = T3_SEND_WITH_SE; - else - wqe->send.rdmaop = T3_SEND; - wqe->send.rem_stag = 0; - break; - case IB_WR_SEND_WITH_INV: - if (wr->send_flags & IB_SEND_SOLICITED) - wqe->send.rdmaop = T3_SEND_WITH_SE_INV; - else - wqe->send.rdmaop = T3_SEND_WITH_INV; - wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey); - break; - default: - return -EINVAL; - } - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; - wqe->send.reserved[0] = 0; - wqe->send.reserved[1] = 0; - wqe->send.reserved[2] = 0; - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) - return -EMSGSIZE; - - plen += wr->sg_list[i].length; - wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); - wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - } - wqe->send.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 4 + ((wr->num_sge) << 1); - wqe->send.plen = cpu_to_be32(plen); - return 0; -} - -static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - int i; - u32 plen; - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; - wqe->write.rdmaop = T3_RDMA_WRITE; - wqe->write.reserved[0] = 0; - wqe->write.reserved[1] = 0; - wqe->write.reserved[2] = 0; - wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); - wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); - - if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { - plen = 4; - wqe->write.sgl[0].stag = wr->ex.imm_data; - wqe->write.sgl[0].len = cpu_to_be32(0); - wqe->write.num_sgle = cpu_to_be32(0); - *flit_cnt = 6; - } else { - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) { - return -EMSGSIZE; - } - plen += wr->sg_list[i].length; - wqe->write.sgl[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->write.sgl[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->write.sgl[i].to = - cpu_to_be64(wr->sg_list[i].addr); - } - wqe->write.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 5 + ((wr->num_sge) << 1); - } - wqe->write.plen = cpu_to_be32(plen); - return 0; -} - -static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - if (wr->num_sge > 1) - return -EINVAL; - wqe->read.rdmaop = T3_READ_REQ; - if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) - wqe->read.local_inv = 1; - else - wqe->read.local_inv = 0; - wqe->read.reserved[0] = 0; - wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey); - wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr); - wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); - wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length); - wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr); - *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; - return 0; -} - -static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr, - u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) -{ - struct iwch_mr *mhp = to_iwch_mr(wr->mr); - int i; - __be64 *p; - - if (mhp->npages > T3_MAX_FASTREG_DEPTH) - return -EINVAL; - *wr_cnt = 1; - wqe->fastreg.stag = cpu_to_be32(wr->key); - wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length); - wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32); - wqe->fastreg.va_base_lo_fbo = - cpu_to_be32(mhp->ibmr.iova & 0xffffffff); - wqe->fastreg.page_type_perms = cpu_to_be32( - V_FR_PAGE_COUNT(mhp->npages) | - V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) | - V_FR_TYPE(TPT_VATO) | - V_FR_PERMS(iwch_ib_to_tpt_access(wr->access))); - p = &wqe->fastreg.pbl_addrs[0]; - for (i = 0; i < mhp->npages; i++, p++) { - - /* If we need a 2nd WR, then set it up */ - if (i == T3_MAX_FASTREG_FRAG) { - *wr_cnt = 2; - wqe = (union t3_wr *)(wq->queue + - Q_PTR2IDX((wq->wptr+1), wq->size_log2)); - build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, - Q_GENBIT(wq->wptr + 1, wq->size_log2), - 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG, - T3_EOP); - - p = &wqe->pbl_frag.pbl_addrs[0]; - } - *p = cpu_to_be64((u64)mhp->pages[i]); - } - *flit_cnt = 5 + mhp->npages; - if (*flit_cnt > 15) - *flit_cnt = 15; - return 0; -} - -static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); - wqe->local_inv.reserved = 0; - *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3; - return 0; -} - -static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, - u32 num_sgle, u32 * pbl_addr, u8 * page_size) -{ - int i; - struct iwch_mr *mhp; - u64 offset; - for (i = 0; i < num_sgle; i++) { - - mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); - if (!mhp) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - if (!mhp->attr.state) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - if (mhp->attr.zbva) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - - if (sg_list[i].addr < mhp->attr.va_fbo) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - if (sg_list[i].addr + ((u64) sg_list[i].length) < - sg_list[i].addr) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - if (sg_list[i].addr + ((u64) sg_list[i].length) > - mhp->attr.va_fbo + ((u64) mhp->attr.len)) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - offset = sg_list[i].addr - mhp->attr.va_fbo; - offset += mhp->attr.va_fbo & - ((1UL << (12 + mhp->attr.page_size)) - 1); - pbl_addr[i] = ((mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3) + - (offset >> (12 + mhp->attr.page_size)); - page_size[i] = mhp->attr.page_size; - } - return 0; -} - -static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, - const struct ib_recv_wr *wr) -{ - int i, err = 0; - u32 pbl_addr[T3_MAX_SGE]; - u8 page_size[T3_MAX_SGE]; - - err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, - page_size); - if (err) - return err; - wqe->recv.pagesz[0] = page_size[0]; - wqe->recv.pagesz[1] = page_size[1]; - wqe->recv.pagesz[2] = page_size[2]; - wqe->recv.pagesz[3] = page_size[3]; - wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); - for (i = 0; i < wr->num_sge; i++) { - wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); - wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - - /* to in the WQE == the offset into the page */ - wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) & - ((1UL << (12 + page_size[i])) - 1)); - - /* pbl_addr is the adapters address in the PBL */ - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); - } - for (; i < T3_MAX_SGE; i++) { - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = 0; - wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; - } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].wr_id = wr->wr_id; - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].pbl_addr = 0; - return 0; -} - -static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, - const struct ib_recv_wr *wr) -{ - int i; - u32 pbl_addr; - u32 pbl_offset; - - - /* - * The T3 HW requires the PBL in the HW recv descriptor to reference - * a PBL entry. So we allocate the max needed PBL memory here and pass - * it to the uP in the recv WR. The uP will build the PBL and setup - * the HW recv descriptor. - */ - pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); - if (!pbl_addr) - return -ENOMEM; - - /* - * Compute the 8B aligned offset. - */ - pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; - - wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); - - for (i = 0; i < wr->num_sge; i++) { - - /* - * Use a 128MB page size. This and an imposed 128MB - * sge length limit allows us to require only a 2-entry HW - * PBL for each SGE. This restriction is acceptable since - * since it is not possible to allocate 128MB of contiguous - * DMA coherent memory! - */ - if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) - return -EINVAL; - wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; - - /* - * T3 restricts a recv to all zero-stag or all non-zero-stag. - */ - if (wr->sg_list[i].lkey != 0) - return -EINVAL; - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset); - pbl_offset += 2; - } - for (; i < T3_MAX_SGE; i++) { - wqe->recv.pagesz[i] = 0; - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = 0; - wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; - } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].wr_id = wr->wr_id; - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; - return 0; -} - -int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr) -{ - int err = 0; - u8 uninitialized_var(t3_wr_flit_cnt); - enum t3_wr_opcode t3_wr_opcode = 0; - enum t3_wr_flags t3_wr_flags; - struct iwch_qp *qhp; - u32 idx; - union t3_wr *wqe; - u32 num_wrs; - unsigned long flag; - struct t3_swsq *sqp; - int wr_cnt = 1; - - qhp = to_iwch_qp(ibqp); - spin_lock_irqsave(&qhp->lock, flag); - if (qhp->attr.state > IWCH_QP_STATE_RTS) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -EINVAL; - goto out; - } - num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, - qhp->wq.sq_size_log2); - if (num_wrs == 0) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -ENOMEM; - goto out; - } - while (wr) { - if (num_wrs == 0) { - err = -ENOMEM; - break; - } - idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - wqe = (union t3_wr *) (qhp->wq.queue + idx); - t3_wr_flags = 0; - if (wr->send_flags & IB_SEND_SOLICITED) - t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) - t3_wr_flags |= T3_COMPLETION_FLAG; - sqp = qhp->wq.sq + - Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - switch (wr->opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_INV: - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_READ_FENCE_FLAG; - t3_wr_opcode = T3_WR_SEND; - err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); - break; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - t3_wr_opcode = T3_WR_WRITE; - err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); - break; - case IB_WR_RDMA_READ: - case IB_WR_RDMA_READ_WITH_INV: - t3_wr_opcode = T3_WR_READ; - t3_wr_flags = 0; /* T3 reads are always signaled */ - err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); - if (err) - break; - sqp->read_len = wqe->read.local_len; - if (!qhp->wq.oldest_read) - qhp->wq.oldest_read = sqp; - break; - case IB_WR_REG_MR: - t3_wr_opcode = T3_WR_FASTREG; - err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt, - &wr_cnt, &qhp->wq); - break; - case IB_WR_LOCAL_INV: - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_LOCAL_FENCE_FLAG; - t3_wr_opcode = T3_WR_INV_STAG; - err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt); - break; - default: - pr_debug("%s post of type=%d TBD!\n", __func__, - wr->opcode); - err = -EINVAL; - } - if (err) - break; - wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; - sqp->wr_id = wr->wr_id; - sqp->opcode = wr2opcode(t3_wr_opcode); - sqp->sq_wptr = qhp->wq.sq_wptr; - sqp->complete = 0; - sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED); - - build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, - Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, t3_wr_flit_cnt, - (wr_cnt == 1) ? T3_SOPEOP : T3_SOP); - pr_debug("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n", - __func__, (unsigned long long)wr->wr_id, idx, - Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), - sqp->opcode); - wr = wr->next; - num_wrs--; - qhp->wq.wptr += wr_cnt; - ++(qhp->wq.sq_wptr); - } - spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); - -out: - if (err) - *bad_wr = wr; - return err; -} - -int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - int err = 0; - struct iwch_qp *qhp; - u32 idx; - union t3_wr *wqe; - u32 num_wrs; - unsigned long flag; - - qhp = to_iwch_qp(ibqp); - spin_lock_irqsave(&qhp->lock, flag); - if (qhp->attr.state > IWCH_QP_STATE_RTS) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -EINVAL; - goto out; - } - num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, - qhp->wq.rq_size_log2) - 1; - if (!wr) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -ENOMEM; - goto out; - } - while (wr) { - if (wr->num_sge > T3_MAX_SGE) { - err = -EINVAL; - break; - } - idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - wqe = (union t3_wr *) (qhp->wq.queue + idx); - if (num_wrs) - if (wr->sg_list[0].lkey) - err = build_rdma_recv(qhp, wqe, wr); - else - err = build_zero_stag_recv(qhp, wqe, wr); - else - err = -ENOMEM; - - if (err) - break; - - build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, - Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); - pr_debug("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x wqe %p\n", - __func__, (unsigned long long)wr->wr_id, - idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); - ++(qhp->wq.rq_wptr); - ++(qhp->wq.wptr); - wr = wr->next; - num_wrs--; - } - spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); - -out: - if (err) - *bad_wr = wr; - return err; -} - -static inline void build_term_codes(struct respQ_msg_t *rsp_msg, - u8 *layer_type, u8 *ecode) -{ - int status = TPT_ERR_INTERNAL_ERR; - int tagged = 0; - int opcode = -1; - int rqtype = 0; - int send_inv = 0; - - if (rsp_msg) { - status = CQE_STATUS(rsp_msg->cqe); - opcode = CQE_OPCODE(rsp_msg->cqe); - rqtype = RQ_TYPE(rsp_msg->cqe); - send_inv = (opcode == T3_SEND_WITH_INV) || - (opcode == T3_SEND_WITH_SE_INV); - tagged = (opcode == T3_RDMA_WRITE) || - (rqtype && (opcode == T3_READ_RESP)); - } - - switch (status) { - case TPT_ERR_STAG: - if (send_inv) { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_CANT_INV_STAG; - } else { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_INV_STAG; - } - break; - case TPT_ERR_PDID: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - if ((opcode == T3_SEND_WITH_INV) || - (opcode == T3_SEND_WITH_SE_INV)) - *ecode = RDMAP_CANT_INV_STAG; - else - *ecode = RDMAP_STAG_NOT_ASSOC; - break; - case TPT_ERR_QPID: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_STAG_NOT_ASSOC; - break; - case TPT_ERR_ACCESS: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_ACC_VIOL; - break; - case TPT_ERR_WRAP: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_TO_WRAP; - break; - case TPT_ERR_BOUND: - if (tagged) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_BASE_BOUNDS; - } else { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_BASE_BOUNDS; - } - break; - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_CANT_INV_STAG; - break; - case TPT_ERR_ECC: - case TPT_ERR_ECC_PSTAG: - case TPT_ERR_INTERNAL_ERR: - *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; - *ecode = 0; - break; - case TPT_ERR_OUT_OF_RQE: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MSN_NOBUF; - break; - case TPT_ERR_PBL_ADDR_BOUND: - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_BASE_BOUNDS; - break; - case TPT_ERR_CRC: - *layer_type = LAYER_MPA|DDP_LLP; - *ecode = MPA_CRC_ERR; - break; - case TPT_ERR_MARKER: - *layer_type = LAYER_MPA|DDP_LLP; - *ecode = MPA_MARKER_ERR; - break; - case TPT_ERR_PDU_LEN_ERR: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_MSG_TOOBIG; - break; - case TPT_ERR_DDP_VERSION: - if (tagged) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_INV_VERS; - } else { - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_VERS; - } - break; - case TPT_ERR_RDMA_VERSION: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_INV_VERS; - break; - case TPT_ERR_OPCODE: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_INV_OPCODE; - break; - case TPT_ERR_DDP_QUEUE_NUM: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_QN; - break; - case TPT_ERR_MSN: - case TPT_ERR_MSN_GAP: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_IRD_OVERFLOW: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MSN_RANGE; - break; - case TPT_ERR_TBIT: - *layer_type = LAYER_DDP|DDP_LOCAL_CATA; - *ecode = 0; - break; - case TPT_ERR_MO: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MO; - break; - default: - *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; - *ecode = 0; - break; - } -} - -int iwch_post_zb_read(struct iwch_ep *ep) -{ - union t3_wr *wqe; - struct sk_buff *skb; - u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; - - pr_debug("%s enter\n", __func__); - skb = alloc_skb(40, GFP_KERNEL); - if (!skb) { - pr_err("%s cannot send zb_read!!\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, sizeof(struct t3_rdma_read_wr)); - wqe->read.rdmaop = T3_READ_REQ; - wqe->read.reserved[0] = 0; - wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(1); - wqe->read.rem_to = cpu_to_be64(1); - wqe->read.local_stag = cpu_to_be32(1); - wqe->read.local_len = cpu_to_be32(0); - wqe->read.local_to = cpu_to_be64(1); - wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)| - V_FW_RIWR_LEN(flit_cnt)); - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb); -} - -/* - * This posts a TERMINATE with layer=RDMA, type=catastrophic. - */ -int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) -{ - union t3_wr *wqe; - struct terminate_message *term; - struct sk_buff *skb; - - pr_debug("%s %d\n", __func__, __LINE__); - skb = alloc_skb(40, GFP_ATOMIC); - if (!skb) { - pr_err("%s cannot send TERMINATE!\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, 40); - wqe->send.rdmaop = T3_TERMINATE; - - /* immediate data length */ - wqe->send.plen = htonl(4); - - /* immediate data starts here. */ - term = (struct terminate_message *)wqe->send.sgl; - build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); - wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) | - V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)); - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); -} - -/* - * Assumes qhp lock is held. - */ -static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, - struct iwch_cq *schp) - __releases(&qhp->lock) - __acquires(&qhp->lock) -{ - int count; - int flushed; - - lockdep_assert_held(&qhp->lock); - - pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); - /* take a ref on the qhp since we must release the lock */ - atomic_inc(&qhp->refcnt); - spin_unlock(&qhp->lock); - - /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock(&rchp->lock); - spin_lock(&qhp->lock); - cxio_flush_hw_cq(&rchp->cq); - cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); - flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock(&rchp->lock); - if (flushed) { - spin_lock(&rchp->comp_handler_lock); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock(&rchp->comp_handler_lock); - } - - /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock(&schp->lock); - spin_lock(&qhp->lock); - cxio_flush_hw_cq(&schp->cq); - cxio_count_scqes(&schp->cq, &qhp->wq, &count); - flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock(&schp->lock); - if (flushed) { - spin_lock(&schp->comp_handler_lock); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock(&schp->comp_handler_lock); - } - - /* deref */ - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); - - spin_lock(&qhp->lock); -} - -static void flush_qp(struct iwch_qp *qhp) -{ - struct iwch_cq *rchp, *schp; - - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); - - if (qhp->ibqp.uobject) { - cxio_set_wq_in_error(&qhp->wq); - cxio_set_cq_in_error(&rchp->cq); - spin_lock(&rchp->comp_handler_lock); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock(&rchp->comp_handler_lock); - if (schp != rchp) { - cxio_set_cq_in_error(&schp->cq); - spin_lock(&schp->comp_handler_lock); - (*schp->ibcq.comp_handler)(&schp->ibcq, - schp->ibcq.cq_context); - spin_unlock(&schp->comp_handler_lock); - } - return; - } - __flush_qp(qhp, rchp, schp); -} - - -/* - * Return count of RECV WRs posted - */ -u16 iwch_rqes_posted(struct iwch_qp *qhp) -{ - union t3_wr *wqe = qhp->wq.queue; - u16 count = 0; - - while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { - count++; - wqe++; - } - pr_debug("%s qhp %p count %u\n", __func__, qhp, count); - return count; -} - -static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs) -{ - struct t3_rdma_init_attr init_attr; - int ret; - - init_attr.tid = qhp->ep->hwtid; - init_attr.qpid = qhp->wq.qpid; - init_attr.pdid = qhp->attr.pd; - init_attr.scqid = qhp->attr.scq; - init_attr.rcqid = qhp->attr.rcq; - init_attr.rq_addr = qhp->wq.rq_addr; - init_attr.rq_size = 1 << qhp->wq.rq_size_log2; - init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE | - qhp->attr.mpa_attr.recv_marker_enabled | - (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | - (qhp->attr.mpa_attr.crc_enabled << 2); - - init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE | - uP_RI_QP_RDMA_WRITE_ENABLE | - uP_RI_QP_BIND_ENABLE; - if (!qhp->ibqp.uobject) - init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE | - uP_RI_QP_FAST_REGISTER_ENABLE; - - init_attr.tcp_emss = qhp->ep->emss; - init_attr.ord = qhp->attr.max_ord; - init_attr.ird = qhp->attr.max_ird; - init_attr.qp_dma_addr = qhp->wq.dma_addr; - init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); - init_attr.rqe_count = iwch_rqes_posted(qhp); - init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; - init_attr.chan = qhp->ep->l2t->smt_idx; - if (peer2peer) { - init_attr.rtr_type = RTR_READ; - if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) - init_attr.ord = 1; - if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator) - init_attr.ird = 1; - } else - init_attr.rtr_type = 0; - init_attr.irs = qhp->ep->rcv_seq; - pr_debug("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d flags 0x%x qpcaps 0x%x\n", - __func__, - init_attr.rq_addr, init_attr.rq_size, - init_attr.flags, init_attr.qpcaps); - ret = cxio_rdma_init(&rhp->rdev, &init_attr); - pr_debug("%s ret %d\n", __func__, ret); - return ret; -} - -int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs, - int internal) -{ - int ret = 0; - struct iwch_qp_attributes newattr = qhp->attr; - unsigned long flag; - int disconnect = 0; - int terminate = 0; - int abort = 0; - int free = 0; - struct iwch_ep *ep = NULL; - - pr_debug("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__, - qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state, - (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); - - spin_lock_irqsave(&qhp->lock, flag); - - /* Process attr changes if in IDLE */ - if (mask & IWCH_QP_ATTR_VALID_MODIFY) { - if (qhp->attr.state != IWCH_QP_STATE_IDLE) { - ret = -EIO; - goto out; - } - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ) - newattr.enable_rdma_read = attrs->enable_rdma_read; - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE) - newattr.enable_rdma_write = attrs->enable_rdma_write; - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND) - newattr.enable_bind = attrs->enable_bind; - if (mask & IWCH_QP_ATTR_MAX_ORD) { - if (attrs->max_ord > - rhp->attr.max_rdma_read_qp_depth) { - ret = -EINVAL; - goto out; - } - newattr.max_ord = attrs->max_ord; - } - if (mask & IWCH_QP_ATTR_MAX_IRD) { - if (attrs->max_ird > - rhp->attr.max_rdma_reads_per_qp) { - ret = -EINVAL; - goto out; - } - newattr.max_ird = attrs->max_ird; - } - qhp->attr = newattr; - } - - if (!(mask & IWCH_QP_ATTR_NEXT_STATE)) - goto out; - if (qhp->attr.state == attrs->next_state) - goto out; - - switch (qhp->attr.state) { - case IWCH_QP_STATE_IDLE: - switch (attrs->next_state) { - case IWCH_QP_STATE_RTS: - if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) { - ret = -EINVAL; - goto out; - } - if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) { - ret = -EINVAL; - goto out; - } - qhp->attr.mpa_attr = attrs->mpa_attr; - qhp->attr.llp_stream_handle = attrs->llp_stream_handle; - qhp->ep = qhp->attr.llp_stream_handle; - qhp->attr.state = IWCH_QP_STATE_RTS; - - /* - * Ref the endpoint here and deref when we - * disassociate the endpoint from the QP. This - * happens in CLOSING->IDLE transition or *->ERROR - * transition. - */ - get_ep(&qhp->ep->com); - spin_unlock_irqrestore(&qhp->lock, flag); - ret = rdma_init(rhp, qhp, mask, attrs); - spin_lock_irqsave(&qhp->lock, flag); - if (ret) - goto err; - break; - case IWCH_QP_STATE_ERROR: - qhp->attr.state = IWCH_QP_STATE_ERROR; - flush_qp(qhp); - break; - default: - ret = -EINVAL; - goto out; - } - break; - case IWCH_QP_STATE_RTS: - switch (attrs->next_state) { - case IWCH_QP_STATE_CLOSING: - BUG_ON(kref_read(&qhp->ep->com.kref) < 2); - qhp->attr.state = IWCH_QP_STATE_CLOSING; - if (!internal) { - abort=0; - disconnect = 1; - ep = qhp->ep; - get_ep(&ep->com); - } - break; - case IWCH_QP_STATE_TERMINATE: - qhp->attr.state = IWCH_QP_STATE_TERMINATE; - if (qhp->ibqp.uobject) - cxio_set_wq_in_error(&qhp->wq); - if (!internal) - terminate = 1; - break; - case IWCH_QP_STATE_ERROR: - qhp->attr.state = IWCH_QP_STATE_ERROR; - if (!internal) { - abort=1; - disconnect = 1; - ep = qhp->ep; - get_ep(&ep->com); - } - goto err; - break; - default: - ret = -EINVAL; - goto out; - } - break; - case IWCH_QP_STATE_CLOSING: - if (!internal) { - ret = -EINVAL; - goto out; - } - switch (attrs->next_state) { - case IWCH_QP_STATE_IDLE: - flush_qp(qhp); - qhp->attr.state = IWCH_QP_STATE_IDLE; - qhp->attr.llp_stream_handle = NULL; - put_ep(&qhp->ep->com); - qhp->ep = NULL; - wake_up(&qhp->wait); - break; - case IWCH_QP_STATE_ERROR: - goto err; - default: - ret = -EINVAL; - goto err; - } - break; - case IWCH_QP_STATE_ERROR: - if (attrs->next_state != IWCH_QP_STATE_IDLE) { - ret = -EINVAL; - goto out; - } - - if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) || - !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) { - ret = -EINVAL; - goto out; - } - qhp->attr.state = IWCH_QP_STATE_IDLE; - break; - case IWCH_QP_STATE_TERMINATE: - if (!internal) { - ret = -EINVAL; - goto out; - } - goto err; - break; - default: - pr_err("%s in a bad state %d\n", __func__, qhp->attr.state); - ret = -EINVAL; - goto err; - break; - } - goto out; -err: - pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep, - qhp->wq.qpid); - - /* disassociate the LLP connection */ - qhp->attr.llp_stream_handle = NULL; - ep = qhp->ep; - qhp->ep = NULL; - qhp->attr.state = IWCH_QP_STATE_ERROR; - free=1; - wake_up(&qhp->wait); - BUG_ON(!ep); - flush_qp(qhp); -out: - spin_unlock_irqrestore(&qhp->lock, flag); - - if (terminate) - iwch_post_terminate(qhp, NULL); - - /* - * If disconnect is 1, then we need to initiate a disconnect - * on the EP. This can be a normal close (RTS->CLOSING) or - * an abnormal close (RTS/CLOSING->ERROR). - */ - if (disconnect) { - iwch_ep_disconnect(ep, abort, GFP_KERNEL); - put_ep(&ep->com); - } - - /* - * If free is 1, then we've disassociated the EP from the QP - * and we need to dereference the EP. - */ - if (free) - put_ep(&ep->com); - - pr_debug("%s exit state %d\n", __func__, qhp->attr.state); - return ret; -} diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h deleted file mode 100644 index c702dc199e18..000000000000 --- a/drivers/infiniband/hw/cxgb3/tcb.h +++ /dev/null @@ -1,632 +0,0 @@ -/* - * Copyright (c) 2007 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _TCB_DEFS_H -#define _TCB_DEFS_H - -#define W_TCB_T_STATE 0 -#define S_TCB_T_STATE 0 -#define M_TCB_T_STATE 0xfULL -#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE) - -#define W_TCB_TIMER 0 -#define S_TCB_TIMER 4 -#define M_TCB_TIMER 0x1ULL -#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER) - -#define W_TCB_DACK_TIMER 0 -#define S_TCB_DACK_TIMER 5 -#define M_TCB_DACK_TIMER 0x1ULL -#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER) - -#define W_TCB_DEL_FLAG 0 -#define S_TCB_DEL_FLAG 6 -#define M_TCB_DEL_FLAG 0x1ULL -#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG) - -#define W_TCB_L2T_IX 0 -#define S_TCB_L2T_IX 7 -#define M_TCB_L2T_IX 0x7ffULL -#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX) - -#define W_TCB_SMAC_SEL 0 -#define S_TCB_SMAC_SEL 18 -#define M_TCB_SMAC_SEL 0x3ULL -#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL) - -#define W_TCB_TOS 0 -#define S_TCB_TOS 20 -#define M_TCB_TOS 0x3fULL -#define V_TCB_TOS(x) ((x) << S_TCB_TOS) - -#define W_TCB_MAX_RT 0 -#define S_TCB_MAX_RT 26 -#define M_TCB_MAX_RT 0xfULL -#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT) - -#define W_TCB_T_RXTSHIFT 0 -#define S_TCB_T_RXTSHIFT 30 -#define M_TCB_T_RXTSHIFT 0xfULL -#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT) - -#define W_TCB_T_DUPACKS 1 -#define S_TCB_T_DUPACKS 2 -#define M_TCB_T_DUPACKS 0xfULL -#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS) - -#define W_TCB_T_MAXSEG 1 -#define S_TCB_T_MAXSEG 6 -#define M_TCB_T_MAXSEG 0xfULL -#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG) - -#define W_TCB_T_FLAGS1 1 -#define S_TCB_T_FLAGS1 10 -#define M_TCB_T_FLAGS1 0xffffffffULL -#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1) - -#define W_TCB_T_MIGRATION 1 -#define S_TCB_T_MIGRATION 20 -#define M_TCB_T_MIGRATION 0x1ULL -#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION) - -#define W_TCB_T_FLAGS2 2 -#define S_TCB_T_FLAGS2 10 -#define M_TCB_T_FLAGS2 0x7fULL -#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2) - -#define W_TCB_SND_SCALE 2 -#define S_TCB_SND_SCALE 17 -#define M_TCB_SND_SCALE 0xfULL -#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE) - -#define W_TCB_RCV_SCALE 2 -#define S_TCB_RCV_SCALE 21 -#define M_TCB_RCV_SCALE 0xfULL -#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE) - -#define W_TCB_SND_UNA_RAW 2 -#define S_TCB_SND_UNA_RAW 25 -#define M_TCB_SND_UNA_RAW 0x7ffffffULL -#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW) - -#define W_TCB_SND_NXT_RAW 3 -#define S_TCB_SND_NXT_RAW 20 -#define M_TCB_SND_NXT_RAW 0x7ffffffULL -#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW) - -#define W_TCB_RCV_NXT 4 -#define S_TCB_RCV_NXT 15 -#define M_TCB_RCV_NXT 0xffffffffULL -#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT) - -#define W_TCB_RCV_ADV 5 -#define S_TCB_RCV_ADV 15 -#define M_TCB_RCV_ADV 0xffffULL -#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV) - -#define W_TCB_SND_MAX_RAW 5 -#define S_TCB_SND_MAX_RAW 31 -#define M_TCB_SND_MAX_RAW 0x7ffffffULL -#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW) - -#define W_TCB_SND_CWND 6 -#define S_TCB_SND_CWND 26 -#define M_TCB_SND_CWND 0x7ffffffULL -#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND) - -#define W_TCB_SND_SSTHRESH 7 -#define S_TCB_SND_SSTHRESH 21 -#define M_TCB_SND_SSTHRESH 0x7ffffffULL -#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH) - -#define W_TCB_T_RTT_TS_RECENT_AGE 8 -#define S_TCB_T_RTT_TS_RECENT_AGE 16 -#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL -#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE) - -#define W_TCB_T_RTSEQ_RECENT 9 -#define S_TCB_T_RTSEQ_RECENT 16 -#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL -#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT) - -#define W_TCB_T_SRTT 10 -#define S_TCB_T_SRTT 16 -#define M_TCB_T_SRTT 0xffffULL -#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT) - -#define W_TCB_T_RTTVAR 11 -#define S_TCB_T_RTTVAR 0 -#define M_TCB_T_RTTVAR 0xffffULL -#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR) - -#define W_TCB_TS_LAST_ACK_SENT_RAW 11 -#define S_TCB_TS_LAST_ACK_SENT_RAW 16 -#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL -#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW) - -#define W_TCB_DIP 12 -#define S_TCB_DIP 11 -#define M_TCB_DIP 0xffffffffULL -#define V_TCB_DIP(x) ((x) << S_TCB_DIP) - -#define W_TCB_SIP 13 -#define S_TCB_SIP 11 -#define M_TCB_SIP 0xffffffffULL -#define V_TCB_SIP(x) ((x) << S_TCB_SIP) - -#define W_TCB_DP 14 -#define S_TCB_DP 11 -#define M_TCB_DP 0xffffULL -#define V_TCB_DP(x) ((x) << S_TCB_DP) - -#define W_TCB_SP 14 -#define S_TCB_SP 27 -#define M_TCB_SP 0xffffULL -#define V_TCB_SP(x) ((x) << S_TCB_SP) - -#define W_TCB_TIMESTAMP 15 -#define S_TCB_TIMESTAMP 11 -#define M_TCB_TIMESTAMP 0xffffffffULL -#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP) - -#define W_TCB_TIMESTAMP_OFFSET 16 -#define S_TCB_TIMESTAMP_OFFSET 11 -#define M_TCB_TIMESTAMP_OFFSET 0xfULL -#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET) - -#define W_TCB_TX_MAX 16 -#define S_TCB_TX_MAX 15 -#define M_TCB_TX_MAX 0xffffffffULL -#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX) - -#define W_TCB_TX_HDR_PTR_RAW 17 -#define S_TCB_TX_HDR_PTR_RAW 15 -#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL -#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW) - -#define W_TCB_TX_LAST_PTR_RAW 18 -#define S_TCB_TX_LAST_PTR_RAW 0 -#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL -#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW) - -#define W_TCB_TX_COMPACT 18 -#define S_TCB_TX_COMPACT 17 -#define M_TCB_TX_COMPACT 0x1ULL -#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT) - -#define W_TCB_RX_COMPACT 18 -#define S_TCB_RX_COMPACT 18 -#define M_TCB_RX_COMPACT 0x1ULL -#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT) - -#define W_TCB_RCV_WND 18 -#define S_TCB_RCV_WND 19 -#define M_TCB_RCV_WND 0x7ffffffULL -#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND) - -#define W_TCB_RX_HDR_OFFSET 19 -#define S_TCB_RX_HDR_OFFSET 14 -#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL -#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET) - -#define W_TCB_RX_FRAG0_START_IDX_RAW 20 -#define S_TCB_RX_FRAG0_START_IDX_RAW 9 -#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW) - -#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21 -#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4 -#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL -#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET) - -#define W_TCB_RX_FRAG0_LEN 21 -#define S_TCB_RX_FRAG0_LEN 31 -#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL -#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN) - -#define W_TCB_RX_FRAG1_LEN 22 -#define S_TCB_RX_FRAG1_LEN 26 -#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL -#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN) - -#define W_TCB_NEWRENO_RECOVER 23 -#define S_TCB_NEWRENO_RECOVER 21 -#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL -#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER) - -#define W_TCB_PDU_HAVE_LEN 24 -#define S_TCB_PDU_HAVE_LEN 16 -#define M_TCB_PDU_HAVE_LEN 0x1ULL -#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN) - -#define W_TCB_PDU_LEN 24 -#define S_TCB_PDU_LEN 17 -#define M_TCB_PDU_LEN 0xffffULL -#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN) - -#define W_TCB_RX_QUIESCE 25 -#define S_TCB_RX_QUIESCE 1 -#define M_TCB_RX_QUIESCE 0x1ULL -#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE) - -#define W_TCB_RX_PTR_RAW 25 -#define S_TCB_RX_PTR_RAW 2 -#define M_TCB_RX_PTR_RAW 0x1ffffULL -#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW) - -#define W_TCB_CPU_NO 25 -#define S_TCB_CPU_NO 19 -#define M_TCB_CPU_NO 0x7fULL -#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO) - -#define W_TCB_ULP_TYPE 25 -#define S_TCB_ULP_TYPE 26 -#define M_TCB_ULP_TYPE 0xfULL -#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE) - -#define W_TCB_RX_FRAG1_PTR_RAW 25 -#define S_TCB_RX_FRAG1_PTR_RAW 30 -#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW) - -#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26 -#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15 -#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW) - -#define W_TCB_RX_FRAG2_PTR_RAW 27 -#define S_TCB_RX_FRAG2_PTR_RAW 10 -#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW) - -#define W_TCB_RX_FRAG2_LEN_RAW 27 -#define S_TCB_RX_FRAG2_LEN_RAW 27 -#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW) - -#define W_TCB_RX_FRAG3_PTR_RAW 28 -#define S_TCB_RX_FRAG3_PTR_RAW 22 -#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW) - -#define W_TCB_RX_FRAG3_LEN_RAW 29 -#define S_TCB_RX_FRAG3_LEN_RAW 7 -#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW) - -#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30 -#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2 -#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW) - -#define W_TCB_PDU_HDR_LEN 30 -#define S_TCB_PDU_HDR_LEN 29 -#define M_TCB_PDU_HDR_LEN 0xffULL -#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN) - -#define W_TCB_SLUSH1 31 -#define S_TCB_SLUSH1 5 -#define M_TCB_SLUSH1 0x7ffffULL -#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1) - -#define W_TCB_ULP_RAW 31 -#define S_TCB_ULP_RAW 24 -#define M_TCB_ULP_RAW 0xffULL -#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW) - -#define W_TCB_DDP_RDMAP_VERSION 25 -#define S_TCB_DDP_RDMAP_VERSION 30 -#define M_TCB_DDP_RDMAP_VERSION 0x1ULL -#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION) - -#define W_TCB_MARKER_ENABLE_RX 25 -#define S_TCB_MARKER_ENABLE_RX 31 -#define M_TCB_MARKER_ENABLE_RX 0x1ULL -#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX) - -#define W_TCB_MARKER_ENABLE_TX 26 -#define S_TCB_MARKER_ENABLE_TX 0 -#define M_TCB_MARKER_ENABLE_TX 0x1ULL -#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX) - -#define W_TCB_CRC_ENABLE 26 -#define S_TCB_CRC_ENABLE 1 -#define M_TCB_CRC_ENABLE 0x1ULL -#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE) - -#define W_TCB_IRS_ULP 26 -#define S_TCB_IRS_ULP 2 -#define M_TCB_IRS_ULP 0x1ffULL -#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP) - -#define W_TCB_ISS_ULP 26 -#define S_TCB_ISS_ULP 11 -#define M_TCB_ISS_ULP 0x1ffULL -#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP) - -#define W_TCB_TX_PDU_LEN 26 -#define S_TCB_TX_PDU_LEN 20 -#define M_TCB_TX_PDU_LEN 0x3fffULL -#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN) - -#define W_TCB_TX_PDU_OUT 27 -#define S_TCB_TX_PDU_OUT 2 -#define M_TCB_TX_PDU_OUT 0x1ULL -#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT) - -#define W_TCB_CQ_IDX_SQ 27 -#define S_TCB_CQ_IDX_SQ 3 -#define M_TCB_CQ_IDX_SQ 0xffffULL -#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ) - -#define W_TCB_CQ_IDX_RQ 27 -#define S_TCB_CQ_IDX_RQ 19 -#define M_TCB_CQ_IDX_RQ 0xffffULL -#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ) - -#define W_TCB_QP_ID 28 -#define S_TCB_QP_ID 3 -#define M_TCB_QP_ID 0xffffULL -#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID) - -#define W_TCB_PD_ID 28 -#define S_TCB_PD_ID 19 -#define M_TCB_PD_ID 0xffffULL -#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID) - -#define W_TCB_STAG 29 -#define S_TCB_STAG 3 -#define M_TCB_STAG 0xffffffffULL -#define V_TCB_STAG(x) ((x) << S_TCB_STAG) - -#define W_TCB_RQ_START 30 -#define S_TCB_RQ_START 3 -#define M_TCB_RQ_START 0x3ffffffULL -#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START) - -#define W_TCB_RQ_MSN 30 -#define S_TCB_RQ_MSN 29 -#define M_TCB_RQ_MSN 0x3ffULL -#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN) - -#define W_TCB_RQ_MAX_OFFSET 31 -#define S_TCB_RQ_MAX_OFFSET 7 -#define M_TCB_RQ_MAX_OFFSET 0xfULL -#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET) - -#define W_TCB_RQ_WRITE_PTR 31 -#define S_TCB_RQ_WRITE_PTR 11 -#define M_TCB_RQ_WRITE_PTR 0x3ffULL -#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR) - -#define W_TCB_INB_WRITE_PERM 31 -#define S_TCB_INB_WRITE_PERM 21 -#define M_TCB_INB_WRITE_PERM 0x1ULL -#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM) - -#define W_TCB_INB_READ_PERM 31 -#define S_TCB_INB_READ_PERM 22 -#define M_TCB_INB_READ_PERM 0x1ULL -#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM) - -#define W_TCB_ORD_L_BIT_VLD 31 -#define S_TCB_ORD_L_BIT_VLD 23 -#define M_TCB_ORD_L_BIT_VLD 0x1ULL -#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD) - -#define W_TCB_RDMAP_OPCODE 31 -#define S_TCB_RDMAP_OPCODE 24 -#define M_TCB_RDMAP_OPCODE 0xfULL -#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE) - -#define W_TCB_TX_FLUSH 31 -#define S_TCB_TX_FLUSH 28 -#define M_TCB_TX_FLUSH 0x1ULL -#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH) - -#define W_TCB_TX_OOS_RXMT 31 -#define S_TCB_TX_OOS_RXMT 29 -#define M_TCB_TX_OOS_RXMT 0x1ULL -#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT) - -#define W_TCB_TX_OOS_TXMT 31 -#define S_TCB_TX_OOS_TXMT 30 -#define M_TCB_TX_OOS_TXMT 0x1ULL -#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT) - -#define W_TCB_SLUSH_AUX2 31 -#define S_TCB_SLUSH_AUX2 31 -#define M_TCB_SLUSH_AUX2 0x1ULL -#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2) - -#define W_TCB_RX_FRAG1_PTR_RAW2 25 -#define S_TCB_RX_FRAG1_PTR_RAW2 30 -#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL -#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2) - -#define W_TCB_RX_DDP_FLAGS 26 -#define S_TCB_RX_DDP_FLAGS 15 -#define M_TCB_RX_DDP_FLAGS 0x3ffULL -#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS) - -#define W_TCB_SLUSH_AUX3 26 -#define S_TCB_SLUSH_AUX3 31 -#define M_TCB_SLUSH_AUX3 0x1ffULL -#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3) - -#define W_TCB_RX_DDP_BUF0_OFFSET 27 -#define S_TCB_RX_DDP_BUF0_OFFSET 8 -#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL -#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET) - -#define W_TCB_RX_DDP_BUF0_LEN 27 -#define S_TCB_RX_DDP_BUF0_LEN 30 -#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL -#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN) - -#define W_TCB_RX_DDP_BUF1_OFFSET 28 -#define S_TCB_RX_DDP_BUF1_OFFSET 20 -#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL -#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET) - -#define W_TCB_RX_DDP_BUF1_LEN 29 -#define S_TCB_RX_DDP_BUF1_LEN 10 -#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL -#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN) - -#define W_TCB_RX_DDP_BUF0_TAG 30 -#define S_TCB_RX_DDP_BUF0_TAG 0 -#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL -#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG) - -#define W_TCB_RX_DDP_BUF1_TAG 31 -#define S_TCB_RX_DDP_BUF1_TAG 0 -#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL -#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG) - -#define S_TF_DACK 10 -#define V_TF_DACK(x) ((x) << S_TF_DACK) - -#define S_TF_NAGLE 11 -#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE) - -#define S_TF_RECV_SCALE 12 -#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE) - -#define S_TF_RECV_TSTMP 13 -#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP) - -#define S_TF_RECV_SACK 14 -#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK) - -#define S_TF_TURBO 15 -#define V_TF_TURBO(x) ((x) << S_TF_TURBO) - -#define S_TF_KEEPALIVE 16 -#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE) - -#define S_TF_TCAM_BYPASS 17 -#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS) - -#define S_TF_CORE_FIN 18 -#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN) - -#define S_TF_CORE_MORE 19 -#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE) - -#define S_TF_MIGRATING 20 -#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING) - -#define S_TF_ACTIVE_OPEN 21 -#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN) - -#define S_TF_ASK_MODE 22 -#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE) - -#define S_TF_NON_OFFLOAD 23 -#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD) - -#define S_TF_MOD_SCHD 24 -#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD) - -#define S_TF_MOD_SCHD_REASON0 25 -#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0) - -#define S_TF_MOD_SCHD_REASON1 26 -#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1) - -#define S_TF_MOD_SCHD_RX 27 -#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX) - -#define S_TF_CORE_PUSH 28 -#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH) - -#define S_TF_RCV_COALESCE_ENABLE 29 -#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE) - -#define S_TF_RCV_COALESCE_PUSH 30 -#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH) - -#define S_TF_RCV_COALESCE_LAST_PSH 31 -#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH) - -#define S_TF_RCV_COALESCE_HEARTBEAT 32 -#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT) - -#define S_TF_HALF_CLOSE 33 -#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE) - -#define S_TF_DACK_MSS 34 -#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS) - -#define S_TF_CCTRL_SEL0 35 -#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0) - -#define S_TF_CCTRL_SEL1 36 -#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1) - -#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37 -#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY) - -#define S_TF_TX_PACE_AUTO 38 -#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO) - -#define S_TF_PEER_FIN_HELD 39 -#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD) - -#define S_TF_CORE_URG 40 -#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG) - -#define S_TF_RDMA_ERROR 41 -#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR) - -#define S_TF_SSWS_DISABLED 42 -#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED) - -#define S_TF_DUPACK_COUNT_ODD 43 -#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD) - -#define S_TF_TX_CHANNEL 44 -#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL) - -#define S_TF_RX_CHANNEL 45 -#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL) - -#define S_TF_TX_PACE_FIXED 46 -#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED) - -#define S_TF_RDMA_FLM_ERROR 47 -#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR) - -#define S_TF_RX_FLOW_CONTROL_DISABLE 48 -#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE) - -#endif /* _TCB_DEFS_H */ diff --git a/include/uapi/rdma/cxgb3-abi.h b/include/uapi/rdma/cxgb3-abi.h deleted file mode 100644 index 85aed672f43e..000000000000 --- a/include/uapi/rdma/cxgb3-abi.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef CXGB3_ABI_USER_H -#define CXGB3_ABI_USER_H - -#include - -#define IWCH_UVERBS_ABI_VERSION 1 - -/* - * Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __aligned_u64 - * instead. - */ -struct iwch_create_cq_req { - __aligned_u64 user_rptr_addr; -}; - -struct iwch_create_cq_resp_v0 { - __aligned_u64 key; - __u32 cqid; - __u32 size_log2; -}; - -struct iwch_create_cq_resp { - __aligned_u64 key; - __u32 cqid; - __u32 size_log2; - __u32 memsize; - __u32 reserved; -}; - -struct iwch_create_qp_resp { - __aligned_u64 key; - __aligned_u64 db_key; - __u32 qpid; - __u32 size_log2; - __u32 sq_size_log2; - __u32 rq_size_log2; -}; - -struct iwch_reg_user_mr_resp { - __u32 pbl_addr; -}; - -struct iwch_alloc_pd_resp { - __u32 pdid; -}; - -#endif /* CXGB3_ABI_USER_H */ diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h index b8bb285f6b2a..b2680051047a 100644 --- a/include/uapi/rdma/rdma_user_ioctl_cmds.h +++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h @@ -88,7 +88,6 @@ enum rdma_driver_id { RDMA_DRIVER_UNKNOWN, RDMA_DRIVER_MLX5, RDMA_DRIVER_MLX4, - RDMA_DRIVER_CXGB3, RDMA_DRIVER_CXGB4, RDMA_DRIVER_MTHCA, RDMA_DRIVER_BNXT_RE, -- cgit v1.2.3-59-g8ed1b From 934f05b05d73d6c9ad68da66e2fd68abf72d1770 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:16:57 -0700 Subject: RDMA/siw: Make node GUIDs valid EUI-64 identifiers >From the IBTA: "GUID (Global Unique Identifier): A globally unique EUI-64 compliant identifier." Make sure that siw GUIDs are valid EUI-64 identifiers. Link: https://lore.kernel.org/r/20190930231707.48259-6-bvanassche@acm.org Cc: Bernard Metzler Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_main.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 05a92f997f60..d1a1b7aa7d83 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -350,15 +351,19 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->netdev = netdev; if (netdev->type != ARPHRD_LOOPBACK) { - memcpy(&base_dev->node_guid, netdev->dev_addr, 6); + addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, + netdev->dev_addr); } else { /* * The loopback device does not have a HW address, * but connection mangagement lib expects gid != 0 */ - size_t gidlen = min_t(size_t, strlen(base_dev->name), 6); + size_t len = min_t(size_t, strlen(base_dev->name), 6); + char addr[6] = { }; - memcpy(&base_dev->node_guid, base_dev->name, gidlen); + memcpy(addr, base_dev->name, len); + addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, + addr); } base_dev->uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | -- cgit v1.2.3-59-g8ed1b From 14673778d06ee551a55695947548b52d503e7273 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:16:58 -0700 Subject: RDMA/srp: Remove two casts This patch does not change any functionality. Link: https://lore.kernel.org/r/20190930231707.48259-7-bvanassche@acm.org Cc: Honggang LI Cc: Laurence Oberman Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srp/ib_srp.c | 4 ++-- drivers/infiniband/ulp/srp/ib_srp.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index b5960351bec0..f015dc4ce22c 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -352,8 +352,8 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) init_completion(&ch->done); ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ? - (struct sockaddr *)&target->rdma_cm.src : NULL, - (struct sockaddr *)&target->rdma_cm.dst, + &target->rdma_cm.src.sa : NULL, + &target->rdma_cm.dst.sa, SRP_PATH_REC_TIMEOUT_MS); if (ret) { pr_err("No route available from %pIS to %pIS (%d)\n", diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index b2861cd2087a..af9922550ae1 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -245,11 +245,13 @@ struct srp_target_port { union { struct sockaddr_in ip4; struct sockaddr_in6 ip6; + struct sockaddr sa; struct sockaddr_storage ss; } src; union { struct sockaddr_in ip4; struct sockaddr_in6 ip6; + struct sockaddr sa; struct sockaddr_storage ss; } dst; bool src_specified; -- cgit v1.2.3-59-g8ed1b From bf583470617e31c836afc4fa2039dabc43350533 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:16:59 -0700 Subject: RDMA/srp: Honor the max_send_sge device attribute Instead of assuming that max_send_sge >= 3, restrict the number of scatter gather elements to what is supported by the RDMA adapter. Link: https://lore.kernel.org/r/20190930231707.48259-8-bvanassche@acm.org Cc: Honggang LI Cc: Laurence Oberman Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srp/ib_srp.c | 7 +++++-- drivers/infiniband/ulp/srp/ib_srp.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index f015dc4ce22c..d77e7dd3e745 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -552,6 +552,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; + const struct ib_device_attr *attr = &dev->dev->attrs; struct ib_qp_init_attr *init_attr; struct ib_cq *recv_cq, *send_cq; struct ib_qp *qp; @@ -583,12 +584,14 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) init_attr->cap.max_send_wr = m * target->queue_size; init_attr->cap.max_recv_wr = target->queue_size + 1; init_attr->cap.max_recv_sge = 1; - init_attr->cap.max_send_sge = SRP_MAX_SGE; + init_attr->cap.max_send_sge = min(SRP_MAX_SGE, attr->max_send_sge); init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; init_attr->qp_type = IB_QPT_RC; init_attr->send_cq = send_cq; init_attr->recv_cq = recv_cq; + ch->max_imm_sge = min(init_attr->cap.max_send_sge - 1U, 255U); + if (target->using_rdma_cm) { ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr); qp = ch->rdma_cm.cm_id->qp; @@ -1838,7 +1841,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, return -EIO; if (ch->use_imm_data && - count <= SRP_MAX_IMM_SGE && + count <= ch->max_imm_sge && SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len && scmnd->sc_data_direction == DMA_TO_DEVICE) { struct srp_imm_buf *buf; diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index af9922550ae1..f38fbb00d0e8 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -161,6 +161,7 @@ struct srp_rdma_ch { }; uint32_t max_it_iu_len; uint32_t max_ti_iu_len; + u8 max_imm_sge; bool use_imm_data; /* Everything above this point is used in the hot path of -- cgit v1.2.3-59-g8ed1b From fdbcf5c026d17f5babe85d57ac6e053f0605d4b8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:17:00 -0700 Subject: RDMA/srp: Make route resolving error messages more informative The IPv6 scope ID is essential when setting up an iWARP connection between IPv6 link-local addresses. Report the scope ID in error messages. Link: https://lore.kernel.org/r/20190930231707.48259-9-bvanassche@acm.org Cc: Honggang LI Cc: Laurence Oberman Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srp/ib_srp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d77e7dd3e745..6fddd14b6bd9 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -356,7 +356,7 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) &target->rdma_cm.dst.sa, SRP_PATH_REC_TIMEOUT_MS); if (ret) { - pr_err("No route available from %pIS to %pIS (%d)\n", + pr_err("No route available from %pISpsc to %pISpsc (%d)\n", &target->rdma_cm.src, &target->rdma_cm.dst, ret); goto out; } @@ -366,7 +366,7 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) ret = ch->status; if (ret) { - pr_err("Resolving address %pIS failed (%d)\n", + pr_err("Resolving address %pISpsc failed (%d)\n", &target->rdma_cm.dst, ret); goto out; } -- cgit v1.2.3-59-g8ed1b From 09f8a1486dcaf69753961a6df6cffdaafc5ccbcb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:17:01 -0700 Subject: RDMA/srpt: Fix handling of SR-IOV and iWARP ports Management datagrams (MADs) are not supported by SR-IOV VFs nor by iWARP ports. Support SR-IOV VFs and iWARP ports by only logging an error message if MAD handler registration fails. Link: https://lore.kernel.org/r/20190930231707.48259-10-bvanassche@acm.org Cc: Honggang LI Cc: Laurence Oberman Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 41 ++++++++++++++++------------------- 1 file changed, 19 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index e25c70a56be6..4f99a5e040c3 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -556,24 +556,16 @@ static int srpt_refresh_port(struct srpt_port *sport) struct ib_port_attr port_attr; int ret; - memset(&port_modify, 0, sizeof(port_modify)); - port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; - port_modify.clr_port_cap_mask = 0; - - ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); - if (ret) - goto err_mod_port; - ret = ib_query_port(sport->sdev->device, sport->port, &port_attr); if (ret) - goto err_query_port; + return ret; sport->sm_lid = port_attr.sm_lid; sport->lid = port_attr.lid; ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); if (ret) - goto err_query_port; + return ret; sport->port_guid_wwn.priv = sport; srpt_format_guid(sport->port_guid, sizeof(sport->port_guid), @@ -584,6 +576,20 @@ static int srpt_refresh_port(struct srpt_port *sport) be64_to_cpu(sport->gid.global.subnet_prefix), be64_to_cpu(sport->gid.global.interface_id)); + if (rdma_protocol_iwarp(sport->sdev->device, sport->port)) + return 0; + + memset(&port_modify, 0, sizeof(port_modify)); + port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; + port_modify.clr_port_cap_mask = 0; + + ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); + if (ret) { + pr_warn("%s-%d: enabling device management failed (%d). Note: this is expected if SR-IOV is enabled.\n", + dev_name(&sport->sdev->device->dev), sport->port, ret); + return 0; + } + if (!sport->mad_agent) { memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; @@ -599,23 +605,14 @@ static int srpt_refresh_port(struct srpt_port *sport) srpt_mad_recv_handler, sport, 0); if (IS_ERR(sport->mad_agent)) { - ret = PTR_ERR(sport->mad_agent); + pr_err("%s-%d: MAD agent registration failed (%ld). Note: this is expected if SR-IOV is enabled.\n", + dev_name(&sport->sdev->device->dev), sport->port, + PTR_ERR(sport->mad_agent)); sport->mad_agent = NULL; - goto err_query_port; } } return 0; - -err_query_port: - - port_modify.set_port_cap_mask = 0; - port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; - ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); - -err_mod_port: - - return ret; } /** -- cgit v1.2.3-59-g8ed1b From cbca2442a096f7fd8b68e6350d35c3a70182ca20 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:17:02 -0700 Subject: RDMA/srpt: Fix handling of iWARP logins The path_rec pointer is NULL set for IB and RoCE logins but not for iWARP logins. Hence check the path_rec pointer before dereferencing it. Link: https://lore.kernel.org/r/20190930231707.48259-11-bvanassche@acm.org Cc: Honggang LI Cc: Laurence Oberman Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 4f99a5e040c3..fbfadeedc195 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2502,6 +2502,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id, struct srpt_device *sdev; struct srp_login_req req; const struct srp_login_req_rdma *req_rdma; + struct sa_path_rec *path_rec = cm_id->route.path_rec; char src_addr[40]; sdev = ib_get_client_data(cm_id->device, &srpt_client); @@ -2527,7 +2528,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id, &cm_id->route.addr.src_addr); return srpt_cm_req_recv(sdev, NULL, cm_id, cm_id->port_num, - cm_id->route.path_rec->pkey, &req, src_addr); + path_rec ? path_rec->pkey : 0, &req, src_addr); } static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, -- cgit v1.2.3-59-g8ed1b From b5948cfddecd3af0faeddba9c77f22015959df89 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:17:03 -0700 Subject: RDMA/srpt: Improve a debug message The ib_srpt driver uses two different identifiers while registering a session with the LIO core. Report both identifiers if the modified pr_debug() statement is enabled. Link: https://lore.kernel.org/r/20190930231707.48259-12-bvanassche@acm.org Cc: Honggang LI Cc: Laurence Oberman Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index fbfadeedc195..dabaea301328 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2293,7 +2293,8 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, be64_to_cpu(*(__be64 *)nexus->i_port_id), be64_to_cpu(*(__be64 *)(nexus->i_port_id + 8))); - pr_debug("registering session %s\n", ch->sess_name); + pr_debug("registering src addr %s or i_port_id %s\n", ch->sess_name, + i_port_id); tag_num = ch->rq_size; tag_size = 1; /* ib_srpt does not use se_sess->sess_cmd_map */ -- cgit v1.2.3-59-g8ed1b From 6eaed91c673ad5baa0af8a05dc756f3a457468f3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:17:04 -0700 Subject: RDMA/srpt: Rework the approach for closing an RDMA channel Instead of relying on a waitqueue, report when the identity of an RDMA channel can be reused through a completion. Link: https://lore.kernel.org/r/20190930231707.48259-13-bvanassche@acm.org Cc: Honggang LI Cc: Laurence Oberman Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 30 +++++++----------------------- drivers/infiniband/ulp/srpt/ib_srpt.h | 3 +++ 2 files changed, 10 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index dabaea301328..88e11a250c96 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1928,41 +1928,22 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch) return ret; } -static bool srpt_ch_closed(struct srpt_port *sport, struct srpt_rdma_ch *ch) -{ - struct srpt_nexus *nexus; - struct srpt_rdma_ch *ch2; - bool res = true; - - rcu_read_lock(); - list_for_each_entry(nexus, &sport->nexus_list, entry) { - list_for_each_entry(ch2, &nexus->ch_list, list) { - if (ch2 == ch) { - res = false; - goto done; - } - } - } -done: - rcu_read_unlock(); - - return res; -} - /* Send DREQ and wait for DREP. */ static void srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch) { + DECLARE_COMPLETION_ONSTACK(closed); struct srpt_port *sport = ch->sport; pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, ch->state); + ch->closed = &closed; + mutex_lock(&sport->mutex); srpt_disconnect_ch(ch); mutex_unlock(&sport->mutex); - while (wait_event_timeout(sport->ch_releaseQ, srpt_ch_closed(sport, ch), - 5 * HZ) == 0) + while (wait_for_completion_timeout(&closed, 5 * HZ) == 0) pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, ch->sess_name, ch->qp->qp_num, ch->state); @@ -2089,6 +2070,9 @@ static void srpt_release_channel_work(struct work_struct *w) list_del_rcu(&ch->list); mutex_unlock(&sport->mutex); + if (ch->closed) + complete(ch->closed); + srpt_destroy_ch_ib(ch); srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index ee9f20e9177a..d0955bc0343d 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -264,6 +264,8 @@ enum rdma_ch_state { * @zw_cqe: Zero-length write CQE. * @rcu: RCU head. * @kref: kref for this channel. + * @closed: Completion object that will be signaled as soon as a new + * channel object with the same identity can be created. * @rq_size: IB receive queue size. * @max_rsp_size: Maximum size of an RSP response message in bytes. * @sq_wr_avail: number of work requests available in the send queue. @@ -306,6 +308,7 @@ struct srpt_rdma_ch { struct ib_cqe zw_cqe; struct rcu_head rcu; struct kref kref; + struct completion *closed; int rq_size; u32 max_rsp_size; atomic_t sq_wr_avail; -- cgit v1.2.3-59-g8ed1b From be408e65f55ea751ca6e8d08094951c50ce9d3a3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:17:05 -0700 Subject: RDMA/srpt: Rework the code that waits until an RDMA port is no longer in use The current implementation does not wait until srpt_release_channel() has finished and hence can trigger a use-after-free. Rework srpt_release_sport() such that it waits until srpt_release_channel() has finished. This patch fixes the following KASAN complaint: ================================================================== BUG: KASAN: use-after-free in srpt_free_ioctx.part.23+0x42/0x100 [ib_srpt] Read of size 8 at addr ffff888115c71100 by task kworker/4:3/807 CPU: 4 PID: 807 Comm: kworker/4:3 Not tainted 5.3.0-dbg+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Workqueue: events srpt_release_channel_work [ib_srpt] Call Trace: dump_stack+0x86/0xca print_address_description+0x74/0x32d __kasan_report.cold.6+0x1b/0x36 kasan_report+0x12/0x17 __asan_load8+0x54/0x90 srpt_free_ioctx.part.23+0x42/0x100 [ib_srpt] srpt_free_ioctx_ring.part.24+0x50/0x80 [ib_srpt] srpt_release_channel_work+0x2ad/0x390 [ib_srpt] process_one_work+0x51a/0xa60 worker_thread+0x67/0x5b0 kthread+0x1dc/0x200 ret_from_fork+0x24/0x30 Allocated by task 984: save_stack+0x21/0x90 __kasan_kmalloc.constprop.9+0xc7/0xd0 kasan_kmalloc+0x9/0x10 __kmalloc+0x153/0x370 srpt_add_one+0x4f/0x570 [ib_srpt] add_client_context+0x251/0x290 [ib_core] ib_register_client+0x1da/0x220 [ib_core] iblock_get_alignment_offset_lbas+0x6b/0x100 [target_core_iblock] do_one_initcall+0xcd/0x43a do_init_module+0x103/0x380 load_module+0x3b77/0x3eb0 __do_sys_finit_module+0x12d/0x1b0 __x64_sys_finit_module+0x43/0x50 do_syscall_64+0x6b/0x2d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 1128: save_stack+0x21/0x90 __kasan_slab_free+0x139/0x190 kasan_slab_free+0xe/0x10 slab_free_freelist_hook+0x67/0x1e0 kfree+0xcb/0x2a0 srpt_remove_one+0x569/0x5b0 [ib_srpt] remove_client_context+0x9a/0xe0 [ib_core] disable_device+0x106/0x1b0 [ib_core] __ib_unregister_device+0x5f/0xf0 [ib_core] ib_unregister_device_and_put+0x48/0x60 [ib_core] nldev_dellink+0x120/0x180 [ib_core] rdma_nl_rcv+0x287/0x480 [ib_core] netlink_unicast+0x2cc/0x370 netlink_sendmsg+0x3b1/0x630 __sys_sendto+0x1db/0x290 __x64_sys_sendto+0x80/0xa0 do_syscall_64+0x6b/0x2d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff888115c71100 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 0 bytes inside of 4096-byte region [ffff888115c71100, ffff888115c72100) The buggy address belongs to the page: page:ffffea0004571c00 refcount:1 mapcount:0 mapping:ffff88811ac0de00 index:0xffff888115c70000 compound_mapcount: 0 flags: 0x2fff000000010200(slab|head) raw: 2fff000000010200 ffffea00045ac408 ffffea0004593208 ffff88811ac0de00 raw: ffff888115c70000 0000000000070002 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888115c71000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888115c71080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff888115c71100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888115c71180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888115c71200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Link: https://lore.kernel.org/r/20190930231707.48259-14-bvanassche@acm.org Cc: Honggang LI Cc: Laurence Oberman Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 42 +++++++++++++++++------------------ drivers/infiniband/ulp/srpt/ib_srpt.h | 6 +++-- 2 files changed, 25 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 88e11a250c96..5e402f7b9692 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2023,10 +2023,17 @@ static void srpt_set_enabled(struct srpt_port *sport, bool enabled) __srpt_close_all_ch(sport); } +static void srpt_drop_sport_ref(struct srpt_port *sport) +{ + if (atomic_dec_return(&sport->refcount) == 0 && sport->freed_channels) + complete(sport->freed_channels); +} + static void srpt_free_ch(struct kref *kref) { struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref); + srpt_drop_sport_ref(ch->sport); kfree_rcu(ch, rcu); } @@ -2087,8 +2094,6 @@ static void srpt_release_channel_work(struct work_struct *w) kmem_cache_destroy(ch->req_buf_cache); - wake_up(&sport->ch_releaseQ); - kref_put(&ch->kref, srpt_free_ch); } @@ -2307,6 +2312,12 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, goto destroy_ib; } + /* + * Once a session has been created destruction of srpt_rdma_ch objects + * will decrement sport->refcount. Hence increment sport->refcount now. + */ + atomic_inc(&sport->refcount); + mutex_lock(&sport->mutex); if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { @@ -2889,39 +2900,29 @@ static void srpt_refresh_port_work(struct work_struct *work) srpt_refresh_port(sport); } -static bool srpt_ch_list_empty(struct srpt_port *sport) -{ - struct srpt_nexus *nexus; - bool res = true; - - rcu_read_lock(); - list_for_each_entry(nexus, &sport->nexus_list, entry) - if (!list_empty(&nexus->ch_list)) - res = false; - rcu_read_unlock(); - - return res; -} - /** * srpt_release_sport - disable login and wait for associated channels * @sport: SRPT HCA port. */ static int srpt_release_sport(struct srpt_port *sport) { + DECLARE_COMPLETION_ONSTACK(c); struct srpt_nexus *nexus, *next_n; struct srpt_rdma_ch *ch; WARN_ON_ONCE(irqs_disabled()); + sport->freed_channels = &c; + mutex_lock(&sport->mutex); srpt_set_enabled(sport, false); mutex_unlock(&sport->mutex); - while (wait_event_timeout(sport->ch_releaseQ, - srpt_ch_list_empty(sport), 5 * HZ) <= 0) { - pr_info("%s_%d: waiting for session unregistration ...\n", - dev_name(&sport->sdev->device->dev), sport->port); + while (atomic_read(&sport->refcount) > 0 && + wait_for_completion_timeout(&c, 5 * HZ) <= 0) { + pr_info("%s_%d: waiting for unregistration of %d sessions ...\n", + dev_name(&sport->sdev->device->dev), sport->port, + atomic_read(&sport->refcount)); rcu_read_lock(); list_for_each_entry(nexus, &sport->nexus_list, entry) { list_for_each_entry(ch, &nexus->ch_list, list) { @@ -3130,7 +3131,6 @@ static void srpt_add_one(struct ib_device *device) for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; INIT_LIST_HEAD(&sport->nexus_list); - init_waitqueue_head(&sport->ch_releaseQ); mutex_init(&sport->mutex); sport->sdev = sdev; sport->port = i; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index d0955bc0343d..f3df791dd877 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -381,7 +381,8 @@ struct srpt_port_attrib { * @port_gid_tpg: TPG associated with target port GID. * @port_gid_wwn: WWN associated with target port GID. * @port_attrib: Port attributes that can be accessed through configfs. - * @ch_releaseQ: Enables waiting for removal from nexus_list. + * @refcount: Number of objects associated with this port. + * @freed_channels: Completion that will be signaled once @refcount becomes 0. * @mutex: Protects nexus_list. * @nexus_list: Nexus list. See also srpt_nexus.entry. */ @@ -401,7 +402,8 @@ struct srpt_port { struct se_portal_group port_gid_tpg; struct se_wwn port_gid_wwn; struct srpt_port_attrib port_attrib; - wait_queue_head_t ch_releaseQ; + atomic_t refcount; + struct completion *freed_channels; struct mutex mutex; struct list_head nexus_list; }; -- cgit v1.2.3-59-g8ed1b From 3236fd61ee92487322982726f63cbe8355f6636e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:17:06 -0700 Subject: RDMA/srpt: Make the code for handling port identities more systematic Introduce a new data structure for the information about an RDMA port name. This patch does not change any functionality. Link: https://lore.kernel.org/r/20190930231707.48259-15-bvanassche@acm.org Cc: Honggang LI Cc: Laurence Oberman Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 64 ++++++++++++++++++++++------------- drivers/infiniband/ulp/srpt/ib_srpt.h | 25 +++++++++----- 2 files changed, 57 insertions(+), 32 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 5e402f7b9692..0582b3d4ec4d 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -567,11 +567,12 @@ static int srpt_refresh_port(struct srpt_port *sport) if (ret) return ret; - sport->port_guid_wwn.priv = sport; - srpt_format_guid(sport->port_guid, sizeof(sport->port_guid), + sport->port_guid_id.wwn.priv = sport; + srpt_format_guid(sport->port_guid_id.name, + sizeof(sport->port_guid_id.name), &sport->gid.global.interface_id); - sport->port_gid_wwn.priv = sport; - snprintf(sport->port_gid, sizeof(sport->port_gid), + sport->port_gid_id.wwn.priv = sport; + snprintf(sport->port_gid_id.name, sizeof(sport->port_gid_id.name), "0x%016llx%016llx", be64_to_cpu(sport->gid.global.subnet_prefix), be64_to_cpu(sport->gid.global.interface_id)); @@ -2287,17 +2288,17 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, tag_num = ch->rq_size; tag_size = 1; /* ib_srpt does not use se_sess->sess_cmd_map */ - if (sport->port_guid_tpg.se_tpg_wwn) - ch->sess = target_setup_session(&sport->port_guid_tpg, tag_num, + if (sport->port_guid_id.tpg.se_tpg_wwn) + ch->sess = target_setup_session(&sport->port_guid_id.tpg, tag_num, tag_size, TARGET_PROT_NORMAL, ch->sess_name, ch, NULL); - if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) - ch->sess = target_setup_session(&sport->port_gid_tpg, tag_num, + if (sport->port_gid_id.tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) + ch->sess = target_setup_session(&sport->port_gid_id.tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id, ch, NULL); /* Retry without leading "0x" */ - if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) - ch->sess = target_setup_session(&sport->port_gid_tpg, tag_num, + if (sport->port_gid_id.tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) + ch->sess = target_setup_session(&sport->port_gid_id.tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id + 2, ch, NULL); if (IS_ERR_OR_NULL(ch->sess)) { @@ -2959,10 +2960,10 @@ static struct se_wwn *__srpt_lookup_wwn(const char *name) for (i = 0; i < dev->phys_port_cnt; i++) { sport = &sdev->port[i]; - if (strcmp(sport->port_guid, name) == 0) - return &sport->port_guid_wwn; - if (strcmp(sport->port_gid, name) == 0) - return &sport->port_gid_wwn; + if (strcmp(sport->port_guid_id.name, name) == 0) + return &sport->port_guid_id.wwn; + if (strcmp(sport->port_gid_id.name, name) == 0) + return &sport->port_gid_id.wwn; } } @@ -3241,14 +3242,33 @@ static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg) return tpg->se_tpg_wwn->priv; } -static char *srpt_get_fabric_wwn(struct se_portal_group *tpg) +static struct srpt_port_id *srpt_tpg_to_sport_id(struct se_portal_group *tpg) { struct srpt_port *sport = srpt_tpg_to_sport(tpg); - WARN_ON_ONCE(tpg != &sport->port_guid_tpg && - tpg != &sport->port_gid_tpg); - return tpg == &sport->port_guid_tpg ? sport->port_guid : - sport->port_gid; + if (tpg == &sport->port_guid_id.tpg) + return &sport->port_guid_id; + if (tpg == &sport->port_gid_id.tpg) + return &sport->port_gid_id; + WARN_ON_ONCE(true); + return NULL; +} + +static struct srpt_port_id *srpt_wwn_to_sport_id(struct se_wwn *wwn) +{ + struct srpt_port *sport = wwn->priv; + + if (wwn == &sport->port_guid_id.wwn) + return &sport->port_guid_id; + if (wwn == &sport->port_gid_id.wwn) + return &sport->port_gid_id; + WARN_ON_ONCE(true); + return NULL; +} + +static char *srpt_get_fabric_wwn(struct se_portal_group *tpg) +{ + return srpt_tpg_to_sport_id(tpg)->name; } static u16 srpt_get_tag(struct se_portal_group *tpg) @@ -3705,13 +3725,9 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, const char *name) { struct srpt_port *sport = wwn->priv; - struct se_portal_group *tpg; + struct se_portal_group *tpg = &srpt_wwn_to_sport_id(wwn)->tpg; int res; - WARN_ON_ONCE(wwn != &sport->port_guid_wwn && - wwn != &sport->port_gid_wwn); - tpg = wwn == &sport->port_guid_wwn ? &sport->port_guid_tpg : - &sport->port_gid_tpg; res = core_tpg_register(wwn, tpg, SCSI_PROTOCOL_SRP); if (res) return ERR_PTR(res); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index f3df791dd877..f8bd95302ac0 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -363,13 +363,26 @@ struct srpt_port_attrib { bool use_srq; }; +/** + * struct srpt_port_id - information about an RDMA port name + * @tpg: TPG associated with the RDMA port. + * @wwn: WWN associated with the RDMA port. + * @name: ASCII representation of the port name. + * + * Multiple sysfs directories can be associated with a single RDMA port. This + * data structure represents a single (port, name) pair. + */ +struct srpt_port_id { + struct se_portal_group tpg; + struct se_wwn wwn; + char name[64]; +}; + /** * struct srpt_port - information associated by SRPT with a single IB port * @sdev: backpointer to the HCA information. * @mad_agent: per-port management datagram processing information. * @enabled: Whether or not this target port is enabled. - * @port_guid: ASCII representation of Port GUID - * @port_gid: ASCII representation of Port GID * @port: one-based port number. * @sm_lid: cached value of the port's sm_lid. * @lid: cached value of the port's lid. @@ -390,17 +403,13 @@ struct srpt_port { struct srpt_device *sdev; struct ib_mad_agent *mad_agent; bool enabled; - u8 port_guid[24]; - u8 port_gid[64]; u8 port; u32 sm_lid; u32 lid; union ib_gid gid; struct work_struct work; - struct se_portal_group port_guid_tpg; - struct se_wwn port_guid_wwn; - struct se_portal_group port_gid_tpg; - struct se_wwn port_gid_wwn; + struct srpt_port_id port_guid_id; + struct srpt_port_id port_gid_id; struct srpt_port_attrib port_attrib; atomic_t refcount; struct completion *freed_channels; -- cgit v1.2.3-59-g8ed1b From 9b64f7d0bb0a8b5987f265756a563384765c7378 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:17:07 -0700 Subject: RDMA/srpt: Postpone HCA removal until after configfs directory removal A shortcoming of the SCSI target core is that it does not have an API for removing tpg or wwn objects. Wait until these directories have been removed before allowing HCA removal to finish. See also Bart Van Assche, "Re: Why using configfs as the only interface is wrong for a storage target", 2011-02-07 (https://www.spinics.net/lists/linux-scsi/msg50248.html). This patch fixes the following kernel crash: ================================================================== BUG: KASAN: use-after-free in __configfs_open_file.isra.4+0x1a8/0x400 Read of size 8 at addr ffff88811880b690 by task restart-lio-srp/1215 CPU: 1 PID: 1215 Comm: restart-lio-srp Not tainted 5.3.0-dbg+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: dump_stack+0x86/0xca print_address_description+0x74/0x32d __kasan_report.cold.6+0x1b/0x36 kasan_report+0x12/0x17 __asan_load8+0x54/0x90 __configfs_open_file.isra.4+0x1a8/0x400 configfs_open_file+0x13/0x20 do_dentry_open+0x2b1/0x770 vfs_open+0x58/0x60 path_openat+0x5fa/0x14b0 do_filp_open+0x115/0x180 do_sys_open+0x1d4/0x2a0 __x64_sys_openat+0x59/0x70 do_syscall_64+0x6b/0x2d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f2f2bd3fcce Code: 25 00 00 41 00 3d 00 00 41 00 74 48 48 8d 05 19 d7 0d 00 8b 00 85 c0 75 69 89 f2 b8 01 01 00 00 48 89 fe bf 9c ff ff ff 0f 05 <48> 3d 00 f0 ff ff 0f 87 a6 00 00 00 48 8b 4c 24 28 64 48 33 0c 25 RSP: 002b:00007ffd155f7850 EFLAGS: 00000246 ORIG_RAX: 0000000000000101 RAX: ffffffffffffffda RBX: 0000564609ba88e0 RCX: 00007f2f2bd3fcce RDX: 0000000000000241 RSI: 0000564609ba8cf0 RDI: 00000000ffffff9c RBP: 00007ffd155f7950 R08: 0000000000000000 R09: 0000000000000020 R10: 00000000000001b6 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000003 R14: 0000000000000001 R15: 0000564609ba8cf0 Allocated by task 995: save_stack+0x21/0x90 __kasan_kmalloc.constprop.9+0xc7/0xd0 kasan_kmalloc+0x9/0x10 __kmalloc+0x153/0x370 srpt_add_one+0x4f/0x561 [ib_srpt] add_client_context+0x251/0x290 [ib_core] ib_register_client+0x1da/0x220 [ib_core] iblock_get_alignment_offset_lbas+0x6b/0x100 [target_core_iblock] do_one_initcall+0xcd/0x43a do_init_module+0x103/0x380 load_module+0x3b77/0x3eb0 __do_sys_finit_module+0x12d/0x1b0 __x64_sys_finit_module+0x43/0x50 do_syscall_64+0x6b/0x2d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 1221: save_stack+0x21/0x90 __kasan_slab_free+0x139/0x190 kasan_slab_free+0xe/0x10 slab_free_freelist_hook+0x67/0x1e0 kfree+0xcb/0x2a0 srpt_remove_one+0x596/0x670 [ib_srpt] remove_client_context+0x9a/0xe0 [ib_core] disable_device+0x106/0x1b0 [ib_core] __ib_unregister_device+0x5f/0xf0 [ib_core] ib_unregister_driver+0x11a/0x170 [ib_core] 0xffffffffa087f666 __x64_sys_delete_module+0x1f8/0x2c0 do_syscall_64+0x6b/0x2d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff88811880b300 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 912 bytes inside of 4096-byte region [ffff88811880b300, ffff88811880c300) The buggy address belongs to the page: page:ffffea0004620200 refcount:1 mapcount:0 mapping:ffff88811ac0de00 index:0x0 compound_mapcount: 0 flags: 0x2fff000000010200(slab|head) raw: 2fff000000010200 dead000000000100 dead000000000122 ffff88811ac0de00 raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88811880b580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88811880b600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88811880b680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88811880b700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88811880b780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Link: https://lore.kernel.org/r/20190930231707.48259-16-bvanassche@acm.org Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 0582b3d4ec4d..daf811abf40a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2921,7 +2921,7 @@ static int srpt_release_sport(struct srpt_port *sport) while (atomic_read(&sport->refcount) > 0 && wait_for_completion_timeout(&c, 5 * HZ) <= 0) { - pr_info("%s_%d: waiting for unregistration of %d sessions ...\n", + pr_info("%s_%d: waiting for unregistration of %d sessions and configfs directories ...\n", dev_name(&sport->sdev->device->dev), sport->port, atomic_read(&sport->refcount)); rcu_read_lock(); @@ -3732,6 +3732,8 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, if (res) return ERR_PTR(res); + atomic_inc(&sport->refcount); + return tpg; } @@ -3745,6 +3747,7 @@ static void srpt_drop_tpg(struct se_portal_group *tpg) sport->enabled = false; core_tpg_deregister(tpg); + srpt_drop_sport_ref(sport); } /** -- cgit v1.2.3-59-g8ed1b From 4b2a67362e7814641730fa4c561a9ef609fc7a03 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 2 Oct 2019 15:25:14 +0300 Subject: RDMA/mlx5: Group boolean parameters to take less space Clean the code to store all boolean parameters inside one variable. Link: https://lore.kernel.org/r/20191002122517.17721-2-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2ceaef3ea3fb..bf30d53d94dc 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -958,7 +958,10 @@ struct mlx5_ib_dev { /* serialize update of capability mask */ struct mutex cap_mask_mutex; - bool ib_active; + u8 ib_active:1; + u8 fill_delay:1; + u8 is_rep:1; + u8 lag_active:1; struct umr_common umrc; /* sync used page count stats */ @@ -967,7 +970,6 @@ struct mlx5_ib_dev { struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; - int fill_delay; struct ib_odp_caps odp_caps; u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; @@ -988,8 +990,6 @@ struct mlx5_ib_dev { struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; - bool is_rep; - int lag_active; struct mlx5_ib_lb_state lb; u8 umr_fence; -- cgit v1.2.3-59-g8ed1b From 2d67c0798821c453d2f165e986567de50f33cbed Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 2 Oct 2019 15:25:15 +0300 Subject: IB/mlx5: Remove unnecessary return statement There is no reason to call return at the end of function which returns void. Remove this unnecessary statement. Link: https://lore.kernel.org/r/20191002122517.17721-3-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 2e9b43061797..95cf0249b015 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -359,8 +359,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled)) caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; - - return; } static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, -- cgit v1.2.3-59-g8ed1b From 6f26b2ac699cc53f7da9598be6151818461af2a1 Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 2 Oct 2019 15:25:16 +0300 Subject: IB/mlx5: Remove unnecessary else statement 'else' is not generally useful after a break or return. Remove this unnecessary statement. Link: https://lore.kernel.org/r/20191002122517.17721-4-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 831539419c30..b95c2b05f682 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -844,8 +844,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); if (uhw->outlen && uhw->outlen < resp_len) return -EINVAL; - else - resp.response_length = resp_len; + + resp.response_length = resp_len; if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 909624d8db5bbd369690749eb4c4392766f39f94 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 2 Oct 2019 15:25:17 +0300 Subject: IB/cm: Use container_of() instead of typecast Use container_of() macro to get to timewait info structure instead of typecasting. Link: https://lore.kernel.org/r/20191002122517.17721-5-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index da10e6ccb43c..c0aa3a4b4cfd 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -246,7 +246,7 @@ struct cm_work { }; struct cm_timewait_info { - struct cm_work work; /* Must be first. */ + struct cm_work work; struct list_head list; struct rb_node remote_qp_node; struct rb_node remote_id_node; @@ -3434,7 +3434,7 @@ static int cm_timewait_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; int ret; - timewait_info = (struct cm_timewait_info *)work; + timewait_info = container_of(work, struct cm_timewait_info, work); spin_lock_irq(&cm.lock); list_del(&timewait_info->list); spin_unlock_irq(&cm.lock); -- cgit v1.2.3-59-g8ed1b From 547ed331bbe89575ff91939e1ac9a023fe526b74 Mon Sep 17 00:00:00 2001 From: Honggang Li Date: Sat, 28 Sep 2019 01:43:51 +0800 Subject: RDMA/srp: Add parse function for maximum initiator to target IU size According to SRP specifications 'srp-r16a' and 'srp2r06', IOControllerProfile attributes for SRP target port include the maximum initiator to target IU size. SRP connection daemons, such as srp_daemon, can get the value from the subnet manager. The SRP connection daemon can pass this value to kernel. This patch adds a parse function for it. Upstream commit [1] enables the kernel parameter, 'use_imm_data', by default. [1] also use (8 * 1024) as the default value for kernel parameter 'max_imm_data'. With those default values, the maximum initiator to target IU size will be 8260. In case the SRPT modules, which include the in-tree 'ib_srpt.ko' module, do not support SRP-2 'immediate data' feature, the default maximum initiator to target IU size is significantly smaller than 8260. For 'ib_srpt.ko' module, which built from source before [2], the default maximum initiator to target IU is 2116. [1] introduces a regression issue for old srp targets with default kernel parameters, as the connection will be rejected because of a too large maximum initiator to target IU size. [1] commit 882981f4a411 ("RDMA/srp: Add support for immediate data") [2] commit 5dabcd0456d7 ("RDMA/srpt: Add support for immediate data") Link: https://lore.kernel.org/r/20190927174352.7800-1-honli@redhat.com Reviewed-by: Bart Van Assche Signed-off-by: Honggang Li Signed-off-by: Jason Gunthorpe --- Documentation/ABI/stable/sysfs-driver-ib_srp | 2 ++ drivers/infiniband/ulp/srp/ib_srp.c | 10 ++++++++++ drivers/infiniband/ulp/srp/ib_srp.h | 1 + 3 files changed, 13 insertions(+) (limited to 'drivers') diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp index 7049a2b50359..84972a57caae 100644 --- a/Documentation/ABI/stable/sysfs-driver-ib_srp +++ b/Documentation/ABI/stable/sysfs-driver-ib_srp @@ -67,6 +67,8 @@ Description: Interface for making ib_srp connect to a new target. initiator is allowed to queue per SCSI host. The default value for this parameter is 62. The lowest supported value is 2. + * max_it_iu_size, a decimal number specifying the maximum + initiator to target information unit length. What: /sys/class/infiniband_srp/srp--/ibdev Date: January 2, 2006 diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 6fddd14b6bd9..c46d2799d192 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3414,6 +3414,7 @@ enum { SRP_OPT_IP_SRC = 1 << 15, SRP_OPT_IP_DEST = 1 << 16, SRP_OPT_TARGET_CAN_QUEUE= 1 << 17, + SRP_OPT_MAX_IT_IU_SIZE = 1 << 18, }; static unsigned int srp_opt_mandatory[] = { @@ -3446,6 +3447,7 @@ static const match_table_t srp_opt_tokens = { { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, { SRP_OPT_IP_SRC, "src=%s" }, { SRP_OPT_IP_DEST, "dest=%s" }, + { SRP_OPT_MAX_IT_IU_SIZE, "max_it_iu_size=%d" }, { SRP_OPT_ERR, NULL } }; @@ -3739,6 +3741,14 @@ static int srp_parse_options(struct net *net, const char *buf, target->tl_retry_count = token; break; + case SRP_OPT_MAX_IT_IU_SIZE: + if (match_int(args, &token) || token < 0) { + pr_warn("bad maximum initiator to target IU size '%s'\n", p); + goto out; + } + target->max_it_iu_size = token; + break; + default: pr_warn("unknown parameter or missing value '%s' in target creation request\n", p); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index f38fbb00d0e8..5359ece561ca 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -210,6 +210,7 @@ struct srp_target_port { u32 ch_count; u32 lkey; enum srp_target_state state; + uint32_t max_it_iu_size; unsigned int cmd_sg_cnt; unsigned int indirect_size; bool allow_ext_sg; -- cgit v1.2.3-59-g8ed1b From b2e872f451992e5ac3d6b79a62b66f1721e29521 Mon Sep 17 00:00:00 2001 From: Honggang Li Date: Sat, 28 Sep 2019 01:43:52 +0800 Subject: RDMA/srp: Calculate max_it_iu_size if remote max_it_iu length available The default maximum immediate size is too big for old srp clients, which do not support immediate data. According to the SRP and SRP-2 specifications, the IOControllerProfile attributes for SRP target ports contains the maximum initiator to target iu length. The maximum initiator to target iu length can be obtained by sending MAD packets to query subnet manager port and SRP target ports. We should calculate the max_it_iu_size instead of the default value, when remote maximum initiator to target iu length available. Link: https://lore.kernel.org/r/20190927174352.7800-2-honli@redhat.com Reviewed-by: Bart Van Assche Signed-off-by: Honggang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srp/ib_srp.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index c46d2799d192..0bd4c66ae331 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1365,7 +1365,8 @@ static void srp_terminate_io(struct srp_rport *rport) } /* Calculate maximum initiator to target information unit length. */ -static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data) +static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data, + uint32_t max_it_iu_size) { uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN + sizeof(struct srp_indirect_buf) + @@ -1375,6 +1376,11 @@ static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data) max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET + srp_max_imm_data); + if (max_it_iu_size) + max_iu_len = min(max_iu_len, max_it_iu_size); + + pr_debug("max_iu_len = %d\n", max_iu_len); + return max_iu_len; } @@ -1392,7 +1398,8 @@ static int srp_rport_reconnect(struct srp_rport *rport) struct srp_target_port *target = rport->lld_data; struct srp_rdma_ch *ch; uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, - srp_use_imm_data); + srp_use_imm_data, + target->max_it_iu_size); int i, j, ret = 0; bool multich = false; @@ -2541,7 +2548,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id, ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP; ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, - ch->use_imm_data); + ch->use_imm_data, + target->max_it_iu_size); WARN_ON_ONCE(ch->max_it_iu_len > be32_to_cpu(lrsp->max_it_iu_len)); @@ -3900,7 +3908,9 @@ static ssize_t srp_create_target(struct device *dev, target->mr_per_cmd = mr_per_cmd; target->indirect_size = target->sg_tablesize * sizeof (struct srp_direct_buf); - max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data); + max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + srp_use_imm_data, + target->max_it_iu_size); INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); -- cgit v1.2.3-59-g8ed1b From 39c48c514601d76f8750d1739928c9577b1785d9 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 3 Oct 2019 01:48:35 -0400 Subject: RDMA/bnxt_re: Enable SRIOV VF support on Broadcom's 57500 adapter series Broadcom's 575xx adapter series has support for SRIOV VFs. Making changes to enable SRIOV VF support. There are two major area where changes are done: - Added new DB location for control-path and data-path DB ring - New devices do not need to issue the sriov-config slow-path command thus, skipping to call that firmware command. For now enabling support for 64 RoCE VFs. Link: https://lore.kernel.org/r/1570081715-14301-1-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 1 + drivers/infiniband/hw/bnxt_re/main.c | 133 +++++++++++++++++------------ drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 5 +- 3 files changed, 82 insertions(+), 57 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index e55a1666c0cd..725b2350e349 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -108,6 +108,7 @@ struct bnxt_re_sqp_entries { #define BNXT_RE_MAX_MSIX 9 #define BNXT_RE_AEQ_IDX 0 #define BNXT_RE_NQ_IDX 1 +#define BNXT_RE_GEN_P5_MAX_VF 64 struct bnxt_re_dev { struct ib_device ibdev; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 7b914bdd7c13..d6785b8339d2 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -119,61 +119,76 @@ static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev) * reserved for the function. The driver may choose to allocate fewer * resources than the firmware maximum. */ -static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) +static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) { - u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0; - u32 i; - u32 vf_pct; - u32 num_vfs; - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *attr; + struct bnxt_qplib_ctx *ctx; + int i; - rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, - dev_attr->max_qp); + attr = &rdev->dev_attr; + ctx = &rdev->qplib_ctx; - rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; + ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, + attr->max_qp); + ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; /* Use max_mr from fw since max_mrw does not get set */ - rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count, - dev_attr->max_mr); - rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, - dev_attr->max_srq); - rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, - dev_attr->max_cq); - - for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) - rdev->qplib_ctx.tqm_count[i] = - rdev->dev_attr.tqm_alloc_reqs[i]; - - if (rdev->num_vfs) { - /* - * Reserve a set of resources for the PF. Divide the remaining - * resources among the VFs - */ - vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; - num_vfs = 100 * rdev->num_vfs; - vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs; - vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs; - vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs; - /* - * The driver allows many more MRs than other resources. If the - * firmware does also, then reserve a fixed amount for the PF - * and divide the rest among VFs. VFs may use many MRs for NFS - * mounts, ISER, NVME applications, etc. If the firmware - * severely restricts the number of MRs, then let PF have - * half and divide the rest among VFs, as for the other - * resource types. - */ - if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) - vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs; - else - vf_mrws = (rdev->qplib_ctx.mrw_count - - BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs; - vf_gids = BNXT_RE_MAX_GID_PER_VF; + ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr); + ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, + attr->max_srq); + ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq); + if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) + for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) + rdev->qplib_ctx.tqm_count[i] = + rdev->dev_attr.tqm_alloc_reqs[i]; +} + +static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf) +{ + struct bnxt_qplib_vf_res *vf_res; + u32 mrws = 0; + u32 vf_pct; + u32 nvfs; + + vf_res = &qplib_ctx->vf_res; + /* + * Reserve a set of resources for the PF. Divide the remaining + * resources among the VFs + */ + vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; + nvfs = num_vf; + num_vf = 100 * num_vf; + vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf; + vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf; + vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf; + /* + * The driver allows many more MRs than other resources. If the + * firmware does also, then reserve a fixed amount for the PF and + * divide the rest among VFs. VFs may use many MRs for NFS + * mounts, ISER, NVME applications, etc. If the firmware severely + * restricts the number of MRs, then let PF have half and divide + * the rest among VFs, as for the other resource types. + */ + if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) { + mrws = qplib_ctx->mrw_count * vf_pct; + nvfs = num_vf; + } else { + mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF; } - rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws; - rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids; - rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps; - rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs; - rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs; + vf_res->max_mrw_per_vf = (mrws / nvfs); + vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF; +} + +static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) +{ + u32 num_vfs; + + memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); + bnxt_re_limit_pf_res(rdev); + + num_vfs = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? + BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs; + if (num_vfs) + bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs); } /* for handling bnxt_en callbacks later */ @@ -193,9 +208,11 @@ static void bnxt_re_sriov_config(void *p, int num_vfs) return; rdev->num_vfs = num_vfs; - bnxt_re_set_resource_limits(rdev); - bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, - &rdev->qplib_ctx); + if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) { + bnxt_re_set_resource_limits(rdev); + bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, + &rdev->qplib_ctx); + } } static void bnxt_re_shutdown(void *p) @@ -894,10 +911,14 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, return 0; } +#define BNXT_RE_GEN_P5_PF_NQ_DB 0x10000 +#define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000 static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) { return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? - 0x10000 : rdev->msix_entries[indx].db_offset; + (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB : + BNXT_RE_GEN_P5_PF_NQ_DB) : + rdev->msix_entries[indx].db_offset; } static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) @@ -1407,8 +1428,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) rdev->is_virtfn); if (rc) goto disable_rcfw; - if (!rdev->is_virtfn) - bnxt_re_set_resource_limits(rdev); + + bnxt_re_set_resource_limits(rdev); rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0, bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 60c8f76aab33..5cdfa84faf85 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -494,8 +494,10 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, * shall setup this area for VF. Skipping the * HW programming */ - if (is_virtfn || bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + if (is_virtfn) goto skip_ctx_setup; + if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + goto config_vf_res; level = ctx->qpc_tbl.level; req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) | @@ -540,6 +542,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements); req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements); +config_vf_res: req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf); req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf); req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf); -- cgit v1.2.3-59-g8ed1b From 3466c060ef6adffefc8aaa45d1fe01b64cd6007d Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 6 Oct 2019 15:56:20 +0300 Subject: RDMA/iser: Use iser_err instead of pr_err for logging Make sure all the debug prints in ib_iser module use the common driver logger. Link: https://lore.kernel.org/r/1570366580-24097-1-git-send-email-maxg@mellanox.com Signed-off-by: Max Gurtovoy Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iser_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 17f8e914b531..9a8f9048add6 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1081,7 +1081,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, ret = ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); if (ret) { - pr_err("ib_check_mr_status failed, ret %d\n", ret); + iser_err("ib_check_mr_status failed, ret %d\n", ret); /* Not a lot we can do, return ambiguous guard error */ *sector = 0; return 0x1; @@ -1093,7 +1093,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, sector_div(sector_off, sector_size + 8); *sector = scsi_get_lba(iser_task->sc) + sector_off; - pr_err("PI error found type %d at sector %llx " + iser_err("PI error found type %d at sector %llx " "expected %x vs actual %x\n", mr_status.sig_err.err_type, (unsigned long long)*sector, -- cgit v1.2.3-59-g8ed1b From 03232cc43cff18ae44a30ceb455a036586ee8244 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 6 Oct 2019 18:54:43 +0300 Subject: IB/mlx5: Introduce and use mkey context setting helper routine Introduce and use set_mkc_access_pd_addr_fields() which sets mkey context's access rights, PD, address fields. Thereby avoid the code duplication. Link: https://lore.kernel.org/r/20191006155443.31068-1-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1eff031ef048..2b57f0e085d6 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -705,6 +705,20 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) return 0; } +static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, + struct ib_pd *pd) +{ + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); +} + struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -728,16 +742,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, length64, 1); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, 0); + set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) @@ -1195,16 +1201,8 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET64(mkc, mkc, len, length); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, start_addr); + set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) -- cgit v1.2.3-59-g8ed1b From 9f7d7064009c37cb26eee4a83302cf077fe180d6 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 4 Sep 2019 11:14:41 +0800 Subject: RDMA/hns: remove a redundant le16_to_cpu Type of ah->av.vlan is u16, there will be a problem using le16_to_cpu on it. Fixes: 82e620d9c3a0 ("RDMA/hns: Modify the data structure of hns_roce_av") Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1567566885-23088-2-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7a89d669f8bf..c5ab8ca4d32e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -389,7 +389,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, V2_UD_SEND_WQE_BYTE_36_VLAN_S, - le16_to_cpu(ah->av.vlan)); + ah->av.vlan); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, -- cgit v1.2.3-59-g8ed1b From e8a07de57ea4ca7c2d604871c52826e66899fc70 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 4 Sep 2019 11:14:42 +0800 Subject: RDMA/hns: Fix wrong parameters when initial mtt of srq->idx_que The parameters npages used to initial mtt of srq->idx_que shouldn't be same with srq's. And page_shift should be calculated from idx_buf_pg_sz. This patch fixes above issues and use field named npage and page_shift in hns_roce_buf instead of two temporary variables to let us use them anywhere. Fixes: 18df508c7970 ("RDMA/hns: Remove if-else judgment statements for creating srq") Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1567566885-23088-3-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_srq.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 9591457eb768..d96041d806f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -180,8 +180,7 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, { struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); struct hns_roce_ib_create_srq ucmd; - u32 page_shift; - u32 npages; + struct hns_roce_buf *buf; int ret; if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) @@ -191,11 +190,13 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); - npages = (ib_umem_page_count(srq->umem) + - (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.srqwqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, page_shift, &srq->mtt); + buf = &srq->buf; + buf->npages = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + (1 << hr_dev->caps.srqwqe_buf_pg_sz); + buf->page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + &srq->mtt); if (ret) goto err_user_buf; @@ -212,9 +213,12 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, goto err_user_srq_mtt; } - ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(srq->idx_que.umem), - PAGE_SHIFT, &srq->idx_que.mtt); - + buf = &srq->idx_que.idx_buf; + buf->npages = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), + 1 << hr_dev->caps.idx_buf_pg_sz); + buf->page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + &srq->idx_que.mtt); if (ret) { dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n"); goto err_user_idx_mtt; -- cgit v1.2.3-59-g8ed1b From 32883228b980ed0a3ea1d13230f90622f0047908 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 4 Sep 2019 11:14:43 +0800 Subject: RDMA/hns: Modify variable/field name from vlan to vlan_id The name of vlan and vlan_tag is not clear enough, it's actually means vlan id. Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1567566885-23088-4-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_ah.c | 14 +++++++------- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 10 +++++----- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 90e08c0c332d..8a522e14ef62 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -46,32 +46,32 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; struct hns_roce_ah *ah = to_hr_ah(ibah); - u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); + u16 vlan_id = 0xffff; bool vlan_en = false; int ret; gid_attr = ah_attr->grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, NULL); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); if (ret) return ret; /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - if (vlan_tag < VLAN_CFI_MASK) { + if (vlan_id < VLAN_N_VID) { vlan_en = true; - vlan_tag |= (rdma_ah_get_sl(ah_attr) & + vlan_id |= (rdma_ah_get_sl(ah_attr) & HNS_ROCE_VLAN_SL_BIT_MASK) << HNS_ROCE_VLAN_SL_SHIFT; } ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan = vlan_tag; + ah->av.vlan_id = vlan_id; ah->av.vlan_en = vlan_en; - dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index, - ah->av.vlan); + dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, + ah->av.vlan_id); if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 96d1302abde1..c810898e0676 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -582,7 +582,7 @@ struct hns_roce_av { u8 tclass; u8 dgid[HNS_ROCE_GID_SIZE]; u8 mac[ETH_ALEN]; - u16 vlan; + u16 vlan_id; bool vlan_en; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c5ab8ca4d32e..f7c83564b431 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -389,7 +389,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, V2_UD_SEND_WQE_BYTE_36_VLAN_S, - ah->av.vlan); + ah->av.vlan_id); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, @@ -4061,8 +4061,8 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); const struct ib_gid_attr *gid_attr = NULL; int is_roce_protocol; + u16 vlan_id = 0xffff; bool is_udp = false; - u16 vlan = 0xffff; u8 ib_port; u8 hr_port; int ret; @@ -4074,7 +4074,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, if (is_roce_protocol) { gid_attr = attr->ah_attr.grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); if (ret) return ret; @@ -4083,7 +4083,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, IB_GID_TYPE_ROCE_UDP_ENCAP); } - if (vlan < VLAN_CFI_MASK) { + if (vlan_id < VLAN_N_VID) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, @@ -4095,7 +4095,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, } roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, - V2_QPC_BYTE_24_VLAN_ID_S, vlan); + V2_QPC_BYTE_24_VLAN_ID_S, vlan_id); roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, V2_QPC_BYTE_24_VLAN_ID_S, 0); -- cgit v1.2.3-59-g8ed1b From cfd82da4e741c16d71a12123bf0cb585af2b8796 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Wed, 4 Sep 2019 11:14:44 +0800 Subject: RDMA/hns: Modify return value of restrack functions The restrack function return EINVAL instead of EMSGSIZE when the driver operation fails. Fixes: 4b42d05d0b2c ("RDMA/hns: Remove unnecessary kzalloc") Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1567566885-23088-5-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_restrack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 0a31d0a3d657..a0d608ec81c1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -95,7 +95,7 @@ static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, ret = hr_dev->dfx->query_cqc_info(hr_dev, hr_cq->cqn, (int *)context); if (ret) - goto err; + return -EINVAL; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); if (!table_attr) -- cgit v1.2.3-59-g8ed1b From 3dcad1f8421f5684c64ef3079c32dfb884a3b910 Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Wed, 4 Sep 2019 11:14:45 +0800 Subject: RDMA/hns: Fix a spelling mistake in a macro HNS_ROCE_ALOGN_UP should be HNS_ROCE_ALIGN_UP, this patch fix it. Signed-off-by: Yixing Liu Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1567566885-23088-6-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c810898e0676..cbd75e466417 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -45,7 +45,7 @@ #define HNS_ROCE_MAX_MSG_LEN 0x80000000 -#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) +#define HNS_ROCE_ALIGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) #define HNS_ROCE_IB_MIN_SQ_STRIDE 6 diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index bd78ff90d998..bec48f2593f4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -391,37 +391,37 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to page_szie */ if (hr_dev->caps.max_sq_sg <= 2) { - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), PAGE_SIZE) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); hr_qp->sq.offset = 0; - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->rq.offset = HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); } else { page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sge.sge_cnt = ex_sge_num ? max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0; - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size) + - HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), page_size) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); hr_qp->sq.offset = 0; if (ex_sge_num) { - hr_qp->sge.offset = HNS_ROCE_ALOGN_UP( + hr_qp->sge.offset = HNS_ROCE_ALIGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); hr_qp->rq.offset = hr_qp->sge.offset + - HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), page_size); } else { - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP( + hr_qp->rq.offset = HNS_ROCE_ALIGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); @@ -591,19 +591,19 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sq.offset = 0; - size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, + size = HNS_ROCE_ALIGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift), (u32)hr_qp->sge.sge_cnt); hr_qp->sge.offset = size; - size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt << + size += HNS_ROCE_ALIGN_UP(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, page_size); } hr_qp->rq.offset = size; - size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), + size += HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size); hr_qp->buff_size = size; -- cgit v1.2.3-59-g8ed1b From d302c6e3a6895608a5856bc708c47bda1770b24d Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Wed, 9 Oct 2019 09:21:50 +0800 Subject: RDMA/hns: Release qp resources when failed to destroy qp Even if no response from hardware, we should make sure that qp related resources are released to avoid memory leaks. Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC") Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1570584110-3659-1-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f7c83564b431..14e24b4ef6ae 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4650,16 +4650,14 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, { struct hns_roce_cq *send_cq, *recv_cq; struct ib_device *ibdev = &hr_dev->ib_dev; - int ret; + int ret = 0; if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); - if (ret) { + if (ret) ibdev_err(ibdev, "modify QP to Reset failed.\n"); - return ret; - } } send_cq = to_hr_cq(hr_qp->ibqp.send_cq); @@ -4715,7 +4713,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, kfree(hr_qp->rq_inl_buf.wqe_list); } - return 0; + return ret; } static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) @@ -4725,11 +4723,9 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) int ret; ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); - if (ret) { + if (ret) ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n", hr_qp->qpn, ret); - return ret; - } if (hr_qp->ibqp.qp_type == IB_QPT_GSI) kfree(hr_to_hr_sqp(hr_qp)); -- cgit v1.2.3-59-g8ed1b From 5a0d523781075529e9c2ff3ba7312ddf4e32609a Mon Sep 17 00:00:00 2001 From: Donald Dutile Date: Wed, 9 Oct 2019 12:49:37 -0400 Subject: ib/srp: Add missing new line after displaying fast_io_fail_tmo param Long-time missing new-line in sysfs output. Simply add it. Signed-off-by: Donald Dutile Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20191009164937.21989-1-ddutile@redhat.com Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 0bd4c66ae331..b7f7a5f7bd98 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -174,9 +174,9 @@ static int srp_tmo_get(char *buffer, const struct kernel_param *kp) int tmo = *(int *)kp->arg; if (tmo >= 0) - return sprintf(buffer, "%d", tmo); + return sprintf(buffer, "%d\n", tmo); else - return sprintf(buffer, "off"); + return sprintf(buffer, "off\n"); } static int srp_tmo_set(const char *val, const struct kernel_param *kp) -- cgit v1.2.3-59-g8ed1b From cf049bb31f7101d9672eaf97ade4fdd5171ddf26 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Fri, 4 Oct 2019 14:53:56 +0200 Subject: RDMA/siw: Fix SQ/RQ drain logic Storage ULPs (e.g. iSER & NVMeOF) use ib_drain_qp() to drain QP/CQ. Current SIW's own drain routines do not properly wait until all SQ/RQ elements are completed and reaped from the CQ. This may cause touch after free issues. New logic relies on generic __ib_drain_sq()/__ib_drain_rq() posting a final work request, which SIW immediately flushes to CQ. Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Link: https://lore.kernel.org/r/20191004125356.20673-1-bmt@zurich.ibm.com Signed-off-by: Krishnamraju Eraparaju Signed-off-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_main.c | 20 ----- drivers/infiniband/sw/siw/siw_verbs.c | 144 ++++++++++++++++++++++++++++------ 2 files changed, 122 insertions(+), 42 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index d1a1b7aa7d83..b8fe43074ad6 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -249,24 +249,6 @@ static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id) return NULL; } -static void siw_verbs_sq_flush(struct ib_qp *base_qp) -{ - struct siw_qp *qp = to_siw_qp(base_qp); - - down_write(&qp->state_lock); - siw_sq_flush(qp); - up_write(&qp->state_lock); -} - -static void siw_verbs_rq_flush(struct ib_qp *base_qp) -{ - struct siw_qp *qp = to_siw_qp(base_qp); - - down_write(&qp->state_lock); - siw_rq_flush(qp); - up_write(&qp->state_lock); -} - static const struct ib_device_ops siw_device_ops = { .owner = THIS_MODULE, .uverbs_abi_ver = SIW_ABI_VERSION, @@ -285,8 +267,6 @@ static const struct ib_device_ops siw_device_ops = { .destroy_cq = siw_destroy_cq, .destroy_qp = siw_destroy_qp, .destroy_srq = siw_destroy_srq, - .drain_rq = siw_verbs_rq_flush, - .drain_sq = siw_verbs_sq_flush, .get_dma_mr = siw_get_dma_mr, .get_port_immutable = siw_get_port_immutable, .iw_accept = siw_accept, diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 869e02b69a01..c0574ddc98fa 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -687,6 +687,47 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, return bytes; } +/* Complete SQ WR's without processing */ +static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + struct siw_sqe sqe = {}; + int rv = 0; + + while (wr) { + sqe.id = wr->wr_id; + sqe.opcode = wr->opcode; + rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR); + if (rv) { + if (bad_wr) + *bad_wr = wr; + break; + } + wr = wr->next; + } + return rv; +} + +/* Complete RQ WR's without processing */ +static int siw_rq_flush_wr(struct siw_qp *qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct siw_rqe rqe = {}; + int rv = 0; + + while (wr) { + rqe.id = wr->wr_id; + rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR); + if (rv) { + if (bad_wr) + *bad_wr = wr; + break; + } + wr = wr->next; + } + return rv; +} + /* * siw_post_send() * @@ -705,26 +746,54 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, unsigned long flags; int rv = 0; + if (wr && !qp->kernel_verbs) { + siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); + *bad_wr = wr; + return -EINVAL; + } + /* * Try to acquire QP state lock. Must be non-blocking * to accommodate kernel clients needs. */ if (!down_read_trylock(&qp->state_lock)) { - *bad_wr = wr; - siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state); - return -ENOTCONN; + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * ERROR state is final, so we can be sure + * this state will not change as long as the QP + * exists. + * + * This handles an ib_drain_sq() call with + * a concurrent request to set the QP state + * to ERROR. + */ + rv = siw_sq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP locked, state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } + return rv; } if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) { + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * Immediately flush this WR to CQ, if QP + * is in ERROR state. SQ is guaranteed to + * be empty, so WR complets in-order. + * + * Typically triggered by ib_drain_sq(). + */ + rv = siw_sq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP out of state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } up_read(&qp->state_lock); - *bad_wr = wr; - siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state); - return -ENOTCONN; - } - if (wr && !qp->kernel_verbs) { - siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); - up_read(&qp->state_lock); - *bad_wr = wr; - return -EINVAL; + return rv; } spin_lock_irqsave(&qp->sq_lock, flags); @@ -919,24 +988,55 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, *bad_wr = wr; return -EOPNOTSUPP; /* what else from errno.h? */ } - /* - * Try to acquire QP state lock. Must be non-blocking - * to accommodate kernel clients needs. - */ - if (!down_read_trylock(&qp->state_lock)) { - *bad_wr = wr; - return -ENOTCONN; - } if (!qp->kernel_verbs) { siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n"); up_read(&qp->state_lock); *bad_wr = wr; return -EINVAL; } + + /* + * Try to acquire QP state lock. Must be non-blocking + * to accommodate kernel clients needs. + */ + if (!down_read_trylock(&qp->state_lock)) { + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * ERROR state is final, so we can be sure + * this state will not change as long as the QP + * exists. + * + * This handles an ib_drain_rq() call with + * a concurrent request to set the QP state + * to ERROR. + */ + rv = siw_rq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP locked, state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } + return rv; + } if (qp->attrs.state > SIW_QP_STATE_RTS) { + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * Immediately flush this WR to CQ, if QP + * is in ERROR state. RQ is guaranteed to + * be empty, so WR complets in-order. + * + * Typically triggered by ib_drain_rq(). + */ + rv = siw_rq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP out of state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } up_read(&qp->state_lock); - *bad_wr = wr; - return -EINVAL; + return rv; } /* * Serialize potentially multiple producers. -- cgit v1.2.3-59-g8ed1b From 00bd1439f464cfac3c60f6eabfe209b8a52e8194 Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Mon, 7 Oct 2019 16:59:32 +0300 Subject: RDMA/rw: Support threshold for registration vs scattering to local pages If there are more scatter entries than the recommended limit provided by the ib device, UMR registration is used. This will provide optimal performance when performing large RDMA READs over devices that advertise the threshold capability. With ConnectX-5 running NVMeoF RDMA with FIO single QP 128KB writes: Without use of cap: 70Gb/sec With use of cap: 84Gb/sec Link: https://lore.kernel.org/r/20191007135933.12483-3-leon@kernel.org Signed-off-by: Yamin Friedman Reviewed-by: Or Gerlitz Signed-off-by: Leon Romanovsky Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rw.c | 25 +++++++++++++++---------- include/rdma/ib_verbs.h | 2 ++ 2 files changed, 17 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 5337393d4dfe..4fad732f9b3c 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -20,14 +20,17 @@ module_param_named(force_mr, rdma_rw_force_mr, bool, 0); MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations"); /* - * Check if the device might use memory registration. This is currently only - * true for iWarp devices. In the future we can hopefully fine tune this based - * on HCA driver input. + * Report whether memory registration should be used. Memory registration must + * be used for iWarp devices because of iWARP-specific limitations. Memory + * registration is also enabled if registering memory might yield better + * performance than using multiple SGE entries, see rdma_rw_io_needs_mr() */ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num) { if (rdma_protocol_iwarp(dev, port_num)) return true; + if (dev->attrs.max_sgl_rd) + return true; if (unlikely(rdma_rw_force_mr)) return true; return false; @@ -35,17 +38,19 @@ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num) /* * Check if the device will use memory registration for this RW operation. - * We currently always use memory registrations for iWarp RDMA READs, and - * have a debug option to force usage of MRs. - * - * XXX: In the future we can hopefully fine tune this based on HCA driver - * input. + * For RDMA READs we must use MRs on iWarp and can optionally use them as an + * optimization otherwise. Additionally we have a debug option to force usage + * of MRs to help testing this code path. */ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, enum dma_data_direction dir, int dma_nents) { - if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE) - return true; + if (dir == DMA_FROM_DEVICE) { + if (rdma_protocol_iwarp(dev, port_num)) + return true; + if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd) + return true; + } if (unlikely(rdma_rw_force_mr)) return true; return false; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6a47ba85c54c..a465fcae992b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -445,6 +445,8 @@ struct ib_device_attr { struct ib_tm_caps tm_caps; struct ib_cq_caps cq_caps; u64 max_dm_size; + /* Max entries for sgl for optimized performance per READ */ + u32 max_sgl_rd; }; enum ib_mtu { -- cgit v1.2.3-59-g8ed1b From 366090564b812453f531051129b82b63dad8f38b Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Mon, 7 Oct 2019 16:59:33 +0300 Subject: RDMA/mlx5: Add capability for max sge to get optimized performance Allows the IB device to provide a value of maximum scatter gather entries per RDMA READ. In certain cases it may be preferable for a device to perform UMR memory registration rather than have many scatter entries in a single RDMA READ. This provides a significant performance increase in devices capable of using different memory registration schemes based on the number of scatter gather entries. This general capability allows each device vendor to fine tune when it is better to use memory registration. Link: https://lore.kernel.org/r/20191007135933.12483-4-leon@kernel.org Signed-off-by: Yamin Friedman Reviewed-by: Or Gerlitz Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 831539419c30..bb3a4e82ee5a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1011,6 +1011,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); props->max_pi_fast_reg_page_list_len = props->max_fast_reg_page_list_len / 2; + props->max_sgl_rd = + MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance); get_atomic_caps_qp(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); -- cgit v1.2.3-59-g8ed1b From df130f878ebddebd17c8616c306d037550be05a7 Mon Sep 17 00:00:00 2001 From: "rd.dunlab@gmail.com" Date: Wed, 9 Oct 2019 20:52:40 -0700 Subject: infiniband: fix ulp/iser/iscsi_iser.[hc] kernel-doc notation Fix struct name in kernel-doc notation to match the struct name below it. Fix one typo (spello). Fix formatting as expected for kernel-doc notation. Fix parameter name to match the function's parameter name to eliminate a kernel-doc warning. ../drivers/infiniband/ulp/iser/iscsi_iser.c:815: warning: Function parameter or member 'non_blocking' not described in 'iscsi_iser_ep_connect' Link: https://lore.kernel.org/r/20191010035239.623888112@gmail.com Signed-off-by: Randy Dunlap Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- drivers/infiniband/ulp/iser/iscsi_iser.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 55f45edeb5f9..3690e28cc7ea 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -786,7 +786,7 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, * iscsi_iser_ep_connect() - Initiate iSER connection establishment * @shost: scsi_host * @dst_addr: destination address - * @non-blocking: indicate if routine can block + * @non_blocking: indicate if routine can block * * Allocate an iscsi endpoint, an iser_conn structure and bind them. * After that start RDMA connection establishment via rdma_cm. We diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 2500c0df2670..3a54c3853a8a 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -311,7 +311,7 @@ struct iser_comp { }; /** - * struct iser_device - Memory registration operations + * struct iser_reg_ops - Memory registration operations * per-device registration schemes * * @alloc_reg_res: Allocate registration resources @@ -365,7 +365,7 @@ struct iser_device { }; /** - * struct iser_reg_resources - Fast registration recources + * struct iser_reg_resources - Fast registration resources * * @mr: memory region * @fmr_pool: pool of fmrs @@ -398,7 +398,7 @@ struct iser_fr_desc { }; /** - * struct iser_fr_pool: connection fast registration pool + * struct iser_fr_pool - connection fast registration pool * * @list: list of fastreg descriptors * @lock: protects fmr/fastreg pool @@ -521,7 +521,7 @@ struct iser_page_vec { }; /** - * struct iser_global: iSER global context + * struct iser_global - iSER global context * * @device_list_mutex: protects device_list * @device_list: iser devices global list -- cgit v1.2.3-59-g8ed1b From 526f2c50637a9eac4107ca4db1ae70e329825aab Mon Sep 17 00:00:00 2001 From: "rd.dunlab@gmail.com" Date: Wed, 9 Oct 2019 20:52:41 -0700 Subject: infiniband: fix core/ipwm_util.h kernel-doc warnings Fix kernel-doc warnings and expected formatting. ../drivers/infiniband/core/iwpm_util.h:219: warning: Function parameter or member 'a_sockaddr' not described in 'iwpm_compare_sockaddr' ../drivers/infiniband/core/iwpm_util.h:219: warning: Function parameter or member 'b_sockaddr' not described in 'iwpm_compare_sockaddr' ../drivers/infiniband/core/iwpm_util.h:280: warning: Function parameter or member 'iwpm_pid' not described in 'iwpm_send_hello' Link: https://lore.kernel.org/r/20191010035239.695604406@gmail.com Signed-off-by: Randy Dunlap Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/iwpm_util.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 7e2bcc72f66c..1bf87d9fd0bd 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -210,8 +210,10 @@ int iwpm_mapinfo_available(void); /** * iwpm_compare_sockaddr - Compare two sockaddr storage structs + * @a_sockaddr: first sockaddr to compare + * @b_sockaddr: second sockaddr to compare * - * Returns 0 if they are holding the same ip/tcp address info, + * Return: 0 if they are holding the same ip/tcp address info, * otherwise returns 1 */ int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, @@ -272,6 +274,7 @@ void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg); * iwpm_send_hello - Send hello response to iwpmd * * @nl_client: The index of the netlink client + * @iwpm_pid: The pid of the user space port mapper * @abi_version: The kernel's abi_version * * Returns 0 on success or a negative error code -- cgit v1.2.3-59-g8ed1b From 28f2a6aeed8db42b7069ef0315ec71af2ba9961e Mon Sep 17 00:00:00 2001 From: "rd.dunlab@gmail.com" Date: Wed, 9 Oct 2019 20:52:42 -0700 Subject: infiniband: fix ulp/iser/iscsi_iser.h kernel-doc warnings Fix kernel-doc warnings and typos/spellos. ../drivers/infiniband/ulp/iser/iscsi_iser.h:254: warning: Function parameter or member 'dma_addr' not described in 'iser_tx_desc' ../drivers/infiniband/ulp/iser/iscsi_iser.h:254: warning: Function parameter or member 'cqe' not described in 'iser_tx_desc' ../drivers/infiniband/ulp/iser/iscsi_iser.h:254: warning: Function parameter or member 'reg_wr' not described in 'iser_tx_desc' ../drivers/infiniband/ulp/iser/iscsi_iser.h:254: warning: Function parameter or member 'send_wr' not described in 'iser_tx_desc' ../drivers/infiniband/ulp/iser/iscsi_iser.h:254: warning: Function parameter or member 'inv_wr' not described in 'iser_tx_desc' ../drivers/infiniband/ulp/iser/iscsi_iser.h:277: warning: Function parameter or member 'cqe' not described in 'iser_rx_desc' ../drivers/infiniband/ulp/iser/iscsi_iser.h:296: warning: Function parameter or member 'rsp' not described in 'iser_login_desc' ../drivers/infiniband/ulp/iser/iscsi_iser.h:339: warning: Function parameter or member 'reg_mem' not described in 'iser_reg_ops' ../drivers/infiniband/ulp/iser/iscsi_iser.h:399: warning: Function parameter or member 'all_list' not described in 'iser_fr_desc' ../drivers/infiniband/ulp/iser/iscsi_iser.h:413: warning: Function parameter or member 'all_list' not described in 'iser_fr_pool' ../drivers/infiniband/ulp/iser/iscsi_iser.h:439: warning: Function parameter or member 'reg_cqe' not described in 'ib_conn' ../drivers/infiniband/ulp/iser/iscsi_iser.h:491: warning: Function parameter or member 'snd_w_inv' not described in 'iser_conn' This leaves 2 "member not described" warnings that I don't know how to fix: ../drivers/infiniband/ulp/iser/iscsi_iser.h:401: warning: Function parameter or member 'all_list' not described in 'iser_fr_desc' ../drivers/infiniband/ulp/iser/iscsi_iser.h:415: warning: Function parameter or member 'all_list' not described in 'iser_fr_pool' Link: https://lore.kernel.org/r/20191010035239.756365352@gmail.com Signed-off-by: Randy Dunlap Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 3a54c3853a8a..029c00163442 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -228,15 +228,16 @@ enum iser_desc_type { * @iser_header: iser header * @iscsi_header: iscsi header * @type: command/control/dataout - * @dam_addr: header buffer dma_address + * @dma_addr: header buffer dma_address * @tx_sg: sg[0] points to iser/iscsi headers * sg[1] optionally points to either of immediate data * unsolicited data-out or control * @num_sge: number sges used on this TX task + * @cqe: completion handler * @mapped: Is the task header mapped - * reg_wr: registration WR - * send_wr: send WR - * inv_wr: invalidate WR + * @reg_wr: registration WR + * @send_wr: send WR + * @inv_wr: invalidate WR */ struct iser_tx_desc { struct iser_ctrl iser_header; @@ -263,6 +264,7 @@ struct iser_tx_desc { * @data: received data segment * @dma_addr: receive buffer dma address * @rx_sg: ib_sge of receive buffer + * @cqe: completion handler * @pad: for sense data TODO: Modify to maximum sense length supported */ struct iser_rx_desc { @@ -279,9 +281,9 @@ struct iser_rx_desc { * struct iser_login_desc - iSER login descriptor * * @req: pointer to login request buffer - * @resp: pointer to login response buffer + * @rsp: pointer to login response buffer * @req_dma: DMA address of login request buffer - * @rsp_dma: DMA address of login response buffer + * @rsp_dma: DMA address of login response buffer * @sge: IB sge for login post recv * @cqe: completion handler */ @@ -316,7 +318,7 @@ struct iser_comp { * * @alloc_reg_res: Allocate registration resources * @free_reg_res: Free registration resources - * @fast_reg_mem: Register memory buffers + * @reg_mem: Register memory buffers * @unreg_mem: Un-register memory buffers * @reg_desc_get: Get a registration descriptor for pool * @reg_desc_put: Get a registration descriptor to pool @@ -423,6 +425,7 @@ struct iser_fr_pool { * @comp: iser completion context * @fr_pool: connection fast registration poool * @pi_support: Indicate device T10-PI support + * @reg_cqe: completion handler */ struct ib_conn { struct rdma_cm_id *cma_id; @@ -463,6 +466,7 @@ struct ib_conn { * @num_rx_descs: number of rx descriptors * @scsi_sg_tablesize: scsi host sg_tablesize * @pages_per_mr: maximum pages available for registration + * @snd_w_inv: connection uses remote invalidation */ struct iser_conn { struct ib_conn ib_conn; -- cgit v1.2.3-59-g8ed1b From dfa4344da392252edaf7371d6dd21c5415cc6602 Mon Sep 17 00:00:00 2001 From: "rd.dunlab@gmail.com" Date: Wed, 9 Oct 2019 20:52:43 -0700 Subject: infiniband: fix ulp/opa_vnic/opa_vnic_internal.h kernel-doc notation Remove kernel-doc notation on 4 structs since they are internal and none of the struct fields/members are described. This removes 45 kernel-doc warnings. Link: https://lore.kernel.org/r/20191010035239.818405496@gmail.com Signed-off-by: Randy Dunlap Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h index 43ac61ffef4a..6dbc08e1a6a6 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h @@ -70,7 +70,7 @@ struct opa_vnic_adapter; -/** +/* * struct __opa_vesw_info - OPA vnic virtual switch info * * Same as opa_vesw_info without bitwise attribute. @@ -96,7 +96,7 @@ struct __opa_vesw_info { u8 rsvd4[2]; } __packed; -/** +/* * struct __opa_per_veswport_info - OPA vnic per port info * * Same as opa_per_veswport_info without bitwise attribute. @@ -136,7 +136,7 @@ struct __opa_per_veswport_info { u8 rsvd3[8]; } __packed; -/** +/* * struct __opa_veswport_info - OPA vnic port info * * Same as opa_veswport_info without bitwise attribute. @@ -146,7 +146,7 @@ struct __opa_veswport_info { struct __opa_per_veswport_info vport; }; -/** +/* * struct __opa_veswport_trap - OPA vnic trap info * * Same as opa_veswport_trap without bitwise attribute. -- cgit v1.2.3-59-g8ed1b From 96f4b0b68da4fb234b99717b5d186133e0d49cc6 Mon Sep 17 00:00:00 2001 From: "rd.dunlab@gmail.com" Date: Wed, 9 Oct 2019 20:52:45 -0700 Subject: infiniband: fix ulp/srpt/ib_srpt.h kernel-doc notation Fix kernel-doc warnings (typos or renames) in ib_srpt.h: ../drivers/infiniband/ulp/srpt/ib_srpt.h:419: warning: Function parameter or member 'port_guid_id' not described in 'srpt_port' ../drivers/infiniband/ulp/srpt/ib_srpt.h:419: warning: Function parameter or member 'port_gid_id' not described in 'srpt_port' Link: https://lore.kernel.org/r/20191010035239.950150496@gmail.com Signed-off-by: Randy Dunlap Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index f8bd95302ac0..54b03ab4ab1a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -387,12 +387,9 @@ struct srpt_port_id { * @sm_lid: cached value of the port's sm_lid. * @lid: cached value of the port's lid. * @gid: cached value of the port's gid. - * @port_acl_lock spinlock for port_acl_list: * @work: work structure for refreshing the aforementioned cached values. - * @port_guid_tpg: TPG associated with target port GUID. - * @port_guid_wwn: WWN associated with target port GUID. - * @port_gid_tpg: TPG associated with target port GID. - * @port_gid_wwn: WWN associated with target port GID. + * @port_guid_id: target port GUID + * @port_gid_id: target port GID * @port_attrib: Port attributes that can be accessed through configfs. * @refcount: Number of objects associated with this port. * @freed_channels: Completion that will be signaled once @refcount becomes 0. -- cgit v1.2.3-59-g8ed1b From 094c88f3c5e8aa5e03a9a2772886a174485b431b Mon Sep 17 00:00:00 2001 From: "rd.dunlab@gmail.com" Date: Wed, 9 Oct 2019 20:52:46 -0700 Subject: infiniband: fix core/verbs.c kernel-doc notation Add missing function parameter descriptions: ../drivers/infiniband/core/verbs.c:257: warning: Function parameter or member 'flags' not described in '__ib_alloc_pd' ../drivers/infiniband/core/verbs.c:257: warning: Function parameter or member 'caller' not described in '__ib_alloc_pd' Link: https://lore.kernel.org/r/20191010035240.011497492@gmail.com Signed-off-by: Randy Dunlap Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f974b6854224..d357ac077bd8 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -244,6 +244,8 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); /** * ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * @caller: caller's build-time module name * * A protection domain object provides an association between QPs, shared * receive queues, address handles, memory regions, and memory windows. -- cgit v1.2.3-59-g8ed1b From 134a42a66b3aa474cbe3cea6a96956ed9817962d Mon Sep 17 00:00:00 2001 From: "rd.dunlab@gmail.com" Date: Wed, 9 Oct 2019 20:52:47 -0700 Subject: infiniband: fix ulp/iser/iser_verbs.c kernel-doc notation Various kernel-doc fixes: - fix typos - don't use /** for internal structs or functions - fix Return: kernel-doc formatting - add kernel-doc notation for missing function parameters ../drivers/infiniband/ulp/iser/iser_verbs.c:159: warning: Function parameter or member 'ib_conn' not described in 'iser_alloc_fmr_pool' ../drivers/infiniband/ulp/iser/iser_verbs.c:159: warning: Function parameter or member 'cmds_max' not described in 'iser_alloc_fmr_pool' ../drivers/infiniband/ulp/iser/iser_verbs.c:159: warning: Function parameter or member 'size' not described in 'iser_alloc_fmr_pool' ../drivers/infiniband/ulp/iser/iser_verbs.c:221: warning: Function parameter or member 'ib_conn' not described in 'iser_free_fmr_pool' ../drivers/infiniband/ulp/iser/iser_verbs.c:304: warning: Function parameter or member 'ib_conn' not described in 'iser_alloc_fastreg_pool' ../drivers/infiniband/ulp/iser/iser_verbs.c:304: warning: Function parameter or member 'cmds_max' not described in 'iser_alloc_fastreg_pool' ../drivers/infiniband/ulp/iser/iser_verbs.c:304: warning: Function parameter or member 'size' not described in 'iser_alloc_fastreg_pool' ../drivers/infiniband/ulp/iser/iser_verbs.c:338: warning: Function parameter or member 'ib_conn' not described in 'iser_free_fastreg_pool' ../drivers/infiniband/ulp/iser/iser_verbs.c:568: warning: Function parameter or member 'iser_conn' not described in 'iser_conn_release' ../drivers/infiniband/ulp/iser/iser_verbs.c:603: warning: Function parameter or member 'iser_conn' not described in 'iser_conn_terminate' ../drivers/infiniband/ulp/iser/iser_verbs.c:1040: warning: Function parameter or member 'signal' not described in 'iser_post_send' ../drivers/infiniband/ulp/iser/iser_verbs.c:1040: warning: Function parameter or member 'ib_conn' not described in 'iser_post_send' ../drivers/infiniband/ulp/iser/iser_verbs.c:1040: warning: Function parameter or member 'tx_desc' not described in 'iser_post_send' Link: https://lore.kernel.org/r/20191010035240.070520193@gmail.com Signed-off-by: Randy Dunlap Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iser_verbs.c | 60 ++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 9a8f9048add6..1f4a37a3c2b3 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -58,12 +58,12 @@ static void iser_event_handler(struct ib_event_handler *handler, dev_name(&event->device->dev), event->element.port_num); } -/** +/* * iser_create_device_ib_res - creates Protection Domain (PD), Completion * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with - * the adapator. + * the adaptor. * - * returns 0 on success, -1 on failure + * Return: 0 on success, -1 on failure */ static int iser_create_device_ib_res(struct iser_device *device) { @@ -124,9 +124,9 @@ comps_err: return -1; } -/** +/* * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, - * CQ and PD created with the device associated with the adapator. + * CQ and PD created with the device associated with the adaptor. */ static void iser_free_device_ib_res(struct iser_device *device) { @@ -149,8 +149,11 @@ static void iser_free_device_ib_res(struct iser_device *device) /** * iser_alloc_fmr_pool - Creates FMR pool and page_vector + * @ib_conn: connection RDMA resources + * @cmds_max: max number of SCSI commands for this connection + * @size: max number of pages per map request * - * returns 0 on success, or errno code on failure + * Return: 0 on success, or errno code on failure */ int iser_alloc_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max, @@ -215,6 +218,7 @@ err_frpl: /** * iser_free_fmr_pool - releases the FMR pool and page vec + * @ib_conn: connection RDMA resources */ void iser_free_fmr_pool(struct ib_conn *ib_conn) { @@ -295,7 +299,11 @@ static void iser_destroy_fastreg_desc(struct iser_fr_desc *desc) /** * iser_alloc_fastreg_pool - Creates pool of fast_reg descriptors * for fast registration work requests. - * returns 0 on success, or errno code on failure + * @ib_conn: connection RDMA resources + * @cmds_max: max number of SCSI commands for this connection + * @size: max number of pages per map request + * + * Return: 0 on success, or errno code on failure */ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max, @@ -332,6 +340,7 @@ err: /** * iser_free_fastreg_pool - releases the pool of fast_reg descriptors + * @ib_conn: connection RDMA resources */ void iser_free_fastreg_pool(struct ib_conn *ib_conn) { @@ -355,10 +364,10 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) fr_pool->size - i); } -/** +/* * iser_create_ib_conn_res - Queue-Pair (QP) * - * returns 0 on success, -1 on failure + * Return: 0 on success, -1 on failure */ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) { @@ -436,7 +445,7 @@ out_err: return ret; } -/** +/* * based on the resolved device node GUID see if there already allocated * device for this device. If there's no such, create one. */ @@ -487,9 +496,9 @@ static void iser_device_try_release(struct iser_device *device) mutex_unlock(&ig.device_list_mutex); } -/** +/* * Called with state mutex held - **/ + */ static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, enum iser_conn_state comp, enum iser_conn_state exch) @@ -561,7 +570,8 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn, } /** - * Frees all conn objects and deallocs conn descriptor + * iser_conn_release - Frees all conn objects and deallocs conn descriptor + * @iser_conn: iSER connection context */ void iser_conn_release(struct iser_conn *iser_conn) { @@ -595,7 +605,10 @@ void iser_conn_release(struct iser_conn *iser_conn) } /** - * triggers start of the disconnect procedures and wait for them to be done + * iser_conn_terminate - triggers start of the disconnect procedures and + * waits for them to be done + * @iser_conn: iSER connection context + * * Called with state mutex held */ int iser_conn_terminate(struct iser_conn *iser_conn) @@ -632,9 +645,9 @@ int iser_conn_terminate(struct iser_conn *iser_conn) return 1; } -/** +/* * Called with state mutex held - **/ + */ static void iser_connect_error(struct rdma_cm_id *cma_id) { struct iser_conn *iser_conn; @@ -684,9 +697,9 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, iser_conn->scsi_sg_tablesize + reserved_mr_pages; } -/** +/* * Called with state mutex held - **/ + */ static void iser_addr_handler(struct rdma_cm_id *cma_id) { struct iser_device *device; @@ -732,9 +745,9 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) } } -/** +/* * Called with state mutex held - **/ + */ static void iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; @@ -1030,9 +1043,12 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) /** - * iser_start_send - Initiate a Send DTO operation + * iser_post_send - Initiate a Send DTO operation + * @ib_conn: connection RDMA resources + * @tx_desc: iSER TX descriptor + * @signal: true to send work request as SIGNALED * - * returns 0 on success, -1 on failure + * Return: 0 on success, -1 on failure */ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, bool signal) -- cgit v1.2.3-59-g8ed1b From b24da1a0d43d959b0dafe2dd95a04ee4f9bb7204 Mon Sep 17 00:00:00 2001 From: "rd.dunlab@gmail.com" Date: Wed, 9 Oct 2019 20:52:48 -0700 Subject: infiniband: fix ulp/iser/iser_initiator.c kernel-doc warnings Add kernel-doc notation for missing function parameters: ../drivers/infiniband/ulp/iser/iser_initiator.c:365: warning: Function parameter or member 'conn' not described in 'iser_send_command' ../drivers/infiniband/ulp/iser/iser_initiator.c:365: warning: Function parameter or member 'task' not described in 'iser_send_command' ../drivers/infiniband/ulp/iser/iser_initiator.c:437: warning: Function parameter or member 'conn' not described in 'iser_send_data_out' ../drivers/infiniband/ulp/iser/iser_initiator.c:437: warning: Function parameter or member 'task' not described in 'iser_send_data_out' ../drivers/infiniband/ulp/iser/iser_initiator.c:437: warning: Function parameter or member 'hdr' not described in 'iser_send_data_out' Link: https://lore.kernel.org/r/20191010035240.132033937@gmail.com Signed-off-by: Randy Dunlap Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iser_initiator.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 5cbb4b3a0566..4a7045bb0831 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -358,6 +358,8 @@ static inline bool iser_signal_comp(u8 sig_count) /** * iser_send_command - send command PDU + * @conn: link to matching iscsi connection + * @task: SCSI command task */ int iser_send_command(struct iscsi_conn *conn, struct iscsi_task *task) @@ -429,6 +431,9 @@ send_command_error: /** * iser_send_data_out - send data out PDU + * @conn: link to matching iscsi connection + * @task: SCSI command task + * @hdr: pointer to the LLD's iSCSI message header */ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_task *task, -- cgit v1.2.3-59-g8ed1b From d6537c1a9c970962b6dc4423e813019b69f920fc Mon Sep 17 00:00:00 2001 From: "rd.dunlab@gmail.com" Date: Wed, 9 Oct 2019 20:52:49 -0700 Subject: infiniband: fix core/ kernel-doc notation Correct function parameter names (typos or renames). Add kernel-doc notation for missing function parameters. ../drivers/infiniband/core/sa_query.c:1263: warning: Function parameter or member 'gid_attr' not described in 'ib_init_ah_attr_from_path' ../drivers/infiniband/core/sa_query.c:1263: warning: Excess function parameter 'sgid_attr' description in 'ib_init_ah_attr_from_path' ../drivers/infiniband/core/device.c:145: warning: Function parameter or member 'dev' not described in 'rdma_dev_access_netns' ../drivers/infiniband/core/device.c:145: warning: Excess function parameter 'device' description in 'rdma_dev_access_netns' ../drivers/infiniband/core/device.c:1333: warning: Function parameter or member 'name' not described in 'ib_register_device' ../drivers/infiniband/core/device.c:1461: warning: Function parameter or member 'ib_dev' not described in 'ib_unregister_device' ../drivers/infiniband/core/device.c:1461: warning: Excess function parameter 'device' description in 'ib_unregister_device' ../drivers/infiniband/core/device.c:1483: warning: Function parameter or member 'ib_dev' not described in 'ib_unregister_device_and_put' ../drivers/infiniband/core/device.c:1550: warning: Function parameter or member 'ib_dev' not described in 'ib_unregister_device_queued' Link: https://lore.kernel.org/r/20191010035240.191542461@gmail.com Signed-off-by: Randy Dunlap Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 23 +++++++++++------------ drivers/infiniband/core/sa_query.c | 2 +- 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a667636f74bf..74941bc39c98 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -128,17 +128,14 @@ module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); MODULE_PARM_DESC(netns_mode, "Share device among net namespaces; default=1 (shared)"); /** - * rdma_dev_access_netns() - Return whether a rdma device can be accessed + * rdma_dev_access_netns() - Return whether an rdma device can be accessed * from a specified net namespace or not. - * @device: Pointer to rdma device which needs to be checked + * @dev: Pointer to rdma device which needs to be checked * @net: Pointer to net namesapce for which access to be checked * - * rdma_dev_access_netns() - Return whether a rdma device can be accessed - * from a specified net namespace or not. When - * rdma device is in shared mode, it ignores the - * net namespace. When rdma device is exclusive - * to a net namespace, rdma device net namespace is - * checked against the specified one. + * When the rdma device is in shared mode, it ignores the net namespace. + * When the rdma device is exclusive to a net namespace, rdma device net + * namespace is checked against the specified one. */ bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) { @@ -1317,7 +1314,9 @@ out: /** * ib_register_device - Register an IB device with IB core - * @device:Device to register + * @device: Device to register + * @name: unique string device name. This may include a '%' which will + * cause a unique index to be added to the passed device name. * * Low-level drivers use ib_register_device() to register their * devices with the IB core. All registered clients will receive a @@ -1444,7 +1443,7 @@ out: /** * ib_unregister_device - Unregister an IB device - * @device: The device to unregister + * @ib_dev: The device to unregister * * Unregister an IB device. All clients will receive a remove callback. * @@ -1466,7 +1465,7 @@ EXPORT_SYMBOL(ib_unregister_device); /** * ib_unregister_device_and_put - Unregister a device while holding a 'get' - * device: The device to unregister + * @ib_dev: The device to unregister * * This is the same as ib_unregister_device(), except it includes an internal * ib_device_put() that should match a 'get' obtained by the caller. @@ -1536,7 +1535,7 @@ static void ib_unregister_work(struct work_struct *work) /** * ib_unregister_device_queued - Unregister a device using a work queue - * device: The device to unregister + * @ib_dev: The device to unregister * * This schedules an asynchronous unregistration using a WQ for the device. A * driver should use this to avoid holding locks while doing unregistration, diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 17fc2936c077..8917125ea16d 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1246,7 +1246,7 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, * @port_num: Port on the specified device. * @rec: path record entry to use for ah attributes initialization. * @ah_attr: address handle attributes to initialization from path record. - * @sgid_attr: SGID attribute to consider during initialization. + * @gid_attr: SGID attribute to consider during initialization. * * When ib_init_ah_attr_from_path() returns success, * (a) for IB link layer it optionally contains a reference to SGID attribute -- cgit v1.2.3-59-g8ed1b From 7c21072dde16ea95c60faff7be2bd64b1638c2f5 Mon Sep 17 00:00:00 2001 From: "rd.dunlab@gmail.com" Date: Wed, 9 Oct 2019 20:52:50 -0700 Subject: infiniband: fix sw/rdmavt/ kernel-doc notation Add kernel-doc for missing function parameters. Remove excess kernel-doc descriptions. Fix expected kernel-doc formatting (use ':' instead of '-' after @funcarg). ../drivers/infiniband/sw/rdmavt/ah.c:138: warning: Excess function parameter 'udata' description in 'rvt_destroy_ah' ../drivers/infiniband/sw/rdmavt/vt.c:698: warning: Function parameter or member 'pkey_table' not described in 'rvt_init_port' ../drivers/infiniband/sw/rdmavt/cq.c:561: warning: Excess function parameter 'rdi' description in 'rvt_driver_cq_init' ../drivers/infiniband/sw/rdmavt/cq.c:575: warning: Excess function parameter 'rdi' description in 'rvt_cq_exit' ../drivers/infiniband/sw/rdmavt/qp.c:2573: warning: Function parameter or member 'qp' not described in 'rvt_add_rnr_timer' ../drivers/infiniband/sw/rdmavt/qp.c:2573: warning: Function parameter or member 'aeth' not described in 'rvt_add_rnr_timer' ../drivers/infiniband/sw/rdmavt/qp.c:2591: warning: Function parameter or member 'qp' not described in 'rvt_stop_rc_timers' ../drivers/infiniband/sw/rdmavt/qp.c:2624: warning: Function parameter or member 'qp' not described in 'rvt_del_timers_sync' ../drivers/infiniband/sw/rdmavt/qp.c:2697: warning: Function parameter or member 'cb' not described in 'rvt_qp_iter_init' ../drivers/infiniband/sw/rdmavt/qp.c:2728: warning: Function parameter or member 'iter' not described in 'rvt_qp_iter_next' ../drivers/infiniband/sw/rdmavt/qp.c:2796: warning: Function parameter or member 'rdi' not described in 'rvt_qp_iter' ../drivers/infiniband/sw/rdmavt/qp.c:2796: warning: Function parameter or member 'v' not described in 'rvt_qp_iter' ../drivers/infiniband/sw/rdmavt/qp.c:2796: warning: Function parameter or member 'cb' not described in 'rvt_qp_iter' Link: https://lore.kernel.org/r/20191010035240.251184229@gmail.com Signed-off-by: Randy Dunlap Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/ah.c | 1 - drivers/infiniband/sw/rdmavt/cq.c | 2 -- drivers/infiniband/sw/rdmavt/qp.c | 30 +++++++++++++++--------------- drivers/infiniband/sw/rdmavt/vt.c | 3 ++- 4 files changed, 17 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index fe99da0ff060..ee02c6176007 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -129,7 +129,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, * rvt_destory_ah - Destory an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) - * @udata: user data or NULL for kernel object * * Return: 0 on success */ diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index a85571a4cf57..13d7f66eadab 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -552,7 +552,6 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) /** * rvt_driver_cq_init - Init cq resources on behalf of driver - * @rdi: rvt dev structure * * Return: 0 on success */ @@ -568,7 +567,6 @@ int rvt_driver_cq_init(void) /** * rvt_cq_exit - tear down cq reources - * @rdi: rvt dev structure */ void rvt_cq_exit(void) { diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0b0a241c57ff..3cdf75d0c7a4 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2563,10 +2563,9 @@ void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift) EXPORT_SYMBOL(rvt_add_retry_timer_ext); /** - * rvt_add_rnr_timer - add/start an rnr timer - * @qp - the QP - * @aeth - aeth of RNR timeout, simulated aeth for loopback - * add an rnr timer on the QP + * rvt_add_rnr_timer - add/start an rnr timer on the QP + * @qp: the QP + * @aeth: aeth of RNR timeout, simulated aeth for loopback */ void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth) { @@ -2583,7 +2582,7 @@ EXPORT_SYMBOL(rvt_add_rnr_timer); /** * rvt_stop_rc_timers - stop all timers - * @qp - the QP + * @qp: the QP * stop any pending timers */ void rvt_stop_rc_timers(struct rvt_qp *qp) @@ -2617,7 +2616,7 @@ static void rvt_stop_rnr_timer(struct rvt_qp *qp) /** * rvt_del_timers_sync - wait for any timeout routines to exit - * @qp - the QP + * @qp: the QP */ void rvt_del_timers_sync(struct rvt_qp *qp) { @@ -2626,7 +2625,7 @@ void rvt_del_timers_sync(struct rvt_qp *qp) } EXPORT_SYMBOL(rvt_del_timers_sync); -/** +/* * This is called from s_timer for missing responses. */ static void rvt_rc_timeout(struct timer_list *t) @@ -2676,12 +2675,13 @@ EXPORT_SYMBOL(rvt_rc_rnr_retry); * rvt_qp_iter_init - initial for QP iteration * @rdi: rvt devinfo * @v: u64 value + * @cb: user-defined callback * * This returns an iterator suitable for iterating QPs * in the system. * - * The @cb is a user defined callback and @v is a 64 - * bit value passed to and relevant for processing in the + * The @cb is a user-defined callback and @v is a 64-bit + * value passed to and relevant for processing in the * @cb. An example use case would be to alter QP processing * based on criteria not part of the rvt_qp. * @@ -2712,7 +2712,7 @@ EXPORT_SYMBOL(rvt_qp_iter_init); /** * rvt_qp_iter_next - return the next QP in iter - * @iter - the iterator + * @iter: the iterator * * Fine grained QP iterator suitable for use * with debugfs seq_file mechanisms. @@ -2775,14 +2775,14 @@ EXPORT_SYMBOL(rvt_qp_iter_next); /** * rvt_qp_iter - iterate all QPs - * @rdi - rvt devinfo - * @v - a 64 bit value - * @cb - a callback + * @rdi: rvt devinfo + * @v: a 64-bit value + * @cb: a callback * * This provides a way for iterating all QPs. * - * The @cb is a user defined callback and @v is a 64 - * bit value passed to and relevant for processing in the + * The @cb is a user-defined callback and @v is a 64-bit + * value passed to and relevant for processing in the * cb. An example use case would be to alter QP processing * based on criteria not part of the rvt_qp. * diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 18da1e1ea979..986265ad6e79 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -683,9 +683,10 @@ EXPORT_SYMBOL(rvt_unregister_device); /** * rvt_init_port - init internal data for driver port - * @rdi: rvt dev strut + * @rdi: rvt_dev_info struct * @port: rvt port * @port_index: 0 based index of ports, different from IB core port num + * @pkey_table: pkey_table for @port * * Keep track of a list of ports. No need to have a detach port. * They persist until the driver goes away. -- cgit v1.2.3-59-g8ed1b From a3de94e3d61ec6e6c57ee066ec4d28ebc260dafa Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 16 Oct 2019 09:23:05 +0300 Subject: IB/mlx5: Introduce ODP diagnostic counters Introduce ODP diagnostic counters and count the following per MR within IB/mlx5 driver: 1) Page faults: Total number of faulted pages. 2) Page invalidations: Total number of pages invalidated by the OS during all invalidation events. The translations can be no longer valid due to either non-present pages or mapping changes. Link: https://lore.kernel.org/r/20191016062308.11886-2-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++++ drivers/infiniband/hw/mlx5/odp.c | 15 +++++++++++++++ include/rdma/ib_verbs.h | 5 +++++ 3 files changed, 24 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bf30d53d94dc..5aae05ebf64b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -585,6 +585,9 @@ struct mlx5_ib_dm { IB_ACCESS_REMOTE_READ |\ IB_ZERO_BASED) +#define mlx5_update_odp_stats(mr, counter_name, value) \ + atomic64_add(value, &((mr)->odp_stats.counter_name)) + struct mlx5_ib_mr { struct ib_mr ibmr; void *descs; @@ -622,6 +625,7 @@ struct mlx5_ib_mr { wait_queue_head_t q_leaf_free; struct mlx5_async_work cb_work; atomic_t num_pending_prefetch; + struct ib_odp_counters odp_stats; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 95cf0249b015..3601c6ad96f9 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -224,6 +224,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt)) - 1; u64 idx = 0, blk_start_idx = 0; + u64 invalidations = 0; int in_block = 0; u64 addr; @@ -261,6 +262,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, blk_start_idx = idx; in_block = 1; } + + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; } else { u64 umr_offset = idx & umr_block_mask; @@ -279,6 +283,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&umem_odp->umem_mutex); + + mlx5_update_odp_stats(mr, invalidations, invalidations); + /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -287,6 +294,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, ib_umem_odp_unmap_dma_pages(umem_odp, start, end); + if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { WRITE_ONCE(umem_odp->dying, 1); @@ -801,6 +809,13 @@ next_mr: if (ret < 0) goto srcu_unlock; + /* + * When prefetching a page, page fault is generated + * in order to bring the page to the main memory. + * In the current flow, page faults are being counted. + */ + mlx5_update_odp_stats(mr, faults, ret); + npages += ret; ret = 0; break; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a465fcae992b..1ef31b27a41c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2220,6 +2220,11 @@ struct rdma_netdev_alloc_params { struct net_device *netdev, void *param); }; +struct ib_odp_counters { + atomic64_t faults; + atomic64_t invalidations; +}; + struct ib_counters { struct ib_device *device; struct ib_uobject *uobject; -- cgit v1.2.3-59-g8ed1b From fb91069088faf30c6e8aeeed900326c040623e2d Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 16 Oct 2019 09:23:06 +0300 Subject: RDMA/nldev: Allow different fill function per resource So far res_get_common_{dumpit, doit} was using the default resource fill function which was defined as part of the nldev_fill_res_entry fill_entries. Add a fill function pointer as an argument allows us to use different fill function in case we want to dump different values then 'rdma resource' flow do, but still use the same existing general resources dumping flow. Link: https://lore.kernel.org/r/20191016062308.11886-3-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 44 +++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 7a7474000100..462275480276 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -42,6 +42,9 @@ #include "cma_priv.h" #include "restrack.h" +typedef int (*res_fill_func_t)(struct sk_buff*, bool, + struct rdma_restrack_entry*, uint32_t); + /* * Sort array elements by the netlink attribute name */ @@ -1117,8 +1120,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb, } struct nldev_fill_res_entry { - int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin, - struct rdma_restrack_entry *res, u32 port); enum rdma_nldev_attr nldev_attr; enum rdma_nldev_command nldev_cmd; u8 flags; @@ -1132,21 +1133,18 @@ enum nldev_res_flags { static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_QP] = { - .fill_res_func = fill_res_qp_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, .id = RDMA_NLDEV_ATTR_RES_LQPN, }, [RDMA_RESTRACK_CM_ID] = { - .fill_res_func = fill_res_cm_id_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CM_IDN, }, [RDMA_RESTRACK_CQ] = { - .fill_res_func = fill_res_cq_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, .flags = NLDEV_PER_DEV, @@ -1154,7 +1152,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_CQN, }, [RDMA_RESTRACK_MR] = { - .fill_res_func = fill_res_mr_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, .flags = NLDEV_PER_DEV, @@ -1162,7 +1159,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_MRN, }, [RDMA_RESTRACK_PD] = { - .fill_res_func = fill_res_pd_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, .flags = NLDEV_PER_DEV, @@ -1170,7 +1166,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_PDN, }, [RDMA_RESTRACK_COUNTER] = { - .fill_res_func = fill_res_counter_entry, .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET, .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, @@ -1180,7 +1175,8 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, - enum rdma_restrack_type res_type) + enum rdma_restrack_type res_type, + res_fill_func_t fill_func) { const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; @@ -1243,7 +1239,9 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, } has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN); - ret = fe->fill_res_func(msg, has_cap_net_admin, res, port); + + ret = fill_func(msg, has_cap_net_admin, res, port); + rdma_restrack_put(res); if (ret) goto err_free; @@ -1263,7 +1261,8 @@ err: static int res_get_common_dumpit(struct sk_buff *skb, struct netlink_callback *cb, - enum rdma_restrack_type res_type) + enum rdma_restrack_type res_type, + res_fill_func_t fill_func) { const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; @@ -1351,7 +1350,8 @@ static int res_get_common_dumpit(struct sk_buff *skb, goto msg_full; } - ret = fe->fill_res_func(skb, has_cap_net_admin, res, port); + ret = fill_func(skb, has_cap_net_admin, res, port); + rdma_restrack_put(res); if (ret) { @@ -1394,17 +1394,19 @@ err_index: return ret; } -#define RES_GET_FUNCS(name, type) \ - static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ +#define RES_GET_FUNCS(name, type) \ + static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ struct netlink_callback *cb) \ - { \ - return res_get_common_dumpit(skb, cb, type); \ - } \ - static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ - struct nlmsghdr *nlh, \ + { \ + return res_get_common_dumpit(skb, cb, type, \ + fill_res_##name##_entry); \ + } \ + static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ + struct nlmsghdr *nlh, \ struct netlink_ext_ack *extack) \ - { \ - return res_get_common_doit(skb, nlh, extack, type); \ + { \ + return res_get_common_doit(skb, nlh, extack, type, \ + fill_res_##name##_entry); \ } RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); -- cgit v1.2.3-59-g8ed1b From e1b95ae0b0ea4987afca73d1dc71dfc0b8ad4e49 Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 16 Oct 2019 09:23:07 +0300 Subject: RDMA/mlx5: Return ODP type per MR Provide an ODP explicit/implicit type as part of 'rdma -dd resource show mr' dump. For example: $ rdma -dd resource show mr dev mlx5_0 mrn 1 rkey 0xa99a lkey 0xa99a mrlen 50000000 pdn 9 pid 7372 comm ibv_rc_pingpong drv_odp explicit For non-ODP MRs, we won't print "drv_odp ..." at all. Link: https://lore.kernel.org/r/20191016062308.11886-4-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 13 ++++++++++ drivers/infiniband/hw/mlx5/Makefile | 2 +- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/odp.c | 2 ++ drivers/infiniband/hw/mlx5/restrack.c | 48 +++++++++++++++++++++++++++++++++++ include/rdma/restrack.h | 3 +++ 7 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/mlx5/restrack.c (limited to 'drivers') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 462275480276..a7f5add714d4 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -183,6 +183,19 @@ static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, return 0; } +int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, + const char *str) +{ + if (put_driver_name_print_type(msg, name, + RDMA_NLDEV_PRINT_TYPE_UNSPEC)) + return -EMSGSIZE; + if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str)) + return -EMSGSIZE; + + return 0; +} +EXPORT_SYMBOL(rdma_nl_put_driver_string); + int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value) { return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 9924be8384d8..d0a043ccbe58 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ - cong.o + cong.o restrack.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1106a2238b14..4a731cafbf7d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6271,6 +6271,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .disassociate_ucontext = mlx5_ib_disassociate_ucontext, .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, + .fill_res_entry = mlx5_ib_fill_res_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5aae05ebf64b..a0ca1ef16e4e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -626,6 +626,7 @@ struct mlx5_ib_mr { struct mlx5_async_work cb_work; atomic_t num_pending_prefetch; struct ib_odp_counters odp_stats; + bool is_odp_implicit; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) @@ -1339,6 +1340,8 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, u8 *native_port_num); void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 3601c6ad96f9..2ab6e44aeaae 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -543,6 +543,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); + imr->is_odp_implicit = true; + return imr; } diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c new file mode 100644 index 000000000000..065049f52b83 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include +#include "mlx5_ib.h" + +static int fill_res_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + if (mr->is_odp_implicit) { + if (rdma_nl_put_driver_string(msg, "odp", "implicit")) + goto err; + } else { + if (rdma_nl_put_driver_string(msg, "odp", "explicit")) + goto err; + } + + nla_nest_end(msg, table_attr); + return 0; + +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_res_mr_entry(msg, res); + + return 0; +} diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 83df1ec6664e..fe9b3c507a9c 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -156,6 +156,9 @@ int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name, int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value); int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value); +int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, + const char *str); + struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id); -- cgit v1.2.3-59-g8ed1b From 4061ff7aa379fa770a82da0ed7ec4f9163034518 Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 16 Oct 2019 09:23:08 +0300 Subject: RDMA/nldev: Provide MR statistics Add RDMA nldev netlink interface for dumping MR statistics information. Output example: $ ./ibv_rc_pingpong -o -P -s 500000000 local address: LID 0x0001, QPN 0x00008a, PSN 0xf81096, GID :: $ rdma stat show mr dev mlx5_0 mrn 2 page_faults 122071 page_invalidations 0 Link: https://lore.kernel.org/r/20191016062308.11886-5-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/nldev.c | 45 ++++++++++++++++++++++++++++++----- drivers/infiniband/hw/mlx5/main.c | 2 ++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++ drivers/infiniband/hw/mlx5/restrack.c | 42 ++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 7 ++++++ include/rdma/restrack.h | 2 ++ 7 files changed, 95 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 74941bc39c98..eb35b663a742 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2605,6 +2605,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, drain_sq); SET_DEVICE_OP(dev_ops, enable_driver); SET_DEVICE_OP(dev_ops, fill_res_entry); + SET_DEVICE_OP(dev_ops, fill_stat_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); SET_DEVICE_OP(dev_ops, get_hw_stats); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index a7f5add714d4..3bb208557c45 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -439,6 +439,14 @@ static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, return dev->ops.fill_res_entry(msg, res); } +static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (!dev->ops.fill_stat_entry) + return false; + return dev->ops.fill_stat_entry(msg, res); +} + static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { @@ -739,8 +747,8 @@ err: return ret; } -static int fill_stat_hwcounter_entry(struct sk_buff *msg, - const char *name, u64 value) +int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, + u64 value) { struct nlattr *entry_attr; @@ -762,6 +770,25 @@ err: nla_nest_cancel(msg, entry_attr); return -EMSGSIZE; } +EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry); + +static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_mr *mr = container_of(res, struct ib_mr, res); + struct ib_device *dev = mr->pd->device; + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) + goto err; + + if (fill_stat_entry(dev, msg, res)) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} static int fill_stat_counter_hwcounters(struct sk_buff *msg, struct rdma_counter *counter) @@ -775,7 +802,7 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg, return -EMSGSIZE; for (i = 0; i < st->num_counters; i++) - if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i])) + if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i])) goto err; nla_nest_end(msg, table_attr); @@ -1897,7 +1924,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, for (i = 0; i < num_cnts; i++) { v = stats->value[i] + rdma_counter_get_hwstat_value(device, port, i); - if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) { + if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) { ret = -EMSGSIZE; goto err_table; } @@ -2006,7 +2033,10 @@ static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, case RDMA_NLDEV_ATTR_RES_QP: ret = stat_get_doit_qp(skb, nlh, extack, tb); break; - + case RDMA_NLDEV_ATTR_RES_MR: + ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR, + fill_stat_mr_entry); + break; default: ret = -EINVAL; break; @@ -2030,7 +2060,10 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb, case RDMA_NLDEV_ATTR_RES_QP: ret = nldev_res_get_counter_dumpit(skb, cb); break; - + case RDMA_NLDEV_ATTR_RES_MR: + ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR, + fill_stat_mr_entry); + break; default: ret = -EINVAL; break; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4a731cafbf7d..39d54e285ae9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -67,6 +67,7 @@ #include #include #include +#include #define UVERBS_MODULE_NAME mlx5_ib #include @@ -6272,6 +6273,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, .fill_res_entry = mlx5_ib_fill_res_entry, + .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a0ca1ef16e4e..e9bdb48cf1d3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1342,6 +1342,8 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); int mlx5_ib_fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res); +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 065049f52b83..8f6c04f12531 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -8,6 +8,39 @@ #include #include "mlx5_ib.h" +static int fill_stat_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + + if (!table_attr) + goto err; + + if (rdma_nl_stat_hwcounter_entry(msg, "page_faults", + atomic64_read(&mr->odp_stats.faults))) + goto err_table; + if (rdma_nl_stat_hwcounter_entry( + msg, "page_invalidations", + atomic64_read(&mr->odp_stats.invalidations))) + goto err_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + static int fill_res_mr_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) { @@ -46,3 +79,12 @@ int mlx5_ib_fill_res_entry(struct sk_buff *msg, return 0; } + +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_stat_mr_entry(msg, res); + + return 0; +} diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1ef31b27a41c..cca9985b4cbc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2570,6 +2570,13 @@ struct ib_device_ops { */ int (*counter_update_stats)(struct rdma_counter *counter); + /** + * Allows rdma drivers to add their own restrack attributes + * dumped via 'rdma stat' iproute2 command. + */ + int (*fill_stat_entry)(struct sk_buff *msg, + struct rdma_restrack_entry *entry); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_cq); DECLARE_RDMA_OBJ_SIZE(ib_pd); diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index fe9b3c507a9c..7682d1bcf789 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -158,6 +158,8 @@ int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value); int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, const char *str); +int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, + u64 value); struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, -- cgit v1.2.3-59-g8ed1b From d3bd93967015d974bb95d47cc14edd5adbea814f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 15 Oct 2019 10:20:58 +0300 Subject: IB/cma: Honor traffic class from lower netdevice for RoCE When a macvlan netdevice is used for RoCE, consider the tos->prio->tc mapping as SL using its lower netdevice. 1. If the lower netdevice is a VLAN netdevice, consider the VLAN netdevice and it's parent netdevice for mapping 2. If the lower netdevice is not a VLAN netdevice, consider tc mapping directly from the lower netdevice Link: https://lore.kernel.org/r/20191015072058.17347-1-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 61 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0e3cf3461999..c8566a423719 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2827,22 +2827,65 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv) return 0; } -static int iboe_tos_to_sl(struct net_device *ndev, int tos) +static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio) { - int prio; struct net_device *dev; - prio = rt_tos2priority(tos); - dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; + dev = vlan_dev_real_dev(vlan_ndev); if (dev->num_tc) return netdev_get_prio_tc_map(dev, prio); -#if IS_ENABLED(CONFIG_VLAN_8021Q) + return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) & + VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; +} + +struct iboe_prio_tc_map { + int input_prio; + int output_tc; + bool found; +}; + +static int get_lower_vlan_dev_tc(struct net_device *dev, void *data) +{ + struct iboe_prio_tc_map *map = data; + + if (is_vlan_dev(dev)) + map->output_tc = get_vlan_ndev_tc(dev, map->input_prio); + else if (dev->num_tc) + map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio); + else + map->output_tc = 0; + /* We are interested only in first level VLAN device, so always + * return 1 to stop iterating over next level devices. + */ + map->found = true; + return 1; +} + +static int iboe_tos_to_sl(struct net_device *ndev, int tos) +{ + struct iboe_prio_tc_map prio_tc_map = {}; + int prio = rt_tos2priority(tos); + + /* If VLAN device, get it directly from the VLAN netdev */ if (is_vlan_dev(ndev)) - return (vlan_dev_get_egress_qos_mask(ndev, prio) & - VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; -#endif - return 0; + return get_vlan_ndev_tc(ndev, prio); + + prio_tc_map.input_prio = prio; + rcu_read_lock(); + netdev_walk_all_lower_dev_rcu(ndev, + get_lower_vlan_dev_tc, + &prio_tc_map); + rcu_read_unlock(); + /* If map is found from lower device, use it; Otherwise + * continue with the current netdevice to get priority to tc map. + */ + if (prio_tc_map.found) + return prio_tc_map.output_tc; + else if (ndev->num_tc) + return netdev_get_prio_tc_map(ndev, prio); + else + return 0; } static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) -- cgit v1.2.3-59-g8ed1b From a29e1012c1bf52c5364cad5ffbf5b4be8fe9c91b Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Fri, 18 Oct 2019 16:15:34 +0800 Subject: RDMA/uverbs: Add a check for uverbs_attr_get to uverbs_copy_to_struct_or_zero All current callers for uverbs_copy_to_struct_or_zero() already check that the attribute exists, but it make sense to verify the result like the other functions do. Link: https://lore.kernel.org/r/20191018081533.8544-1-hslester96@gmail.com Signed-off-by: Chuhong Yuan Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 61758201d9b2..269938f59d3f 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -795,6 +795,9 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, { const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + if (IS_ERR(attr)) + return PTR_ERR(attr); + if (size < attr->ptr_attr.len) { if (clear_user(u64_to_user_ptr(attr->ptr_attr.data) + size, attr->ptr_attr.len - size)) -- cgit v1.2.3-59-g8ed1b From 68abaa765e410dc1583de1fa285ec7b0c58c6252 Mon Sep 17 00:00:00 2001 From: Ran Rozenstein Date: Sun, 20 Oct 2019 09:44:54 +0300 Subject: IB/mlx5: Remove dead code mlx5_ib_dc_atomic_is_supported function is not used anywhere. Remove the dead code. Fixes: a60109dc9a95 ("IB/mlx5: Add support for extended atomic operations") Link: https://lore.kernel.org/r/20191020064454.8551-1-leon@kernel.org Signed-off-by: Ran Rozenstein Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 15 --------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - 2 files changed, 16 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 39d54e285ae9..20fabb368f61 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -694,21 +694,6 @@ static void get_atomic_caps_qp(struct mlx5_ib_dev *dev, get_atomic_caps(dev, atomic_size_qp, props); } -static void get_atomic_caps_dc(struct mlx5_ib_dev *dev, - struct ib_device_attr *props) -{ - u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); - - get_atomic_caps(dev, atomic_size_qp, props); -} - -bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev) -{ - struct ib_device_attr props = {}; - - get_atomic_caps_dc(dev, &props); - return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false; -} static int mlx5_query_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e9bdb48cf1d3..64f56926bf6b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1241,7 +1241,6 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); -bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev); struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, -- cgit v1.2.3-59-g8ed1b From 3f89b01f4bbab2dcd1a6e7e31e64faacb448e3be Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Sun, 20 Oct 2019 09:43:59 +0300 Subject: IB/mlx5: Align usage of QP1 create flags with rest of mlx5 defines There is little value in keeping separate function for one flag, provide it directly like any other mlx5 define. Link: https://lore.kernel.org/r/20191020064400.8344-2-leon@kernel.org Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/gsi.c | 2 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +------ drivers/infiniband/hw/mlx5/qp.c | 8 ++++---- 3 files changed, 6 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 4950df3f71b6..ac4d8d1b9a07 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -263,7 +263,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) }, .sq_sig_type = gsi->sq_sig_type, .qp_type = IB_QPT_UD, - .create_flags = mlx5_ib_create_qp_sqpn_qp1(), + .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1, }; return ib_create_qp(pd, &init_attr); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 64f56926bf6b..d53739e43a93 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -247,12 +247,7 @@ struct mlx5_ib_flow_db { * These flags are intended for internal use by the mlx5_ib driver, and they * rely on the range reserved for that use in the ib_qp_create_flags enum. */ - -/* Create a UD QP whose source QP number is 1 */ -static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) -{ - return IB_QP_CREATE_RESERVED_START; -} +#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START struct wr_list { u16 opcode; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8937d72ddcf6..bb3f432e2fb6 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1041,7 +1041,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | IB_QP_CREATE_NETIF_QP | - mlx5_ib_create_qp_sqpn_qp1())) + MLX5_IB_QP_CREATE_SQPN_QP1)) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) @@ -1104,7 +1104,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, MLX5_SET(qpc, qpc, fre, 1); MLX5_SET(qpc, qpc, rlky, 1); - if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { + if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) { MLX5_SET(qpc, qpc, deth_sqpn, 1); qp->flags |= MLX5_IB_QP_SQPN_QP1; } @@ -2140,7 +2140,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EINVAL; } if (init_attr->create_flags & - mlx5_ib_create_qp_sqpn_qp1()) { + MLX5_IB_QP_CREATE_SQPN_QP1) { mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); return -EINVAL; } @@ -5823,7 +5823,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, if (qp->flags & MLX5_IB_QP_MANAGED_RECV) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; if (qp->flags & MLX5_IB_QP_SQPN_QP1) - qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1(); + qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1; qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; -- cgit v1.2.3-59-g8ed1b From c4c8aff5a9ddb061a6246fb34eabdb9244b4d8f6 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 20 Oct 2019 09:54:27 +0300 Subject: IB/core: Do not notify GID change event of an unregistered device When IB device is undergoing unregistration, the GID cache is always cleaned up after all clients are unregistered with the below flow. __ib_unregister_device() disable_device() ib_cache_cleanup_one() gid_table_cleanup_one() cleanup_gid_table_port() There is no use in generating a GID change event at this stage, where there is no active client of the device and device is nearly unregistered. Link: https://lore.kernel.org/r/20191020065427.8772-4-leon@kernel.org Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reviewed-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 00fb3eacda19..d535995711c3 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -819,22 +819,16 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table) { int i; - bool deleted = false; if (!table) return; mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { - if (is_gid_entry_valid(table->data_vec[i])) { + if (is_gid_entry_valid(table->data_vec[i])) del_gid(ib_dev, port, table, i); - deleted = true; - } } mutex_unlock(&table->lock); - - if (deleted) - dispatch_gid_change_event(ib_dev, port); } void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, -- cgit v1.2.3-59-g8ed1b From cf7e93c12fbc0f18cbea0571406e302d6904a7ac Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 10 Oct 2019 10:11:04 +0300 Subject: RDMA/restrack: Remove PID namespace support IB resources are bounded to IB device and file descriptors, both entities are unaware to PID namespaces and to task lifetime. The difference in model caused to unpredictable behavior for the following scenario: 1. Create FD and context 2. Share it with ephemeral child 3. Create any object and exit that child The end result of this flow, that those newly created objects will be tracked by restrack, but won't be visible for users because task_struct associated with them already exited. The right thing is to rely on net namespace only for any filtering purposes and drop PID namespace. Link: https://lore.kernel.org/r/20191010071105.25538-2-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 14 -------------- drivers/infiniband/core/nldev.c | 13 +------------ drivers/infiniband/core/restrack.c | 20 +------------------- drivers/infiniband/core/restrack.h | 1 - 4 files changed, 2 insertions(+), 46 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 736ab760025d..12ba2685abcf 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -149,9 +149,6 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, struct auto_mode_param *param = &counter->mode.param; bool match = true; - if (!rdma_is_visible_in_pid_ns(&qp->res)) - return false; - /* Ensure that counter belongs to the right PID */ if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task)) return false; @@ -229,9 +226,6 @@ static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, rt = &dev->res[RDMA_RESTRACK_COUNTER]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - counter = container_of(res, struct rdma_counter, res); if ((counter->device != qp->device) || (counter->port != port)) goto next; @@ -412,9 +406,6 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) if (IS_ERR(res)) return NULL; - if (!rdma_is_visible_in_pid_ns(res)) - goto err; - qp = container_of(res, struct ib_qp, res); if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) goto err; @@ -445,11 +436,6 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, if (IS_ERR(res)) return NULL; - if (!rdma_is_visible_in_pid_ns(res)) { - rdma_restrack_put(res); - return NULL; - } - counter = container_of(res, struct rdma_counter, res); kref_get(&counter->kref); rdma_restrack_put(res); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 3bb208557c45..2f052c23c8c7 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -722,9 +722,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg, rt = &counter->device->res[RDMA_RESTRACK_QP]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - qp = container_of(res, struct ib_qp, res); if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) continue; @@ -1258,15 +1255,10 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } - if (!rdma_is_visible_in_pid_ns(res)) { - ret = -ENOENT; - goto err_get; - } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_get; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, @@ -1373,9 +1365,6 @@ static int res_get_common_dumpit(struct sk_buff *skb, * objects. */ xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - if (idx < start || !rdma_restrack_get(res)) goto next; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index a07665f7ef8c..62fbb0ae9cb4 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -116,11 +116,8 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) u32 cnt = 0; xa_lock(&rt->xa); - xas_for_each(&xas, e, U32_MAX) { - if (!rdma_is_visible_in_pid_ns(e)) - continue; + xas_for_each(&xas, e, U32_MAX) cnt++; - } xa_unlock(&rt->xa); return cnt; } @@ -346,18 +343,3 @@ out: } } EXPORT_SYMBOL(rdma_restrack_del); - -bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res) -{ - /* - * 1. Kern resources should be visible in init - * namespace only - * 2. Present only resources visible in the current - * namespace - */ - if (rdma_is_kernel_res(res)) - return task_active_pid_ns(current) == &init_pid_ns; - - /* PID 0 means that resource is not found in current namespace */ - return task_pid_vnr(res->task); -} diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h index 7bd177cc0a61..d084e5f89849 100644 --- a/drivers/infiniband/core/restrack.h +++ b/drivers/infiniband/core/restrack.h @@ -27,5 +27,4 @@ int rdma_restrack_init(struct ib_device *dev); void rdma_restrack_clean(struct ib_device *dev); void rdma_restrack_attach_task(struct rdma_restrack_entry *res, struct task_struct *task); -bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res); #endif /* _RDMA_CORE_RESTRACK_H_ */ -- cgit v1.2.3-59-g8ed1b From ac71ffcfb457a98f0386d682de107ef746bcb60e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 10 Oct 2019 10:11:05 +0300 Subject: RDMA/core: Check that process is still alive before sending it to the users The PID information can disappear asynchronously because the task can be killed and moved to zombie state. In this case, PID will be zero in similar way to the kernel tasks. Recognize such situation where we are asking to return orphaned object and simply skip filling PID attribute. As part of this change, document the same scenario in counter.c code. Link: https://lore.kernel.org/r/20191010071105.25538-3-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/counters.c | 14 ++++++++++++-- drivers/infiniband/core/nldev.c | 28 +++++++++++++++++++++------- 2 files changed, 33 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 12ba2685abcf..8434ec082c3a 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -149,8 +149,18 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, struct auto_mode_param *param = &counter->mode.param; bool match = true; - /* Ensure that counter belongs to the right PID */ - if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task)) + /* + * Ensure that counter belongs to the right PID. This operation can + * race with user space which kills the process and leaves QP and + * counters orphans. + * + * It is not a big deal because exitted task will leave both QP and + * counter in the same bucket of zombie process. Just ensure that + * process is still alive before procedding. + * + */ + if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) || + !task_pid_nr(qp->res.task)) return false; if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 2f052c23c8c7..b2328550d24c 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -415,20 +415,34 @@ err: static int fill_res_name_pid(struct sk_buff *msg, struct rdma_restrack_entry *res) { + int err = 0; + /* * For user resources, user is should read /proc/PID/comm to get the * name of the task file. */ if (rdma_is_kernel_res(res)) { - if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, - res->kern_name)) - return -EMSGSIZE; + err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, + res->kern_name); } else { - if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, - task_pid_vnr(res->task))) - return -EMSGSIZE; + pid_t pid; + + pid = task_pid_vnr(res->task); + /* + * Task is dead and in zombie state. + * There is no need to print PID anymore. + */ + if (pid) + /* + * This part is racy, task can be killed and PID will + * be zero right here but it is ok, next query won't + * return PID. We don't promise real-time reflection + * of SW objects. + */ + err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid); } - return 0; + + return err ? -EMSGSIZE : 0; } static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, -- cgit v1.2.3-59-g8ed1b From dc2f7edcc01219fbbb517c1245dac46d30edaf93 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Oct 2019 08:57:24 +0300 Subject: RDMA/rxe: Remove useless rxe_init_device_param assignments IB devices are allocated with kzalloc and don't need explicit zero assignments for their parameters. It can be removed safely. Link: https://lore.kernel.org/r/20191020055724.7410-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe.c | 13 ------------- drivers/infiniband/sw/rxe/rxe_param.h | 13 ------------- 2 files changed, 26 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index a8c11b5e1e94..0946a301a5c5 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -77,12 +77,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe) { rxe->max_inline_data = RXE_MAX_INLINE_DATA; - rxe->attr.fw_ver = RXE_FW_VER; rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; - rxe->attr.vendor_id = RXE_VENDOR_ID; - rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID; - rxe->attr.hw_ver = RXE_HW_VER; rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; @@ -94,22 +90,13 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_mr = RXE_MAX_MR; rxe->attr.max_pd = RXE_MAX_PD; rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; - rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM; rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; - rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM; rxe->attr.atomic_cap = IB_ATOMIC_HCA; - rxe->attr.max_ee = RXE_MAX_EE; - rxe->attr.max_rdd = RXE_MAX_RDD; - rxe->attr.max_mw = RXE_MAX_MW; - rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP; - rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP; rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; rxe->attr.max_ah = RXE_MAX_AH; - rxe->attr.max_fmr = RXE_MAX_FMR; - rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR; rxe->attr.max_srq = RXE_MAX_SRQ; rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE; diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index fe5207386700..353c6668249e 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -60,12 +60,8 @@ static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) /* default/initial rxe device parameter settings */ enum rxe_device_param { - RXE_FW_VER = 0, RXE_MAX_MR_SIZE = -1ull, RXE_PAGE_SIZE_CAP = 0xfffff000, - RXE_VENDOR_ID = 0, - RXE_VENDOR_PART_ID = 0, - RXE_HW_VER = 0, RXE_MAX_QP = 0x10000, RXE_MAX_QP_WR = 0x4000, RXE_MAX_INLINE_DATA = 400, @@ -87,21 +83,12 @@ enum rxe_device_param { RXE_MAX_MR = 256 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, - RXE_MAX_EE_RD_ATOM = 0, RXE_MAX_RES_RD_ATOM = 0x3f000, RXE_MAX_QP_INIT_RD_ATOM = 128, - RXE_MAX_EE_INIT_RD_ATOM = 0, - RXE_MAX_EE = 0, - RXE_MAX_RDD = 0, - RXE_MAX_MW = 0, - RXE_MAX_RAW_IPV6_QP = 0, - RXE_MAX_RAW_ETHY_QP = 0, RXE_MAX_MCAST_GRP = 8192, RXE_MAX_MCAST_QP_ATTACH = 56, RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, RXE_MAX_AH = 100, - RXE_MAX_FMR = 0, - RXE_MAX_MAP_PER_FMR = 0, RXE_MAX_SRQ = 960, RXE_MAX_SRQ_WR = 0x4000, RXE_MIN_SRQ_WR = 1, -- cgit v1.2.3-59-g8ed1b From 8d625101a74087a29b7c7760959e4036b27ea735 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Oct 2019 10:15:54 +0300 Subject: RDMA/cm: Delete unused cm_is_active_peer function Function cm_is_active_peer is not used, delete it. Link: https://lore.kernel.org/r/20191020071559.9743-2-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c0aa3a4b4cfd..bc4abb05dbd4 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1525,14 +1525,6 @@ static int cm_issue_rej(struct cm_port *port, return ret; } -static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid, - __be32 local_qpn, __be32 remote_qpn) -{ - return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) || - ((local_ca_guid == remote_ca_guid) && - (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn)))); -} - static bool cm_req_has_alt_path(struct cm_req_msg *req_msg) { return ((req_msg->alt_local_lid) || -- cgit v1.2.3-59-g8ed1b From a916051191a3080b3124e77199126a032c327303 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Oct 2019 10:15:55 +0300 Subject: RDMA/cm: Use specific keyword to check define There is a specific define keyword to check if define exists or not, let's use it instead of open-coded variant. Link: https://lore.kernel.org/r/20191020071559.9743-3-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm_msgs.h | 2 +- include/rdma/ib_cm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 3d16d614aff6..0a9e2d3fb9df 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -31,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE * SOFTWARE. */ -#if !defined(CM_MSGS_H) +#ifndef CM_MSGS_H #define CM_MSGS_H #include diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 49f4f75499b3..5dc4ec986527 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -32,7 +32,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if !defined(IB_CM_H) +#ifndef IB_CM_H #define IB_CM_H #include -- cgit v1.2.3-59-g8ed1b From 24f52149230454249ae628b922f741036e83b84c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Oct 2019 10:15:56 +0300 Subject: RDMA/cm: Update copyright together with SPDX tag Add Mellanox to lust of copyright holders and replace copyright boilerplate with relevant SPDX tag. Link: https://lore.kernel.org/r/20191020071559.9743-4-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 30 ++---------------------------- drivers/infiniband/core/cm_msgs.h | 30 ++---------------------------- drivers/infiniband/core/cma.c | 31 ++----------------------------- include/rdma/ib_cm.h | 30 ++---------------------------- 4 files changed, 8 insertions(+), 113 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index bc4abb05dbd4..7ffa16ea5fe3 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,36 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ #include diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 0a9e2d3fb9df..92d7260ac913 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -1,35 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING the madirectory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use source and binary forms, with or - * withmodification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retathe above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ #ifndef CM_MSGS_H #define CM_MSGS_H diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c8566a423719..8f318928f29d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1,36 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. - * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 5dc4ec986527..b01a8a8d4de9 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -1,36 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ #ifndef IB_CM_H #define IB_CM_H -- cgit v1.2.3-59-g8ed1b From 515f60004ed985d2b2f03659365752e0b6142986 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 8 Jun 2019 12:25:14 +0300 Subject: RDMA/hns: Prevent undefined behavior in hns_roce_set_user_sq_size() The "ucmd->log_sq_bb_count" variable is a user controlled variable in the 0-255 range. If we shift more than then number of bits in an int then it's undefined behavior (it shift wraps), and potentially the int could become negative. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://lore.kernel.org/r/20190608092514.GC28890@mwanda Reported-by: Dan Carpenter Signed-off-by: Jason Gunthorpe Reviewed-by: Dan Carpenter --- drivers/infiniband/hw/hns/hns_roce_qp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index bec48f2593f4..6aa27d6ea3a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -332,9 +332,8 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, u8 max_sq_stride = ilog2(roundup_sq_stride); /* Sanity check SQ size before proceeding */ - if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes || - ucmd->log_sq_stride > max_sq_stride || - ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { + if (ucmd->log_sq_stride > max_sq_stride || + ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n"); return -EINVAL; } @@ -358,13 +357,16 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, u32 max_cnt; int ret; + if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || + hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) + return -EINVAL; + ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); if (ret) { ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n"); return ret; } - hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; hr_qp->sq.wqe_shift = ucmd->log_sq_stride; max_cnt = max(1U, cap->max_send_sge); -- cgit v1.2.3-59-g8ed1b From f9e66db143162a72bb41369fadddcd65fef8664f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 23 Oct 2019 08:42:39 +0300 Subject: RDMA/hns: Delete BITS_PER_BYTE redefinition HNS redefined available in bits.h define and didn't use it, we can safely delete it. Link: https://lore.kernel.org/r/20191023054239.31648-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index cbd75e466417..940761310430 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -53,8 +53,6 @@ #define BA_BYTE_LEN 8 -#define BITS_PER_BYTE 8 - /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 -- cgit v1.2.3-59-g8ed1b From 79d81ef42c9a8feee2f1df5dffa6ac628b71141d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 23 Oct 2019 13:41:06 -0700 Subject: RDMA/srpt: Fix TPG creation Unlike the iSCSI target driver, for the SRP target driver it is sufficient if a single TPG can be associated with each RDMA port name. However, users started associating multiple TPGs with RDMA port names. Support this by converting the single TPG in struct srpt_port_id into a list. This patch fixes the following list corruption issue: list_add corruption. prev->next should be next (ffffffffc0a080c0), but was ffffa08a994ce6f0. (prev=ffffa08a994ce6f0). WARNING: CPU: 2 PID: 2597 at lib/list_debug.c:28 __list_add_valid+0x6a/0x70 CPU: 2 PID: 2597 Comm: targetcli Not tainted 5.4.0-rc1.3bfa3c9602a7 #1 RIP: 0010:__list_add_valid+0x6a/0x70 Call Trace: core_tpg_register+0x116/0x200 [target_core_mod] srpt_make_tpg+0x3f/0x60 [ib_srpt] target_fabric_make_tpg+0x41/0x290 [target_core_mod] configfs_mkdir+0x158/0x3e0 vfs_mkdir+0x108/0x1a0 do_mkdirat+0x77/0xe0 do_syscall_64+0x55/0x1d0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Link: https://lore.kernel.org/r/20191023204106.23326-1-bvanassche@acm.org Reported-by: Honggang LI Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Bart Van Assche Acked-by: Honggang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 77 +++++++++++++++++++++++------------ drivers/infiniband/ulp/srpt/ib_srpt.h | 25 ++++++++++-- 2 files changed, 73 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index daf811abf40a..a278e76b9e02 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2131,6 +2131,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, char i_port_id[36]; u32 it_iu_len; int i, tag_num, tag_size, ret; + struct srpt_tpg *stpg; WARN_ON_ONCE(irqs_disabled()); @@ -2288,19 +2289,33 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, tag_num = ch->rq_size; tag_size = 1; /* ib_srpt does not use se_sess->sess_cmd_map */ - if (sport->port_guid_id.tpg.se_tpg_wwn) - ch->sess = target_setup_session(&sport->port_guid_id.tpg, tag_num, + + mutex_lock(&sport->port_guid_id.mutex); + list_for_each_entry(stpg, &sport->port_guid_id.tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, ch->sess_name, ch, NULL); - if (sport->port_gid_id.tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) - ch->sess = target_setup_session(&sport->port_gid_id.tpg, tag_num, + } + mutex_unlock(&sport->port_guid_id.mutex); + + mutex_lock(&sport->port_gid_id.mutex); + list_for_each_entry(stpg, &sport->port_gid_id.tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id, ch, NULL); - /* Retry without leading "0x" */ - if (sport->port_gid_id.tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) - ch->sess = target_setup_session(&sport->port_gid_id.tpg, tag_num, + if (!IS_ERR_OR_NULL(ch->sess)) + break; + /* Retry without leading "0x" */ + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id + 2, ch, NULL); + } + mutex_unlock(&sport->port_gid_id.mutex); + if (IS_ERR_OR_NULL(ch->sess)) { WARN_ON_ONCE(ch->sess == NULL); ret = PTR_ERR(ch->sess); @@ -3140,6 +3155,10 @@ static void srpt_add_one(struct ib_device *device) sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; sport->port_attrib.use_srq = false; INIT_WORK(&sport->work, srpt_refresh_port_work); + mutex_init(&sport->port_guid_id.mutex); + INIT_LIST_HEAD(&sport->port_guid_id.tpg_list); + mutex_init(&sport->port_gid_id.mutex); + INIT_LIST_HEAD(&sport->port_gid_id.tpg_list); if (srpt_refresh_port(sport)) { pr_err("MAD registration failed for %s-%d.\n", @@ -3242,18 +3261,6 @@ static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg) return tpg->se_tpg_wwn->priv; } -static struct srpt_port_id *srpt_tpg_to_sport_id(struct se_portal_group *tpg) -{ - struct srpt_port *sport = srpt_tpg_to_sport(tpg); - - if (tpg == &sport->port_guid_id.tpg) - return &sport->port_guid_id; - if (tpg == &sport->port_gid_id.tpg) - return &sport->port_gid_id; - WARN_ON_ONCE(true); - return NULL; -} - static struct srpt_port_id *srpt_wwn_to_sport_id(struct se_wwn *wwn) { struct srpt_port *sport = wwn->priv; @@ -3268,7 +3275,9 @@ static struct srpt_port_id *srpt_wwn_to_sport_id(struct se_wwn *wwn) static char *srpt_get_fabric_wwn(struct se_portal_group *tpg) { - return srpt_tpg_to_sport_id(tpg)->name; + struct srpt_tpg *stpg = container_of(tpg, typeof(*stpg), tpg); + + return stpg->sport_id->name; } static u16 srpt_get_tag(struct se_portal_group *tpg) @@ -3725,16 +3734,27 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, const char *name) { struct srpt_port *sport = wwn->priv; - struct se_portal_group *tpg = &srpt_wwn_to_sport_id(wwn)->tpg; - int res; + struct srpt_port_id *sport_id = srpt_wwn_to_sport_id(wwn); + struct srpt_tpg *stpg; + int res = -ENOMEM; - res = core_tpg_register(wwn, tpg, SCSI_PROTOCOL_SRP); - if (res) + stpg = kzalloc(sizeof(*stpg), GFP_KERNEL); + if (!stpg) + return ERR_PTR(res); + stpg->sport_id = sport_id; + res = core_tpg_register(wwn, &stpg->tpg, SCSI_PROTOCOL_SRP); + if (res) { + kfree(stpg); return ERR_PTR(res); + } + + mutex_lock(&sport_id->mutex); + list_add_tail(&stpg->entry, &sport_id->tpg_list); + mutex_unlock(&sport_id->mutex); atomic_inc(&sport->refcount); - return tpg; + return &stpg->tpg; } /** @@ -3743,10 +3763,17 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, */ static void srpt_drop_tpg(struct se_portal_group *tpg) { + struct srpt_tpg *stpg = container_of(tpg, typeof(*stpg), tpg); + struct srpt_port_id *sport_id = stpg->sport_id; struct srpt_port *sport = srpt_tpg_to_sport(tpg); + mutex_lock(&sport_id->mutex); + list_del(&stpg->entry); + mutex_unlock(&sport_id->mutex); + sport->enabled = false; core_tpg_deregister(tpg); + kfree(stpg); srpt_drop_sport_ref(sport); } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 54b03ab4ab1a..2e1a69840857 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -363,17 +363,34 @@ struct srpt_port_attrib { bool use_srq; }; +/** + * struct srpt_tpg - information about a single "target portal group" + * @entry: Entry in @sport_id->tpg_list. + * @sport_id: Port name this TPG is associated with. + * @tpg: LIO TPG data structure. + * + * Zero or more target portal groups are associated with each port name + * (srpt_port_id). With each TPG an ACL list is associated. + */ +struct srpt_tpg { + struct list_head entry; + struct srpt_port_id *sport_id; + struct se_portal_group tpg; +}; + /** * struct srpt_port_id - information about an RDMA port name - * @tpg: TPG associated with the RDMA port. - * @wwn: WWN associated with the RDMA port. - * @name: ASCII representation of the port name. + * @mutex: Protects @tpg_list changes. + * @tpg_list: TPGs associated with the RDMA port name. + * @wwn: WWN associated with the RDMA port name. + * @name: ASCII representation of the port name. * * Multiple sysfs directories can be associated with a single RDMA port. This * data structure represents a single (port, name) pair. */ struct srpt_port_id { - struct se_portal_group tpg; + struct mutex mutex; + struct list_head tpg_list; struct se_wwn wwn; char name[64]; }; -- cgit v1.2.3-59-g8ed1b From 5c7e76fb7cb5071be800c938ebf2c475e140d3f0 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Thu, 24 Oct 2019 17:21:56 +0800 Subject: RDMA/hns: Fix to support 64K page for srq SRQ's page size configuration of BA and buffer should depend on current PAGE_SHIFT, or it can't work in scenario of 64K page. Fixes: c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Link: https://lore.kernel.org/r/1571908917-16220-2-git-send-email-liweihang@hisilicon.com Signed-off-by: Lijun Ou Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 14e24b4ef6ae..7218f6d4101a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6088,11 +6088,11 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - hr_dev->caps.idx_ba_pg_sz); + hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - hr_dev->caps.idx_buf_pg_sz); + hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET); srq_context->idx_nxt_blk_addr = cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT); -- cgit v1.2.3-59-g8ed1b From 887803db866a7a4e1817a3cb8a3eee4e9879fed2 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 24 Oct 2019 17:21:57 +0800 Subject: RDMA/hns: Bugfix for qpc/cqc timer configuration qpc/cqc timer entry size needs one page, but currently they are fixedly configured to 4096, which is not appropriate in 64K page scenarios. So they should be modified to PAGE_SIZE. Fixes: 0e40dc2f70cd ("RDMA/hns: Add timer allocation support for hip08") Link: https://lore.kernel.org/r/1571908917-16220-3-git-send-email-liweihang@hisilicon.com Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 43219d2f7de0..76a14db7028d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -87,8 +87,8 @@ #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 #define HNS_ROCE_V2_SCCC_ENTRY_SZ 32 -#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ 4096 -#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ 4096 +#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE +#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_INVALID_LKEY 0x100 -- cgit v1.2.3-59-g8ed1b From 994195e1537074f56df216a9309f6e366cb35b67 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 24 Oct 2019 14:10:34 +0100 Subject: RDMA/hns: Fix memory leak on 'context' on error return path Currently, the error return path when the call to function dev->dfx->query_cqc_info fails will leak object 'context'. Fix this by making the error return path via 'err' return return codes rather than -EMSGSIZE, set ret appropriately for all error return paths and for the memory leak now return via 'err' rather than just returning without freeing context. Link: https://lore.kernel.org/r/20191024131034.19989-1-colin.king@canonical.com Addresses-Coverity: ("Resource leak") Fixes: e1c9a0dc2939 ("RDMA/hns: Dump detailed driver-specific CQ") Signed-off-by: Colin Ian King Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_restrack.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index a0d608ec81c1..06871731ac43 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -95,14 +95,18 @@ static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, ret = hr_dev->dfx->query_cqc_info(hr_dev, hr_cq->cqn, (int *)context); if (ret) - return -EINVAL; + goto err; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); - if (!table_attr) + if (!table_attr) { + ret = -EMSGSIZE; goto err; + } - if (hns_roce_fill_cq(msg, context)) + if (hns_roce_fill_cq(msg, context)) { + ret = -EMSGSIZE; goto err_cancel_table; + } nla_nest_end(msg, table_attr); kfree(context); @@ -113,7 +117,7 @@ err_cancel_table: nla_nest_cancel(msg, table_attr); err: kfree(context); - return -EMSGSIZE; + return ret; } int hns_roce_fill_res_entry(struct sk_buff *msg, -- cgit v1.2.3-59-g8ed1b From 73ab512f720298aabe23b34110e3f6a8545b0ba5 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 27 Oct 2019 22:04:48 +0200 Subject: RDMA/qedr: Fix srqs xarray initialization There was a missing initialization for the srqs xarray. SRQs xarray can also be called from irq context when searching for an element and uses the xa_XXX_irq apis, therefore should be initialized with IRQ flags. Fixes: 9fd15987ed27 ("qedr: Convert srqidr to XArray") Link: https://lore.kernel.org/r/20191027200451.28187-2-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 5136b835e1ba..aa0bda428690 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -357,6 +357,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev) return -ENOMEM; spin_lock_init(&dev->sgid_lock); + xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ); if (IS_IWARP(dev)) { xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ); -- cgit v1.2.3-59-g8ed1b From 5fdff18b4dc64e2d1e912ad2b90495cd487f791b Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 27 Oct 2019 22:04:49 +0200 Subject: RDMA/qedr: Fix qpids xarray api used The qpids xarray isn't accessed from irq context and therefore there is no need to use the xa_XXX_irq version of the apis. Remove the _irq. Fixes: b6014f9e5f39 ("qedr: Convert qpidr to XArray") Link: https://lore.kernel.org/r/20191027200451.28187-3-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 2 +- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 2 +- drivers/infiniband/hw/qedr/verbs.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index aa0bda428690..1ff5407270d2 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -360,7 +360,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev) xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ); if (IS_IWARP(dev)) { - xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ); + xa_init(&dev->qps); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 22881d4442b9..7fea74739c1f 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -739,7 +739,7 @@ void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) struct qedr_qp *qp = get_qedr_qp(ibqp); if (atomic_dec_and_test(&qp->refcnt)) { - xa_erase_irq(&qp->dev->qps, qp->qp_id); + xa_erase(&qp->dev->qps, qp->qp_id); kfree(qp); } } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 6f3ce86019b7..84b666c67cff 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1918,7 +1918,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { - rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL); + rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL); if (rc) goto err; } @@ -2492,7 +2492,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (atomic_dec_and_test(&qp->refcnt) && rdma_protocol_iwarp(&dev->ibdev, 1)) { - xa_erase_irq(&dev->qps, qp->qp_id); + xa_erase(&dev->qps, qp->qp_id); kfree(qp); } return 0; -- cgit v1.2.3-59-g8ed1b From 82af6d19d8d9227c22a53ff00b40fb2a4f9fce69 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 27 Oct 2019 22:04:50 +0200 Subject: RDMA/qedr: Fix synchronization methods and memory leaks in qedr Re-design of the iWARP CM related objects reference counting and synchronization methods, to ensure operations are synchronized correctly and that memory allocated for "ep" is properly released. Also makes sure QP memory is not released before ep is finished accessing it. Where as the QP object is created/destroyed by external operations, the ep is created/destroyed by internal operations and represents the tcp connection associated with the QP. QP destruction flow: - needs to wait for ep establishment to complete (either successfully or with error) - needs to wait for ep disconnect to be fully posted to avoid a race condition of disconnect being called after reset. - both the operations above don't always happen, so we use atomic flags to indicate whether the qp destruction flow needs to wait for these completions or not, if the destroy is called before these operations began, the flows will check the flags and not execute them ( connect / disconnect). We use completion structure for waiting for the completions mentioned above. The QP refcnt was modified to kref object. The EP has a kref added to it to handle additional worker thread accessing it. Memory Leaks - https://www.spinics.net/lists/linux-rdma/msg83762.html Concurrency not managed correctly - https://www.spinics.net/lists/linux-rdma/msg67949.html Fixes: de0089e692a9 ("RDMA/qedr: Add iWARP connection management qp related callbacks") Link: https://lore.kernel.org/r/20191027200451.28187-4-michal.kalderon@marvell.com Reported-by: Chuck Lever Reported-by: Jason Gunthorpe Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 23 ++++- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 148 +++++++++++++++++++++----------- drivers/infiniband/hw/qedr/verbs.c | 62 ++++++++----- 3 files changed, 158 insertions(+), 75 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 0cfd849b13d6..8e927f6c1520 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -40,6 +40,7 @@ #include #include #include +#include #include "qedr_hsi_rdma.h" #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" @@ -377,10 +378,20 @@ enum qedr_qp_err_bitmap { QEDR_QP_ERR_RQ_PBL_FULL = 32, }; +enum qedr_qp_create_type { + QEDR_QP_CREATE_NONE, + QEDR_QP_CREATE_USER, + QEDR_QP_CREATE_KERNEL, +}; + +enum qedr_iwarp_cm_flags { + QEDR_IWARP_CM_WAIT_FOR_CONNECT = BIT(0), + QEDR_IWARP_CM_WAIT_FOR_DISCONNECT = BIT(1), +}; + struct qedr_qp { struct ib_qp ibqp; /* must be first */ struct qedr_dev *dev; - struct qedr_iw_ep *ep; struct qedr_qp_hwq_info sq; struct qedr_qp_hwq_info rq; @@ -395,6 +406,7 @@ struct qedr_qp { u32 id; struct qedr_pd *pd; enum ib_qp_type qp_type; + enum qedr_qp_create_type create_type; struct qed_rdma_qp *qed_qp; u32 qp_id; u16 icid; @@ -437,8 +449,11 @@ struct qedr_qp { /* Relevant to qps created from user space only (applications) */ struct qedr_userq usq; struct qedr_userq urq; - atomic_t refcnt; - bool destroyed; + + /* synchronization objects used with iwarp ep */ + struct kref refcnt; + struct completion iwarp_cm_comp; + unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */ }; struct qedr_ah { @@ -531,7 +546,7 @@ struct qedr_iw_ep { struct iw_cm_id *cm_id; struct qedr_qp *qp; void *qed_context; - u8 during_connect; + struct kref refcnt; }; static inline diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 7fea74739c1f..5e9732990be5 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -79,6 +79,27 @@ qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info, } } +static void qedr_iw_free_qp(struct kref *ref) +{ + struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt); + + kfree(qp); +} + +static void +qedr_iw_free_ep(struct kref *ref) +{ + struct qedr_iw_ep *ep = container_of(ref, struct qedr_iw_ep, refcnt); + + if (ep->qp) + kref_put(&ep->qp->refcnt, qedr_iw_free_qp); + + if (ep->cm_id) + ep->cm_id->rem_ref(ep->cm_id); + + kfree(ep); +} + static void qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) { @@ -93,6 +114,7 @@ qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) ep->dev = dev; ep->qed_context = params->ep_context; + kref_init(&ep->refcnt); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; @@ -141,12 +163,10 @@ qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params) { struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - if (ep->cm_id) { + if (ep->cm_id) qedr_iw_issue_event(context, params, IW_CM_EVENT_CLOSE); - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -186,11 +206,13 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) struct qedr_qp *qp = ep->qp; struct iw_cm_event event; - if (qp->destroyed) { - kfree(dwork); - qedr_iw_qp_rem_ref(&qp->ibqp); - return; - } + /* The qp won't be released until we release the ep. + * the ep's refcnt was increased before calling this + * function, therefore it is safe to access qp + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + goto out; memset(&event, 0, sizeof(event)); event.status = dwork->status; @@ -204,7 +226,6 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) else qp_params.new_state = QED_ROCE_QP_STATE_SQD; - kfree(dwork); if (ep->cm_id) ep->cm_id->event_handler(ep->cm_id, &event); @@ -214,7 +235,10 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params); - qedr_iw_qp_rem_ref(&qp->ibqp); + complete(&ep->qp->iwarp_cm_comp); +out: + kfree(dwork); + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -224,13 +248,17 @@ qedr_iw_disconnect_event(void *context, struct qedr_discon_work *work; struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; struct qedr_dev *dev = ep->dev; - struct qedr_qp *qp = ep->qp; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return; - qedr_iw_qp_add_ref(&qp->ibqp); + /* We can't get a close event before disconnect, but since + * we're scheduling a work queue we need to make sure close + * won't delete the ep, so we increase the refcnt + */ + kref_get(&ep->refcnt); + work->ep = ep; work->event = params->event; work->status = params->status; @@ -252,16 +280,30 @@ qedr_iw_passive_complete(void *context, if ((params->status == -ECONNREFUSED) && (!ep->qp)) { DP_DEBUG(dev, QEDR_MSG_IWARP, "PASSIVE connection refused releasing ep...\n"); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return; } + complete(&ep->qp->iwarp_cm_comp); qedr_iw_issue_event(context, params, IW_CM_EVENT_ESTABLISHED); if (params->status < 0) qedr_iw_close_event(context, params); } +static void +qedr_iw_active_complete(void *context, + struct qed_iwarp_cm_event_params *params) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + + complete(&ep->qp->iwarp_cm_comp); + qedr_iw_issue_event(context, params, IW_CM_EVENT_CONNECT_REPLY); + + if (params->status < 0) + kref_put(&ep->refcnt, qedr_iw_free_ep); +} + static int qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params) { @@ -288,27 +330,15 @@ qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params) qedr_iw_mpa_reply(context, params); break; case QED_IWARP_EVENT_PASSIVE_COMPLETE: - ep->during_connect = 0; qedr_iw_passive_complete(context, params); break; - case QED_IWARP_EVENT_ACTIVE_COMPLETE: - ep->during_connect = 0; - qedr_iw_issue_event(context, - params, - IW_CM_EVENT_CONNECT_REPLY); - if (params->status < 0) { - struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + qedr_iw_active_complete(context, params); break; case QED_IWARP_EVENT_DISCONNECT: qedr_iw_disconnect_event(context, params); break; case QED_IWARP_EVENT_CLOSE: - ep->during_connect = 0; qedr_iw_close_event(context, params); break; case QED_IWARP_EVENT_RQ_EMPTY: @@ -476,6 +506,19 @@ qedr_addr6_resolve(struct qedr_dev *dev, return rc; } +struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn) +{ + struct qedr_qp *qp; + + xa_lock(&dev->qps); + qp = xa_load(&dev->qps, qpn); + if (qp) + kref_get(&qp->refcnt); + xa_unlock(&dev->qps); + + return qp; +} + int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct qedr_dev *dev = get_qedr_dev(cm_id->device); @@ -491,10 +534,6 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int rc = 0; int i; - qp = xa_load(&dev->qps, conn_param->qpn); - if (unlikely(!qp)) - return -EINVAL; - laddr = (struct sockaddr_in *)&cm_id->m_local_addr; raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; @@ -516,8 +555,15 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -ENOMEM; ep->dev = dev; + kref_init(&ep->refcnt); + + qp = qedr_iw_load_qp(dev, conn_param->qpn); + if (!qp) { + rc = -EINVAL; + goto err; + } + ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -580,16 +626,20 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) in_params.qp = qp->qed_qp; memcpy(in_params.local_mac_addr, dev->ndev->dev_addr, ETH_ALEN); - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already being destroyed */ + rc = dev->ops->iwarp_connect(dev->rdma_ctx, &in_params, &out_params); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; err: - cm_id->rem_ref(cm_id); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return rc; } @@ -677,18 +727,17 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct qedr_dev *dev = ep->dev; struct qedr_qp *qp; struct qed_iwarp_accept_in params; - int rc; + int rc = 0; DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); - qp = xa_load(&dev->qps, conn_param->qpn); + qp = qedr_iw_load_qp(dev, conn_param->qpn); if (!qp) { DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); return -EINVAL; } ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -700,15 +749,21 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) params.ird = conn_param->ird; params.ord = conn_param->ord; - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already destroyed */ + rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; + err: - ep->during_connect = 0; - cm_id->rem_ref(cm_id); + kref_put(&ep->refcnt, qedr_iw_free_ep); + return rc; } @@ -731,17 +786,14 @@ void qedr_iw_qp_add_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - atomic_inc(&qp->refcnt); + kref_get(&qp->refcnt); } void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - if (atomic_dec_and_test(&qp->refcnt)) { - xa_erase(&qp->dev->qps, qp->qp_id); - kfree(qp); - } + kref_put(&qp->refcnt, qedr_iw_free_qp); } struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 84b666c67cff..a17b388ee3b3 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -51,6 +51,7 @@ #include "verbs.h" #include #include "qedr_roce_cm.h" +#include "qedr_iw_cm.h" #define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm) #define RDMA_MAX_SGE_PER_SRQ (4) @@ -1193,7 +1194,10 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, struct ib_qp_init_attr *attrs) { spin_lock_init(&qp->q_lock); - atomic_set(&qp->refcnt, 1); + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + kref_init(&qp->refcnt); + init_completion(&qp->iwarp_cm_comp); + } qp->pd = pd; qp->qp_type = attrs->qp_type; qp->max_inline_data = attrs->cap.max_inline_data; @@ -1592,6 +1596,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; + qp->create_type = QEDR_QP_CREATE_USER; memset(&ureq, 0, sizeof(ureq)); rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); if (rc) { @@ -1805,6 +1810,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev, u32 n_sq_entries; memset(&in_params, 0, sizeof(in_params)); + qp->create_type = QEDR_QP_CREATE_KERNEL; /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in * the ring. The ring should allow at least a single WR, even if the @@ -2437,7 +2443,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, return rc; } - if (udata) + if (qp->create_type == QEDR_QP_CREATE_USER) qedr_cleanup_user(dev, qp); else qedr_cleanup_kernel(dev, qp); @@ -2467,34 +2473,44 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) qedr_modify_qp(ibqp, &attr, attr_mask, NULL); } } else { - /* Wait for the connect/accept to complete */ - if (qp->ep) { - int wait_count = 1; - - while (qp->ep->during_connect) { - DP_DEBUG(dev, QEDR_MSG_QP, - "Still in during connect/accept\n"); - - msleep(100); - if (wait_count++ > 200) { - DP_NOTICE(dev, - "during connect timeout\n"); - break; - } - } - } + /* If connection establishment started the WAIT_FOR_CONNECT + * bit will be on and we need to Wait for the establishment + * to complete before destroying the qp. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); + + /* If graceful disconnect started, the WAIT_FOR_DISCONNECT + * bit will be on, and we need to wait for the disconnect to + * complete before continuing. We can use the same completion, + * iwarp_cm_comp, since this is the only place that waits for + * this completion and it is sequential. In addition, + * disconnect can't occur before the connection is fully + * established, therefore if WAIT_FOR_DISCONNECT is on it + * means WAIT_FOR_CONNECT is also on and the completion for + * CONNECT already occurred. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); } if (qp->qp_type == IB_QPT_GSI) qedr_destroy_gsi_qp(dev); + /* We need to remove the entry from the xarray before we release the + * qp_id to avoid a race of the qp_id being reallocated and failing + * on xa_insert + */ + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + xa_erase(&dev->qps, qp->qp_id); + qedr_free_qp_resources(dev, qp, udata); - if (atomic_dec_and_test(&qp->refcnt) && - rdma_protocol_iwarp(&dev->ibdev, 1)) { - xa_erase(&dev->qps, qp->qp_id); - kfree(qp); - } + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_iw_qp_rem_ref(&qp->ibqp); + return 0; } -- cgit v1.2.3-59-g8ed1b From 24e412c1e00ebfe73619e6b88cbc26c2c7d41b85 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 27 Oct 2019 22:04:51 +0200 Subject: RDMA/qedr: Fix memory leak in user qp and mr User QPs pbl's weren't freed properly. MR pbls weren't freed properly. Fixes: e0290cce6ac0 ("qedr: Add support for memory registeration verbs") Link: https://lore.kernel.org/r/20191027200451.28187-5-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/verbs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a17b388ee3b3..8b4240c1cc76 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1581,6 +1581,14 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) ib_umem_release(qp->urq.umem); qp->urq.umem = NULL; + + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); + qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + kfree(qp->urq.pbl_tbl); + } } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -2689,8 +2697,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); - if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) - qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + if (mr->type != QEDR_MR_DMA) + free_mr_info(dev, &mr->info); /* it could be user registered memory. */ ib_umem_release(mr->umem); -- cgit v1.2.3-59-g8ed1b From 5212c3fda2225af66a6a83afd9eb0a6f0c80b99c Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Fri, 25 Oct 2019 16:27:02 +0530 Subject: RDMA/iw_cxgb4: Report correct port speed/width Query speed/width from corresponding netdev. Link: https://lore.kernel.org/r/1572001022-4533-1-git-send-email-bharat@chelsio.com Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/provider.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index d373ac0fe2cb..ba83d942997c 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -305,7 +305,10 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro static int c4iw_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { + int ret = 0; pr_debug("ibdev %p\n", ibdev); + ret = ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width); props->port_cap_flags = IB_PORT_CM_SUP | @@ -315,11 +318,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; - props->active_width = 2; - props->active_speed = IB_SPEED_DDR; props->max_msg_sz = -1; - return 0; + return ret; } static ssize_t hw_rev_show(struct device *dev, -- cgit v1.2.3-59-g8ed1b From 0edefddbae396e50eb7887d279d0c4bb4d7a6384 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Fri, 25 Oct 2019 16:29:03 +0200 Subject: RDMA/siw: Fix post_recv QP state locking Do not release qp state lock if not previously acquired. Fixes: cf049bb31f71 ("RDMA/siw: Fix SQ/RQ drain logic") Link: https://lore.kernel.org/r/20191025142903.20625-1-bmt@zurich.ibm.com Reported-by: Dan Carpenter Signed-off-by: Bernard Metzler Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_verbs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index c0574ddc98fa..726a5924ea13 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -990,7 +990,6 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, } if (!qp->kernel_verbs) { siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n"); - up_read(&qp->state_lock); *bad_wr = wr; return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From 97458fd510917f142c417245fa5701a892ce69d7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Oct 2019 15:58:28 -0700 Subject: RDMA/rxe: Increase DMA max_segment_size parameter Increase the DMA max_segment_size parameter from 64 KB to 2 GB. Link: https://lore.kernel.org/r/20191025225830.257535-3-bvanassche@acm.org Signed-off-by: Bart Van Assche Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 3 +++ drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + 2 files changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index fa47bdcc7f54..9dd4bd7aea92 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1175,6 +1175,9 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); dev->dev.dma_ops = &dma_virt_ops; + dev->dev.dma_parms = &rxe->dma_parms; + rxe->dma_parms = (struct device_dma_parameters) + { .max_segment_size = SZ_2G }; dma_coerce_mask_and_coherent(&dev->dev, dma_get_required_mask(&dev->dev)); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 5c4b2239129c..95834206c80c 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -384,6 +384,7 @@ struct rxe_port { struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; + struct device_dma_parameters dma_parms; int max_ucontext; int max_inline_data; struct mutex usdev_lock; -- cgit v1.2.3-59-g8ed1b From a401fb819cd6586c2c983dc199be4a9b44c30661 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Oct 2019 15:58:29 -0700 Subject: RDMA/siw: Increase DMA max_segment_size parameter Increase the DMA max_segment_size parameter from 64 KB to 2 GB. Link: https://lore.kernel.org/r/20191025225830.257535-4-bvanassche@acm.org Signed-off-by: Bart Van Assche Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw.h | 1 + drivers/infiniband/sw/siw/siw_main.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index dba4535494ab..1ea3ed249e7b 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -70,6 +70,7 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; + struct device_dma_parameters dma_parms; struct net_device *netdev; struct siw_dev_cap attrs; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index b8fe43074ad6..48e45a852b51 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -382,6 +382,9 @@ static struct siw_device *siw_device_create(struct net_device *netdev) base_dev->phys_port_cnt = 1; base_dev->dev.parent = parent; base_dev->dev.dma_ops = &dma_virt_ops; + base_dev->dev.dma_parms = &sdev->dma_parms; + sdev->dma_parms = (struct device_dma_parameters) + { .max_segment_size = SZ_2G }; base_dev->num_comp_vectors = num_possible_cpus(); ib_set_device_ops(base_dev, &siw_device_ops); -- cgit v1.2.3-59-g8ed1b From c9121262d57b8a3be4f08073546436ba0128ca6a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Oct 2019 15:58:30 -0700 Subject: RDMA/core: Set DMA parameters correctly The dma_set_max_seg_size() call in setup_dma_device() does not have any effect since device->dev.dma_parms is NULL. Fix this by initializing device->dev.dma_parms first. Link: https://lore.kernel.org/r/20191025225830.257535-5-bvanassche@acm.org Fixes: d10bcf947a3e ("RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs") Signed-off-by: Bart Van Assche Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index eb35b663a742..a93c23867fb5 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1196,9 +1196,21 @@ static void setup_dma_device(struct ib_device *device) WARN_ON_ONCE(!parent); device->dma_device = parent; } - /* Setup default max segment size for all IB devices */ - dma_set_max_seg_size(device->dma_device, SZ_2G); + if (!device->dev.dma_parms) { + if (parent) { + /* + * The caller did not provide DMA parameters, so + * 'parent' probably represents a PCI device. The PCI + * core sets the maximum segment size to 64 + * KB. Increase this parameter to 2 GB. + */ + device->dev.dma_parms = parent->dma_parms; + dma_set_max_seg_size(device->dma_device, SZ_2G); + } else { + WARN_ON_ONCE(true); + } + } } /* -- cgit v1.2.3-59-g8ed1b From a52dc3a100958f4bf5e921067ba626c2caf8e55f Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Mon, 28 Oct 2019 18:14:52 +0000 Subject: RDMA/vmw_pvrdma: Use resource ids from physical device if available This change allows the RDMA stack to use physical resource numbers if they are passed up from the device. This is accomplished by separating the concept of the QP number from the QP handle. Previously, the two were the same, as the QP number was exposed to the guest and also used to reference a virtual QP in the device backend. With physical resource numbers exposed, the QP number given to the guest is the number assigned from the physical HCA's QP, while the QP handle is still the internal handle used to reference a virtual QP. Regardless of whether the device is exposing physical ids, the driver will still try to pick up the QP handle from the backend if possible. The MR keys exposed to the guest will also be the MR keys created by the physical HCA, instead of virtual MR keys. The distinction between handle and keys is already present for MRs so there is no need to do anything special here. A new version of the create QP response has been added to the device API to pass up the QP number and handle. The driver will also report these to userspace in the udata response if userspace supports it or not create the queuepair if not. I also had to do a refactor of the destroy qp code to reuse it if we fail to copy to userspace. Link: https://lore.kernel.org/r/20191028181444.19448-1-aditr@vmware.com Reviewed-by: Jorgen Hansen Signed-off-by: Adit Ranadive Signed-off-by: Bryan Tan Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 15 ++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 115 ++++++++++++++++------ include/uapi/rdma/vmw_pvrdma-abi.h | 5 + 3 files changed, 106 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index 8f9749d54688..86a6c054ea26 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -58,7 +58,8 @@ #define PVRDMA_ROCEV1_VERSION 17 #define PVRDMA_ROCEV2_VERSION 18 #define PVRDMA_PPN64_VERSION 19 -#define PVRDMA_VERSION PVRDMA_PPN64_VERSION +#define PVRDMA_QPHANDLE_VERSION 20 +#define PVRDMA_VERSION PVRDMA_QPHANDLE_VERSION #define PVRDMA_BOARD_ID 1 #define PVRDMA_REV_ID 1 @@ -581,6 +582,17 @@ struct pvrdma_cmd_create_qp_resp { u32 max_inline_data; }; +struct pvrdma_cmd_create_qp_resp_v2 { + struct pvrdma_cmd_resp_hdr hdr; + u32 qpn; + u32 qp_handle; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + struct pvrdma_cmd_modify_qp { struct pvrdma_cmd_hdr hdr; u32 qp_handle; @@ -663,6 +675,7 @@ union pvrdma_cmd_resp { struct pvrdma_cmd_create_cq_resp create_cq_resp; struct pvrdma_cmd_resize_cq_resp resize_cq_resp; struct pvrdma_cmd_create_qp_resp create_qp_resp; + struct pvrdma_cmd_create_qp_resp_v2 create_qp_resp_v2; struct pvrdma_cmd_query_qp_resp query_qp_resp; struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; struct pvrdma_cmd_create_srq_resp create_srq_resp; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index bca6a58a442e..22daf2389d95 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -52,6 +52,9 @@ #include "pvrdma.h" +static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, + struct pvrdma_qp *qp); + static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq, struct pvrdma_cq **recv_cq) { @@ -195,7 +198,9 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_qp *cmd = &req.create_qp; struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp; + struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2; struct pvrdma_create_qp ucmd; + struct pvrdma_create_qp_resp qp_resp = {}; unsigned long flags; int ret; bool is_srq = !!init_attr->srq; @@ -260,6 +265,15 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, goto err_qp; } + /* Userspace supports qpn and qp handles? */ + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION && + udata->outlen < sizeof(qp_resp)) { + dev_warn(&dev->pdev->dev, + "create queuepair not supported\n"); + ret = -EOPNOTSUPP; + goto err_qp; + } + if (!is_srq) { /* set qp->sq.wqe_cnt, shift, buf_size.. */ qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr, @@ -379,13 +393,33 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, } /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */ - qp->qp_handle = resp->qpn; qp->port = init_attr->port_num; - qp->ibqp.qp_num = resp->qpn; + + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) { + qp->ibqp.qp_num = resp_v2->qpn; + qp->qp_handle = resp_v2->qp_handle; + } else { + qp->ibqp.qp_num = resp->qpn; + qp->qp_handle = resp->qpn; + } + spin_lock_irqsave(&dev->qp_tbl_lock, flags); dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp; spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); + if (udata) { + qp_resp.qpn = qp->ibqp.qp_num; + qp_resp.qp_handle = qp->qp_handle; + + if (ib_copy_to_udata(udata, &qp_resp, + min(udata->outlen, sizeof(qp_resp)))) { + dev_warn(&dev->pdev->dev, + "failed to copy back udata\n"); + __pvrdma_destroy_qp(dev, qp); + return ERR_PTR(-EINVAL); + } + } + return &qp->ibqp; err_pdir: @@ -400,27 +434,15 @@ err_qp: return ERR_PTR(ret); } -static void pvrdma_free_qp(struct pvrdma_qp *qp) +static void _pvrdma_free_qp(struct pvrdma_qp *qp) { + unsigned long flags; struct pvrdma_dev *dev = to_vdev(qp->ibqp.device); - struct pvrdma_cq *scq; - struct pvrdma_cq *rcq; - unsigned long flags, scq_flags, rcq_flags; - - /* In case cq is polling */ - get_cqs(qp, &scq, &rcq); - pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); - - _pvrdma_flush_cqe(qp, scq); - if (scq != rcq) - _pvrdma_flush_cqe(qp, rcq); spin_lock_irqsave(&dev->qp_tbl_lock, flags); dev->qp_tbl[qp->qp_handle] = NULL; spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); - pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); - if (refcount_dec_and_test(&qp->refcnt)) complete(&qp->free); wait_for_completion(&qp->free); @@ -435,34 +457,71 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) atomic_dec(&dev->num_qps); } -/** - * pvrdma_destroy_qp - destroy a queue pair - * @qp: the queue pair to destroy - * @udata: user data or null for kernel object - * - * @return: 0 on success. - */ -int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) +static void pvrdma_free_qp(struct pvrdma_qp *qp) +{ + struct pvrdma_cq *scq; + struct pvrdma_cq *rcq; + unsigned long scq_flags, rcq_flags; + + /* In case cq is polling */ + get_cqs(qp, &scq, &rcq); + pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_flush_cqe(qp, scq); + if (scq != rcq) + _pvrdma_flush_cqe(qp, rcq); + + /* + * We're now unlocking the CQs before clearing out the qp handle this + * should still be safe. We have destroyed the backend QP and flushed + * the CQEs so there should be no other completions for this QP. + */ + pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_free_qp(qp); +} + +static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev, + u32 qp_handle) { - struct pvrdma_qp *vqp = to_vqp(qp); union pvrdma_cmd_req req; struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp; int ret; memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP; - cmd->qp_handle = vqp->qp_handle; + cmd->qp_handle = qp_handle; - ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0); + ret = pvrdma_cmd_post(dev, &req, NULL, 0); if (ret < 0) - dev_warn(&to_vdev(qp->device)->pdev->dev, + dev_warn(&dev->pdev->dev, "destroy queuepair failed, error: %d\n", ret); +} +/** + * pvrdma_destroy_qp - destroy a queue pair + * @qp: the queue pair to destroy + * @udata: user data or null for kernel object + * + * @return: always 0. + */ +int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) +{ + struct pvrdma_qp *vqp = to_vqp(qp); + + _pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle); pvrdma_free_qp(vqp); return 0; } +static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, + struct pvrdma_qp *qp) +{ + _pvrdma_destroy_qp_work(dev, qp->qp_handle); + _pvrdma_free_qp(qp); +} + /** * pvrdma_modify_qp - modify queue pair attributes * @ibqp: the queue pair diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index 6e73f0274e41..f8b638c73371 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -179,6 +179,11 @@ struct pvrdma_create_qp { __aligned_u64 qp_addr; }; +struct pvrdma_create_qp_resp { + __u32 qpn; + __u32 qp_handle; +}; + /* PVRDMA masked atomic compare and swap */ struct pvrdma_ex_cmp_swap { __aligned_u64 swap_val; -- cgit v1.2.3-59-g8ed1b From fb985e278a30224183fdf3d56e2f69cfdef88d4e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:21 -0300 Subject: RDMA/mlx5: Use SRCU properly in ODP prefetch When working with SRCU protected xarrays the xarray itself should be the SRCU 'update' point. Instead prefetch is using live as the SRCU update point and this prevents switching the locking design to use the xarray instead. To solve this the prefetch must only read from the xarray once, and hold on to the actual MR pointer for the duration of the async operation. Incrementing num_pending_prefetch delays destruction of the MR, so it is suitable. Prefetch calls directly to the pagefault_mr using the MR pointer and only does a single xarray lookup. All the testing if a MR is prefetchable or not is now done only in the prefetch code and removed from the pagefault critical path. Link: https://lore.kernel.org/r/20191009160934.3143-2-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 262 ++++++++++++++++++--------------------- 1 file changed, 121 insertions(+), 141 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 3f9478d19376..09dac97e4ca4 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -606,16 +606,13 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); } -#define MLX5_PF_FLAGS_PREFETCH BIT(0) #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) -static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt, u32 *bytes_mapped, - u32 flags) +static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, + u32 *bytes_mapped, u32 flags) { int npages = 0, current_seq, page_shift, ret, np; struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; - bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; u64 access_mask; u64 start_idx, page_mask; struct ib_umem_odp *odp; @@ -639,14 +636,6 @@ next_mr: start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; - if (prefetch && !downgrade && !odp->umem.writable) { - /* prefetch with write-access must - * be supported by the MR - */ - ret = -EINVAL; - goto out; - } - if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; @@ -681,7 +670,8 @@ next_mr: if (ret < 0) { if (ret != -EAGAIN) - mlx5_ib_err(dev, "Failed to update mkey page tables\n"); + mlx5_ib_err(mr->dev, + "Failed to update mkey page tables\n"); goto out; } @@ -700,8 +690,10 @@ next_mr: io_virt += size; next = odp_next(odp); if (unlikely(!next || ib_umem_start(next) != io_virt)) { - mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", - io_virt, next); + mlx5_ib_dbg( + mr->dev, + "next implicit leaf removed at 0x%llx. got %p\n", + io_virt, next); return -EAGAIN; } odp = next; @@ -718,7 +710,7 @@ out: if (!wait_for_completion_timeout(&odp->notifier_completion, timeout)) { mlx5_ib_warn( - dev, + mr->dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", current_seq, odp->notifiers_seq, odp->notifiers_count); @@ -775,10 +767,9 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 key, u64 io_virt, size_t bcnt, u32 *bytes_committed, - u32 *bytes_mapped, u32 flags) + u32 *bytes_mapped) { int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; - bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -800,12 +791,6 @@ next_mr: goto srcu_unlock; } - if (prefetch && mmkey->type != MLX5_MKEY_MR) { - mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n"); - ret = -EINVAL; - goto srcu_unlock; - } - switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); @@ -815,17 +800,6 @@ next_mr: goto srcu_unlock; } - if (prefetch) { - if (!is_odp_mr(mr) || - mr->ibmr.pd != pd) { - mlx5_ib_dbg(dev, "Invalid prefetch request: %s\n", - is_odp_mr(mr) ? "MR is not ODP" : - "PD is not of the MR"); - ret = -EINVAL; - goto srcu_unlock; - } - } - if (!is_odp_mr(mr)) { mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", key); @@ -835,7 +809,7 @@ next_mr: goto srcu_unlock; } - ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags); + ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); if (ret < 0) goto srcu_unlock; @@ -1009,7 +983,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, NULL, key, io_virt, bcnt, &pfault->bytes_committed, - bytes_mapped, 0); + bytes_mapped); if (ret < 0) break; npages += ret; @@ -1292,8 +1266,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } ret = pagefault_single_data_segment(dev, NULL, rkey, address, length, - &pfault->bytes_committed, NULL, - 0); + &pfault->bytes_committed, NULL); if (ret == -EAGAIN) { /* We're racing with an invalidation, don't prefetch */ prefetch_activated = 0; @@ -1320,8 +1293,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, NULL, rkey, address, prefetch_len, - &bytes_committed, NULL, - 0); + &bytes_committed, NULL); if (ret < 0 && ret != -EAGAIN) { mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", ret, pfault->token, address, prefetch_len); @@ -1624,114 +1596,138 @@ int mlx5_ib_odp_init(void) struct prefetch_mr_work { struct work_struct work; - struct ib_pd *pd; u32 pf_flags; u32 num_sge; - struct ib_sge sg_list[0]; + struct { + u64 io_virt; + struct mlx5_ib_mr *mr; + size_t length; + } frags[]; }; -static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev, - struct ib_sge *sg_list, u32 num_sge, - u32 from) +static void destroy_prefetch_work(struct prefetch_mr_work *work) { u32 i; - int srcu_key; - - srcu_key = srcu_read_lock(&dev->mr_srcu); - for (i = from; i < num_sge; ++i) { - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; - - mmkey = xa_load(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(sg_list[i].lkey)); - mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - atomic_dec(&mr->num_pending_prefetch); - } - - srcu_read_unlock(&dev->mr_srcu, srcu_key); + for (i = 0; i < work->num_sge; ++i) + atomic_dec(&work->frags[i].mr->num_pending_prefetch); + kvfree(work); } -static bool num_pending_prefetch_inc(struct ib_pd *pd, - struct ib_sge *sg_list, u32 num_sge) +static struct mlx5_ib_mr * +get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, + u32 lkey) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - bool ret = true; - u32 i; + struct mlx5_core_mkey *mmkey; + struct ib_umem_odp *odp; + struct mlx5_ib_mr *mr; - for (i = 0; i < num_sge; ++i) { - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; + lockdep_assert_held(&dev->mr_srcu); - mmkey = xa_load(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(sg_list[i].lkey)); - if (!mmkey || mmkey->key != sg_list[i].lkey) { - ret = false; - break; - } + mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(lkey)); + if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) + return NULL; - if (mmkey->type != MLX5_MKEY_MR) { - ret = false; - break; - } + mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + if (!smp_load_acquire(&mr->live)) + return NULL; - if (!smp_load_acquire(&mr->live)) { - ret = false; - break; - } + if (mr->ibmr.pd != pd || !is_odp_mr(mr)) + return NULL; - if (mr->ibmr.pd != pd) { - ret = false; - break; - } + /* + * Implicit child MRs are internal and userspace should not refer to + * them. + */ + if (mr->parent) + return NULL; - atomic_inc(&mr->num_pending_prefetch); - } + odp = to_ib_umem_odp(mr->umem); - if (!ret) - num_pending_prefetch_dec(dev, sg_list, i, 0); + /* prefetch with write-access must be supported by the MR */ + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && + !odp->umem.writable) + return NULL; - return ret; + return mr; } -static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags, - struct ib_sge *sg_list, u32 num_sge) +static void mlx5_ib_prefetch_mr_work(struct work_struct *w) { + struct prefetch_mr_work *work = + container_of(w, struct prefetch_mr_work, work); + u32 bytes_mapped = 0; u32 i; - int ret = 0; - struct mlx5_ib_dev *dev = to_mdev(pd->device); + + for (i = 0; i < work->num_sge; ++i) + pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, + work->frags[i].length, &bytes_mapped, + work->pf_flags); + + destroy_prefetch_work(work); +} + +static bool init_prefetch_work(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 pf_flags, struct prefetch_mr_work *work, + struct ib_sge *sg_list, u32 num_sge) +{ + u32 i; + + INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); + work->pf_flags = pf_flags; for (i = 0; i < num_sge; ++i) { - struct ib_sge *sg = &sg_list[i]; - int bytes_committed = 0; + work->frags[i].io_virt = sg_list[i].addr; + work->frags[i].length = sg_list[i].length; + work->frags[i].mr = + get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (!work->frags[i].mr) { + work->num_sge = i - 1; + if (i) + destroy_prefetch_work(work); + return false; + } - ret = pagefault_single_data_segment(dev, pd, sg->lkey, sg->addr, - sg->length, - &bytes_committed, NULL, - pf_flags); - if (ret < 0) - break; + /* Keep the MR pointer will valid outside the SRCU */ + atomic_inc(&work->frags[i].mr->num_pending_prefetch); } - - return ret < 0 ? ret : 0; + work->num_sge = num_sge; + return true; } -static void mlx5_ib_prefetch_mr_work(struct work_struct *work) +static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 pf_flags, struct ib_sge *sg_list, + u32 num_sge) { - struct prefetch_mr_work *w = - container_of(work, struct prefetch_mr_work, work); + struct mlx5_ib_dev *dev = to_mdev(pd->device); + u32 bytes_mapped = 0; + int srcu_key; + int ret = 0; + u32 i; - if (ib_device_try_get(w->pd->device)) { - mlx5_ib_prefetch_sg_list(w->pd, w->pf_flags, w->sg_list, - w->num_sge); - ib_device_put(w->pd->device); + srcu_key = srcu_read_lock(&dev->mr_srcu); + for (i = 0; i < num_sge; ++i) { + struct mlx5_ib_mr *mr; + + mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (!mr) { + ret = -ENOENT; + goto out; + } + ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, + &bytes_mapped, pf_flags); + if (ret < 0) + goto out; } + ret = 0; - num_pending_prefetch_dec(to_mdev(w->pd->device), w->sg_list, - w->num_sge, 0); - kvfree(w); +out: + srcu_read_unlock(&dev->mr_srcu, srcu_key); + return ret; } int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, @@ -1739,43 +1735,27 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, u32 flags, struct ib_sge *sg_list, u32 num_sge) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - u32 pf_flags = MLX5_PF_FLAGS_PREFETCH; + u32 pf_flags = 0; struct prefetch_mr_work *work; - bool valid_req; int srcu_key; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) - return mlx5_ib_prefetch_sg_list(pd, pf_flags, sg_list, + return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list, num_sge); - work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); + work = kvzalloc(struct_size(work, frags, num_sge), GFP_KERNEL); if (!work) return -ENOMEM; - memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); - - /* It is guaranteed that the pd when work is executed is the pd when - * work was queued since pd can't be destroyed while it holds MRs and - * destroying a MR leads to flushing the workquque - */ - work->pd = pd; - work->pf_flags = pf_flags; - work->num_sge = num_sge; - - INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); - srcu_key = srcu_read_lock(&dev->mr_srcu); - - valid_req = num_pending_prefetch_inc(pd, sg_list, num_sge); - if (valid_req) - queue_work(system_unbound_wq, &work->work); - else - kvfree(work); - + if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { + srcu_read_unlock(&dev->mr_srcu, srcu_key); + return -EINVAL; + } + queue_work(system_unbound_wq, &work->work); srcu_read_unlock(&dev->mr_srcu, srcu_key); - - return valid_req ? 0 : -EINVAL; + return 0; } -- cgit v1.2.3-59-g8ed1b From 50211ec9443ff2e16db43f691dfcc0ef435cf45d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:22 -0300 Subject: RDMA/mlx5: Split sig_err MR data into its own xarray The locking model for signature is completely different than ODP, do not share the same xarray that relies on SRCU locking to support ODP. Simply store the active mlx5_core_sig_ctx's in an xarray when signature MRs are created and rely on trivial xarray locking to serialize everything. The overhead of storing only a handful of SIG related MRs is going to be much less than an xarray full of every mkey. Link: https://lore.kernel.org/r/20191009160934.3143-3-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cq.c | 33 ++++++++++++++++----------------- drivers/infiniband/hw/mlx5/main.c | 2 ++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++ drivers/infiniband/hw/mlx5/mr.c | 8 ++++++++ 4 files changed, 28 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 45f48cde6b9d..cc938d27f913 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -423,9 +423,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_cqe64 *cqe64; struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; - struct mlx5_sig_err_cqe *sig_err_cqe; - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; u16 wqe_ctr; @@ -519,27 +516,29 @@ repoll: } } break; - case MLX5_CQE_SIG_ERR: - sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; + case MLX5_CQE_SIG_ERR: { + struct mlx5_sig_err_cqe *sig_err_cqe = + (struct mlx5_sig_err_cqe *)cqe64; + struct mlx5_core_sig_ctx *sig; - xa_lock(&dev->mdev->priv.mkey_table); - mmkey = xa_load(&dev->mdev->priv.mkey_table, + xa_lock(&dev->sig_mrs); + sig = xa_load(&dev->sig_mrs, mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - mr = to_mibmr(mmkey); - get_sig_err_item(sig_err_cqe, &mr->sig->err_item); - mr->sig->sig_err_exists = true; - mr->sig->sigerr_count++; + get_sig_err_item(sig_err_cqe, &sig->err_item); + sig->sig_err_exists = true; + sig->sigerr_count++; mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", - cq->mcq.cqn, mr->sig->err_item.key, - mr->sig->err_item.err_type, - mr->sig->err_item.sig_err_offset, - mr->sig->err_item.expected, - mr->sig->err_item.actual); + cq->mcq.cqn, sig->err_item.key, + sig->err_item.err_type, + sig->err_item.sig_err_offset, + sig->err_item.expected, + sig->err_item.actual); - xa_unlock(&dev->mdev->priv.mkey_table); + xa_unlock(&dev->sig_mrs); goto repoll; } + } return 0; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 831539419c30..b7eea724beaa 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6150,6 +6150,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) cleanup_srcu_struct(&dev->mr_srcu); } + WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } @@ -6201,6 +6202,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); + xa_init(&dev->sig_mrs); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 1a98ee2e01c4..f4ce58354544 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -999,6 +999,8 @@ struct mlx5_ib_dev { struct mlx5_srq_table srq_table; struct mlx5_async_ctx async_ctx; struct mlx5_devx_event_table devx_event_table; + + struct xarray sig_mrs; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 630599311586..fd24640606c1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1560,6 +1560,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->sig->psv_wire.psv_idx)) mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", mr->sig->psv_wire.psv_idx); + xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key)); kfree(mr->sig); mr->sig = NULL; } @@ -1797,8 +1798,15 @@ static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, if (err) goto err_free_mtt_mr; + err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), + mr->sig, GFP_KERNEL)); + if (err) + goto err_free_descs; return 0; +err_free_descs: + destroy_mkey(dev, mr); + mlx5_free_priv_descs(mr); err_free_mtt_mr: dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); mr->mtt_mr = NULL; -- cgit v1.2.3-59-g8ed1b From 806b101b2bfa800a9c779336b750bee39c7fb3b4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:23 -0300 Subject: RDMA/mlx5: Use a dedicated mkey xarray for ODP There is a per device xarray storing mkeys that is used to store every mkey in the system. However, this xarray is now only read by ODP for certain ODP designated MRs (ODP, implicit ODP, MW, DEVX_INDIRECT). Create an xarray only for use by ODP, that only contains ODP related MKeys. This xarray is protected by SRCU and all erases are protected by a synchronize. This improves performance: - All MRs in the odp_mkeys xarray are ODP MRs, so some tests for is_odp() can be deleted. The xarray will also consume fewer nodes. - normal MR's are never mixed with ODP MRs in a SRCU data structure so performance sucking synchronize_srcu() on every MR destruction is not needed. - No smp_load_acquire(live) and xa_load() double barrier on read Due to the SRCU locking scheme care must be taken with the placement of the xa_store(). Once it completes the MR is immediately visible to other threads and only through a xa_erase() & synchronize_srcu() cycle could it be destroyed. Link: https://lore.kernel.org/r/20191009160934.3143-4-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 8 ++-- drivers/infiniband/hw/mlx5/main.c | 17 ++++---- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 ++- drivers/infiniband/hw/mlx5/mr.c | 43 ++++++++++--------- drivers/infiniband/hw/mlx5/odp.c | 83 +++++++++++++++++++----------------- 5 files changed, 83 insertions(+), 73 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index d609f4659afb..6b1fca91d7d3 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1265,8 +1265,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); - return xa_err(xa_store(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL)); + return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey, + GFP_KERNEL)); } static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, @@ -1345,9 +1345,9 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, * the mmkey, we must wait for that to stop before freeing the * mkey, as another allocation could get the same mkey #. */ - xa_erase(&obj->ib_dev->mdev->priv.mkey_table, + xa_erase(&obj->ib_dev->odp_mkeys, mlx5_base_mkey(obj->devx_mr.mmkey.key)); - synchronize_srcu(&dev->mr_srcu); + synchronize_srcu(&dev->odp_srcu); } if (obj->flags & DEVX_OBJ_FLAGS_DCT) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b7eea724beaa..4692c37b057c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6145,10 +6145,10 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - srcu_barrier(&dev->mr_srcu); - cleanup_srcu_struct(&dev->mr_srcu); - } + WARN_ON(!xa_empty(&dev->odp_mkeys)); + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + srcu_barrier(&dev->odp_srcu); + cleanup_srcu_struct(&dev->odp_srcu); WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); @@ -6202,16 +6202,15 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); + xa_init(&dev->odp_mkeys); xa_init(&dev->sig_mrs); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - err = init_srcu_struct(&dev->mr_srcu); - if (err) - goto err_mp; - } + err = init_srcu_struct(&dev->odp_srcu); + if (err) + goto err_mp; return 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f4ce58354544..2cf91db6a36f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -606,7 +606,6 @@ struct mlx5_ib_mr { struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; - unsigned int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ @@ -975,7 +974,9 @@ struct mlx5_ib_dev { * Sleepable RCU that prevents destruction of MRs while they are still * being used by a page fault handler. */ - struct srcu_struct mr_srcu; + struct srcu_struct odp_srcu; + struct xarray odp_mkeys; + u32 null_mkey; struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd24640606c1..60b12dc53004 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -59,13 +59,9 @@ static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); - - return err; + return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } static int order2idx(struct mlx5_ib_dev *dev, int order) @@ -218,9 +214,6 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - synchronize_srcu(&dev->mr_srcu); - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); kfree(mr); @@ -555,10 +548,6 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); -#endif - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); kfree(mr); @@ -1338,9 +1327,14 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (is_odp_mr(mr)) { to_ib_umem_odp(mr->umem)->private = mr; atomic_set(&mr->num_pending_prefetch, 0); + err = xa_err(xa_store(&dev->odp_mkeys, + mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, + GFP_KERNEL)); + if (err) { + dereg_mr(dev, mr); + return ERR_PTR(err); + } } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - smp_store_release(&mr->live, 1); return &mr->ibmr; error: @@ -1582,10 +1576,10 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Prevent new page faults and * prefetch requests from succeeding */ - WRITE_ONCE(mr->live, 0); + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->mr_srcu); + synchronize_srcu(&dev->odp_srcu); /* dequeue pending prefetch requests for the mr */ if (atomic_read(&mr->num_pending_prefetch)) @@ -1959,9 +1953,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, } } + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + err = xa_err(xa_store(&dev->odp_mkeys, + mlx5_base_mkey(mw->mmkey.key), &mw->mmkey, + GFP_KERNEL)); + if (err) + goto free_mkey; + } + kfree(in); return &mw->ibmw; +free_mkey: + mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); free: kfree(mw); kfree(in); @@ -1975,13 +1979,12 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) int err; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - xa_erase(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(mmw->mmkey.key)); + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); /* * pagefault_single_data_segment() may be accessing mmw under * SRCU if the user bound an ODP MR to this MW. */ - synchronize_srcu(&dev->mr_srcu); + synchronize_srcu(&dev->odp_srcu); } err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 09dac97e4ca4..1f06433bcfc8 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -199,7 +199,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, * locking around the children list. */ lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex); - lockdep_assert_held(&mr->dev->mr_srcu); + lockdep_assert_held(&mr->dev->odp_srcu); odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, nentries * MLX5_IMR_MTT_SIZE, mr); @@ -229,16 +229,16 @@ static void mr_leaf_free_action(struct work_struct *work) int srcu_key; mr->parent = NULL; - synchronize_srcu(&mr->dev->mr_srcu); + synchronize_srcu(&mr->dev->odp_srcu); - if (smp_load_acquire(&imr->live)) { - srcu_key = srcu_read_lock(&mr->dev->mr_srcu); + if (xa_load(&mr->dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key))) { + srcu_key = srcu_read_lock(&mr->dev->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(imr, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr->dev->mr_srcu, srcu_key); + srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); } ib_umem_odp_release(odp); mlx5_mr_cache_free(mr->dev, mr); @@ -318,7 +318,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { - WRITE_ONCE(mr->live, 0); + xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); umem_odp->dying = 1; atomic_inc(&mr->parent->num_leaf_free); schedule_work(&umem_odp->work); @@ -430,6 +430,11 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, if (IS_ERR(mr)) return mr; + err = xa_reserve(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + GFP_KERNEL); + if (err) + goto out_mr; + mr->ibmr.pd = pd; mr->dev = dev; @@ -455,7 +460,7 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, } if (err) - goto fail; + goto out_release; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; @@ -465,7 +470,9 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, return mr; -fail: +out_release: + xa_release(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); +out_mr: mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); mlx5_mr_cache_free(dev, mr); @@ -513,7 +520,8 @@ next_mr: mtt->parent = mr; INIT_WORK(&odp->work, mr_leaf_free_action); - smp_store_release(&mtt->live, 1); + xa_store(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key), + &mtt->mmkey, GFP_ATOMIC); if (!nentries) start_idx = addr >> MLX5_IMR_MTT_SHIFT; @@ -567,7 +575,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); - smp_store_release(&imr->live, 1); + xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), + &imr->mmkey, GFP_ATOMIC); return imr; } @@ -778,13 +787,28 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, size_t offset; int ndescs; - srcu_key = srcu_read_lock(&dev->mr_srcu); + srcu_key = srcu_read_lock(&dev->odp_srcu); io_virt += *bytes_committed; bcnt -= *bytes_committed; next_mr: - mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key)); + mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key)); + if (!mmkey) { + mlx5_ib_dbg( + dev, + "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", + key); + if (bytes_mapped) + *bytes_mapped += bcnt; + /* + * The user could specify a SGL with multiple lkeys and only + * some of them are ODP. Treat the non-ODP ones as fully + * faulted. + */ + ret = 0; + goto srcu_unlock; + } if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; @@ -794,20 +818,6 @@ next_mr: switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) { - mlx5_ib_dbg(dev, "got dead MR\n"); - ret = -EFAULT; - goto srcu_unlock; - } - - if (!is_odp_mr(mr)) { - mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", - key); - if (bytes_mapped) - *bytes_mapped += bcnt; - ret = 0; - goto srcu_unlock; - } ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); if (ret < 0) @@ -902,7 +912,7 @@ srcu_unlock: } kfree(out); - srcu_read_unlock(&dev->mr_srcu, srcu_key); + srcu_read_unlock(&dev->odp_srcu, srcu_key); *bytes_committed = 0; return ret ? ret : npages; } @@ -1623,18 +1633,15 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; - lockdep_assert_held(&dev->mr_srcu); + lockdep_assert_held(&dev->odp_srcu); - mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(lkey)); + mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) return NULL; mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!smp_load_acquire(&mr->live)) - return NULL; - - if (mr->ibmr.pd != pd || !is_odp_mr(mr)) + if (mr->ibmr.pd != pd) return NULL; /* @@ -1709,7 +1716,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, int ret = 0; u32 i; - srcu_key = srcu_read_lock(&dev->mr_srcu); + srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < num_sge; ++i) { struct mlx5_ib_mr *mr; @@ -1726,7 +1733,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, ret = 0; out: - srcu_read_unlock(&dev->mr_srcu, srcu_key); + srcu_read_unlock(&dev->odp_srcu, srcu_key); return ret; } @@ -1750,12 +1757,12 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (!work) return -ENOMEM; - srcu_key = srcu_read_lock(&dev->mr_srcu); + srcu_key = srcu_read_lock(&dev->odp_srcu); if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { - srcu_read_unlock(&dev->mr_srcu, srcu_key); + srcu_read_unlock(&dev->odp_srcu, srcu_key); return -EINVAL; } queue_work(system_unbound_wq, &work->work); - srcu_read_unlock(&dev->mr_srcu, srcu_key); + srcu_read_unlock(&dev->odp_srcu, srcu_key); return 0; } -- cgit v1.2.3-59-g8ed1b From 74bddb3682f60df16ba24be335c94de348ba1b07 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:24 -0300 Subject: RDMA/mlx5: Delete struct mlx5_priv->mkey_table No users are left, delete it. Link: https://lore.kernel.org/r/20191009160934.3143-5-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 9 --------- drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ---- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 28 +------------------------- include/linux/mlx5/driver.h | 4 ---- 4 files changed, 1 insertion(+), 44 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 60b12dc53004..fd94838a8845 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -90,8 +90,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; - struct xarray *mkeys = &dev->mdev->priv.mkey_table; - int err; spin_lock_irqsave(&ent->lock, flags); ent->pending--; @@ -118,13 +116,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) ent->size++; spin_unlock_irqrestore(&ent->lock, flags); - xa_lock_irqsave(mkeys, flags); - err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_ATOMIC)); - xa_unlock_irqrestore(mkeys, flags); - if (err) - pr_err("Error inserting to mkey tree. 0x%x\n", -err); - if (!completion_done(&ent->compl)) complete(&ent->compl); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e47dd7c1b909..d10051f39b9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -837,8 +837,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) mlx5_init_qp_table(dev); - mlx5_init_mkey_table(dev); - mlx5_init_reserved_gids(dev); mlx5_init_clock(dev); @@ -896,7 +894,6 @@ err_rl_cleanup: err_tables_cleanup: mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); - mlx5_cleanup_mkey_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); @@ -924,7 +921,6 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); - mlx5_cleanup_mkey_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index c501bf2a0252..42cc3c7ac5b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,16 +36,6 @@ #include #include "mlx5_core.h" -void mlx5_init_mkey_table(struct mlx5_core_dev *dev) -{ - xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ); -} - -void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) -{ - WARN_ON(!xa_empty(&dev->priv.mkey_table)); -} - int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, struct mlx5_async_ctx *async_ctx, u32 *in, @@ -54,7 +44,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_async_work *context) { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; - struct xarray *mkeys = &dev->priv.mkey_table; u32 mkey_index; void *mkc; int err; @@ -84,16 +73,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", mkey_index, key, mkey->key); - - err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey, - GFP_KERNEL)); - if (err) { - mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n", - mlx5_base_mkey(mkey->key), err); - mlx5_core_destroy_mkey(dev, mkey); - } - - return err; + return 0; } EXPORT_SYMBOL(mlx5_core_create_mkey_cb); @@ -111,12 +91,6 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, { u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; - struct xarray *mkeys = &dev->priv.mkey_table; - unsigned long flags; - - xa_lock_irqsave(mkeys, flags); - __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); - xa_unlock_irqrestore(mkeys, flags); MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3e80f03a387f..8288b62b8f37 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -556,8 +556,6 @@ struct mlx5_priv { struct dentry *cmdif_debugfs; /* end: qp staff */ - struct xarray mkey_table; - /* start: alloc staff */ /* protect buffer alocation according to numa node */ struct mutex alloc_mutex; @@ -942,8 +940,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -void mlx5_init_mkey_table(struct mlx5_core_dev *dev); -void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, struct mlx5_async_ctx *async_ctx, u32 *in, -- cgit v1.2.3-59-g8ed1b From 3d5f3c54e7bc82a279c80c18087462c0ce00ba44 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:25 -0300 Subject: RDMA/mlx5: Rework implicit_mr_get_data This function is intended to loop across each MTT chunk in the implicit parent that intersects the range [io_virt, io_virt+bnct). But it is has a confusing construction, so: - Consistently use imr and odp_imr to refer to the implicit parent to avoid confusion with the normal mr and odp of the child - Directly compute the inclusive start/end indexes by shifting. This is clearer to understand the intent and avoids any errors from unaligned values of addr - Iterate directly over the range of MTT indexes, do not make a loop out of goto - Follow 'success oriented flow', with goto error unwind - Directly calculate the range of idx's that need update_xlt - Ensure that any leaf MR added to the interval tree always results in an update to the XLT Link: https://lore.kernel.org/r/20191009160934.3143-6-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 123 ++++++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 54 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 1f06433bcfc8..78e31dc694d9 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -479,78 +479,93 @@ out_mr: return ERR_PTR(err); } -static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt) +static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, + unsigned long idx) { - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); - struct ib_umem_odp *odp, *result = NULL; - struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); - u64 addr = io_virt & MLX5_IMR_MTT_MASK; - int nentries = 0, start_idx = 0, ret; + struct ib_umem_odp *odp; struct mlx5_ib_mr *mtt; - mutex_lock(&odp_mr->umem_mutex); - odp = odp_lookup(addr, 1, mr); + odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), + idx * MLX5_IMR_MTT_SIZE, + MLX5_IMR_MTT_SIZE); + if (IS_ERR(odp)) + return ERR_CAST(odp); - mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", - io_virt, bcnt, addr, odp); + mtt = implicit_mr_alloc(imr->ibmr.pd, odp, 0, imr->access_flags); + if (IS_ERR(mtt)) { + ib_umem_odp_release(odp); + return mtt; + } -next_mr: - if (likely(odp)) { - if (nentries) - nentries++; - } else { - odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE); - if (IS_ERR(odp)) { - mutex_unlock(&odp_mr->umem_mutex); - return ERR_CAST(odp); - } + odp->private = mtt; + mtt->umem = &odp->umem; + mtt->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; + mtt->parent = imr; + INIT_WORK(&odp->work, mr_leaf_free_action); - mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0, - mr->access_flags); - if (IS_ERR(mtt)) { - mutex_unlock(&odp_mr->umem_mutex); - ib_umem_odp_release(odp); - return ERR_CAST(mtt); - } + xa_store(&mtt->dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key), + &mtt->mmkey, GFP_ATOMIC); + return mtt; +} - odp->private = mtt; - mtt->umem = &odp->umem; - mtt->mmkey.iova = addr; - mtt->parent = mr; - INIT_WORK(&odp->work, mr_leaf_free_action); +static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr, + u64 io_virt, size_t bcnt) +{ + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + unsigned long end_idx = (io_virt + bcnt - 1) >> MLX5_IMR_MTT_SHIFT; + unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT; + unsigned long inv_start_idx = end_idx + 1; + unsigned long inv_len = 0; + struct ib_umem_odp *result = NULL; + struct ib_umem_odp *odp; + int ret; - xa_store(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key), - &mtt->mmkey, GFP_ATOMIC); + mutex_lock(&odp_imr->umem_mutex); + odp = odp_lookup(idx * MLX5_IMR_MTT_SIZE, 1, imr); + for (idx = idx; idx <= end_idx; idx++) { + if (unlikely(!odp)) { + struct mlx5_ib_mr *mtt; - if (!nentries) - start_idx = addr >> MLX5_IMR_MTT_SHIFT; - nentries++; - } + mtt = implicit_get_child_mr(imr, idx); + if (IS_ERR(mtt)) { + result = ERR_CAST(mtt); + goto out; + } + odp = to_ib_umem_odp(mtt->umem); + inv_start_idx = min(inv_start_idx, idx); + inv_len = idx - inv_start_idx + 1; + } - /* Return first odp if region not covered by single one */ - if (likely(!result)) - result = odp; + /* Return first odp if region not covered by single one */ + if (likely(!result)) + result = odp; - addr += MLX5_IMR_MTT_SIZE; - if (unlikely(addr < io_virt + bcnt)) { odp = odp_next(odp); - if (odp && ib_umem_start(odp) != addr) + if (odp && ib_umem_start(odp) != idx * MLX5_IMR_MTT_SIZE) odp = NULL; - goto next_mr; } - if (unlikely(nentries)) { - ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0, - MLX5_IB_UPD_XLT_INDIRECT | + /* + * Any time the children in the interval tree are changed we must + * perform an update of the xlt before exiting to ensure the HW and + * the tree remains synchronized. + */ +out: + if (likely(!inv_len)) + goto out_unlock; + + ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0, + MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); - if (ret) { - mlx5_ib_err(dev, "Failed to update PAS\n"); - result = ERR_PTR(ret); - } + if (ret) { + mlx5_ib_err(to_mdev(imr->ibmr.pd->device), + "Failed to update PAS\n"); + result = ERR_PTR(ret); + goto out_unlock; } - mutex_unlock(&odp_mr->umem_mutex); +out_unlock: + mutex_unlock(&odp_imr->umem_mutex); return result; } -- cgit v1.2.3-59-g8ed1b From c2edcd69351f681594a30b17b7fbc5259a038fb0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:26 -0300 Subject: RDMA/mlx5: Lift implicit_mr_alloc() into the two routines that call it This makes the routines easier to understand, particularly with respect the locking requirements of the entire sequence. The implicit_mr_alloc() had a lot of ifs specializing it to each of the callers, and only a very small amount of code was actually shared. Following patches will cause the flow in the two functions to diverge further. Link: https://lore.kernel.org/r/20191009160934.3143-7-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 151 +++++++++++++++++++-------------------- 1 file changed, 74 insertions(+), 77 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 78e31dc694d9..dfaa39fc4d3f 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -416,96 +416,66 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, wq_num, err); } -static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, - struct ib_umem_odp *umem_odp, - bool ksm, int access_flags) +static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, + unsigned long idx) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; + struct mlx5_ib_mr *ret; int err; - mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY : - MLX5_IMR_MTT_CACHE_ENTRY); + odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), + idx * MLX5_IMR_MTT_SIZE, + MLX5_IMR_MTT_SIZE); + if (IS_ERR(odp)) + return ERR_CAST(odp); + ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY); if (IS_ERR(mr)) - return mr; + goto out_umem; - err = xa_reserve(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + err = xa_reserve(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), GFP_KERNEL); - if (err) + if (err) { + ret = ERR_PTR(err); goto out_mr; - - mr->ibmr.pd = pd; - - mr->dev = dev; - mr->access_flags = access_flags; - mr->mmkey.iova = 0; - mr->umem = &umem_odp->umem; - - if (ksm) { - err = mlx5_ib_update_xlt(mr, 0, - mlx5_imr_ksm_entries, - MLX5_KSM_PAGE_SHIFT, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE); - - } else { - err = mlx5_ib_update_xlt(mr, 0, - MLX5_IMR_MTT_ENTRIES, - PAGE_SHIFT, - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE | - MLX5_IB_UPD_XLT_ATOMIC); } - if (err) - goto out_release; - + mr->ibmr.pd = imr->ibmr.pd; + mr->access_flags = imr->access_flags; + mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; + mr->mmkey.iova = 0; + mr->parent = imr; + odp->private = mr; + INIT_WORK(&odp->work, mr_leaf_free_action); + + err = mlx5_ib_update_xlt(mr, 0, + MLX5_IMR_MTT_ENTRIES, + PAGE_SHIFT, + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE | + MLX5_IB_UPD_XLT_ATOMIC); + if (err) { + ret = ERR_PTR(err); + goto out_release; + } - mlx5_ib_dbg(dev, "key %x dev %p mr %p\n", - mr->mmkey.key, dev->mdev, mr); + mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; + xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_ATOMIC); + mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); return mr; out_release: - xa_release(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + xa_release(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); out_mr: - mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); - mlx5_mr_cache_free(dev, mr); - - return ERR_PTR(err); -} - -static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, - unsigned long idx) -{ - struct ib_umem_odp *odp; - struct mlx5_ib_mr *mtt; - - odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), - idx * MLX5_IMR_MTT_SIZE, - MLX5_IMR_MTT_SIZE); - if (IS_ERR(odp)) - return ERR_CAST(odp); - - mtt = implicit_mr_alloc(imr->ibmr.pd, odp, 0, imr->access_flags); - if (IS_ERR(mtt)) { - ib_umem_odp_release(odp); - return mtt; - } - - odp->private = mtt; - mtt->umem = &odp->umem; - mtt->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; - mtt->parent = imr; - INIT_WORK(&odp->work, mr_leaf_free_action); - - xa_store(&mtt->dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key), - &mtt->mmkey, GFP_ATOMIC); - return mtt; + mlx5_mr_cache_free(imr->dev, mr); +out_umem: + ib_umem_odp_release(odp); + return ret; } static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr, @@ -573,27 +543,54 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags) { - struct mlx5_ib_mr *imr; + struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); struct ib_umem_odp *umem_odp; + struct mlx5_ib_mr *imr; + int err; umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags); if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags); + imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY); if (IS_ERR(imr)) { - ib_umem_odp_release(umem_odp); - return ERR_CAST(imr); + err = PTR_ERR(imr); + goto out_umem; } + imr->ibmr.pd = &pd->ibpd; + imr->access_flags = access_flags; + imr->mmkey.iova = 0; + imr->umem = &umem_odp->umem; + imr->ibmr.lkey = imr->mmkey.key; + imr->ibmr.rkey = imr->mmkey.key; imr->umem = &umem_odp->umem; init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); - xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), - &imr->mmkey, GFP_ATOMIC); + err = mlx5_ib_update_xlt(imr, 0, + mlx5_imr_ksm_entries, + MLX5_KSM_PAGE_SHIFT, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE); + if (err) + goto out_mr; + + err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), + &imr->mmkey, GFP_KERNEL)); + if (err) + goto out_mr; + + mlx5_ib_dbg(dev, "key %x mr %p\n", imr->mmkey.key, imr); return imr; +out_mr: + mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); + mlx5_mr_cache_free(dev, imr); +out_umem: + ib_umem_odp_release(umem_odp); + return ERR_PTR(err); } void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) -- cgit v1.2.3-59-g8ed1b From 9162420dde49c9a8f4819f28bf2d5c675fb12552 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:27 -0300 Subject: RDMA/mlx5: Set the HW IOVA of the child MRs to their place in the tree Instead of rewriting all the IOVA's to 0 as things progress down the tree make the IOVA of the children equal to placement in the tree. This makes things easier to understand by keeping mmkey.iova == HW configuration. Link: https://lore.kernel.org/r/20191009160934.3143-8-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index dfaa39fc4d3f..b25cf7836544 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -211,9 +211,11 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, struct mlx5_ib_mr *mtt = odp->private; pklm->key = cpu_to_be32(mtt->ibmr.lkey); + pklm->va = cpu_to_be64(va); odp = odp_next(odp); } else { pklm->key = cpu_to_be32(dev->null_mkey); + pklm->va = 0; } mlx5_ib_dbg(dev, "[%d] va %lx key %x\n", i, va, be32_to_cpu(pklm->key)); @@ -446,7 +448,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; - mr->mmkey.iova = 0; + mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; mr->parent = imr; odp->private = mr; INIT_WORK(&odp->work, mr_leaf_free_action); @@ -462,7 +464,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_release; } - mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, GFP_ATOMIC); -- cgit v1.2.3-59-g8ed1b From 54375e7382952daded7002d1618eadaae859cecb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:28 -0300 Subject: RDMA/mlx5: Split implicit handling from pagefault_mr The single routine has a very confusing scheme to advance to the next child MR when working on an implicit parent. This scheme can only be used when working with an implicit parent and must not be triggered when working on a normal MR. Re-arrange things by directly putting all the single-MR stuff into one function and calling it in a loop for the implicit case. Simplify some of the error handling in the new pagefault_real_mr() to remove unneeded gotos. Link: https://lore.kernel.org/r/20191009160934.3143-9-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 125 ++++++++++++++++++++++++--------------- 1 file changed, 76 insertions(+), 49 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index b25cf7836544..8f43af6580ce 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -629,33 +629,18 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) -static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, - u32 *bytes_mapped, u32 flags) +static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, + u64 user_va, size_t bcnt, u32 *bytes_mapped, + u32 flags) { - int npages = 0, current_seq, page_shift, ret, np; - struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); + int current_seq, page_shift, ret, np; bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; u64 access_mask; u64 start_idx, page_mask; - struct ib_umem_odp *odp; - size_t size; - - if (odp_mr->is_implicit_odp) { - odp = implicit_mr_get_data(mr, io_virt, bcnt); - - if (IS_ERR(odp)) - return PTR_ERR(odp); - mr = odp->private; - } else { - odp = odp_mr; - } - -next_mr: - size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt); page_shift = odp->page_shift; page_mask = ~(BIT(page_shift) - 1); - start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; + start_idx = (user_va - (mr->mmkey.iova & page_mask)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; if (odp->umem.writable && !downgrade) @@ -668,13 +653,10 @@ next_mr: */ smp_rmb(); - ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask, - current_seq); - - if (ret < 0) - goto out; - - np = ret; + np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask, + current_seq); + if (np < 0) + return np; mutex_lock(&odp->umem_mutex); if (!ib_umem_mmu_notifier_retry(odp, current_seq)) { @@ -699,31 +681,12 @@ next_mr: if (bytes_mapped) { u32 new_mappings = (np << page_shift) - - (io_virt - round_down(io_virt, 1 << page_shift)); - *bytes_mapped += min_t(u32, new_mappings, size); - } - - npages += np << (page_shift - PAGE_SHIFT); - bcnt -= size; + (user_va - round_down(user_va, 1 << page_shift)); - if (unlikely(bcnt)) { - struct ib_umem_odp *next; - - io_virt += size; - next = odp_next(odp); - if (unlikely(!next || ib_umem_start(next) != io_virt)) { - mlx5_ib_dbg( - mr->dev, - "next implicit leaf removed at 0x%llx. got %p\n", - io_virt, next); - return -EAGAIN; - } - odp = next; - mr = odp->private; - goto next_mr; + *bytes_mapped += min_t(u32, new_mappings, bcnt); } - return npages; + return np << (page_shift - PAGE_SHIFT); out: if (ret == -EAGAIN) { @@ -742,6 +705,70 @@ out: return ret; } +/* + * Returns: + * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are + * not accessible, or the MR is no longer valid. + * -EAGAIN/-ENOMEM: The operation should be retried + * + * -EINVAL/others: General internal malfunction + * >0: Number of pages mapped + */ +static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, + u32 *bytes_mapped, u32 flags) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + struct ib_umem_odp *child; + int npages = 0; + + if (!odp->is_implicit_odp) { + if (unlikely(io_virt < ib_umem_start(odp) || + ib_umem_end(odp) - io_virt < bcnt)) + return -EFAULT; + return pagefault_real_mr(mr, odp, io_virt, bcnt, bytes_mapped, + flags); + } + + if (unlikely(io_virt >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE || + mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - io_virt < bcnt)) + return -EFAULT; + + child = implicit_mr_get_data(mr, io_virt, bcnt); + if (IS_ERR(child)) + return PTR_ERR(child); + + /* Fault each child mr that intersects with our interval. */ + while (bcnt) { + u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(child)); + u64 len = end - io_virt; + int ret; + + ret = pagefault_real_mr(child->private, child, io_virt, len, + bytes_mapped, flags); + if (ret < 0) + return ret; + io_virt += len; + bcnt -= len; + npages += ret; + + if (unlikely(bcnt)) { + child = odp_next(child); + /* + * implicit_mr_get_data sets up all the leaves, this + * means they got invalidated before we got to them. + */ + if (!child || ib_umem_start(child) != io_virt) { + mlx5_ib_dbg( + mr->dev, + "next implicit leaf removed at 0x%llx.\n", + io_virt); + return -EAGAIN; + } + } + } + return npages; +} + struct pf_frame { struct pf_frame *next; u32 key; -- cgit v1.2.3-59-g8ed1b From 423f52d65005e8f5067d94bd4f41d8a7d8388135 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:29 -0300 Subject: RDMA/mlx5: Use an xarray for the children of an implicit ODP Currently the child leaves are stored in the shared interval tree and every lookup for a child must be done under the interval tree rwsem. This is further complicated by dropping the rwsem during iteration (ie the odp_lookup(), odp_next() pattern), which requires a very tricky an difficult to understand locking scheme with SRCU. Instead reserve the interval tree for the exclusive use of the mmu notifier related code in umem_odp.c and give each implicit MR a xarray containing all the child MRs. Since the size of each child is 1GB of VA, a 1 level xarray will index 64G of VA, and a 2 level will index 2TB, making xarray a much better data structure choice than an interval tree. The locking properties of xarray will be used in the next patches to rework the implicit ODP locking scheme into something simpler. At this point, the xarray is locked by the implicit MR's umem_mutex, and read can also be locked by the odp_srcu. Link: https://lore.kernel.org/r/20191009160934.3143-10-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +- drivers/infiniband/hw/mlx5/odp.c | 195 +++++++++++------------------------ include/rdma/ib_umem_odp.h | 16 --- 3 files changed, 67 insertions(+), 149 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2cf91db6a36f..88769fcffb5a 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -617,10 +617,13 @@ struct mlx5_ib_mr { u64 data_iova; u64 pi_iova; + /* For ODP and implicit */ atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; - struct mlx5_async_work cb_work; atomic_t num_pending_prefetch; + struct xarray implicit_children; + + struct mlx5_async_work cb_work; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 8f43af6580ce..6f7eea175c72 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -93,132 +93,54 @@ struct mlx5_pagefault { static u64 mlx5_imr_ksm_entries; -static int check_parent(struct ib_umem_odp *odp, - struct mlx5_ib_mr *parent) +void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, + struct mlx5_ib_mr *imr, int flags) { - struct mlx5_ib_mr *mr = odp->private; - - return mr && mr->parent == parent && !odp->dying; -} - -static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) -{ - if (WARN_ON(!mr || !is_odp_mr(mr))) - return NULL; - - return to_ib_umem_odp(mr->umem)->per_mm; -} - -static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) -{ - struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent; - struct ib_ucontext_per_mm *per_mm = odp->per_mm; - struct rb_node *rb; - - down_read(&per_mm->umem_rwsem); - while (1) { - rb = rb_next(&odp->interval_tree.rb); - if (!rb) - goto not_found; - odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (check_parent(odp, parent)) - goto end; - } -not_found: - odp = NULL; -end: - up_read(&per_mm->umem_rwsem); - return odp; -} - -static struct ib_umem_odp *odp_lookup(u64 start, u64 length, - struct mlx5_ib_mr *parent) -{ - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent); - struct ib_umem_odp *odp; - struct rb_node *rb; - - down_read(&per_mm->umem_rwsem); - odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length); - if (!odp) - goto end; - - while (1) { - if (check_parent(odp, parent)) - goto end; - rb = rb_next(&odp->interval_tree.rb); - if (!rb) - goto not_found; - odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (ib_umem_start(odp) > start + length) - goto not_found; - } -not_found: - odp = NULL; -end: - up_read(&per_mm->umem_rwsem); - return odp; -} - -void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, - size_t nentries, struct mlx5_ib_mr *mr, int flags) -{ - struct ib_pd *pd = mr->ibmr.pd; - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct ib_umem_odp *odp; - unsigned long va; - int i; + struct mlx5_klm *end = pklm + nentries; if (flags & MLX5_IB_UPD_XLT_ZAP) { - for (i = 0; i < nentries; i++, pklm++) { + for (; pklm != end; pklm++, idx++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - pklm->key = cpu_to_be32(dev->null_mkey); + pklm->key = cpu_to_be32(imr->dev->null_mkey); pklm->va = 0; } return; } /* - * The locking here is pretty subtle. Ideally the implicit children - * list would be protected by the umem_mutex, however that is not + * The locking here is pretty subtle. Ideally the implicit_children + * xarray would be protected by the umem_mutex, however that is not * possible. Instead this uses a weaker update-then-lock pattern: * * srcu_read_lock() - * + * xa_store() * mutex_lock(umem_mutex) * mlx5_ib_update_xlt() * mutex_unlock(umem_mutex) * destroy lkey * - * ie any change the children list must be followed by the locked - * update_xlt before destroying. + * ie any change the xarray must be followed by the locked update_xlt + * before destroying. * * The umem_mutex provides the acquire/release semantic needed to make - * the children list visible to a racing thread. While SRCU is not + * the xa_store() visible to a racing thread. While SRCU is not * technically required, using it gives consistent use of the SRCU - * locking around the children list. + * locking around the xarray. */ - lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex); - lockdep_assert_held(&mr->dev->odp_srcu); + lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); + lockdep_assert_held(&imr->dev->odp_srcu); - odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, - nentries * MLX5_IMR_MTT_SIZE, mr); + for (; pklm != end; pklm++, idx++) { + struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); - for (i = 0; i < nentries; i++, pklm++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - va = (offset + i) * MLX5_IMR_MTT_SIZE; - if (odp && ib_umem_start(odp) == va) { - struct mlx5_ib_mr *mtt = odp->private; - + if (mtt) { pklm->key = cpu_to_be32(mtt->ibmr.lkey); - pklm->va = cpu_to_be64(va); - odp = odp_next(odp); + pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); } else { - pklm->key = cpu_to_be32(dev->null_mkey); + pklm->key = cpu_to_be32(imr->dev->null_mkey); pklm->va = 0; } - mlx5_ib_dbg(dev, "[%d] va %lx key %x\n", - i, va, be32_to_cpu(pklm->key)); } } @@ -320,6 +242,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { + xa_erase(&mr->parent->implicit_children, + ib_umem_start(umem_odp) >> MLX5_IMR_MTT_SHIFT); xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); umem_odp->dying = 1; atomic_inc(&mr->parent->num_leaf_free); @@ -464,6 +388,16 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_release; } + /* + * Once the store to either xarray completes any error unwind has to + * use synchronize_srcu(). Avoid this with xa_reserve() + */ + err = xa_err(xa_store(&imr->implicit_children, idx, mr, GFP_KERNEL)); + if (err) { + ret = ERR_PTR(err); + goto out_release; + } + xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, GFP_ATOMIC); @@ -479,7 +413,7 @@ out_umem: return ret; } -static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr, +static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr, u64 io_virt, size_t bcnt) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); @@ -487,39 +421,32 @@ static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr, unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT; unsigned long inv_start_idx = end_idx + 1; unsigned long inv_len = 0; - struct ib_umem_odp *result = NULL; - struct ib_umem_odp *odp; + struct mlx5_ib_mr *result = NULL; int ret; mutex_lock(&odp_imr->umem_mutex); - odp = odp_lookup(idx * MLX5_IMR_MTT_SIZE, 1, imr); for (idx = idx; idx <= end_idx; idx++) { - if (unlikely(!odp)) { - struct mlx5_ib_mr *mtt; + struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); + if (unlikely(!mtt)) { mtt = implicit_get_child_mr(imr, idx); if (IS_ERR(mtt)) { - result = ERR_CAST(mtt); + result = mtt; goto out; } - odp = to_ib_umem_odp(mtt->umem); inv_start_idx = min(inv_start_idx, idx); inv_len = idx - inv_start_idx + 1; } /* Return first odp if region not covered by single one */ if (likely(!result)) - result = odp; - - odp = odp_next(odp); - if (odp && ib_umem_start(odp) != idx * MLX5_IMR_MTT_SIZE) - odp = NULL; + result = mtt; } /* - * Any time the children in the interval tree are changed we must - * perform an update of the xlt before exiting to ensure the HW and - * the tree remains synchronized. + * Any time the implicit_children are changed we must perform an + * update of the xlt before exiting to ensure the HW and the + * implicit_children remains synchronized. */ out: if (likely(!inv_len)) @@ -569,6 +496,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); + xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, mlx5_imr_ksm_entries, @@ -596,18 +524,15 @@ out_umem: void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); - struct rb_node *node; + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct mlx5_ib_mr *mtt; + unsigned long idx; - down_read(&per_mm->umem_rwsem); - for (node = rb_first_cached(&per_mm->umem_tree); node; - node = rb_next(node)) { - struct ib_umem_odp *umem_odp = - rb_entry(node, struct ib_umem_odp, interval_tree.rb); - struct mlx5_ib_mr *mr = umem_odp->private; + mutex_lock(&odp_imr->umem_mutex); + xa_for_each (&imr->implicit_children, idx, mtt) { + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem); - if (mr->parent != imr) - continue; + xa_erase(&imr->implicit_children, idx); mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), @@ -623,9 +548,12 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) schedule_work(&umem_odp->work); mutex_unlock(&umem_odp->umem_mutex); } - up_read(&per_mm->umem_rwsem); + mutex_unlock(&odp_imr->umem_mutex); wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); + WARN_ON(!xa_empty(&imr->implicit_children)); + /* Remove any left over reserved elements */ + xa_destroy(&imr->implicit_children); } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) @@ -718,7 +646,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, u32 *bytes_mapped, u32 flags) { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - struct ib_umem_odp *child; + struct mlx5_ib_mr *mtt; int npages = 0; if (!odp->is_implicit_odp) { @@ -733,17 +661,18 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - io_virt < bcnt)) return -EFAULT; - child = implicit_mr_get_data(mr, io_virt, bcnt); - if (IS_ERR(child)) - return PTR_ERR(child); + mtt = implicit_mr_get_data(mr, io_virt, bcnt); + if (IS_ERR(mtt)) + return PTR_ERR(mtt); /* Fault each child mr that intersects with our interval. */ while (bcnt) { - u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(child)); + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem); + u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(umem_odp)); u64 len = end - io_virt; int ret; - ret = pagefault_real_mr(child->private, child, io_virt, len, + ret = pagefault_real_mr(mtt, umem_odp, io_virt, len, bytes_mapped, flags); if (ret < 0) return ret; @@ -752,12 +681,14 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, npages += ret; if (unlikely(bcnt)) { - child = odp_next(child); + mtt = xa_load(&mr->implicit_children, + io_virt >> MLX5_IMR_MTT_SHIFT); + /* * implicit_mr_get_data sets up all the leaves, this * means they got invalidated before we got to them. */ - if (!child || ib_umem_start(child) != io_virt) { + if (!mtt) { mlx5_ib_dbg( mr->dev, "next implicit leaf removed at 0x%llx.\n", diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 253df1a1fa54..28078efc3833 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -156,22 +156,6 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, umem_call_back cb, bool blockable, void *cookie); -/* - * Find first region intersecting with address range. - * Return NULL if not found - */ -static inline struct ib_umem_odp * -rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length) -{ - struct interval_tree_node *node; - - node = interval_tree_iter_first(root, addr, addr + length - 1); - if (!node) - return NULL; - return container_of(node, struct ib_umem_odp, interval_tree); - -} - static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, unsigned long mmu_seq) { -- cgit v1.2.3-59-g8ed1b From 3389baa831b6a09e3c96e2a6283a1b952be2f0cd Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:30 -0300 Subject: RDMA/mlx5: Reduce locking in implicit_mr_get_data() Now that the child MRs are stored in an xarray we can rely on the SRCU lock to protect the xa_load and use xa_cmpxchg on the slow allocation path to resolve races with concurrent page fault. This reduces the scope of the critical section of umem_mutex for implicit MRs to only cover mlx5_ib_update_xlt, and avoids taking a lock at all if the child MR is already in the xarray. This makes it consistent with the normal ODP MR critical section for umem_lock, and the locking approach used for destroying an unusued implicit child MR. The MLX5_IB_UPD_XLT_ATOMIC is no longer needed in implicit_get_child_mr() since it is no longer called with any locks. Link: https://lore.kernel.org/r/20191009160934.3143-11-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 6f7eea175c72..00e14b6acd98 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -381,8 +381,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, MLX5_IMR_MTT_ENTRIES, PAGE_SHIFT, MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE | - MLX5_IB_UPD_XLT_ATOMIC); + MLX5_IB_UPD_XLT_ENABLE); if (err) { ret = ERR_PTR(err); goto out_release; @@ -392,9 +391,16 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, * Once the store to either xarray completes any error unwind has to * use synchronize_srcu(). Avoid this with xa_reserve() */ - err = xa_err(xa_store(&imr->implicit_children, idx, mr, GFP_KERNEL)); - if (err) { - ret = ERR_PTR(err); + ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, GFP_KERNEL); + if (unlikely(ret)) { + if (xa_is_err(ret)) { + ret = ERR_PTR(xa_err(ret)); + goto out_release; + } + /* + * Another thread beat us to creating the child mr, use + * theirs. + */ goto out_release; } @@ -424,7 +430,8 @@ static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr, struct mlx5_ib_mr *result = NULL; int ret; - mutex_lock(&odp_imr->umem_mutex); + lockdep_assert_held(&imr->dev->odp_srcu); + for (idx = idx; idx <= end_idx; idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -450,20 +457,27 @@ static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr, */ out: if (likely(!inv_len)) - goto out_unlock; + return result; + /* + * Notice this is not strictly ordered right, the KSM is updated after + * the implicit_leaves is updated, so a parallel page fault could see + * a MR that is not yet visible in the KSM. This is similar to a + * parallel page fault seeing a MR that is being concurrently removed + * from the KSM. Both of these improbable situations are resolved + * safely by resuming the HW and then taking another page fault. The + * next pagefault handler will see the new information. + */ + mutex_lock(&odp_imr->umem_mutex); ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); if (ret) { mlx5_ib_err(to_mdev(imr->ibmr.pd->device), "Failed to update PAS\n"); - result = ERR_PTR(ret); - goto out_unlock; + return ERR_PTR(ret); } - -out_unlock: - mutex_unlock(&odp_imr->umem_mutex); return result; } -- cgit v1.2.3-59-g8ed1b From b70d785d237c0d3e4235c511f38f8ce64620f945 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:31 -0300 Subject: RDMA/mlx5: Avoid double lookups on the pagefault path Now that the locking is simplified combine pagefault_implicit_mr() with implicit_mr_get_data() so that we sweep over the idx range only once, and do the single xlt update at the end, after the child umems are setup. This avoids double iteration/xa_loads plus the sketchy failure path if the xa_load() fails. Link: https://lore.kernel.org/r/20191009160934.3143-12-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 186 +++++++++++++++++---------------------- 1 file changed, 80 insertions(+), 106 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 00e14b6acd98..8bd30db87c21 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -419,68 +419,6 @@ out_umem: return ret; } -static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr, - u64 io_virt, size_t bcnt) -{ - struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); - unsigned long end_idx = (io_virt + bcnt - 1) >> MLX5_IMR_MTT_SHIFT; - unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT; - unsigned long inv_start_idx = end_idx + 1; - unsigned long inv_len = 0; - struct mlx5_ib_mr *result = NULL; - int ret; - - lockdep_assert_held(&imr->dev->odp_srcu); - - for (idx = idx; idx <= end_idx; idx++) { - struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); - - if (unlikely(!mtt)) { - mtt = implicit_get_child_mr(imr, idx); - if (IS_ERR(mtt)) { - result = mtt; - goto out; - } - inv_start_idx = min(inv_start_idx, idx); - inv_len = idx - inv_start_idx + 1; - } - - /* Return first odp if region not covered by single one */ - if (likely(!result)) - result = mtt; - } - - /* - * Any time the implicit_children are changed we must perform an - * update of the xlt before exiting to ensure the HW and the - * implicit_children remains synchronized. - */ -out: - if (likely(!inv_len)) - return result; - - /* - * Notice this is not strictly ordered right, the KSM is updated after - * the implicit_leaves is updated, so a parallel page fault could see - * a MR that is not yet visible in the KSM. This is similar to a - * parallel page fault seeing a MR that is being concurrently removed - * from the KSM. Both of these improbable situations are resolved - * safely by resuming the HW and then taking another page fault. The - * next pagefault handler will see the new information. - */ - mutex_lock(&odp_imr->umem_mutex); - ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ATOMIC); - mutex_unlock(&odp_imr->umem_mutex); - if (ret) { - mlx5_ib_err(to_mdev(imr->ibmr.pd->device), - "Failed to update PAS\n"); - return ERR_PTR(ret); - } - return result; -} - struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags) @@ -647,6 +585,84 @@ out: return ret; } +static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, + struct ib_umem_odp *odp_imr, u64 user_va, + size_t bcnt, u32 *bytes_mapped, u32 flags) +{ + unsigned long end_idx = (user_va + bcnt - 1) >> MLX5_IMR_MTT_SHIFT; + unsigned long upd_start_idx = end_idx + 1; + unsigned long upd_len = 0; + unsigned long npages = 0; + int err; + int ret; + + if (unlikely(user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE || + mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt)) + return -EFAULT; + + /* Fault each child mr that intersects with our interval. */ + while (bcnt) { + unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT; + struct ib_umem_odp *umem_odp; + struct mlx5_ib_mr *mtt; + u64 len; + + mtt = xa_load(&imr->implicit_children, idx); + if (unlikely(!mtt)) { + mtt = implicit_get_child_mr(imr, idx); + if (IS_ERR(mtt)) { + ret = PTR_ERR(mtt); + goto out; + } + upd_start_idx = min(upd_start_idx, idx); + upd_len = idx - upd_start_idx + 1; + } + + umem_odp = to_ib_umem_odp(mtt->umem); + len = min_t(u64, user_va + bcnt, ib_umem_end(umem_odp)) - + user_va; + + ret = pagefault_real_mr(mtt, umem_odp, user_va, len, + bytes_mapped, flags); + if (ret < 0) + goto out; + user_va += len; + bcnt -= len; + npages += ret; + } + + ret = npages; + + /* + * Any time the implicit_children are changed we must perform an + * update of the xlt before exiting to ensure the HW and the + * implicit_children remains synchronized. + */ +out: + if (likely(!upd_len)) + return ret; + + /* + * Notice this is not strictly ordered right, the KSM is updated after + * the implicit_children is updated, so a parallel page fault could + * see a MR that is not yet visible in the KSM. This is similar to a + * parallel page fault seeing a MR that is being concurrently removed + * from the KSM. Both of these improbable situations are resolved + * safely by resuming the HW and then taking another page fault. The + * next pagefault handler will see the new information. + */ + mutex_lock(&odp_imr->umem_mutex); + err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); + if (err) { + mlx5_ib_err(imr->dev, "Failed to update PAS\n"); + return err; + } + return ret; +} + /* * Returns: * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are @@ -660,8 +676,6 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, u32 *bytes_mapped, u32 flags) { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - struct mlx5_ib_mr *mtt; - int npages = 0; if (!odp->is_implicit_odp) { if (unlikely(io_virt < ib_umem_start(odp) || @@ -670,48 +684,8 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, return pagefault_real_mr(mr, odp, io_virt, bcnt, bytes_mapped, flags); } - - if (unlikely(io_virt >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE || - mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - io_virt < bcnt)) - return -EFAULT; - - mtt = implicit_mr_get_data(mr, io_virt, bcnt); - if (IS_ERR(mtt)) - return PTR_ERR(mtt); - - /* Fault each child mr that intersects with our interval. */ - while (bcnt) { - struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem); - u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(umem_odp)); - u64 len = end - io_virt; - int ret; - - ret = pagefault_real_mr(mtt, umem_odp, io_virt, len, - bytes_mapped, flags); - if (ret < 0) - return ret; - io_virt += len; - bcnt -= len; - npages += ret; - - if (unlikely(bcnt)) { - mtt = xa_load(&mr->implicit_children, - io_virt >> MLX5_IMR_MTT_SHIFT); - - /* - * implicit_mr_get_data sets up all the leaves, this - * means they got invalidated before we got to them. - */ - if (!mtt) { - mlx5_ib_dbg( - mr->dev, - "next implicit leaf removed at 0x%llx.\n", - io_virt); - return -EAGAIN; - } - } - } - return npages; + return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped, + flags); } struct pf_frame { -- cgit v1.2.3-59-g8ed1b From 5256edcb98a14b11409a2d323f56a70a8b366363 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:32 -0300 Subject: RDMA/mlx5: Rework implicit ODP destroy Use SRCU in a sensible way by removing all MRs in the implicit tree from the two xarrays (the update operation), then a synchronize, followed by a normal single threaded teardown. This is only a little unusual from the normal pattern as there can still be some work pending in the unbound wq that may also require a workqueue flush. This is tracked with a single atomic, consolidating the redundant existing atomics and wait queue. For understand-ability the entire ODP implicit create/destroy flow now largely exists in a single pair of functions within odp.c, with a few support functions for tearing down an unused child. Link: https://lore.kernel.org/r/20191009160934.3143-13-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 - drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 ++- drivers/infiniband/hw/mlx5/mr.c | 21 ++--- drivers/infiniband/hw/mlx5/odp.c | 152 +++++++++++++++++++++++------------ include/rdma/ib_umem_odp.h | 2 - 5 files changed, 120 insertions(+), 66 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4692c37b057c..add24b628900 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6146,8 +6146,6 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - srcu_barrier(&dev->odp_srcu); cleanup_srcu_struct(&dev->odp_srcu); WARN_ON(!xa_empty(&dev->sig_mrs)); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 88769fcffb5a..b8c958f62628 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -618,10 +618,13 @@ struct mlx5_ib_mr { u64 pi_iova; /* For ODP and implicit */ - atomic_t num_leaf_free; - wait_queue_head_t q_leaf_free; - atomic_t num_pending_prefetch; + atomic_t num_deferred_work; struct xarray implicit_children; + union { + struct rcu_head rcu; + struct list_head elm; + struct work_struct work; + } odp_destroy; struct mlx5_async_work cb_work; }; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd94838a8845..1e91f61efa8a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1317,7 +1317,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (is_odp_mr(mr)) { to_ib_umem_odp(mr->umem)->private = mr; - atomic_set(&mr->num_pending_prefetch, 0); + atomic_set(&mr->num_deferred_work, 0); err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, GFP_KERNEL)); @@ -1573,17 +1573,15 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) synchronize_srcu(&dev->odp_srcu); /* dequeue pending prefetch requests for the mr */ - if (atomic_read(&mr->num_pending_prefetch)) + if (atomic_read(&mr->num_deferred_work)) { flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&mr->num_pending_prefetch)); + WARN_ON(atomic_read(&mr->num_deferred_work)); + } /* Destroy all page mappings */ - if (!umem_odp->is_implicit_odp) - mlx5_ib_invalidate_range(umem_odp, - ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); - else - mlx5_ib_free_implicit_mr(mr); + mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); + /* * We kill the umem before the MR for ODP, * so that there will not be any invalidations in @@ -1620,6 +1618,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); } + if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) { + mlx5_ib_free_implicit_mr(mmr); + return 0; + } + dereg_mr(to_mdev(ibmr->device), mmr); return 0; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 8bd30db87c21..5cf93d8129a9 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -144,31 +144,79 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, } } -static void mr_leaf_free_action(struct work_struct *work) +/* + * This must be called after the mr has been removed from implicit_children + * and odp_mkeys and the SRCU synchronized. NOTE: The MR does not necessarily + * have to be empty here, parallel page faults could have raced with the free + * process and added pages to it. + */ +static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) { - struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); - int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; - struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; + struct mlx5_ib_mr *imr = mr->parent; struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; int srcu_key; - mr->parent = NULL; - synchronize_srcu(&mr->dev->odp_srcu); + /* implicit_child_mr's are not allowed to have deferred work */ + WARN_ON(atomic_read(&mr->num_deferred_work)); - if (xa_load(&mr->dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key))) { + if (need_imr_xlt) { srcu_key = srcu_read_lock(&mr->dev->odp_srcu); mutex_lock(&odp_imr->umem_mutex); - mlx5_ib_update_xlt(imr, idx, 1, 0, + mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); } - ib_umem_odp_release(odp); + + mr->parent = NULL; mlx5_mr_cache_free(mr->dev, mr); + ib_umem_odp_release(odp); + atomic_dec(&imr->num_deferred_work); +} + +static void free_implicit_child_mr_work(struct work_struct *work) +{ + struct mlx5_ib_mr *mr = + container_of(work, struct mlx5_ib_mr, odp_destroy.work); + + free_implicit_child_mr(mr, true); +} + +static void free_implicit_child_mr_rcu(struct rcu_head *head) +{ + struct mlx5_ib_mr *mr = + container_of(head, struct mlx5_ib_mr, odp_destroy.rcu); + + /* Freeing a MR is a sleeping operation, so bounce to a work queue */ + INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); + queue_work(system_unbound_wq, &mr->odp_destroy.work); +} + +static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; + struct mlx5_ib_mr *imr = mr->parent; - if (atomic_dec_and_test(&imr->num_leaf_free)) - wake_up(&imr->q_leaf_free); + xa_lock(&imr->implicit_children); + /* + * This can race with mlx5_ib_free_implicit_mr(), the first one to + * reach the xa lock wins the race and destroys the MR. + */ + if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) != + mr) + goto out_unlock; + + __xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + atomic_inc(&imr->num_deferred_work); + call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, + free_implicit_child_mr_rcu); + +out_unlock: + xa_unlock(&imr->implicit_children); } void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, @@ -240,15 +288,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, ib_umem_odp_unmap_dma_pages(umem_odp, start, end); - if (unlikely(!umem_odp->npages && mr->parent && - !umem_odp->dying)) { - xa_erase(&mr->parent->implicit_children, - ib_umem_start(umem_odp) >> MLX5_IMR_MTT_SHIFT); - xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); - umem_odp->dying = 1; - atomic_inc(&mr->parent->num_leaf_free); - schedule_work(&umem_odp->work); - } + if (unlikely(!umem_odp->npages && mr->parent)) + destroy_unused_implicit_child_mr(mr); mutex_unlock(&umem_odp->umem_mutex); } @@ -375,7 +416,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; mr->parent = imr; odp->private = mr; - INIT_WORK(&odp->work, mr_leaf_free_action); err = mlx5_ib_update_xlt(mr, 0, MLX5_IMR_MTT_ENTRIES, @@ -391,7 +431,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, * Once the store to either xarray completes any error unwind has to * use synchronize_srcu(). Avoid this with xa_reserve() */ - ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, GFP_KERNEL); + ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, + GFP_KERNEL); + if (likely(!ret)) + xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_ATOMIC); if (unlikely(ret)) { if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); @@ -404,9 +448,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_release; } - xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_ATOMIC); - mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); return mr; @@ -445,9 +486,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; imr->umem = &umem_odp->umem; - init_waitqueue_head(&imr->q_leaf_free); - atomic_set(&imr->num_leaf_free, 0); - atomic_set(&imr->num_pending_prefetch, 0); + atomic_set(&imr->num_deferred_work, 0); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -477,35 +516,48 @@ out_umem: void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct mlx5_ib_dev *dev = imr->dev; + struct list_head destroy_list; struct mlx5_ib_mr *mtt; + struct mlx5_ib_mr *tmp; unsigned long idx; - mutex_lock(&odp_imr->umem_mutex); - xa_for_each (&imr->implicit_children, idx, mtt) { - struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem); + INIT_LIST_HEAD(&destroy_list); - xa_erase(&imr->implicit_children, idx); + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key)); + /* + * This stops the SRCU protected page fault path from touching either + * the imr or any children. The page fault path can only reach the + * children xarray via the imr. + */ + synchronize_srcu(&dev->odp_srcu); - mutex_lock(&umem_odp->umem_mutex); - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); + xa_lock(&imr->implicit_children); + xa_for_each (&imr->implicit_children, idx, mtt) { + __xa_erase(&imr->implicit_children, idx); + __xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key)); + list_add(&mtt->odp_destroy.elm, &destroy_list); + } + xa_unlock(&imr->implicit_children); - if (umem_odp->dying) { - mutex_unlock(&umem_odp->umem_mutex); - continue; - } + /* Fence access to the child pointers via the pagefault thread */ + synchronize_srcu(&dev->odp_srcu); - umem_odp->dying = 1; - atomic_inc(&imr->num_leaf_free); - schedule_work(&umem_odp->work); - mutex_unlock(&umem_odp->umem_mutex); + /* + * num_deferred_work can only be incremented inside the odp_srcu, or + * under xa_lock while the child is in the xarray. Thus at this point + * it is only decreasing, and all work holding it is now on the wq. + */ + if (atomic_read(&imr->num_deferred_work)) { + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&imr->num_deferred_work)); } - mutex_unlock(&odp_imr->umem_mutex); - wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); - WARN_ON(!xa_empty(&imr->implicit_children)); - /* Remove any left over reserved elements */ - xa_destroy(&imr->implicit_children); + list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) + free_implicit_child_mr(mtt, false); + + mlx5_mr_cache_free(dev, imr); + ib_umem_odp_release(odp_imr); } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) @@ -1579,7 +1631,7 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - atomic_dec(&work->frags[i].mr->num_pending_prefetch); + atomic_dec(&work->frags[i].mr->num_deferred_work); kvfree(work); } @@ -1658,7 +1710,7 @@ static bool init_prefetch_work(struct ib_pd *pd, } /* Keep the MR pointer will valid outside the SRCU */ - atomic_inc(&work->frags[i].mr->num_pending_prefetch); + atomic_inc(&work->frags[i].mr->num_deferred_work); } work->num_sge = num_sge; return true; diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 28078efc3833..09b0e4494986 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -78,9 +78,7 @@ struct ib_umem_odp { bool is_implicit_odp; struct completion notifier_completion; - int dying; unsigned int page_shift; - struct work_struct work; }; static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) -- cgit v1.2.3-59-g8ed1b From d561987f34f263dd176ccd8fb782cb153d72f441 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:33 -0300 Subject: RDMA/mlx5: Do not store implicit children in the odp_mkeys xarray These mkeys are entirely internal and are never used by the HW for page fault. They should also never be used by userspace for prefetch. Simplify & optimize things by not including them in the xarray. Since the prefetch path can now never see a child mkey there is no need for the second synchronize_srcu() during imr destroy. Link: https://lore.kernel.org/r/20191009160934.3143-14-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 36 ++++++------------------------------ 1 file changed, 6 insertions(+), 30 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 5cf93d8129a9..06e24d5e7609 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -146,9 +146,9 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, /* * This must be called after the mr has been removed from implicit_children - * and odp_mkeys and the SRCU synchronized. NOTE: The MR does not necessarily - * have to be empty here, parallel page faults could have raced with the free - * process and added pages to it. + * and the SRCU synchronized. NOTE: The MR does not necessarily have to be + * empty here, parallel page faults could have raced with the free process and + * added pages to it. */ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) { @@ -210,7 +210,6 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) mr) goto out_unlock; - __xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); atomic_inc(&imr->num_deferred_work); call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, free_implicit_child_mr_rcu); @@ -401,13 +400,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(mr)) goto out_umem; - err = xa_reserve(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - GFP_KERNEL); - if (err) { - ret = ERR_PTR(err); - goto out_mr; - } - mr->ibmr.pd = imr->ibmr.pd; mr->access_flags = imr->access_flags; mr->umem = &odp->umem; @@ -424,7 +416,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, MLX5_IB_UPD_XLT_ENABLE); if (err) { ret = ERR_PTR(err); - goto out_release; + goto out_mr; } /* @@ -433,26 +425,21 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, */ ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, GFP_KERNEL); - if (likely(!ret)) - xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_ATOMIC); if (unlikely(ret)) { if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); - goto out_release; + goto out_mr; } /* * Another thread beat us to creating the child mr, use * theirs. */ - goto out_release; + goto out_mr; } mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); return mr; -out_release: - xa_release(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); out_mr: mlx5_mr_cache_free(imr->dev, mr); out_umem: @@ -535,14 +522,10 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) xa_lock(&imr->implicit_children); xa_for_each (&imr->implicit_children, idx, mtt) { __xa_erase(&imr->implicit_children, idx); - __xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key)); list_add(&mtt->odp_destroy.elm, &destroy_list); } xa_unlock(&imr->implicit_children); - /* Fence access to the child pointers via the pagefault thread */ - synchronize_srcu(&dev->odp_srcu); - /* * num_deferred_work can only be incremented inside the odp_srcu, or * under xa_lock while the child is in the xarray. Thus at this point @@ -1655,13 +1638,6 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, if (mr->ibmr.pd != pd) return NULL; - /* - * Implicit child MRs are internal and userspace should not refer to - * them. - */ - if (mr->parent) - return NULL; - odp = to_ib_umem_odp(mr->umem); /* prefetch with write-access must be supported by the MR */ -- cgit v1.2.3-59-g8ed1b From 09689703d29a3b75c510c198c3aca85d7d8b50c7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:34 -0300 Subject: RDMA/mlx5: Do not race with mlx5_ib_invalidate_range during create and destroy For creation, as soon as the umem_odp is created the notifier can be called, however the underlying MR may not have been setup yet. This would cause problems if mlx5_ib_invalidate_range() runs. There is some confusing/ulocked/racy code that might by trying to solve this, but without locks it isn't going to work right. Instead trivially solve the problem by short-circuiting the invalidation if there are not yet any DMA mapped pages. By definition there is nothing to invalidate in this case. The create code will have the umem fully setup before anything is DMA mapped, and npages is fully locked by the umem_mutex. For destroy, invalidate the entire MR at the HW to stop DMA then DMA unmap the pages before destroying the MR. This drives npages to zero and prevents similar racing with invalidate while the MR is undergoing destruction. Arguably it would be better if the umem was created after the MR and destroyed before, but that would require a big rework of the MR code. Fixes: 6aec21f6a832 ("IB/mlx5: Page faults handling infrastructure") Link: https://lore.kernel.org/r/20191009160934.3143-15-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++ drivers/infiniband/hw/mlx5/mr.c | 74 ++++++++++++------------------------ drivers/infiniband/hw/mlx5/odp.c | 70 +++++++++++++++++++++++++++++----- 3 files changed, 88 insertions(+), 59 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b8c958f62628..f61d4005c6c3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1171,6 +1171,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); +void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); @@ -1232,6 +1233,8 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry); void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr); + int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1e91f61efa8a..199f7959aaa5 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -50,7 +50,6 @@ enum { static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); -static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { @@ -495,7 +494,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) c = order2idx(dev, mr->order); WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); - if (unreg_umr(dev, mr)) { + if (mlx5_mr_cache_invalidate(mr)) { mr->allocated_from_cache = false; destroy_mkey(dev, mr); ent = &cache->ent[c]; @@ -1333,22 +1332,29 @@ error: return ERR_PTR(err); } -static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +/** + * mlx5_mr_cache_invalidate - Fence all DMA on the MR + * @mr: The MR to fence + * + * Upon return the NIC will not be doing any DMA to the pages under the MR, + * and any DMA inprogress will be completed. Failure of this function + * indicates the HW has failed catastrophically. + */ +int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) { - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_umr_wr umrwr = {}; - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = dev->umrc.pd; + umrwr.pd = mr->dev->umrc.pd; umrwr.mkey = mr->mmkey.key; umrwr.ignore_free_state = 1; - return mlx5_ib_post_send_wait(dev, &umrwr); + return mlx5_ib_post_send_wait(mr->dev, &umrwr); } static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, @@ -1432,7 +1438,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * UMR can't be used - MKey needs to be replaced. */ if (mr->allocated_from_cache) - err = unreg_umr(dev, mr); + err = mlx5_mr_cache_invalidate(mr); else err = destroy_mkey(dev, mr); if (err) @@ -1561,52 +1567,20 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int npages = mr->npages; struct ib_umem *umem = mr->umem; - if (is_odp_mr(mr)) { - struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); - - /* Prevent new page faults and - * prefetch requests from succeeding - */ - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); - - /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->odp_srcu); - - /* dequeue pending prefetch requests for the mr */ - if (atomic_read(&mr->num_deferred_work)) { - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&mr->num_deferred_work)); - } - - /* Destroy all page mappings */ - mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); - - /* - * We kill the umem before the MR for ODP, - * so that there will not be any invalidations in - * flight, looking at the *mr struct. - */ - ib_umem_odp_release(umem_odp); - atomic_sub(npages, &dev->mdev->priv.reg_pages); - - /* Avoid double-freeing the umem. */ - umem = NULL; - } + /* Stop all DMA */ + if (is_odp_mr(mr)) + mlx5_ib_fence_odp_mr(mr); + else + clean_mr(dev, mr); - clean_mr(dev, mr); + if (mr->allocated_from_cache) + mlx5_mr_cache_free(dev, mr); + else + kfree(mr); - /* - * We should unregister the DMA address from the HCA before - * remove the DMA mapping. - */ - mlx5_mr_cache_free(dev, mr); ib_umem_release(umem); - if (umem) - atomic_sub(npages, &dev->mdev->priv.reg_pages); + atomic_sub(npages, &dev->mdev->priv.reg_pages); - if (!mr->allocated_from_cache) - kfree(mr); } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 06e24d5e7609..bcfc09846697 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -144,6 +144,27 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, } } +static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + + /* Ensure mlx5_ib_invalidate_range() will not touch the MR any more */ + mutex_lock(&odp->umem_mutex); + if (odp->npages) { + mlx5_mr_cache_invalidate(mr); + ib_umem_odp_unmap_dma_pages(odp, ib_umem_start(odp), + ib_umem_end(odp)); + WARN_ON(odp->npages); + } + odp->private = NULL; + mutex_unlock(&odp->umem_mutex); + + if (!mr->allocated_from_cache) { + mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); + WARN_ON(mr->descs); + } +} + /* * This must be called after the mr has been removed from implicit_children * and the SRCU synchronized. NOTE: The MR does not necessarily have to be @@ -171,6 +192,8 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); } + dma_fence_odp_mr(mr); + mr->parent = NULL; mlx5_mr_cache_free(mr->dev, mr); ib_umem_odp_release(odp); @@ -228,16 +251,15 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, int in_block = 0; u64 addr; - if (!umem_odp) { - pr_err("invalidation called on NULL umem or non-ODP umem\n"); - return; - } - + mutex_lock(&umem_odp->umem_mutex); + /* + * If npages is zero then umem_odp->private may not be setup yet. This + * does not complete until after the first page is mapped for DMA. + */ + if (!umem_odp->npages) + goto out; mr = umem_odp->private; - if (!mr || !mr->ibmr.pd) - return; - start = max_t(u64, ib_umem_start(umem_odp), start); end = min_t(u64, ib_umem_end(umem_odp), end); @@ -247,7 +269,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, * overwrite the same MTTs. Concurent invalidations might race us, * but they will write 0s as well, so no difference in the end result. */ - mutex_lock(&umem_odp->umem_mutex); for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; /* @@ -289,6 +310,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent)) destroy_unused_implicit_child_mr(mr); +out: mutex_unlock(&umem_odp->umem_mutex); } @@ -536,6 +558,13 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) WARN_ON(atomic_read(&imr->num_deferred_work)); } + /* + * Fence the imr before we destroy the children. This allows us to + * skip updating the XLT of the imr during destroy of the child mkey + * the imr points to. + */ + mlx5_mr_cache_invalidate(imr); + list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) free_implicit_child_mr(mtt, false); @@ -543,6 +572,29 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) ib_umem_odp_release(odp_imr); } +/** + * mlx5_ib_fence_odp_mr - Stop all access to the ODP MR + * @mr: to fence + * + * On return no parallel threads will be touching this MR and no DMA will be + * active. + */ +void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) +{ + /* Prevent new page faults and prefetch requests from succeeding */ + xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&mr->dev->odp_srcu); + + if (atomic_read(&mr->num_deferred_work)) { + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&mr->num_deferred_work)); + } + + dma_fence_odp_mr(mr); +} + #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, u64 user_va, size_t bcnt, u32 *bytes_mapped, -- cgit v1.2.3-59-g8ed1b From 46870b2391d5163e84180e051fdabadd502d8b44 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:35 -0300 Subject: RDMA/odp: Remove broken debugging call to invalidate_range invalidate_range() also obtains the umem_mutex which is being held at this point, so if this path were was ever called it would deadlock. Thus conclude the debugging never triggers and rework it into a simple WARN_ON and leave things as they are. While here add a note to explain how we could possibly get inconsistent page pointers. Link: https://lore.kernel.org/r/20191009160934.3143-16-jgg@ziepe.ca Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem_odp.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 163ff7ba92b7..d7d5fadf0899 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -508,7 +508,6 @@ static int ib_umem_odp_map_dma_single_page( { struct ib_device *dev = umem_odp->umem.ibdev; dma_addr_t dma_addr; - int remove_existing_mapping = 0; int ret = 0; /* @@ -534,28 +533,29 @@ static int ib_umem_odp_map_dma_single_page( } else if (umem_odp->page_list[page_index] == page) { umem_odp->dma_list[page_index] |= access_mask; } else { - pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", - umem_odp->page_list[page_index], page); - /* Better remove the mapping now, to prevent any further - * damage. */ - remove_existing_mapping = 1; + /* + * This is a race here where we could have done: + * + * CPU0 CPU1 + * get_user_pages() + * invalidate() + * page_fault() + * mutex_lock(umem_mutex) + * page from GUP != page in ODP + * + * It should be prevented by the retry test above as reading + * the seq number should be reliable under the + * umem_mutex. Thus something is really not working right if + * things get here. + */ + WARN(true, + "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", + umem_odp->page_list[page_index], page); + ret = -EAGAIN; } out: put_user_page(page); - - if (remove_existing_mapping) { - ib_umem_notifier_start_account(umem_odp); - dev->ops.invalidate_range( - umem_odp, - ib_umem_start(umem_odp) + - (page_index << umem_odp->page_shift), - ib_umem_start(umem_odp) + - ((page_index + 1) << umem_odp->page_shift)); - ib_umem_notifier_end_account(umem_odp); - ret = -EAGAIN; - } - return ret; } -- cgit v1.2.3-59-g8ed1b From d5b60e26e86a463ca83bb5ec502dda6ea685159e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Oct 2019 23:18:08 +0200 Subject: RDMA/hns: Fix build error again This is not the first attempt to fix building random configurations, unfortunately the attempt in commit a07fc0bb483e ("RDMA/hns: Fix build error") caused a new problem when CONFIG_INFINIBAND_HNS_HIP06=m and CONFIG_INFINIBAND_HNS_HIP08=y: drivers/infiniband/hw/hns/hns_roce_main.o:(.rodata+0xe60): undefined reference to `__this_module' Revert commits a07fc0bb483e ("RDMA/hns: Fix build error") and a3e2d4c7e766 ("RDMA/hns: remove obsolete Kconfig comment") to get back to the previous state, then fix the issues described there differently, by adding more specific dependencies: INFINIBAND_HNS can now only be built-in if at least one of HNS or HNS3 are built-in, and the individual back-ends are only available if that code is reachable from the main driver. Fixes: a07fc0bb483e ("RDMA/hns: Fix build error") Fixes: a3e2d4c7e766 ("RDMA/hns: remove obsolete Kconfig comment") Fixes: dd74282df573 ("RDMA/hns: Initialize the PCI device for hip08 RoCE") Fixes: 08805fdbeb2d ("RDMA/hns: Split hw v1 driver from hns roce driver") Link: https://lore.kernel.org/r/20191007211826.3361202-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/Kconfig | 17 ++++++++++++++--- drivers/infiniband/hw/hns/Makefile | 8 ++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index d602b698b57e..4921c1e40ccd 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,23 +1,34 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_HNS - bool "HNS RoCE Driver" + tristate "HNS RoCE Driver" depends on NET_VENDOR_HISILICON depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on (HNS_DSAF && HNS_ENET) || HNS3 ---help--- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine is used in Hisilicon Hip06 and more further ICT SoC based on platform device. + To compile HIP06 or HIP08 driver as module, choose M here. + config INFINIBAND_HNS_HIP06 - tristate "Hisilicon Hip06 Family RoCE support" + bool "Hisilicon Hip06 Family RoCE support" depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET + depends on INFINIBAND_HNS=m || (HNS_DSAF=y && HNS_ENET=y) ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and Hip07 SoC. These RoCE engines are platform devices. + To compile this driver, choose Y here: if INFINIBAND_HNS is m, this + module will be called hns-roce-hw-v1 + config INFINIBAND_HNS_HIP08 - tristate "Hisilicon Hip08 Family RoCE support" + bool "Hisilicon Hip08 Family RoCE support" depends on INFINIBAND_HNS && PCI && HNS3 + depends on INFINIBAND_HNS=m || HNS3=y ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC. The RoCE engine is a PCI device. + + To compile this driver, choose Y here: if INFINIBAND_HNS is m, this + module will be called hns-roce-hw-v2. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index 449a2d81319d..e105945b94a1 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -9,8 +9,12 @@ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o +ifdef CONFIG_INFINIBAND_HNS_HIP06 hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o +endif +ifdef CONFIG_INFINIBAND_HNS_HIP08 hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o +endif -- cgit v1.2.3-59-g8ed1b From 546d30099ed204792083f043cd7e016de86016a3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 07:57:21 +0200 Subject: RDMA/mlx5: Return proper error value Returned value from mlx5_mr_cache_alloc() is checked to be error or real pointer. Return proper error code instead of NULL which is not checked later. Fixes: 81713d3788d2 ("IB/mlx5: Add implicit MR support") Link: https://lore.kernel.org/r/20191029055721.7192-1-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index f583476f5c8b..077ca10d9ed9 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -411,7 +411,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); - return NULL; + return ERR_PTR(-EINVAL); } ent = &cache->ent[entry]; -- cgit v1.2.3-59-g8ed1b From 11f552e21755cb6f804572243a1502b6bbd008dd Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Mon, 10 Jun 2019 15:21:24 +0300 Subject: IB/mlx5: Test write combining support Linux can run in all sorts of physical machines and VMs where write combining may or may not be supported. Currently there is no way to reliably tell if the system supports WC, or not. The driver uses WC to optimize posting work to the HCA, and getting this wrong in either direction can cause a significant performance loss. Add a test in mlx5_ib initialization process to test whether write-combining is supported on the machine. The test will run as part of the enable_driver callback to ensure that the test runs after the device is setup and can create and modify the QP needed, but runs before the device is exposed to the users. The test opens UD QP and posts NOP WQEs, the WQE written to the BlueFlame is different from the WQE in memory, requesting CQE only on the BlueFlame WQE. By checking whether we received a completion on one of these WQEs we can know if BlueFlame succeeded and this write-combining must be supported. Change reporting of BlueFlame support to be dependent on write-combining support instead of the FW's guess as to what the machine can do. Link: https://lore.kernel.org/r/20191027062234.10993-1-leon@kernel.org Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 15 ++- drivers/infiniband/hw/mlx5/mem.c | 199 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 ++ drivers/infiniband/hw/mlx5/qp.c | 6 +- 4 files changed, 223 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0a8a1a129f17..22db87278c40 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1796,7 +1796,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, return -EINVAL; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) + if (dev->wc_support) resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); resp.cache_line_size = cache_line_size(); resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); @@ -6256,6 +6256,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .disassociate_ucontext = mlx5_ib_disassociate_ucontext, .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, + .enable_driver = mlx5_ib_enable_driver, .fill_res_entry = mlx5_ib_fill_res_entry, .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, @@ -6699,6 +6700,18 @@ static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) } } +int mlx5_ib_enable_driver(struct ib_device *dev) +{ + struct mlx5_ib_dev *mdev = to_mdev(dev); + int ret; + + ret = mlx5_ib_test_wc(mdev); + mlx5_ib_dbg(mdev, "Write-Combining %s", + mdev->wc_support ? "supported" : "not supported"); + + return ret; +} + void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index b5aece786b36..048f4e974a61 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -34,6 +34,7 @@ #include #include #include "mlx5_ib.h" +#include /* @umem: umem object to scan * @addr: ib virtual address requested by the user @@ -216,3 +217,201 @@ int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) *offset = buf_off >> ilog2(off_size); return 0; } + +#define WR_ID_BF 0xBF +#define WR_ID_END 0xBAD +#define TEST_WC_NUM_WQES 255 +#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) +static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id, + bool signaled) +{ + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_bf *bf = &qp->bf; + __be32 mmio_wqe[16] = {}; + unsigned long flags; + unsigned int idx; + int i; + + if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) + return -EIO; + + spin_lock_irqsave(&qp->sq.lock, flags); + + idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); + + memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg)); + ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0; + ctrl->opmod_idx_opcode = + cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP); + ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) | + (qp->trans_qp.base.mqp.qpn << 8)); + + qp->sq.wrid[idx] = wr_id; + qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP; + qp->sq.wqe_head[idx] = qp->sq.head + 1; + qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), + MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; + qp->sq.head++; + + memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); + ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= + MLX5_WQE_CTRL_CQ_UPDATE; + + /* Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + wmb(); + + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + + /* Make sure doorbell record is visible to the HCA before + * we hit doorbell + */ + wmb(); + for (i = 0; i < 8; i++) + mlx5_write64(&mmio_wqe[i * 2], + bf->bfreg->map + bf->offset + i * 8); + + bf->offset ^= bf->buf_size; + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return 0; +} + +static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq) +{ + int ret; + struct ib_wc wc = {}; + unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; + + do { + ret = ib_poll_cq(cq, 1, &wc); + if (ret < 0 || wc.status) + return ret < 0 ? ret : -EINVAL; + if (ret) + break; + } while (!time_after(jiffies, end)); + + if (!ret) + return -ETIMEDOUT; + + if (wc.wr_id != WR_ID_BF) + ret = 0; + + return ret; +} + +static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp) +{ + int err, i; + + for (i = 0; i < TEST_WC_NUM_WQES; i++) { + err = post_send_nop(dev, qp, WR_ID_BF, false); + if (err) + return err; + } + + return post_send_nop(dev, qp, WR_ID_END, true); +} + +int mlx5_ib_test_wc(struct mlx5_ib_dev *dev) +{ + struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 }; + int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); + struct ib_qp_init_attr qp_init_attr = { + .cap = { .max_send_wr = TEST_WC_NUM_WQES }, + .qp_type = IB_QPT_UD, + .sq_sig_type = IB_SIGNAL_REQ_WR, + .create_flags = MLX5_IB_QP_CREATE_WC_TEST, + }; + struct ib_qp_attr qp_attr = { .port_num = 1 }; + struct ib_device *ibdev = &dev->ib_dev; + struct ib_qp *qp; + struct ib_cq *cq; + struct ib_pd *pd; + int ret; + + if (!MLX5_CAP_GEN(dev->mdev, bf)) + return 0; + + if (!dev->mdev->roce.roce_en && + port_type_cap == MLX5_CAP_PORT_TYPE_ETH) { + if (mlx5_core_is_pf(dev->mdev)) + dev->wc_support = true; + return 0; + } + + ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false); + if (ret) + goto print_err; + + if (!dev->wc_bfreg.wc) + goto out1; + + pd = ib_alloc_pd(ibdev, 0); + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); + goto out1; + } + + cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto out2; + } + + qp_init_attr.recv_cq = cq; + qp_init_attr.send_cq = cq; + qp = ib_create_qp(pd, &qp_init_attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto out3; + } + + qp_attr.qp_state = IB_QPS_INIT; + ret = ib_modify_qp(qp, &qp_attr, + IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX | + IB_QP_QKEY); + if (ret) + goto out4; + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); + if (ret) + goto out4; + + qp_attr.qp_state = IB_QPS_RTS; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) + goto out4; + + ret = test_wc_do_send(dev, qp); + if (ret < 0) + goto out4; + + ret = test_wc_poll_cq_result(dev, cq); + if (ret > 0) { + dev->wc_support = true; + ret = 0; + } + +out4: + ib_destroy_qp(qp); +out3: + ib_destroy_cq(cq); +out2: + ib_dealloc_pd(pd); +out1: + mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg); +print_err: + if (ret) + mlx5_ib_err( + dev, + "Error %d while trying to test write-combining support\n", + ret); + return ret; +} diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5b4c5751a98f..0bdb8b45ea15 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -248,6 +248,7 @@ struct mlx5_ib_flow_db { * rely on the range reserved for that use in the ib_qp_create_flags enum. */ #define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START +#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1) struct wr_list { u16 opcode; @@ -966,6 +967,7 @@ struct mlx5_ib_dev { u8 fill_delay:1; u8 is_rep:1; u8 lag_active:1; + u8 wc_support:1; struct umr_common umrc; /* sync used page count stats */ @@ -993,6 +995,7 @@ struct mlx5_ib_dev { /* Array with num_ports elements */ struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; + struct mlx5_sq_bfreg wc_bfreg; struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; @@ -1506,4 +1509,7 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, return true; } + +int mlx5_ib_enable_driver(struct ib_device *dev); +int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index bb3f432e2fb6..c831b455ffa8 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1041,11 +1041,14 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | IB_QP_CREATE_NETIF_QP | - MLX5_IB_QP_CREATE_SQPN_QP1)) + MLX5_IB_QP_CREATE_SQPN_QP1 | + MLX5_IB_QP_CREATE_WC_TEST)) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) qp->bf.bfreg = &dev->fp_bfreg; + else if (init_attr->create_flags & MLX5_IB_QP_CREATE_WC_TEST) + qp->bf.bfreg = &dev->wc_bfreg; else qp->bf.bfreg = &dev->bfreg; @@ -5328,7 +5331,6 @@ out: * we hit doorbell */ wmb(); - /* currently we support only regular doorbells */ mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); /* Make sure doorbells don't leak out of SQ spinlock * and reach the HCA out of order. -- cgit v1.2.3-59-g8ed1b From b86deba977a91aaa5a4b725d7b42970e6de28d2c Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:10 +0200 Subject: RDMA/core: Move core content from ib_uverbs to ib_core Move functionality that is called by the driver, which is related to umap, to a new file that will be linked in ib_core. This is a first step in later enabling ib_uverbs to be optional. vm_ops is now initialized in ib_uverbs_mmap instead of priv_init to avoid having to move all the rdma_umap functions as well. Link: https://lore.kernel.org/r/20191030094417.16866-2-michal.kalderon@marvell.com Suggested-by: Jason Gunthorpe Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/core_priv.h | 9 ++++ drivers/infiniband/core/ib_core_uverbs.c | 73 +++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 74 ++------------------------------ 4 files changed, 86 insertions(+), 72 deletions(-) create mode 100644 drivers/infiniband/core/ib_core_uverbs.c (limited to 'drivers') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 09881bd5f12d..9a8871e21545 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -11,7 +11,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ - nldev.o restrack.o counters.o + nldev.o restrack.o counters.o ib_core_uverbs.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 3a8b0911c3bc..0252da9560f4 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -387,4 +387,13 @@ int ib_device_set_netns_put(struct sk_buff *skb, int rdma_nl_net_init(struct rdma_dev_net *rnet); void rdma_nl_net_exit(struct rdma_dev_net *rnet); + +struct rdma_umap_priv { + struct vm_area_struct *vma; + struct list_head list; +}; + +void rdma_umap_priv_init(struct rdma_umap_priv *priv, + struct vm_area_struct *vma); + #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c new file mode 100644 index 000000000000..b74d2a2fb342 --- /dev/null +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2019 Marvell. All rights reserved. + */ +#include +#include "uverbs.h" +#include "core_priv.h" + +/* + * Each time we map IO memory into user space this keeps track of the mapping. + * When the device is hot-unplugged we 'zap' the mmaps in user space to point + * to the zero page and allow the hot unplug to proceed. + * + * This is necessary for cases like PCI physical hot unplug as the actual BAR + * memory may vanish after this and access to it from userspace could MCE. + * + * RDMA drivers supporting disassociation must have their user space designed + * to cope in some way with their IO pages going to the zero page. + */ +void rdma_umap_priv_init(struct rdma_umap_priv *priv, + struct vm_area_struct *vma) +{ + struct ib_uverbs_file *ufile = vma->vm_file->private_data; + + priv->vma = vma; + vma->vm_private_data = priv; + /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */ + + mutex_lock(&ufile->umap_lock); + list_add(&priv->list, &ufile->umaps); + mutex_unlock(&ufile->umap_lock); +} +EXPORT_SYMBOL(rdma_umap_priv_init); + +/* + * Map IO memory into a process. This is to be called by drivers as part of + * their mmap() functions if they wish to send something like PCI-E BAR memory + * to userspace. + */ +int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + struct ib_uverbs_file *ufile = ucontext->ufile; + struct rdma_umap_priv *priv; + + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + if (vma->vm_end - vma->vm_start != size) + return -EINVAL; + + /* Driver is using this wrong, must be called by ib_uverbs_mmap */ + if (WARN_ON(!vma->vm_file || + vma->vm_file->private_data != ufile)) + return -EINVAL; + lockdep_assert_held(&ufile->device->disassociate_srcu); + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + vma->vm_page_prot = prot; + if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { + kfree(priv); + return -EAGAIN; + } + + rdma_umap_priv_init(priv, vma); + return 0; +} +EXPORT_SYMBOL(rdma_user_mmap_io); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index db98111b47f4..b1f5334ff907 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -772,6 +772,8 @@ out_unlock: return (ret) ? : count; } +static const struct vm_operations_struct rdma_umap_ops; + static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; @@ -785,45 +787,13 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) ret = PTR_ERR(ucontext); goto out; } - + vma->vm_ops = &rdma_umap_ops; ret = ucontext->device->ops.mmap(ucontext, vma); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; } -/* - * Each time we map IO memory into user space this keeps track of the mapping. - * When the device is hot-unplugged we 'zap' the mmaps in user space to point - * to the zero page and allow the hot unplug to proceed. - * - * This is necessary for cases like PCI physical hot unplug as the actual BAR - * memory may vanish after this and access to it from userspace could MCE. - * - * RDMA drivers supporting disassociation must have their user space designed - * to cope in some way with their IO pages going to the zero page. - */ -struct rdma_umap_priv { - struct vm_area_struct *vma; - struct list_head list; -}; - -static const struct vm_operations_struct rdma_umap_ops; - -static void rdma_umap_priv_init(struct rdma_umap_priv *priv, - struct vm_area_struct *vma) -{ - struct ib_uverbs_file *ufile = vma->vm_file->private_data; - - priv->vma = vma; - vma->vm_private_data = priv; - vma->vm_ops = &rdma_umap_ops; - - mutex_lock(&ufile->umap_lock); - list_add(&priv->list, &ufile->umaps); - mutex_unlock(&ufile->umap_lock); -} - /* * The VMA has been dup'd, initialize the vm_private_data with a new tracking * struct @@ -931,44 +901,6 @@ static const struct vm_operations_struct rdma_umap_ops = { .fault = rdma_umap_fault, }; -/* - * Map IO memory into a process. This is to be called by drivers as part of - * their mmap() functions if they wish to send something like PCI-E BAR memory - * to userspace. - */ -int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot) -{ - struct ib_uverbs_file *ufile = ucontext->ufile; - struct rdma_umap_priv *priv; - - if (!(vma->vm_flags & VM_SHARED)) - return -EINVAL; - - if (vma->vm_end - vma->vm_start != size) - return -EINVAL; - - /* Driver is using this wrong, must be called by ib_uverbs_mmap */ - if (WARN_ON(!vma->vm_file || - vma->vm_file->private_data != ufile)) - return -EINVAL; - lockdep_assert_held(&ufile->device->disassociate_srcu); - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - vma->vm_page_prot = prot; - if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { - kfree(priv); - return -EAGAIN; - } - - rdma_umap_priv_init(priv, vma); - return 0; -} -EXPORT_SYMBOL(rdma_user_mmap_io); - void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) { struct rdma_umap_priv *priv, *next_priv; -- cgit v1.2.3-59-g8ed1b From 3411f9f01b76bd88aa6e0e013847ab6479cb4f24 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:11 +0200 Subject: RDMA/core: Create mmap database and cookie helper functions Create some common API's for adding entries to a xa_mmap. Searching for an entry and freeing one. The general approach is copied from the EFA driver and improved to be more general and do more to help the drivers. Integration with the core allows a reference counted scheme with a free function so that the driver can know when its mmaps are all gone. This significant new functionality will be helpful for drivers to have the correct lifetime model for mmap objects. Link: https://lore.kernel.org/r/20191030094417.16866-3-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/ib_core_uverbs.c | 236 +++++++++++++++++++++++++++++++ drivers/infiniband/core/rdma_core.c | 1 + drivers/infiniband/core/uverbs_cmd.c | 2 + include/rdma/ib_verbs.h | 35 +++++ 5 files changed, 275 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f8d383ceae05..e785bebaf16e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2642,6 +2642,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, map_mr_sg_pi); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); + SET_DEVICE_OP(dev_ops, mmap_free); SET_DEVICE_OP(dev_ops, modify_ah); SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c index b74d2a2fb342..aacd84a45de6 100644 --- a/drivers/infiniband/core/ib_core_uverbs.c +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -71,3 +71,239 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, return 0; } EXPORT_SYMBOL(rdma_user_mmap_io); + +/** + * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa + * + * @ucontext: associated user context + * @pgoff: The mmap offset >> PAGE_SHIFT + * + * This function is called when a user tries to mmap with an offset (returned + * by rdma_user_mmap_get_offset()) it initially received from the driver. The + * rdma_user_mmap_entry was created by the function + * rdma_user_mmap_entry_insert(). This function increases the refcnt of the + * entry so that it won't be deleted from the xarray in the meantime. + * + * Return an reference to an entry if exists or NULL if there is no + * match. rdma_user_mmap_entry_put() must be called to put the reference. + */ +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, + unsigned long pgoff) +{ + struct rdma_user_mmap_entry *entry; + + if (pgoff > U32_MAX) + return NULL; + + xa_lock(&ucontext->mmap_xa); + + entry = xa_load(&ucontext->mmap_xa, pgoff); + + /* + * If refcount is zero, entry is already being deleted, driver_removed + * indicates that the no further mmaps are possible and we waiting for + * the active VMAs to be closed. + */ + if (!entry || entry->start_pgoff != pgoff || entry->driver_removed || + !kref_get_unless_zero(&entry->ref)) + goto err; + + xa_unlock(&ucontext->mmap_xa); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n", + pgoff, entry->npages); + + return entry; + +err: + xa_unlock(&ucontext->mmap_xa); + return NULL; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff); + +/** + * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa + * + * @ucontext: associated user context + * @vma: the vma being mmap'd into + * + * This function is like rdma_user_mmap_entry_get_pgoff() except that it also + * checks that the VMA is correct. + */ +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, + struct vm_area_struct *vma) +{ + struct rdma_user_mmap_entry *entry; + + if (!(vma->vm_flags & VM_SHARED)) + return NULL; + entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff); + if (!entry) + return NULL; + if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) { + rdma_user_mmap_entry_put(entry); + return NULL; + } + return entry; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_get); + +static void rdma_user_mmap_entry_free(struct kref *kref) +{ + struct rdma_user_mmap_entry *entry = + container_of(kref, struct rdma_user_mmap_entry, ref); + struct ib_ucontext *ucontext = entry->ucontext; + unsigned long i; + + /* + * Erase all entries occupied by this single entry, this is deferred + * until all VMA are closed so that the mmap offsets remain unique. + */ + xa_lock(&ucontext->mmap_xa); + for (i = 0; i < entry->npages; i++) + __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i); + xa_unlock(&ucontext->mmap_xa); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n", + entry->start_pgoff, entry->npages); + + if (ucontext->device->ops.mmap_free) + ucontext->device->ops.mmap_free(entry); +} + +/** + * rdma_user_mmap_entry_put() - Drop reference to the mmap entry + * + * @entry: an entry in the mmap_xa + * + * This function is called when the mapping is closed if it was + * an io mapping or when the driver is done with the entry for + * some other reason. + * Should be called after rdma_user_mmap_entry_get was called + * and entry is no longer needed. This function will erase the + * entry and free it if its refcnt reaches zero. + */ +void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry) +{ + kref_put(&entry->ref, rdma_user_mmap_entry_free); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_put); + +/** + * rdma_user_mmap_entry_remove() - Drop reference to entry and + * mark it as unmmapable + * + * @entry: the entry to insert into the mmap_xa + * + * Drivers can call this to prevent userspace from creating more mappings for + * entry, however existing mmaps continue to exist and ops->mmap_free() will + * not be called until all user mmaps are destroyed. + */ +void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) +{ + if (!entry) + return; + + entry->driver_removed = true; + kref_put(&entry->ref, rdma_user_mmap_entry_free); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_remove); + +/** + * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa + * + * @ucontext: associated user context. + * @entry: the entry to insert into the mmap_xa + * @length: length of the address that will be mmapped + * + * This function should be called by drivers that use the rdma_user_mmap + * interface for implementing their mmap syscall A database of mmap offsets is + * handled in the core and helper functions are provided to insert entries + * into the database and extract entries when the user calls mmap with the + * given offset. The function allocates a unique page offset that should be + * provided to user, the user will use the offset to retrieve information such + * as address to be mapped and how. + * + * Return: 0 on success and -ENOMEM on failure + */ +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length) +{ + struct ib_uverbs_file *ufile = ucontext->ufile; + XA_STATE(xas, &ucontext->mmap_xa, 0); + u32 xa_first, xa_last, npages; + int err; + u32 i; + + if (!entry) + return -EINVAL; + + kref_init(&entry->ref); + entry->ucontext = ucontext; + + /* + * We want the whole allocation to be done without interruption from a + * different thread. The allocation requires finding a free range and + * storing. During the xa_insert the lock could be released, possibly + * allowing another thread to choose the same range. + */ + mutex_lock(&ufile->umap_lock); + + xa_lock(&ucontext->mmap_xa); + + /* We want to find an empty range */ + npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE); + entry->npages = npages; + while (true) { + /* First find an empty index */ + xas_find_marked(&xas, U32_MAX, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) + goto err_unlock; + + xa_first = xas.xa_index; + + /* Is there enough room to have the range? */ + if (check_add_overflow(xa_first, npages, &xa_last)) + goto err_unlock; + + /* + * Now look for the next present entry. If an entry doesn't + * exist, we found an empty range and can proceed. + */ + xas_next_entry(&xas, xa_last - 1); + if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last) + break; + } + + for (i = xa_first; i < xa_last; i++) { + err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL); + if (err) + goto err_undo; + } + + /* + * Internally the kernel uses a page offset, in libc this is a byte + * offset. Drivers should not return pgoff to userspace. + */ + entry->start_pgoff = xa_first; + xa_unlock(&ucontext->mmap_xa); + mutex_unlock(&ufile->umap_lock); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n", + entry->start_pgoff, npages); + + return 0; + +err_undo: + for (; i > xa_first; i--) + __xa_erase(&ucontext->mmap_xa, i - 1); + +err_unlock: + xa_unlock(&ucontext->mmap_xa); + mutex_unlock(&ufile->umap_lock); + return -ENOMEM; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_insert); diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index ccf4d069c25c..6c72773faf29 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, rdma_restrack_del(&ucontext->res); ib_dev->ops.dealloc_ucontext(ucontext); + WARN_ON(!xa_empty(&ucontext->mmap_xa)); kfree(ucontext); ufile->ucontext = NULL; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 14a80fd9f464..06ed32c8662f 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -252,6 +252,8 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) ucontext->closing = false; ucontext->cleanup_retryable = false; + xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC); + ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) goto err_free; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0626b62ed107..8865ec28180a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1473,6 +1473,7 @@ struct ib_ucontext { * Implementation details of the RDMA core, don't use in drivers: */ struct rdma_restrack_entry res; + struct xarray mmap_xa; }; struct ib_uobject { @@ -2258,6 +2259,21 @@ struct iw_cm_conn_param; #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct +struct rdma_user_mmap_entry { + struct kref ref; + struct ib_ucontext *ucontext; + unsigned long start_pgoff; + size_t npages; + bool driver_removed; +}; + +/* Return the offset (in bytes) the user should pass to libc's mmap() */ +static inline u64 +rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry) +{ + return (u64)entry->start_pgoff << PAGE_SHIFT; +} + /** * struct ib_device_ops - InfiniBand device operations * This structure defines all the InfiniBand device operations, providers will @@ -2370,6 +2386,13 @@ struct ib_device_ops { struct ib_udata *udata); void (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); + /** + * This will be called once refcount of an entry in mmap_xa reaches + * zero. The type of the memory that was mapped may differ between + * entries and is opaque to the rdma_user_mmap interface. + * Therefore needs to be implemented by the driver in mmap_free. + */ + void (*mmap_free)(struct rdma_user_mmap_entry *entry); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); @@ -2815,6 +2838,18 @@ static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, return -EINVAL; } #endif +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length); +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, + unsigned long pgoff); +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, + struct vm_area_struct *vma); +void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry); + +void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry); static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) { -- cgit v1.2.3-59-g8ed1b From c043ff2cfb7f6fdd9a1cb1a7ba3800f19b70bf65 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:12 +0200 Subject: RDMA: Connect between the mmap entry and the umap_priv structure The rdma_user_mmap_io interface created a common interface for drivers to correctly map hw resources and zap them once the ucontext is destroyed enabling the drivers to safely free the hw resources. However, this meant the drivers need to delay freeing the resource to the ucontext destroy phase to ensure they were no longer mapped. The new mechanism for a common way of handling user/driver address mapping enabled notifying the driver if all umap_priv mappings were removed, and enabled freeing the hw resources when they are done with and not delay it until ucontext destroy. Since not all drivers use the mechanism, NULL can be sent to the rdma_user_mmap_io interface to continue working as before. Drivers that use the mmap_xa interface can pass the entry being mapped to the rdma_user_mmap_io function to be linked together. Link: https://lore.kernel.org/r/20191030094417.16866-4-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 4 ++- drivers/infiniband/core/ib_core_uverbs.c | 48 ++++++++++++++++++++++++------- drivers/infiniband/core/uverbs_main.c | 10 ++++++- drivers/infiniband/hw/efa/efa_verbs.c | 6 ++-- drivers/infiniband/hw/hns/hns_roce_main.c | 6 ++-- drivers/infiniband/hw/mlx4/main.c | 9 ++++-- drivers/infiniband/hw/mlx5/main.c | 8 ++++-- include/rdma/ib_verbs.h | 13 ++------- 8 files changed, 70 insertions(+), 34 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 0252da9560f4..6be180eb8cb3 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -391,9 +391,11 @@ void rdma_nl_net_exit(struct rdma_dev_net *rnet); struct rdma_umap_priv { struct vm_area_struct *vma; struct list_head list; + struct rdma_user_mmap_entry *entry; }; void rdma_umap_priv_init(struct rdma_umap_priv *priv, - struct vm_area_struct *vma); + struct vm_area_struct *vma, + struct rdma_user_mmap_entry *entry); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c index aacd84a45de6..f509c478b469 100644 --- a/drivers/infiniband/core/ib_core_uverbs.c +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -8,23 +8,36 @@ #include "uverbs.h" #include "core_priv.h" -/* - * Each time we map IO memory into user space this keeps track of the mapping. - * When the device is hot-unplugged we 'zap' the mmaps in user space to point - * to the zero page and allow the hot unplug to proceed. +/** + * rdma_umap_priv_init() - Initialize the private data of a vma + * + * @priv: The already allocated private data + * @vma: The vm area struct that needs private data + * @entry: entry into the mmap_xa that needs to be linked with + * this vma + * + * Each time we map IO memory into user space this keeps track of the + * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space + * to point to the zero page and allow the hot unplug to proceed. * * This is necessary for cases like PCI physical hot unplug as the actual BAR * memory may vanish after this and access to it from userspace could MCE. * * RDMA drivers supporting disassociation must have their user space designed * to cope in some way with their IO pages going to the zero page. + * */ void rdma_umap_priv_init(struct rdma_umap_priv *priv, - struct vm_area_struct *vma) + struct vm_area_struct *vma, + struct rdma_user_mmap_entry *entry) { struct ib_uverbs_file *ufile = vma->vm_file->private_data; priv->vma = vma; + if (entry) { + kref_get(&entry->ref); + priv->entry = entry; + } vma->vm_private_data = priv; /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */ @@ -34,13 +47,26 @@ void rdma_umap_priv_init(struct rdma_umap_priv *priv, } EXPORT_SYMBOL(rdma_umap_priv_init); -/* - * Map IO memory into a process. This is to be called by drivers as part of - * their mmap() functions if they wish to send something like PCI-E BAR memory - * to userspace. +/** + * rdma_user_mmap_io() - Map IO memory into a process + * + * @ucontext: associated user context + * @vma: the vma related to the current mmap call + * @pfn: pfn to map + * @size: size to map + * @prot: pgprot to use in remap call + * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL + * if mmap_entry is not used by the driver + * + * This is to be called by drivers as part of their mmap() functions if they + * wish to send something like PCI-E BAR memory to userspace. + * + * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on + * success. */ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot) + unsigned long pfn, unsigned long size, pgprot_t prot, + struct rdma_user_mmap_entry *entry) { struct ib_uverbs_file *ufile = ucontext->ufile; struct rdma_umap_priv *priv; @@ -67,7 +93,7 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, return -EAGAIN; } - rdma_umap_priv_init(priv, vma); + rdma_umap_priv_init(priv, vma, entry); return 0; } EXPORT_SYMBOL(rdma_user_mmap_io); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index b1f5334ff907..9aa7ffc1d12a 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -819,7 +819,7 @@ static void rdma_umap_open(struct vm_area_struct *vma) priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto out_unlock; - rdma_umap_priv_init(priv, vma); + rdma_umap_priv_init(priv, vma, opriv->entry); up_read(&ufile->hw_destroy_rwsem); return; @@ -850,6 +850,9 @@ static void rdma_umap_close(struct vm_area_struct *vma) * this point. */ mutex_lock(&ufile->umap_lock); + if (priv->entry) + rdma_user_mmap_entry_put(priv->entry); + list_del(&priv->list); mutex_unlock(&ufile->umap_lock); kfree(priv); @@ -950,6 +953,11 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); + + if (priv->entry) { + rdma_user_mmap_entry_put(priv->entry); + priv->entry = NULL; + } } mutex_unlock(&ufile->umap_lock); skip_mm: diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 4edae89e8e3c..37e3d62bbb51 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1612,11 +1612,13 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, switch (entry->mmap_flag) { case EFA_MMAP_IO_NC: err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); break; case EFA_MMAP_IO_WC: err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_writecombine(vma->vm_page_prot)); + pgprot_writecombine(vma->vm_page_prot), + NULL); break; case EFA_MMAP_DMA_PAGE: for (va = vma->vm_start; va < vma->vm_end; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index b5d196c119ee..803dc6f4b496 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -359,7 +359,8 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, to_hr_ucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); /* vm_pgoff: 1 -- TPTR */ case 1: @@ -372,7 +373,8 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, hr_dev->tptr_dma_addr >> PAGE_SHIFT, hr_dev->tptr_size, - vma->vm_page_prot); + vma->vm_page_prot, + NULL); default: return -EINVAL; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8d2f1e38b891..f89b129b7e3a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1146,7 +1146,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return rdma_user_mmap_io(context, vma, to_mucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); case 1: if (dev->dev->caps.bf_reg_size == 0) @@ -1155,7 +1156,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) context, vma, to_mucontext(context)->uar.pfn + dev->dev->caps.num_uars, - PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot)); + PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot), + NULL); case 3: { struct mlx4_clock_params params; @@ -1171,7 +1173,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) params.bar) + params.offset) >> PAGE_SHIFT, - PAGE_SIZE, pgprot_noncached(vma->vm_page_prot)); + PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), + NULL); } default: diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 22db87278c40..2a510de4ae0f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2156,7 +2156,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, - prot); + prot, NULL); if (err) { mlx5_ib_err(dev, "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n", @@ -2198,7 +2198,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) PAGE_SHIFT) + page_idx; return rdma_user_mmap_io(context, vma, pfn, map_size, - pgprot_writecombine(vma->vm_page_prot)); + pgprot_writecombine(vma->vm_page_prot), + NULL); } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) @@ -2236,7 +2237,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm PAGE_SHIFT; return rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8865ec28180a..416e72ea80d9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2826,18 +2826,9 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, void ib_set_device_ops(struct ib_device *device, const struct ib_device_ops *ops); -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot); -#else -static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, - pgprot_t prot) -{ - return -EINVAL; -} -#endif + unsigned long pfn, unsigned long size, pgprot_t prot, + struct rdma_user_mmap_entry *entry); int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, struct rdma_user_mmap_entry *entry, size_t length); -- cgit v1.2.3-59-g8ed1b From e84d3c184e7967559d511e4569c111a02d64031e Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:13 +0200 Subject: RDMA/efa: Use the common mmap_xa helpers Remove the functions related to managing the mmap_xa database. This code was replaced with common code in ib_core. Link: https://lore.kernel.org/r/20191030094417.16866-5-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa.h | 11 +- drivers/infiniband/hw/efa/efa_main.c | 1 + drivers/infiniband/hw/efa/efa_verbs.c | 335 +++++++++++++++------------------- 3 files changed, 153 insertions(+), 194 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 2283e432693e..2bda07078b97 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -71,8 +71,6 @@ struct efa_dev { struct efa_ucontext { struct ib_ucontext ibucontext; - struct xarray mmap_xa; - u32 mmap_xa_page; u16 uarn; }; @@ -91,6 +89,7 @@ struct efa_cq { struct efa_ucontext *ucontext; dma_addr_t dma_addr; void *cpu_addr; + struct rdma_user_mmap_entry *mmap_entry; size_t size; u16 cq_idx; }; @@ -101,6 +100,13 @@ struct efa_qp { void *rq_cpu_addr; size_t rq_size; enum ib_qp_state state; + + /* Used for saving mmap_xa entries */ + struct rdma_user_mmap_entry *sq_db_mmap_entry; + struct rdma_user_mmap_entry *llq_desc_mmap_entry; + struct rdma_user_mmap_entry *rq_db_mmap_entry; + struct rdma_user_mmap_entry *rq_mmap_entry; + u32 qp_handle; u32 max_send_wr; u32 max_recv_wr; @@ -147,6 +153,7 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata); void efa_dealloc_ucontext(struct ib_ucontext *ibucontext); int efa_mmap(struct ib_ucontext *ibucontext, struct vm_area_struct *vma); +void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, u32 flags, diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 83858f7e83d0..0e3050d01b75 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -217,6 +217,7 @@ static const struct ib_device_ops efa_dev_ops = { .get_link_layer = efa_port_link_layer, .get_port_immutable = efa_get_port_immutable, .mmap = efa_mmap, + .mmap_free = efa_mmap_free, .modify_qp = efa_modify_qp, .query_device = efa_query_device, .query_gid = efa_query_gid, diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 37e3d62bbb51..b242ea7a3bc8 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -13,10 +13,6 @@ #include "efa.h" -#define EFA_MMAP_FLAG_SHIFT 56 -#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0) -#define EFA_MMAP_INVALID U64_MAX - enum { EFA_MMAP_DMA_PAGE = 0, EFA_MMAP_IO_WC, @@ -27,20 +23,12 @@ enum { (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) -struct efa_mmap_entry { - void *obj; +struct efa_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; u64 address; - u64 length; - u32 mmap_page; u8 mmap_flag; }; -static inline u64 get_mmap_key(const struct efa_mmap_entry *efa) -{ - return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) | - ((u64)efa->mmap_page << PAGE_SHIFT); -} - #define EFA_DEFINE_STATS(op) \ op(EFA_TX_BYTES, "tx_bytes") \ op(EFA_TX_PKTS, "tx_pkts") \ @@ -147,6 +135,12 @@ static inline struct efa_ah *to_eah(struct ib_ah *ibah) return container_of(ibah, struct efa_ah, ibah); } +static inline struct efa_user_mmap_entry * +to_emmap(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); +} + #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ FIELD_SIZEOF(typeof(x), fld) <= (sz)) @@ -172,106 +166,6 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, return addr; } -/* - * This is only called when the ucontext is destroyed and there can be no - * concurrent query via mmap or allocate on the xarray, thus we can be sure no - * other thread is using the entry pointer. We also know that all the BAR - * pages have either been zap'd or munmaped at this point. Normal pages are - * refcounted and will be freed at the proper time. - */ -static void mmap_entries_remove_free(struct efa_dev *dev, - struct efa_ucontext *ucontext) -{ - struct efa_mmap_entry *entry; - unsigned long mmap_page; - - xa_for_each(&ucontext->mmap_xa, mmap_page, entry) { - xa_erase(&ucontext->mmap_xa, mmap_page); - - ibdev_dbg( - &dev->ibdev, - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", - entry->obj, get_mmap_key(entry), entry->address, - entry->length); - if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) - /* DMA mapping is already gone, now free the pages */ - free_pages_exact(phys_to_virt(entry->address), - entry->length); - kfree(entry); - } -} - -static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev, - struct efa_ucontext *ucontext, - u64 key, u64 len) -{ - struct efa_mmap_entry *entry; - u64 mmap_page; - - mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT; - if (mmap_page > U32_MAX) - return NULL; - - entry = xa_load(&ucontext->mmap_xa, mmap_page); - if (!entry || get_mmap_key(entry) != key || entry->length != len) - return NULL; - - ibdev_dbg(&dev->ibdev, - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", - entry->obj, key, entry->address, entry->length); - - return entry; -} - -/* - * Note this locking scheme cannot support removal of entries, except during - * ucontext destruction when the core code guarentees no concurrency. - */ -static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext, - void *obj, u64 address, u64 length, u8 mmap_flag) -{ - struct efa_mmap_entry *entry; - u32 next_mmap_page; - int err; - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return EFA_MMAP_INVALID; - - entry->obj = obj; - entry->address = address; - entry->length = length; - entry->mmap_flag = mmap_flag; - - xa_lock(&ucontext->mmap_xa); - if (check_add_overflow(ucontext->mmap_xa_page, - (u32)(length >> PAGE_SHIFT), - &next_mmap_page)) - goto err_unlock; - - entry->mmap_page = ucontext->mmap_xa_page; - ucontext->mmap_xa_page = next_mmap_page; - err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry, - GFP_KERNEL); - if (err) - goto err_unlock; - - xa_unlock(&ucontext->mmap_xa); - - ibdev_dbg( - &dev->ibdev, - "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n", - entry->obj, entry->address, entry->length, get_mmap_key(entry)); - - return get_mmap_key(entry); - -err_unlock: - xa_unlock(&ucontext->mmap_xa); - kfree(entry); - return EFA_MMAP_INVALID; - -} - int efa_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata) @@ -485,8 +379,19 @@ static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) return efa_com_destroy_qp(&dev->edev, ¶ms); } +static void efa_qp_user_mmap_entries_remove(struct efa_ucontext *uctx, + struct efa_qp *qp) +{ + rdma_user_mmap_entry_remove(qp->rq_mmap_entry); + rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry); + rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry); + rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry); +} + int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { + struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct efa_ucontext, ibucontext); struct efa_dev *dev = to_edev(ibqp->pd->device); struct efa_qp *qp = to_eqp(ibqp); int err; @@ -505,61 +410,101 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) DMA_TO_DEVICE); } + efa_qp_user_mmap_entries_remove(ucontext, qp); kfree(qp); return 0; } +static struct rdma_user_mmap_entry* +efa_user_mmap_entry_insert(struct ib_ucontext *ucontext, + u64 address, size_t length, + u8 mmap_flag, u64 *offset) +{ + struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + int err; + + if (!entry) + return NULL; + + entry->address = address; + entry->mmap_flag = mmap_flag; + + err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry, + length); + if (err) { + kfree(entry); + return NULL; + } + *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); + + return &entry->rdma_entry; +} + static int qp_mmap_entries_setup(struct efa_qp *qp, struct efa_dev *dev, struct efa_ucontext *ucontext, struct efa_com_create_qp_params *params, struct efa_ibv_create_qp_resp *resp) { - /* - * Once an entry is inserted it might be mmapped, hence cannot be - * cleaned up until dealloc_ucontext. - */ - resp->sq_db_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->db_bar_addr + resp->sq_db_offset, - PAGE_SIZE, EFA_MMAP_IO_NC); - if (resp->sq_db_mmap_key == EFA_MMAP_INVALID) + size_t length; + u64 address; + + address = dev->db_bar_addr + resp->sq_db_offset; + qp->sq_db_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, + PAGE_SIZE, EFA_MMAP_IO_NC, + &resp->sq_db_mmap_key); + if (!qp->sq_db_mmap_entry) return -ENOMEM; resp->sq_db_offset &= ~PAGE_MASK; - resp->llq_desc_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->mem_bar_addr + resp->llq_desc_offset, - PAGE_ALIGN(params->sq_ring_size_in_bytes + - (resp->llq_desc_offset & ~PAGE_MASK)), - EFA_MMAP_IO_WC); - if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = dev->mem_bar_addr + resp->llq_desc_offset; + length = PAGE_ALIGN(params->sq_ring_size_in_bytes + + (resp->llq_desc_offset & ~PAGE_MASK)); + + qp->llq_desc_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, length, + EFA_MMAP_IO_WC, + &resp->llq_desc_mmap_key); + if (!qp->llq_desc_mmap_entry) + goto err_remove_mmap; resp->llq_desc_offset &= ~PAGE_MASK; if (qp->rq_size) { - resp->rq_db_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->db_bar_addr + resp->rq_db_offset, - PAGE_SIZE, EFA_MMAP_IO_NC); - if (resp->rq_db_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = dev->db_bar_addr + resp->rq_db_offset; + + qp->rq_db_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, PAGE_SIZE, + EFA_MMAP_IO_NC, + &resp->rq_db_mmap_key); + if (!qp->rq_db_mmap_entry) + goto err_remove_mmap; resp->rq_db_offset &= ~PAGE_MASK; - resp->rq_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - virt_to_phys(qp->rq_cpu_addr), - qp->rq_size, EFA_MMAP_DMA_PAGE); - if (resp->rq_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = virt_to_phys(qp->rq_cpu_addr); + qp->rq_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, qp->rq_size, + EFA_MMAP_DMA_PAGE, + &resp->rq_mmap_key); + if (!qp->rq_mmap_entry) + goto err_remove_mmap; resp->rq_mmap_size = qp->rq_size; } return 0; + +err_remove_mmap: + efa_qp_user_mmap_entries_remove(ucontext, qp); + + return -ENOMEM; } static int efa_qp_validate_cap(struct efa_dev *dev, @@ -634,7 +579,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, struct efa_dev *dev = to_edev(ibpd->device); struct efa_ibv_create_qp_resp resp = {}; struct efa_ibv_create_qp cmd = {}; - bool rq_entry_inserted = false; struct efa_ucontext *ucontext; struct efa_qp *qp; int err; @@ -742,7 +686,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, if (err) goto err_destroy_qp; - rq_entry_inserted = true; qp->qp_handle = create_qp_resp.qp_handle; qp->ibqp.qp_num = create_qp_resp.qp_num; qp->ibqp.qp_type = init_attr->qp_type; @@ -759,7 +702,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, ibdev_dbg(&dev->ibdev, "Failed to copy udata for qp[%u]\n", create_qp_resp.qp_num); - goto err_destroy_qp; + goto err_remove_mmap_entries; } } @@ -767,13 +710,16 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, return &qp->ibqp; +err_remove_mmap_entries: + efa_qp_user_mmap_entries_remove(ucontext, qp); err_destroy_qp: efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); err_free_mapped: if (qp->rq_size) { dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, DMA_TO_DEVICE); - if (!rq_entry_inserted) + + if (!qp->rq_mmap_entry) free_pages_exact(qp->rq_cpu_addr, qp->rq_size); } err_free_qp: @@ -897,16 +843,18 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) efa_destroy_cq_idx(dev, cq->cq_idx); dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, DMA_FROM_DEVICE); + rdma_user_mmap_entry_remove(cq->mmap_entry); } static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, struct efa_ibv_create_cq_resp *resp) { resp->q_mmap_size = cq->size; - resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq, - virt_to_phys(cq->cpu_addr), - cq->size, EFA_MMAP_DMA_PAGE); - if (resp->q_mmap_key == EFA_MMAP_INVALID) + cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, + virt_to_phys(cq->cpu_addr), + cq->size, EFA_MMAP_DMA_PAGE, + &resp->q_mmap_key); + if (!cq->mmap_entry) return -ENOMEM; return 0; @@ -924,7 +872,6 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct efa_dev *dev = to_edev(ibdev); struct efa_ibv_create_cq cmd = {}; struct efa_cq *cq = to_ecq(ibcq); - bool cq_entry_inserted = false; int entries = attr->cqe; int err; @@ -1013,15 +960,13 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_destroy_cq; } - cq_entry_inserted = true; - if (udata->outlen) { err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { ibdev_dbg(ibdev, "Failed to copy udata for create_cq\n"); - goto err_destroy_cq; + goto err_remove_mmap; } } @@ -1030,13 +975,16 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; +err_remove_mmap: + rdma_user_mmap_entry_remove(cq->mmap_entry); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); err_free_mapped: dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, DMA_FROM_DEVICE); - if (!cq_entry_inserted) + if (!cq->mmap_entry) free_pages_exact(cq->cpu_addr, cq->size); + err_out: atomic64_inc(&dev->stats.sw_stats.create_cq_err); return err; @@ -1556,7 +1504,6 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) goto err_out; ucontext->uarn = result.uarn; - xa_init(&ucontext->mmap_xa); resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; @@ -1585,40 +1532,56 @@ void efa_dealloc_ucontext(struct ib_ucontext *ibucontext) struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); - mmap_entries_remove_free(dev, ucontext); efa_dealloc_uar(dev, ucontext->uarn); } +void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry) +{ + struct efa_user_mmap_entry *entry = to_emmap(rdma_entry); + + /* DMA mapping is already gone, now free the pages */ + if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) + free_pages_exact(phys_to_virt(entry->address), + entry->rdma_entry.npages * PAGE_SIZE); + kfree(entry); +} + static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, - struct vm_area_struct *vma, u64 key, u64 length) + struct vm_area_struct *vma) { - struct efa_mmap_entry *entry; + struct rdma_user_mmap_entry *rdma_entry; + struct efa_user_mmap_entry *entry; unsigned long va; + int err = 0; u64 pfn; - int err; - entry = mmap_entry_get(dev, ucontext, key, length); - if (!entry) { - ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n", - key); + rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma); + if (!rdma_entry) { + ibdev_dbg(&dev->ibdev, + "pgoff[%#lx] does not have valid entry\n", + vma->vm_pgoff); return -EINVAL; } + entry = to_emmap(rdma_entry); ibdev_dbg(&dev->ibdev, - "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n", - entry->address, length, entry->mmap_flag); + "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", + entry->address, rdma_entry->npages * PAGE_SIZE, + entry->mmap_flag); pfn = entry->address >> PAGE_SHIFT; switch (entry->mmap_flag) { case EFA_MMAP_IO_NC: - err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, + entry->rdma_entry.npages * PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), - NULL); + rdma_entry); break; case EFA_MMAP_IO_WC: - err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, + entry->rdma_entry.npages * PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot), - NULL); + rdma_entry); break; case EFA_MMAP_DMA_PAGE: for (va = vma->vm_start; va < vma->vm_end; @@ -1635,12 +1598,13 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, if (err) { ibdev_dbg( &dev->ibdev, - "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n", - entry->address, length, entry->mmap_flag, err); - return err; + "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", + entry->address, rdma_entry->npages * PAGE_SIZE, + entry->mmap_flag, err); } - return 0; + rdma_user_mmap_entry_put(rdma_entry); + return err; } int efa_mmap(struct ib_ucontext *ibucontext, @@ -1648,26 +1612,13 @@ int efa_mmap(struct ib_ucontext *ibucontext, { struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); - u64 length = vma->vm_end - vma->vm_start; - u64 key = vma->vm_pgoff << PAGE_SHIFT; + size_t length = vma->vm_end - vma->vm_start; ibdev_dbg(&dev->ibdev, - "start %#lx, end %#lx, length = %#llx, key = %#llx\n", - vma->vm_start, vma->vm_end, length, key); - - if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) { - ibdev_dbg(&dev->ibdev, - "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n", - length, PAGE_SIZE, vma->vm_flags); - return -EINVAL; - } - - if (vma->vm_flags & VM_EXEC) { - ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n"); - return -EPERM; - } + "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", + vma->vm_start, vma->vm_end, length, vma->vm_pgoff); - return __efa_mmap(dev, ucontext, vma, key, length); + return __efa_mmap(dev, ucontext, vma); } static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) -- cgit v1.2.3-59-g8ed1b From 11f1a75567c43e1f9e01b37ab524b770a0a0ca78 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:14 +0200 Subject: RDMA/siw: Use the common mmap_xa helpers Remove the functions related to managing the mmap_xa database. This code is now common in ib_core. Link: https://lore.kernel.org/r/20191030094417.16866-6-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Bernard Metzler Reviewed-by: Bernard Metzler Tested-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw.h | 19 +++- drivers/infiniband/sw/siw/siw_main.c | 1 + drivers/infiniband/sw/siw/siw_verbs.c | 195 +++++++++++++++++----------------- drivers/infiniband/sw/siw/siw_verbs.h | 1 + 4 files changed, 114 insertions(+), 102 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 1ea3ed249e7b..f851afb5632e 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -221,7 +221,7 @@ struct siw_cq { u32 cq_get; u32 num_cqe; bool kernel_verbs; - u32 xa_cq_index; /* mmap information for CQE array */ + struct rdma_user_mmap_entry *cq_entry; /* mmap info for CQE array */ u32 id; /* For debugging only */ }; @@ -264,7 +264,7 @@ struct siw_srq { u32 rq_put; u32 rq_get; u32 num_rqe; /* max # of wqe's allowed */ - u32 xa_srq_index; /* mmap information for SRQ array */ + struct rdma_user_mmap_entry *srq_entry; /* mmap info for SRQ array */ char armed; /* inform user if limit hit */ char kernel_verbs; /* '1' if kernel client */ }; @@ -478,8 +478,8 @@ struct siw_qp { u8 layer : 4, etype : 4; u8 ecode; } term_info; - u32 xa_sq_index; /* mmap information for SQE array */ - u32 xa_rq_index; /* mmap information for RQE array */ + struct rdma_user_mmap_entry *sq_entry; /* mmap info for SQE array */ + struct rdma_user_mmap_entry *rq_entry; /* mmap info for RQE array */ struct rcu_head rcu; }; @@ -504,6 +504,11 @@ struct iwarp_msg_info { int (*rx_data)(struct siw_qp *qp); }; +struct siw_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + void *address; +}; + /* Global siw parameters. Currently set in siw_main.c */ extern const bool zcopy_tx; extern const bool try_gso; @@ -608,6 +613,12 @@ static inline struct siw_mr *to_siw_mr(struct ib_mr *base_mr) return container_of(base_mr, struct siw_mr, base_mr); } +static inline struct siw_user_mmap_entry * +to_siw_mmap_entry(struct rdma_user_mmap_entry *rdma_mmap) +{ + return container_of(rdma_mmap, struct siw_user_mmap_entry, rdma_entry); +} + static inline struct siw_qp *siw_qp_id2obj(struct siw_device *sdev, int id) { struct siw_qp *qp; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 48e45a852b51..c147f0613d95 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -279,6 +279,7 @@ static const struct ib_device_ops siw_device_ops = { .iw_rem_ref = siw_qp_put_ref, .map_mr_sg = siw_map_mr_sg, .mmap = siw_mmap, + .mmap_free = siw_mmap_free, .modify_qp = siw_verbs_modify_qp, .modify_srq = siw_modify_srq, .poll_cq = siw_poll_cq, diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 726a5924ea13..725985ed8af3 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -34,44 +34,19 @@ static char ib_qp_state_to_string[IB_QPS_ERR + 1][sizeof("RESET")] = { [IB_QPS_ERR] = "ERR" }; -static u32 siw_create_uobj(struct siw_ucontext *uctx, void *vaddr, u32 size) +void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { - struct siw_uobj *uobj; - struct xa_limit limit = XA_LIMIT(0, SIW_UOBJ_MAX_KEY); - u32 key; + struct siw_user_mmap_entry *entry = to_siw_mmap_entry(rdma_entry); - uobj = kzalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) - return SIW_INVAL_UOBJ_KEY; - - if (xa_alloc_cyclic(&uctx->xa, &key, uobj, limit, &uctx->uobj_nextkey, - GFP_KERNEL) < 0) { - kfree(uobj); - return SIW_INVAL_UOBJ_KEY; - } - uobj->size = PAGE_ALIGN(size); - uobj->addr = vaddr; - - return key; -} - -static struct siw_uobj *siw_get_uobj(struct siw_ucontext *uctx, - unsigned long off, u32 size) -{ - struct siw_uobj *uobj = xa_load(&uctx->xa, off); - - if (uobj && uobj->size == size) - return uobj; - - return NULL; + kfree(entry); } int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) { struct siw_ucontext *uctx = to_siw_ctx(ctx); - struct siw_uobj *uobj; - unsigned long off = vma->vm_pgoff; - int size = vma->vm_end - vma->vm_start; + size_t size = vma->vm_end - vma->vm_start; + struct rdma_user_mmap_entry *rdma_entry; + struct siw_user_mmap_entry *entry; int rv = -EINVAL; /* @@ -79,18 +54,25 @@ int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) */ if (vma->vm_start & (PAGE_SIZE - 1)) { pr_warn("siw: mmap not page aligned\n"); - goto out; + return -EINVAL; } - uobj = siw_get_uobj(uctx, off, size); - if (!uobj) { - siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %u\n", - off, size); + rdma_entry = rdma_user_mmap_entry_get(&uctx->base_ucontext, vma); + if (!rdma_entry) { + siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %#zx\n", + vma->vm_pgoff, size); + return -EINVAL; + } + entry = to_siw_mmap_entry(rdma_entry); + + rv = remap_vmalloc_range(vma, entry->address, 0); + if (rv) { + pr_warn("remap_vmalloc_range failed: %lu, %zu\n", vma->vm_pgoff, + size); goto out; } - rv = remap_vmalloc_range(vma, uobj->addr, 0); - if (rv) - pr_warn("remap_vmalloc_range failed: %lu, %u\n", off, size); out: + rdma_user_mmap_entry_put(rdma_entry); + return rv; } @@ -105,7 +87,7 @@ int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata) rv = -ENOMEM; goto err_out; } - xa_init_flags(&ctx->xa, XA_FLAGS_ALLOC); + ctx->uobj_nextkey = 0; ctx->sdev = sdev; @@ -135,19 +117,7 @@ err_out: void siw_dealloc_ucontext(struct ib_ucontext *base_ctx) { struct siw_ucontext *uctx = to_siw_ctx(base_ctx); - void *entry; - unsigned long index; - /* - * Make sure all user mmap objects are gone. Since QP, CQ - * and SRQ destroy routines destroy related objects, nothing - * should be found here. - */ - xa_for_each(&uctx->xa, index, entry) { - kfree(xa_erase(&uctx->xa, index)); - pr_warn("siw: dropping orphaned uobj at %lu\n", index); - } - xa_destroy(&uctx->xa); atomic_dec(&uctx->sdev->num_ctx); } @@ -293,6 +263,33 @@ void siw_qp_put_ref(struct ib_qp *base_qp) siw_qp_put(to_siw_qp(base_qp)); } +static struct rdma_user_mmap_entry * +siw_mmap_entry_insert(struct siw_ucontext *uctx, + void *address, size_t length, + u64 *offset) +{ + struct siw_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + int rv; + + *offset = SIW_INVAL_UOBJ_KEY; + if (!entry) + return NULL; + + entry->address = address; + + rv = rdma_user_mmap_entry_insert(&uctx->base_ucontext, + &entry->rdma_entry, + length); + if (rv) { + kfree(entry); + return NULL; + } + + *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); + + return &entry->rdma_entry; +} + /* * siw_create_qp() * @@ -317,6 +314,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, struct siw_cq *scq = NULL, *rcq = NULL; unsigned long flags; int num_sqe, num_rqe, rv = 0; + size_t length; siw_dbg(base_dev, "create new QP\n"); @@ -380,8 +378,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, spin_lock_init(&qp->orq_lock); qp->kernel_verbs = !udata; - qp->xa_sq_index = SIW_INVAL_UOBJ_KEY; - qp->xa_rq_index = SIW_INVAL_UOBJ_KEY; rv = siw_qp_add(sdev, qp); if (rv) @@ -458,22 +454,27 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, uresp.qp_id = qp_id(qp); if (qp->sendq) { - qp->xa_sq_index = - siw_create_uobj(uctx, qp->sendq, - num_sqe * sizeof(struct siw_sqe)); + length = num_sqe * sizeof(struct siw_sqe); + qp->sq_entry = + siw_mmap_entry_insert(uctx, qp->sendq, + length, &uresp.sq_key); + if (!qp->sq_entry) { + rv = -ENOMEM; + goto err_out_xa; + } } + if (qp->recvq) { - qp->xa_rq_index = - siw_create_uobj(uctx, qp->recvq, - num_rqe * sizeof(struct siw_rqe)); - } - if (qp->xa_sq_index == SIW_INVAL_UOBJ_KEY || - qp->xa_rq_index == SIW_INVAL_UOBJ_KEY) { - rv = -ENOMEM; - goto err_out_xa; + length = num_rqe * sizeof(struct siw_rqe); + qp->rq_entry = + siw_mmap_entry_insert(uctx, qp->recvq, + length, &uresp.rq_key); + if (!qp->rq_entry) { + uresp.sq_key = SIW_INVAL_UOBJ_KEY; + rv = -ENOMEM; + goto err_out_xa; + } } - uresp.sq_key = qp->xa_sq_index << PAGE_SHIFT; - uresp.rq_key = qp->xa_rq_index << PAGE_SHIFT; if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; @@ -501,11 +502,10 @@ err_out: kfree(siw_base_qp); if (qp) { - if (qp->xa_sq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_sq_index)); - if (qp->xa_rq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_rq_index)); - + if (uctx) { + rdma_user_mmap_entry_remove(qp->sq_entry); + rdma_user_mmap_entry_remove(qp->rq_entry); + } vfree(qp->sendq); vfree(qp->recvq); kfree(qp); @@ -619,10 +619,10 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) qp->attrs.flags |= SIW_QP_IN_DESTROY; qp->rx_stream.rx_suspend = 1; - if (uctx && qp->xa_sq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_sq_index)); - if (uctx && qp->xa_rq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&uctx->xa, qp->xa_rq_index)); + if (uctx) { + rdma_user_mmap_entry_remove(qp->sq_entry); + rdma_user_mmap_entry_remove(qp->rq_entry); + } down_write(&qp->state_lock); @@ -1092,8 +1092,8 @@ void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) siw_cq_flush(cq); - if (ctx && cq->xa_cq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, cq->xa_cq_index)); + if (ctx) + rdma_user_mmap_entry_remove(cq->cq_entry); atomic_dec(&sdev->num_cq); @@ -1130,7 +1130,6 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, size = roundup_pow_of_two(size); cq->base_cq.cqe = size; cq->num_cqe = size; - cq->xa_cq_index = SIW_INVAL_UOBJ_KEY; if (!udata) { cq->kernel_verbs = 1; @@ -1156,16 +1155,17 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); + size_t length = size * sizeof(struct siw_cqe) + + sizeof(struct siw_cq_ctrl); - cq->xa_cq_index = - siw_create_uobj(ctx, cq->queue, - size * sizeof(struct siw_cqe) + - sizeof(struct siw_cq_ctrl)); - if (cq->xa_cq_index == SIW_INVAL_UOBJ_KEY) { + cq->cq_entry = + siw_mmap_entry_insert(ctx, cq->queue, + length, &uresp.cq_key); + if (!cq->cq_entry) { rv = -ENOMEM; goto err_out; } - uresp.cq_key = cq->xa_cq_index << PAGE_SHIFT; + uresp.cq_id = cq->id; uresp.num_cqe = size; @@ -1186,8 +1186,8 @@ err_out: struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); - if (cq->xa_cq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, cq->xa_cq_index)); + if (ctx) + rdma_user_mmap_entry_remove(cq->cq_entry); vfree(cq->queue); } atomic_dec(&sdev->num_cq); @@ -1591,7 +1591,6 @@ int siw_create_srq(struct ib_srq *base_srq, } srq->max_sge = attrs->max_sge; srq->num_rqe = roundup_pow_of_two(attrs->max_wr); - srq->xa_srq_index = SIW_INVAL_UOBJ_KEY; srq->limit = attrs->srq_limit; if (srq->limit) srq->armed = 1; @@ -1610,15 +1609,16 @@ int siw_create_srq(struct ib_srq *base_srq, } if (udata) { struct siw_uresp_create_srq uresp = {}; + size_t length = srq->num_rqe * sizeof(struct siw_rqe); - srq->xa_srq_index = siw_create_uobj( - ctx, srq->recvq, srq->num_rqe * sizeof(struct siw_rqe)); - - if (srq->xa_srq_index == SIW_INVAL_UOBJ_KEY) { + srq->srq_entry = + siw_mmap_entry_insert(ctx, srq->recvq, + length, &uresp.srq_key); + if (!srq->srq_entry) { rv = -ENOMEM; goto err_out; } - uresp.srq_key = srq->xa_srq_index; + uresp.num_rqe = srq->num_rqe; if (udata->outlen < sizeof(uresp)) { @@ -1637,8 +1637,8 @@ int siw_create_srq(struct ib_srq *base_srq, err_out: if (srq->recvq) { - if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, srq->xa_srq_index)); + if (ctx) + rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); } atomic_dec(&sdev->num_srq); @@ -1724,9 +1724,8 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); - if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY) - kfree(xa_erase(&ctx->xa, srq->xa_srq_index)); - + if (ctx) + rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); atomic_dec(&sdev->num_srq); } diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h index 1910869281cb..1a731989fad6 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.h +++ b/drivers/infiniband/sw/siw/siw_verbs.h @@ -83,6 +83,7 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata); int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma); +void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry); void siw_qp_event(struct siw_qp *qp, enum ib_event_type type); void siw_cq_event(struct siw_cq *cq, enum ib_event_type type); void siw_srq_event(struct siw_srq *srq, enum ib_event_type type); -- cgit v1.2.3-59-g8ed1b From 4c6bb02d598003e525e83532280ae895213aab20 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:15 +0200 Subject: RDMA/qedr: Use the common mmap API Remove all functions related to mmap from qedr and use the common API. Link: https://lore.kernel.org/r/20191030094417.16866-7-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 1 + drivers/infiniband/hw/qedr/qedr.h | 30 +++--- drivers/infiniband/hw/qedr/verbs.c | 185 ++++++++++++++++--------------------- drivers/infiniband/hw/qedr/verbs.h | 3 +- 4 files changed, 98 insertions(+), 121 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 1ff5407270d2..03fd168cccc0 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -212,6 +212,7 @@ static const struct ib_device_ops qedr_dev_ops = { .get_link_layer = qedr_link_layer, .map_mr_sg = qedr_map_mr_sg, .mmap = qedr_mmap, + .mmap_free = qedr_mmap_free, .modify_port = qedr_modify_port, .modify_qp = qedr_modify_qp, .modify_srq = qedr_modify_srq, diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 8e927f6c1520..6ef39dccba21 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -231,14 +231,10 @@ struct qedr_ucontext { struct qedr_dev *dev; struct qedr_pd *pd; void __iomem *dpi_addr; + struct rdma_user_mmap_entry *db_mmap_entry; u64 dpi_phys_addr; u32 dpi_size; u16 dpi; - - struct list_head mm_head; - - /* Lock to protect mm list */ - struct mutex mm_list_lock; }; union db_prod64 { @@ -301,14 +297,6 @@ struct qedr_pd { struct qedr_ucontext *uctx; }; -struct qedr_mm { - struct { - u64 phy_addr; - unsigned long len; - } key; - struct list_head entry; -}; - union db_prod32 { struct rdma_pwm_val16_data data; u32 raw; @@ -491,6 +479,15 @@ struct qedr_mr { u32 npages; }; +struct qedr_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + struct qedr_dev *dev; + u64 io_address; + size_t length; + u16 dpi; + u8 mmap_flag; +}; + #define SET_FIELD2(value, name, flag) ((value) |= ((flag) << (name ## _SHIFT))) #define QEDR_RESP_IMM (RDMA_CQE_RESPONDER_IMM_FLG_MASK << \ @@ -589,4 +586,11 @@ static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct qedr_srq, ibsrq); } + +static inline struct qedr_user_mmap_entry * +get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, struct qedr_user_mmap_entry, + rdma_entry); +} #endif diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 8b4240c1cc76..903cd934eb53 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -59,6 +59,10 @@ #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) +enum { + QEDR_USER_MMAP_IO_WC = 0, +}; + static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) { @@ -257,60 +261,6 @@ int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, return 0; } -static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr, - unsigned long len) -{ - struct qedr_mm *mm; - - mm = kzalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) - return -ENOMEM; - - mm->key.phy_addr = phy_addr; - /* This function might be called with a length which is not a multiple - * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel - * forces this granularity by increasing the requested size if needed. - * When qedr_mmap is called, it will search the list with the updated - * length as a key. To prevent search failures, the length is rounded up - * in advance to PAGE_SIZE. - */ - mm->key.len = roundup(len, PAGE_SIZE); - INIT_LIST_HEAD(&mm->entry); - - mutex_lock(&uctx->mm_list_lock); - list_add(&mm->entry, &uctx->mm_head); - mutex_unlock(&uctx->mm_list_lock); - - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "added (addr=0x%llx,len=0x%lx) for ctx=%p\n", - (unsigned long long)mm->key.phy_addr, - (unsigned long)mm->key.len, uctx); - - return 0; -} - -static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr, - unsigned long len) -{ - bool found = false; - struct qedr_mm *mm; - - mutex_lock(&uctx->mm_list_lock); - list_for_each_entry(mm, &uctx->mm_head, entry) { - if (len != mm->key.len || phy_addr != mm->key.phy_addr) - continue; - - found = true; - break; - } - mutex_unlock(&uctx->mm_list_lock); - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n", - mm->key.phy_addr, mm->key.len, uctx, found); - - return found; -} - int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { struct ib_device *ibdev = uctx->device; @@ -319,6 +269,7 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) struct qedr_alloc_ucontext_resp uresp = {}; struct qedr_dev *dev = get_qedr_dev(ibdev); struct qed_rdma_add_user_out_params oparams; + struct qedr_user_mmap_entry *entry; if (!udata) return -EFAULT; @@ -335,13 +286,29 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) ctx->dpi_addr = oparams.dpi_addr; ctx->dpi_phys_addr = oparams.dpi_phys_addr; ctx->dpi_size = oparams.dpi_size; - INIT_LIST_HEAD(&ctx->mm_head); - mutex_init(&ctx->mm_list_lock); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + rc = -ENOMEM; + goto err; + } + + entry->io_address = ctx->dpi_phys_addr; + entry->length = ctx->dpi_size; + entry->mmap_flag = QEDR_USER_MMAP_IO_WC; + entry->dpi = ctx->dpi; + entry->dev = dev; + rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry, + ctx->dpi_size); + if (rc) { + kfree(entry); + goto err; + } + ctx->db_mmap_entry = &entry->rdma_entry; uresp.dpm_enabled = dev->user_dpm_enabled; uresp.wids_enabled = 1; uresp.wid_count = oparams.wid_count; - uresp.db_pa = ctx->dpi_phys_addr; + uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry); uresp.db_size = ctx->dpi_size; uresp.max_send_wr = dev->attr.max_sqe; uresp.max_recv_wr = dev->attr.max_rqe; @@ -353,82 +320,86 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) - return rc; + goto err; ctx->dev = dev; - rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size); - if (rc) - return rc; - DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n", &ctx->ibucontext); return 0; + +err: + if (!ctx->db_mmap_entry) + dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi); + else + rdma_user_mmap_entry_remove(ctx->db_mmap_entry); + + return rc; } void qedr_dealloc_ucontext(struct ib_ucontext *ibctx) { struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx); - struct qedr_mm *mm, *tmp; DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n", uctx); - uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi); - - list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n", - mm->key.phy_addr, mm->key.len, uctx); - list_del(&mm->entry); - kfree(mm); - } + + rdma_user_mmap_entry_remove(uctx->db_mmap_entry); } -int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { - struct qedr_ucontext *ucontext = get_qedr_ucontext(context); - struct qedr_dev *dev = get_qedr_dev(context->device); - unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT; - unsigned long len = (vma->vm_end - vma->vm_start); - unsigned long dpi_start; + struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry); + struct qedr_dev *dev = entry->dev; - dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size); + if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC) + dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi); - DP_DEBUG(dev, QEDR_MSG_INIT, - "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n", - (void *)vma->vm_start, (void *)vma->vm_end, - (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size); + kfree(entry); +} - if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { - DP_ERR(dev, - "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n", - (void *)vma->vm_start, (void *)vma->vm_end); - return -EINVAL; - } +int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma) +{ + struct ib_device *dev = ucontext->device; + size_t length = vma->vm_end - vma->vm_start; + struct rdma_user_mmap_entry *rdma_entry; + struct qedr_user_mmap_entry *entry; + int rc = 0; + u64 pfn; - if (!qedr_search_mmap(ucontext, phys_addr, len)) { - DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n", - vma->vm_pgoff); - return -EINVAL; - } + ibdev_dbg(dev, + "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", + vma->vm_start, vma->vm_end, length, vma->vm_pgoff); - if (phys_addr < dpi_start || - ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) { - DP_ERR(dev, - "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n", - (void *)phys_addr, (void *)dpi_start, - ucontext->dpi_size); + rdma_entry = rdma_user_mmap_entry_get(ucontext, vma); + if (!rdma_entry) { + ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n", + vma->vm_pgoff); return -EINVAL; } - - if (vma->vm_flags & VM_READ) { - DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n"); - return -EINVAL; + entry = get_qedr_mmap_entry(rdma_entry); + ibdev_dbg(dev, + "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", + entry->io_address, length, entry->mmap_flag); + + switch (entry->mmap_flag) { + case QEDR_USER_MMAP_IO_WC: + pfn = entry->io_address >> PAGE_SHIFT; + rc = rdma_user_mmap_io(ucontext, vma, pfn, length, + pgprot_writecombine(vma->vm_page_prot), + rdma_entry); + break; + default: + rc = -EINVAL; } - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, - vma->vm_page_prot); + if (rc) + ibdev_dbg(dev, + "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", + entry->io_address, length, entry->mmap_flag, rc); + + rdma_user_mmap_entry_put(rdma_entry); + return rc; } int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 9aaa90283d6e..3606e97e95da 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -46,7 +46,8 @@ int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void qedr_dealloc_ucontext(struct ib_ucontext *uctx); -int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); +int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma); +void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); -- cgit v1.2.3-59-g8ed1b From 97f612509294aadabb8b431782794544df10cd13 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:16 +0200 Subject: RDMA/qedr: Add doorbell overflow recovery support Use the doorbell recovery mechanism to register rdma related doorbells that will be restored in case there is a doorbell overflow attention. Link: https://lore.kernel.org/r/20191030094417.16866-8-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 11 +- drivers/infiniband/hw/qedr/verbs.c | 314 +++++++++++++++++++++++++++++++------ include/uapi/rdma/qedr-abi.h | 25 +++ 3 files changed, 300 insertions(+), 50 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 6ef39dccba21..639d521ed319 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -235,6 +235,7 @@ struct qedr_ucontext { u64 dpi_phys_addr; u32 dpi_size; u16 dpi; + bool db_rec; }; union db_prod64 { @@ -262,6 +263,11 @@ struct qedr_userq { struct qedr_pbl *pbl_tbl; u64 buf_addr; size_t buf_len; + + /* doorbell recovery */ + void __iomem *db_addr; + struct qedr_user_db_rec *db_rec_data; + struct rdma_user_mmap_entry *db_mmap_entry; }; struct qedr_cq { @@ -482,7 +488,10 @@ struct qedr_mr { struct qedr_user_mmap_entry { struct rdma_user_mmap_entry rdma_entry; struct qedr_dev *dev; - u64 io_address; + union { + u64 io_address; + void *address; + }; size_t length; u16 dpi; u8 mmap_flag; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 903cd934eb53..bc4a35b5581c 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -61,6 +61,7 @@ enum { QEDR_USER_MMAP_IO_WC = 0, + QEDR_USER_MMAP_PHYS_PAGE, }; static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, @@ -267,6 +268,7 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) int rc; struct qedr_ucontext *ctx = get_qedr_ucontext(uctx); struct qedr_alloc_ucontext_resp uresp = {}; + struct qedr_alloc_ucontext_req ureq = {}; struct qedr_dev *dev = get_qedr_dev(ibdev); struct qed_rdma_add_user_out_params oparams; struct qedr_user_mmap_entry *entry; @@ -274,6 +276,17 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) if (!udata) return -EFAULT; + if (udata->inlen) { + rc = ib_copy_from_udata(&ureq, udata, + min(sizeof(ureq), udata->inlen)); + if (rc) { + DP_ERR(dev, "Problem copying data from user space\n"); + return -EFAULT; + } + + ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC); + } + rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams); if (rc) { DP_ERR(dev, @@ -352,7 +365,9 @@ void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry) struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry); struct qedr_dev *dev = entry->dev; - if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC) + if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE) + free_page((unsigned long)entry->address); + else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC) dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi); kfree(entry); @@ -389,6 +404,10 @@ int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma) pgprot_writecombine(vma->vm_page_prot), rdma_entry); break; + case QEDR_USER_MMAP_PHYS_PAGE: + rc = vm_insert_page(vma, vma->vm_start, + virt_to_page(entry->address)); + break; default: rc = -EINVAL; } @@ -629,16 +648,48 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, } } +static int qedr_db_recovery_add(struct qedr_dev *dev, + void __iomem *db_addr, + void *db_data, + enum qed_db_rec_width db_width, + enum qed_db_rec_space db_space) +{ + if (!db_data) { + DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n"); + return 0; + } + + return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data, + db_width, db_space); +} + +static void qedr_db_recovery_del(struct qedr_dev *dev, + void __iomem *db_addr, + void *db_data) +{ + if (!db_data) { + DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n"); + return; + } + + /* Ignore return code as there is not much we can do about it. Error + * log will be printed inside. + */ + dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data); +} + static int qedr_copy_cq_uresp(struct qedr_dev *dev, - struct qedr_cq *cq, struct ib_udata *udata) + struct qedr_cq *cq, struct ib_udata *udata, + u32 db_offset) { struct qedr_create_cq_uresp uresp; int rc; memset(&uresp, 0, sizeof(uresp)); - uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + uresp.db_offset = db_offset; uresp.icid = cq->icid; + uresp.db_rec_addr = rdma_user_mmap_get_offset(cq->q.db_mmap_entry); rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) @@ -666,10 +717,58 @@ static inline int qedr_align_cq_entries(int entries) return aligned_size / QEDR_CQE_SIZE; } +static int qedr_init_user_db_rec(struct ib_udata *udata, + struct qedr_dev *dev, struct qedr_userq *q, + bool requires_db_rec) +{ + struct qedr_ucontext *uctx = + rdma_udata_to_drv_context(udata, struct qedr_ucontext, + ibucontext); + struct qedr_user_mmap_entry *entry; + int rc; + + /* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */ + if (requires_db_rec == 0 || !uctx->db_rec) + return 0; + + /* Allocate a page for doorbell recovery, add to mmap */ + q->db_rec_data = (void *)get_zeroed_page(GFP_USER); + if (!q->db_rec_data) { + DP_ERR(dev, "get_zeroed_page failed\n"); + return -ENOMEM; + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto err_free_db_data; + + entry->address = q->db_rec_data; + entry->length = PAGE_SIZE; + entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE; + rc = rdma_user_mmap_entry_insert(&uctx->ibucontext, + &entry->rdma_entry, + PAGE_SIZE); + if (rc) + goto err_free_entry; + + q->db_mmap_entry = &entry->rdma_entry; + + return 0; + +err_free_entry: + kfree(entry); + +err_free_db_data: + free_page((unsigned long)q->db_rec_data); + q->db_rec_data = NULL; + return -ENOMEM; +} + static inline int qedr_init_user_queue(struct ib_udata *udata, struct qedr_dev *dev, struct qedr_userq *q, u64 buf_addr, - size_t buf_len, int access, int dmasync, + size_t buf_len, bool requires_db_rec, + int access, int dmasync, int alloc_and_init) { u32 fw_pages; @@ -707,7 +806,8 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, } } - return 0; + /* mmap the user address used to store doorbell data for recovery */ + return qedr_init_user_db_rec(udata, dev, q, requires_db_rec); err0: ib_umem_release(q->umem); @@ -793,6 +893,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, int entries = attr->cqe; struct qedr_cq *cq = get_qedr_cq(ibcq); int chain_entries; + u32 db_offset; int page_cnt; u64 pbl_ptr; u16 icid; @@ -812,8 +913,12 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, chain_entries = qedr_align_cq_entries(entries); chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES); + /* calc db offset. user will add DPI base, kernel will add db addr */ + db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + if (udata) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen))) { DP_ERR(dev, "create cq: problem copying data from user space\n"); goto err0; @@ -828,8 +933,9 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->cq_type = QEDR_CQ_TYPE_USER; rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr, - ureq.len, IB_ACCESS_LOCAL_WRITE, 1, - 1); + ureq.len, true, + IB_ACCESS_LOCAL_WRITE, + 1, 1); if (rc) goto err0; @@ -837,6 +943,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, page_cnt = cq->q.pbl_info.num_pbes; cq->ibcq.cqe = chain_entries; + cq->q.db_addr = ctx->dpi_addr + db_offset; } else { cq->cq_type = QEDR_CQ_TYPE_KERNEL; @@ -848,7 +955,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, sizeof(union rdma_cqe), &cq->pbl, NULL); if (rc) - goto err1; + goto err0; page_cnt = qed_chain_get_page_cnt(&cq->pbl); pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl); @@ -860,21 +967,28 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid); if (rc) - goto err2; + goto err1; cq->icid = icid; cq->sig = QEDR_CQ_MAGIC_NUMBER; spin_lock_init(&cq->cq_lock); if (udata) { - rc = qedr_copy_cq_uresp(dev, cq, udata); + rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset); if (rc) - goto err3; + goto err2; + + rc = qedr_db_recovery_add(dev, cq->q.db_addr, + &cq->q.db_rec_data->db_data, + DB_REC_WIDTH_64B, + DB_REC_USER); + if (rc) + goto err2; + } else { /* Generate doorbell address. */ - cq->db_addr = dev->db_addr + - DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); cq->db.data.icid = cq->icid; + cq->db_addr = dev->db_addr + db_offset; cq->db.data.params = DB_AGG_CMD_SET << RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; @@ -884,6 +998,11 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->latest_cqe = NULL; consume_cqe(cq); cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl); + + rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data, + DB_REC_WIDTH_64B, DB_REC_KERNEL); + if (rc) + goto err2; } DP_DEBUG(dev, QEDR_MSG_CQ, @@ -892,18 +1011,19 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; -err3: +err2: destroy_iparams.icid = cq->icid; dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams, &destroy_oparams); -err2: - if (udata) - qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); - else - dev->ops->common->chain_free(dev->cdev, &cq->pbl); err1: - if (udata) + if (udata) { + qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); + if (ctx) + rdma_user_mmap_entry_remove(cq->q.db_mmap_entry); + } else { + dev->ops->common->chain_free(dev->cdev, &cq->pbl); + } err0: return -EINVAL; } @@ -934,8 +1054,10 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) cq->destroyed = 1; /* GSIs CQs are handled by driver, so they don't exist in the FW */ - if (cq->cq_type == QEDR_CQ_TYPE_GSI) + if (cq->cq_type == QEDR_CQ_TYPE_GSI) { + qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data); return; + } iparams.icid = cq->icid; dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); @@ -944,6 +1066,14 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) if (udata) { qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); + + if (cq->q.db_rec_data) { + qedr_db_recovery_del(dev, cq->q.db_addr, + &cq->q.db_rec_data->db_data); + rdma_user_mmap_entry_remove(cq->q.db_mmap_entry); + } + } else { + qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data); } /* We don't want the IRQ handler to handle a non-existing CQ so we @@ -1108,8 +1238,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev, } static void qedr_copy_rq_uresp(struct qedr_dev *dev, - struct qedr_create_qp_uresp *uresp, - struct qedr_qp *qp) + struct qedr_create_qp_uresp *uresp, + struct qedr_qp *qp) { /* iWARP requires two doorbells per RQ. */ if (rdma_protocol_iwarp(&dev->ibdev, 1)) { @@ -1122,6 +1252,7 @@ static void qedr_copy_rq_uresp(struct qedr_dev *dev, } uresp->rq_icid = qp->icid; + uresp->rq_db_rec_addr = rdma_user_mmap_get_offset(qp->urq.db_mmap_entry); } static void qedr_copy_sq_uresp(struct qedr_dev *dev, @@ -1135,22 +1266,25 @@ static void qedr_copy_sq_uresp(struct qedr_dev *dev, uresp->sq_icid = qp->icid; else uresp->sq_icid = qp->icid + 1; + + uresp->sq_db_rec_addr = + rdma_user_mmap_get_offset(qp->usq.db_mmap_entry); } static int qedr_copy_qp_uresp(struct qedr_dev *dev, - struct qedr_qp *qp, struct ib_udata *udata) + struct qedr_qp *qp, struct ib_udata *udata, + struct qedr_create_qp_uresp *uresp) { - struct qedr_create_qp_uresp uresp; int rc; - memset(&uresp, 0, sizeof(uresp)); - qedr_copy_sq_uresp(dev, &uresp, qp); - qedr_copy_rq_uresp(dev, &uresp, qp); + memset(uresp, 0, sizeof(*uresp)); + qedr_copy_sq_uresp(dev, uresp, qp); + qedr_copy_rq_uresp(dev, uresp, qp); - uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; - uresp.qp_id = qp->qp_id; + uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; + uresp->qp_id = qp->qp_id; - rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp)); if (rc) DP_ERR(dev, "create qp: failed a copy to user space with qp icid=0x%x.\n", @@ -1197,16 +1331,35 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, qp->sq.max_sges, qp->sq_cq->icid); } -static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) +static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) { + int rc; + qp->sq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); qp->sq.db_data.data.icid = qp->icid + 1; + rc = qedr_db_recovery_add(dev, qp->sq.db, + &qp->sq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + return rc; + if (!qp->srq) { qp->rq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); qp->rq.db_data.data.icid = qp->icid; + + rc = qedr_db_recovery_add(dev, qp->rq.db, + &qp->rq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + qedr_db_recovery_del(dev, qp->sq.db, + &qp->sq.db_data); } + + return rc; } static int qedr_check_srq_params(struct qedr_dev *dev, @@ -1260,7 +1413,7 @@ static int qedr_init_srq_user_params(struct ib_udata *udata, int rc; rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr, - ureq->srq_len, access, dmasync, 1); + ureq->srq_len, false, access, dmasync, 1); if (rc) return rc; @@ -1356,7 +1509,8 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, hw_srq->max_sges = init_attr->attr.max_sge; if (udata) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen))) { DP_ERR(dev, "create srq: problem copying data from user space\n"); goto err0; @@ -1545,7 +1699,9 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev, &qp->urq.pbl_info, FW_PAGE_SHIFT); } -static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) +static void qedr_cleanup_user(struct qedr_dev *dev, + struct qedr_ucontext *ctx, + struct qedr_qp *qp) { ib_umem_release(qp->usq.umem); qp->usq.umem = NULL; @@ -1560,6 +1716,18 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) kfree(qp->usq.pbl_tbl); kfree(qp->urq.pbl_tbl); } + + if (qp->usq.db_rec_data) { + qedr_db_recovery_del(dev, qp->usq.db_addr, + &qp->usq.db_rec_data->db_data); + rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry); + } + + if (qp->urq.db_rec_data) { + qedr_db_recovery_del(dev, qp->urq.db_addr, + &qp->urq.db_rec_data->db_data); + rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry); + } } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -1571,13 +1739,15 @@ static int qedr_create_user_qp(struct qedr_dev *dev, struct qed_rdma_create_qp_in_params in_params; struct qed_rdma_create_qp_out_params out_params; struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_create_qp_uresp uresp; + struct qedr_ucontext *ctx = NULL; struct qedr_create_qp_ureq ureq; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; qp->create_type = QEDR_QP_CREATE_USER; memset(&ureq, 0, sizeof(ureq)); - rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); + rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen)); if (rc) { DP_ERR(dev, "Problem copying data from user space\n"); return rc; @@ -1585,14 +1755,16 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* SQ - read access only (0), dma sync not required (0) */ rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, - ureq.sq_len, 0, 0, alloc_and_init); + ureq.sq_len, true, 0, 0, + alloc_and_init); if (rc) return rc; if (!qp->srq) { /* RQ - read access only (0), dma sync not required (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, - ureq.rq_len, 0, 0, alloc_and_init); + ureq.rq_len, true, + 0, 0, alloc_and_init); if (rc) return rc; } @@ -1622,29 +1794,57 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - rc = qedr_copy_qp_uresp(dev, qp, udata); + rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp); + if (rc) + goto err; + + /* db offset was calculated in copy_qp_uresp, now set in the user q */ + ctx = pd->uctx; + qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; + qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + + rc = qedr_db_recovery_add(dev, qp->usq.db_addr, + &qp->usq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + + rc = qedr_db_recovery_add(dev, qp->urq.db_addr, + &qp->urq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); if (rc) goto err; qedr_qp_user_print(dev, qp); - return 0; + return rc; err: rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); if (rc) DP_ERR(dev, "create qp: fatal fault. rc=%d", rc); err1: - qedr_cleanup_user(dev, qp); + qedr_cleanup_user(dev, ctx, qp); return rc; } -static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) +static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) { + int rc; + qp->sq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); qp->sq.db_data.data.icid = qp->icid; + rc = qedr_db_recovery_add(dev, qp->sq.db, + &qp->sq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + return rc; + qp->rq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); qp->rq.db_data.data.icid = qp->icid; @@ -1652,6 +1852,12 @@ static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); qp->rq.iwarp_db2_data.data.icid = qp->icid; qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD; + + rc = qedr_db_recovery_add(dev, qp->rq.db, + &qp->rq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + return rc; } static int @@ -1699,8 +1905,7 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - qedr_set_roce_db_info(dev, qp); - return rc; + return qedr_set_roce_db_info(dev, qp); } static int @@ -1758,8 +1963,7 @@ qedr_iwarp_create_kernel_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - qedr_set_iwarp_db_info(dev, qp); - return rc; + return qedr_set_iwarp_db_info(dev, qp); err: dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); @@ -1774,6 +1978,15 @@ static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp) dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl); kfree(qp->rqe_wr_id); + + /* GSI qp is not registered to db mechanism so no need to delete */ + if (qp->qp_type == IB_QPT_GSI) + return; + + qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data); + + if (!qp->srq) + qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data); } static int qedr_create_kernel_qp(struct qedr_dev *dev, @@ -2414,7 +2627,10 @@ err: static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, struct ib_udata *udata) { - int rc = 0; + struct qedr_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct qedr_ucontext, + ibucontext); + int rc; if (qp->qp_type != IB_QPT_GSI) { rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); @@ -2423,7 +2639,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, } if (qp->create_type == QEDR_QP_CREATE_USER) - qedr_cleanup_user(dev, qp); + qedr_cleanup_user(dev, ctx, qp); else qedr_cleanup_kernel(dev, qp); diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 7a10b3a325fa..c022ee26089b 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -38,6 +38,15 @@ #define QEDR_ABI_VERSION (8) /* user kernel communication data structures. */ +enum qedr_alloc_ucontext_flags { + QEDR_ALLOC_UCTX_RESERVED = 1 << 0, + QEDR_ALLOC_UCTX_DB_REC = 1 << 1 +}; + +struct qedr_alloc_ucontext_req { + __u32 context_flags; + __u32 reserved; +}; struct qedr_alloc_ucontext_resp { __aligned_u64 db_pa; @@ -74,6 +83,7 @@ struct qedr_create_cq_uresp { __u32 db_offset; __u16 icid; __u16 reserved; + __aligned_u64 db_rec_addr; }; struct qedr_create_qp_ureq { @@ -109,6 +119,13 @@ struct qedr_create_qp_uresp { __u32 rq_db2_offset; __u32 reserved; + + /* address of SQ doorbell recovery user entry */ + __aligned_u64 sq_db_rec_addr; + + /* address of RQ doorbell recovery user entry */ + __aligned_u64 rq_db_rec_addr; + }; struct qedr_create_srq_ureq { @@ -128,4 +145,12 @@ struct qedr_create_srq_uresp { __u32 reserved1; }; +/* doorbell recovery entry allocated and populated by userspace doorbelling + * entities and mapped to kernel. Kernel uses this to register doorbell + * information with doorbell drop recovery mechanism. + */ +struct qedr_user_db_rec { + __aligned_u64 db_data; /* doorbell data */ +}; + #endif /* __QEDR_USER_H__ */ -- cgit v1.2.3-59-g8ed1b From b4bc76609722f175a257184cc17ad73226d4b716 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:17 +0200 Subject: RDMA/qedr: Add iWARP doorbell recovery support This patch adds the iWARP specific doorbells to the doorbell recovery mechanism. Link: https://lore.kernel.org/r/20191030094417.16866-9-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 12 +++++++----- drivers/infiniband/hw/qedr/verbs.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 639d521ed319..5488dbd59d3c 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -238,6 +238,11 @@ struct qedr_ucontext { bool db_rec; }; +union db_prod32 { + struct rdma_pwm_val16_data data; + u32 raw; +}; + union db_prod64 { struct rdma_pwm_val32_data data; u64 raw; @@ -268,6 +273,8 @@ struct qedr_userq { void __iomem *db_addr; struct qedr_user_db_rec *db_rec_data; struct rdma_user_mmap_entry *db_mmap_entry; + void __iomem *db_rec_db2_addr; + union db_prod32 db_rec_db2_data; }; struct qedr_cq { @@ -303,11 +310,6 @@ struct qedr_pd { struct qedr_ucontext *uctx; }; -union db_prod32 { - struct rdma_pwm_val16_data data; - u32 raw; -}; - struct qedr_qp_hwq_info { /* WQE Elements */ struct qed_chain pbl; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index bc4a35b5581c..9a674e65c4f7 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1728,6 +1728,10 @@ static void qedr_cleanup_user(struct qedr_dev *dev, &qp->urq.db_rec_data->db_data); rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry); } + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr, + &qp->urq.db_rec_db2_data); } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -1803,6 +1807,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset; + + /* calculate the db_rec_db2 data since it is constant so no + * need to reflect from user + */ + qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid); + qp->urq.db_rec_db2_data.data.value = + cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD); + } + rc = qedr_db_recovery_add(dev, qp->usq.db_addr, &qp->usq.db_rec_data->db_data, DB_REC_WIDTH_32B, @@ -1817,6 +1832,14 @@ static int qedr_create_user_qp(struct qedr_dev *dev, if (rc) goto err; + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr, + &qp->urq.db_rec_db2_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + } qedr_qp_user_print(dev, qp); return rc; @@ -1857,6 +1880,13 @@ static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) &qp->rq.db_data, DB_REC_WIDTH_32B, DB_REC_KERNEL); + if (rc) + return rc; + + rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2, + &qp->rq.iwarp_db2_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); return rc; } @@ -1985,8 +2015,13 @@ static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp) qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data); - if (!qp->srq) + if (!qp->srq) { qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data); + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_db_recovery_del(dev, qp->rq.iwarp_db2, + &qp->rq.iwarp_db2_data); + } } static int qedr_create_kernel_qp(struct qedr_dev *dev, -- cgit v1.2.3-59-g8ed1b From 55bfe905fa97633438c13fb029aed85371d85480 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 28 Oct 2019 17:59:28 +0200 Subject: RDMA/core: Fix return code when modify_port isn't supported Improve return code from ib_modify_port() by doing the following: - Use "-EOPNOTSUPP" instead "-ENOSYS" which is the proper return code - Allow only fake IB_PORT_CM_SUP manipulation for RoCE providers that didn't implement the modify_port callback, otherwise return "-EOPNOTSUPP" Fixes: 61e0962d5221 ("IB: Avoid ib_modify_port() failure for RoCE devices") Link: https://lore.kernel.org/r/20191028155931.1114-2-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e785bebaf16e..1ad39b209252 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2408,8 +2408,12 @@ int ib_modify_port(struct ib_device *device, rc = device->ops.modify_port(device, port_num, port_modify_mask, port_modify); + else if (rdma_protocol_roce(device, port_num) && + ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 || + (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0)) + rc = 0; else - rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS; + rc = -EOPNOTSUPP; return rc; } EXPORT_SYMBOL(ib_modify_port); -- cgit v1.2.3-59-g8ed1b From 25f3b49b9288a873b8c2ed94d7f66fad74e82296 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 28 Oct 2019 17:59:29 +0200 Subject: RDMA/hns: Remove unsupported modify_port callback There is no need to return always zero for function which is not supported. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://lore.kernel.org/r/20191028155931.1114-3-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_main.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 803dc6f4b496..a389f9996689 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -301,12 +301,6 @@ static int hns_roce_modify_device(struct ib_device *ib_dev, int mask, return 0; } -static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask, - struct ib_port_modify *props) -{ - return 0; -} - static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -440,7 +434,6 @@ static const struct ib_device_ops hns_roce_dev_ops = { .get_port_immutable = hns_roce_port_immutable, .mmap = hns_roce_mmap, .modify_device = hns_roce_modify_device, - .modify_port = hns_roce_modify_port, .modify_qp = hns_roce_modify_qp, .query_ah = hns_roce_query_ah, .query_device = hns_roce_query_device, -- cgit v1.2.3-59-g8ed1b From 6135b71159de3bc66964ad9157f1a62a77590006 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 28 Oct 2019 17:59:30 +0200 Subject: RDMA/ocrdma: Remove unsupported modify_port callback There is no need to return always zero for function which is not supported. Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter") Link: https://lore.kernel.org/r/20191028155931.1114-4-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 - drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 6 ------ drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 2 -- 3 files changed, 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index c15cfc6cef81..d8c47d24d6d6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -166,7 +166,6 @@ static const struct ib_device_ops ocrdma_dev_ops = { .get_port_immutable = ocrdma_port_immutable, .map_mr_sg = ocrdma_map_mr_sg, .mmap = ocrdma_mmap, - .modify_port = ocrdma_modify_port, .modify_qp = ocrdma_modify_qp, .poll_cq = ocrdma_poll_cq, .post_recv = ocrdma_post_recv, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e8267e590772..e72050de5734 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -190,12 +190,6 @@ int ocrdma_query_port(struct ib_device *ibdev, return 0; } -int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, - struct ib_port_modify *props) -{ - return 0; -} - static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, unsigned long len) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 32488da1b752..3a5010881be5 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -54,8 +54,6 @@ int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props, struct ib_udata *uhw); int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); -int ocrdma_modify_port(struct ib_device *, u8 port, int mask, - struct ib_port_modify *props); enum rdma_protocol_type ocrdma_query_protocol(struct ib_device *device, u8 port_num); -- cgit v1.2.3-59-g8ed1b From ad0593ec8930d2e3fa4252cb7ad666bdfe4ed4f1 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 28 Oct 2019 17:59:31 +0200 Subject: RDMA/qedr: Remove unsupported modify_port callback There is no need to return always zero for function which is not supported. Fixes: ac1b36e55a51 ("qedr: Add support for user context verbs") Link: https://lore.kernel.org/r/20191028155931.1114-5-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 1 - drivers/infiniband/hw/qedr/verbs.c | 6 ------ drivers/infiniband/hw/qedr/verbs.h | 2 -- 3 files changed, 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 03fd168cccc0..ee47f6ce0c12 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -213,7 +213,6 @@ static const struct ib_device_ops qedr_dev_ops = { .map_mr_sg = qedr_map_mr_sg, .mmap = qedr_mmap, .mmap_free = qedr_mmap_free, - .modify_port = qedr_modify_port, .modify_qp = qedr_modify_qp, .modify_srq = qedr_modify_srq, .poll_cq = qedr_poll_cq, diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 9a674e65c4f7..8096b8fcab4e 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -256,12 +256,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) return 0; } -int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, - struct ib_port_modify *props) -{ - return 0; -} - int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { struct ib_device *ibdev = uctx->device; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 3606e97e95da..620472116c6d 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -35,8 +35,6 @@ int qedr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, struct ib_udata *udata); int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); -int qedr_modify_port(struct ib_device *, u8 port, int mask, - struct ib_port_modify *props); int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); -- cgit v1.2.3-59-g8ed1b From 77cf98d4ec90e8c48592c6537cfc2281c58f7ac3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 1 Nov 2019 13:47:56 -0700 Subject: Revert "RDMA/srpt: Postpone HCA removal until after configfs directory removal" Although the mentioned patch fixes a use-after-free bug, it introduces a hang during shutdown. Since the latter is worse, revert this patch. Link: https://lore.kernel.org/r/20191101204756.182162-1-bvanassche@acm.org Reported-by: Honggang Li Fixes: 9b64f7d0bb0a ("RDMA/srpt: Postpone HCA removal until after configfs directory removal") Signed-off-by: Bart Van Assche Acked-by: Honggang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index a278e76b9e02..653de1583cf9 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2936,7 +2936,7 @@ static int srpt_release_sport(struct srpt_port *sport) while (atomic_read(&sport->refcount) > 0 && wait_for_completion_timeout(&c, 5 * HZ) <= 0) { - pr_info("%s_%d: waiting for unregistration of %d sessions and configfs directories ...\n", + pr_info("%s_%d: waiting for unregistration of %d sessions ...\n", dev_name(&sport->sdev->device->dev), sport->port, atomic_read(&sport->refcount)); rcu_read_lock(); @@ -3733,7 +3733,6 @@ static struct configfs_attribute *srpt_tpg_attrs[] = { static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, const char *name) { - struct srpt_port *sport = wwn->priv; struct srpt_port_id *sport_id = srpt_wwn_to_sport_id(wwn); struct srpt_tpg *stpg; int res = -ENOMEM; @@ -3752,8 +3751,6 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, list_add_tail(&stpg->entry, &sport_id->tpg_list); mutex_unlock(&sport_id->mutex); - atomic_inc(&sport->refcount); - return &stpg->tpg; } @@ -3774,7 +3771,6 @@ static void srpt_drop_tpg(struct se_portal_group *tpg) sport->enabled = false; core_tpg_deregister(tpg); kfree(stpg); - srpt_drop_sport_ref(sport); } /** -- cgit v1.2.3-59-g8ed1b From 8a80cf93106045cd9e36330e153d0e606428619e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:30 +0200 Subject: RDMA/mad: Delete never implemented functions Delete never implemented and used MAD functions. Link: https://lore.kernel.org/r/20191029062745.7932-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 19 ------------------- include/rdma/ib_mad.h | 40 ---------------------------------------- 2 files changed, 59 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 9947d16edef2..8adb487eb314 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1397,25 +1397,6 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) } EXPORT_SYMBOL(ib_free_recv_mad); -struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - return ERR_PTR(-EINVAL); /* XXX: for now */ -} -EXPORT_SYMBOL(ib_redirect_mad_qp); - -int ib_process_mad_wc(struct ib_mad_agent *mad_agent, - struct ib_wc *wc) -{ - dev_err(&mad_agent->device->dev, - "ib_process_mad_wc() not implemented yet\n"); - return 0; -} -EXPORT_SYMBOL(ib_process_mad_wc); - static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req) { diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index eea946fcc819..4e62650e2127 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -814,46 +814,6 @@ void ib_cancel_mad(struct ib_mad_agent *mad_agent, int ib_modify_mad(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, u32 timeout_ms); -/** - * ib_redirect_mad_qp - Registers a QP for MAD services. - * @qp: Reference to a QP that requires MAD services. - * @rmpp_version: If set, indicates that the client will send - * and receive MADs that contain the RMPP header for the given version. - * If set to 0, indicates that RMPP is not used by this client. - * @send_handler: The completion callback routine invoked after a send - * request has completed. - * @recv_handler: The completion callback routine invoked for a received - * MAD. - * @context: User specified context associated with the registration. - * - * Use of this call allows clients to use MAD services, such as RMPP, - * on user-owned QPs. After calling this routine, users may send - * MADs on the specified QP by calling ib_mad_post_send. - */ -struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context); - -/** - * ib_process_mad_wc - Processes a work completion associated with a - * MAD sent or received on a redirected QP. - * @mad_agent: Specifies the registered MAD service using the redirected QP. - * @wc: References a work completion associated with a sent or received - * MAD segment. - * - * This routine is used to complete or continue processing on a MAD request. - * If the work completion is associated with a send operation, calling - * this routine is required to continue an RMPP transfer or to wait for a - * corresponding response, if it is a request. If the work completion is - * associated with a receive operation, calling this routine is required to - * process an inbound or outbound RMPP transfer, or to match a response MAD - * with its corresponding request. - */ -int ib_process_mad_wc(struct ib_mad_agent *mad_agent, - struct ib_wc *wc); - /** * ib_create_send_mad - Allocate and initialize a data buffer and work request * for sending a MAD. -- cgit v1.2.3-59-g8ed1b From 688eec9d3dca2a3db3824e66463f086c70fa1b9f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:43 +0200 Subject: RDMA/qib: Delete extra line Trivial cleanup to fix the following warning: drivers/infiniband/hw/qib/qib_iba6120.c:1420: warning: bad line: Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters") Link: https://lore.kernel.org/r/20191029062745.7932-15-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_iba6120.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 531d8a1db2c3..ca5ea734e3d0 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1417,7 +1417,6 @@ static void qib_6120_quiet_serdes(struct qib_pportdata *ppd) * * The exact combo of LEDs if on is true is determined by looking * at the ibcstatus. - * These LEDs indicate the physical and logical state of IB link. * For this chip (at least with recommended board pinouts), LED1 * is Yellow (logical state) and LED2 is Green (physical state), -- cgit v1.2.3-59-g8ed1b From 874e476ba949deb475378e10df7527921e4ff6c1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:44 +0200 Subject: RDMA/qib: Delete empty check_cc_key function Function always returns zero, just delete it. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_mad.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index f92faf5ec369..818ec37a3215 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2296,18 +2296,11 @@ bail: return reply_failure((struct ib_smp *) ccp); } -static int check_cc_key(struct qib_ibport *ibp, - struct ib_cc_mad *ccp, int mad_flags) -{ - return 0; -} - static int process_cc(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; - struct qib_ibport *ibp = to_iport(ibdev, port); int ret; *out_mad = *in_mad; @@ -2318,10 +2311,6 @@ static int process_cc(struct ib_device *ibdev, int mad_flags, goto bail; } - ret = check_cc_key(ibp, ccp, mad_flags); - if (ret) - goto bail; - switch (ccp->method) { case IB_MGMT_METHOD_GET: switch (ccp->attr_id) { -- cgit v1.2.3-59-g8ed1b From be4a8d46732a45e78aa27c34b58431a917cd644d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:31 +0200 Subject: RDMA/mad: Allocate zeroed MAD buffer Ensure that MAD output buffer is zero-based allocated in all the callers of process_mad and remove the various memset()'s from the drivers. Link: https://lore.kernel.org/r/20191029062745.7932-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sysfs.c | 4 ++-- drivers/infiniband/hw/mlx4/mad.c | 1 - drivers/infiniband/hw/mlx5/mad.c | 2 -- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 1 - drivers/infiniband/hw/qib/qib_mad.c | 8 -------- 5 files changed, 2 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 92c932c067cb..62c756ea5668 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -481,8 +481,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, if (!dev->ops.process_mad) return -ENOSYS; - in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kzalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) { ret = -ENOMEM; goto out; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 57079110af9b..985cced5d509 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -966,7 +966,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } mutex_unlock(&dev->counters_table[port_num - 1].mutex); if (stats_avail) { - memset(out_mad->data, 0, sizeof out_mad->data); switch (counter_stats.counter_mode & 0xf) { case 0: edit_counter(&counter_stats, diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 348c1df69cdc..0a5eb6e1798c 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -284,8 +284,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, *out_mad_size != sizeof(*out_mad))) return IB_MAD_RESULT_FAILURE; - memset(out_mad->data, 0, sizeof(out_mad->data)); - if (MLX5_CAP_GEN(dev->mdev, vport_counters) && in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index a902942adb5d..dda9b61f53b6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -675,7 +675,6 @@ int ocrdma_pma_counters(struct ocrdma_dev *dev, { struct ib_pma_portcounters *pma_cnt; - memset(out_mad->data, 0, sizeof out_mad->data); pma_cnt = (void *)(out_mad->data + 40); ocrdma_update_stats(dev); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 818ec37a3215..6fbba0d54269 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2098,8 +2098,6 @@ static int cc_get_classportinfo(struct ib_cc_mad *ccp, struct ib_cc_classportinfo_attr *p = (struct ib_cc_classportinfo_attr *)ccp->mgmt_data; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - p->base_version = 1; p->class_version = 1; p->cap_mask = 0; @@ -2120,8 +2118,6 @@ static int cc_get_congestion_info(struct ib_cc_mad *ccp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - p->congestion_info = 0; p->control_table_cap = ppd->cc_max_table_entries; @@ -2138,8 +2134,6 @@ static int cc_get_congestion_setting(struct ib_cc_mad *ccp, struct qib_pportdata *ppd = ppd_from_ibp(ibp); struct ib_cc_congestion_entry_shadow *entries; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - spin_lock(&ppd->cc_shadow_lock); entries = ppd->congestion_entries_shadow->entries; @@ -2176,8 +2170,6 @@ static int cc_get_congestion_control_table(struct ib_cc_mad *ccp, if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) goto bail; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - spin_lock(&ppd->cc_shadow_lock); max_cct_block = -- cgit v1.2.3-59-g8ed1b From 6a42265c9116a2ff8b94b52525c95f177cb15db2 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:34 +0200 Subject: RDMA/ocrdma: Make ocrdma_pma_counters() return void This function always returns 0, so just use void and remove the bogus checking at the only call site. Link: https://lore.kernel.org/r/20191029062745.7932-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 6 ++---- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 4 +--- drivers/infiniband/hw/ocrdma/ocrdma_stats.h | 3 +-- 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 8d3e36d548aa..f8ebdf7086a1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -268,10 +268,8 @@ int ocrdma_process_mad(struct ib_device *ibdev, switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_PERF_MGMT: dev = get_ocrdma_dev(ibdev); - if (!ocrdma_pma_counters(dev, out_mad)) - status = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; - else - status = IB_MAD_RESULT_SUCCESS; + ocrdma_pma_counters(dev, out_mad); + status = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; break; default: status = IB_MAD_RESULT_SUCCESS; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index dda9b61f53b6..95e8c1560cc2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -670,8 +670,7 @@ err: return -EFAULT; } -int ocrdma_pma_counters(struct ocrdma_dev *dev, - struct ib_mad *out_mad) +void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad) { struct ib_pma_portcounters *pma_cnt; @@ -682,7 +681,6 @@ int ocrdma_pma_counters(struct ocrdma_dev *dev, pma_cnt->port_rcv_data = cpu_to_be32(ocrdma_sysfs_rcv_data(dev)); pma_cnt->port_xmit_packets = cpu_to_be32(ocrdma_sysfs_xmit_pkts(dev)); pma_cnt->port_rcv_packets = cpu_to_be32(ocrdma_sysfs_rcv_pkts(dev)); - return 0; } static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h index bba1fec4f11f..98feca26ac55 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h @@ -69,7 +69,6 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev); void ocrdma_release_stats_resources(struct ocrdma_dev *dev); void ocrdma_rem_port_stats(struct ocrdma_dev *dev); void ocrdma_add_port_stats(struct ocrdma_dev *dev); -int ocrdma_pma_counters(struct ocrdma_dev *dev, - struct ib_mad *out_mad); +void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad); #endif /* __OCRDMA_STATS_H__ */ -- cgit v1.2.3-59-g8ed1b From dd0b0159f7b31439679879fcf2574d7ad744b6f1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:37 +0200 Subject: RDMA/mad: Do not check MAD sizes in roce and ib drivers All callers for process_mad allocate MAD structures with proper sizes, there is no need to recheck it. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/mad.c | 4 ---- drivers/infiniband/hw/mlx5/mad.c | 4 ---- drivers/infiniband/hw/mthca/mthca_mad.c | 4 ---- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 4 ---- drivers/infiniband/hw/qib/qib_mad.c | 4 ---- 5 files changed, 20 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 985cced5d509..c6ea4c50da1e 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -992,10 +992,6 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_mad *out_mad = (struct ib_mad *)out; enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA * queries, should be called only by VFs and for that specific purpose */ diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 0a5eb6e1798c..f49d9c70246e 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -280,10 +280,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_mad *out_mad = (struct ib_mad *)out; int ret; - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - if (MLX5_CAP_GEN(dev->mdev, vport_counters) && in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7ad517da4917..0893604d2a62 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -212,10 +212,6 @@ int mthca_process_mad(struct ib_device *ibdev, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index f8ebdf7086a1..0dc74ef42f8c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -261,10 +261,6 @@ int ocrdma_process_mad(struct ib_device *ibdev, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_PERF_MGMT: dev = get_ocrdma_dev(ibdev); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 6fbba0d54269..b259aaf85d4a 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2396,10 +2396,6 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: -- cgit v1.2.3-59-g8ed1b From 84b56d57cf3cc9640b69cbb3a5d023d010ccb62a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:40 +0200 Subject: RDMA/ocrdma: Simplify process_mad function Change the switch with one case into a simple if statement so the code is less confusing. Link: https://lore.kernel.org/r/20191029062745.7932-12-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 0dc74ef42f8c..4098508b9240 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -256,20 +256,16 @@ int ocrdma_process_mad(struct ib_device *ibdev, struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index) { - int status; + int status = IB_MAD_RESULT_SUCCESS; struct ocrdma_dev *dev; const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - switch (in_mad->mad_hdr.mgmt_class) { - case IB_MGMT_CLASS_PERF_MGMT: + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { dev = get_ocrdma_dev(ibdev); ocrdma_pma_counters(dev, out_mad); - status = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; - break; - default: - status = IB_MAD_RESULT_SUCCESS; - break; + status |= IB_MAD_RESULT_REPLY; } + return status; } -- cgit v1.2.3-59-g8ed1b From ffa2fd1323e32093abc23e7d71ed92b85e566ffe Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:42 +0200 Subject: RDMA/mlx5: Rewrite MAD processing logic to be readable Multiple "if"s and "||" make extension of process_mad() function as a tedious task, rewrite that function to be more readable. Link: https://lore.kernel.org/r/20191029062745.7932-14-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mad.c | 116 +++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 61 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index f49d9c70246e..1c87412a162f 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -74,58 +74,6 @@ static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, port); } -static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad *in_mad, struct ib_mad *out_mad) -{ - u16 slid; - int err; - - slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); - - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) - return IB_MAD_RESULT_SUCCESS; - - /* Don't process SMInfo queries -- the SMA can't handle them. - */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) - return IB_MAD_RESULT_SUCCESS; - } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || - in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) - return IB_MAD_RESULT_SUCCESS; - } else { - return IB_MAD_RESULT_SUCCESS; - } - - err = mlx5_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); - if (err) - return IB_MAD_RESULT_FAILURE; - - /* set return bit in status of directed route responses */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); - - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) - /* no response for trap repress */ - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; -} - static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext, void *out) { @@ -278,17 +226,63 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - int ret; + u8 mgmt_class = in_mad->mad_hdr.mgmt_class; + u8 method = in_mad->mad_hdr.method; + u16 slid; + int err; - if (MLX5_CAP_GEN(dev->mdev, vport_counters) && - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && - in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { - ret = process_pma_cmd(dev, port_num, in_mad, out_mad); - } else { - ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, - in_mad, out_mad); + slid = in_wc ? ib_lid_cpu16(in_wc->slid) : + be16_to_cpu(IB_LID_PERMISSIVE); + + if (method == IB_MGMT_METHOD_TRAP && !slid) + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + switch (mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: { + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_MGMT_METHOD_TRAP_REPRESS) + return IB_MAD_RESULT_SUCCESS; + + /* Don't process SMInfo queries -- the SMA can't handle them. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) + return IB_MAD_RESULT_SUCCESS; + } break; + case IB_MGMT_CLASS_PERF_MGMT: + if (MLX5_CAP_GEN(dev->mdev, vport_counters) && + method == IB_MGMT_METHOD_GET) + return process_pma_cmd(dev, port_num, in_mad, out_mad); + /* fallthrough */ + case MLX5_IB_VENDOR_CLASS1: + /* fallthrough */ + case MLX5_IB_VENDOR_CLASS2: + case IB_MGMT_CLASS_CONG_MGMT: { + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET) + return IB_MAD_RESULT_SUCCESS; + } break; + default: + return IB_MAD_RESULT_SUCCESS; } - return ret; + + err = mlx5_MAD_IFC(to_mdev(ibdev), + mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, + port_num, in_wc, in_grh, in_mad, out_mad); + if (err) + return IB_MAD_RESULT_FAILURE; + + /* set return bit in status of directed route responses */ + if (mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + + if (method == IB_MGMT_METHOD_TRAP_REPRESS) + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) -- cgit v1.2.3-59-g8ed1b From ec6adad0a1e3ef3064c12146b00c2bd1e6835b0c Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Tue, 5 Nov 2019 19:07:54 +0800 Subject: RDMA/hns: Delete unnecessary variable max_post There is no need to define max_post in hns_roce_wq, as it does same thing as wqe_cnt. Link: https://lore.kernel.org/r/1572952082-6681-2-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - drivers/infiniband/hw/hns/hns_roce_qp.c | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 940761310430..f8f6f052bab5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -424,7 +424,6 @@ struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; int wqe_cnt; /* WQE num */ - u32 max_post; int max_gs; int offset; int wqe_shift; /* WQE size */ diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6aa27d6ea3a6..308d15b98ee0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -318,7 +318,7 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, * hr_qp->rq.max_gs); } - cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt; + cap->max_recv_wr = hr_qp->rq.wqe_cnt; cap->max_recv_sge = hr_qp->rq.max_gs; return 0; @@ -610,7 +610,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, hr_qp->buff_size = size; /* Get wr and sge number which send */ - cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt; + cap->max_send_wr = hr_qp->sq.wqe_cnt; cap->max_send_sge = hr_qp->sq.max_gs; /* We don't support inline sends for kernel QPs (yet) */ @@ -1291,7 +1291,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, u32 cur; cur = hr_wq->head - hr_wq->tail; - if (likely(cur + nreq < hr_wq->max_post)) + if (likely(cur + nreq < hr_wq->wqe_cnt)) return false; hr_cq = to_hr_cq(ib_cq); @@ -1299,7 +1299,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, cur = hr_wq->head - hr_wq->tail; spin_unlock(&hr_cq->lock); - return cur + nreq >= hr_wq->max_post; + return cur + nreq >= hr_wq->wqe_cnt; } int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) -- cgit v1.2.3-59-g8ed1b From 16a11e0bffcab729bd2e8f315e1252e7ef3ddf33 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 5 Nov 2019 19:07:55 +0800 Subject: RDMA/hns: Remove unnecessary structure hns_roce_sqp Special QP have no differences with normal qp in data structure, so definition of struct hns_roce_sqp should be removed and replaced by struct hns_roce_qp. Link: https://lore.kernel.org/r/1572952082-6681-3-git-send-email-liweihang@hisilicon.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 9 --------- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 5 +---- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 +---- drivers/infiniband/hw/hns/hns_roce_qp.c | 8 +++----- 4 files changed, 5 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f8f6f052bab5..409c626708b7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -692,10 +692,6 @@ struct hns_roce_qp { struct hns_roce_rinl_buf rq_inl_buf; }; -struct hns_roce_sqp { - struct hns_roce_qp hr_qp; -}; - struct hns_roce_ib_iboe { spinlock_t lock; struct net_device *netdevs[HNS_ROCE_MAX_PORTS]; @@ -1089,11 +1085,6 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct hns_roce_srq, ibsrq); } -static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp) -{ - return container_of(hr_qp, struct hns_roce_sqp, hr_qp); -} - static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { __raw_writeq(*(u64 *) val, dest); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5f74bf55f471..bfe9cee8c969 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3644,10 +3644,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); } - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - kfree(hr_qp); - else - kfree(hr_to_hr_sqp(hr_qp)); + kfree(hr_qp); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7218f6d4101a..0883fce0e400 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4727,10 +4727,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n", hr_qp->qpn, ret); - if (hr_qp->ibqp.qp_type == IB_QPT_GSI) - kfree(hr_to_hr_sqp(hr_qp)); - else - kfree(hr_qp); + kfree(hr_qp); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 308d15b98ee0..6fac4666ae8a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1019,7 +1019,6 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_sqp *hr_sqp; struct hns_roce_qp *hr_qp; int ret; @@ -1049,11 +1048,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL); - if (!hr_sqp) + hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); + if (!hr_qp) return ERR_PTR(-ENOMEM); - hr_qp = &hr_sqp->hr_qp; hr_qp->port = init_attr->port_num - 1; hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; @@ -1068,7 +1066,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, hr_qp->ibqp.qp_num, hr_qp); if (ret) { ibdev_err(ibdev, "Create GSI QP failed!\n"); - kfree(hr_sqp); + kfree(hr_qp); return ERR_PTR(ret); } -- cgit v1.2.3-59-g8ed1b From 03ccba5c2cf7ec5149c59039a5644233a2f60ca9 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Tue, 5 Nov 2019 19:07:56 +0800 Subject: RDMA/hns: Delete unnecessary uar from hns_roce_cq The uar information is already recorded in priv_uar of hns_roce_dev, there is no need to record it in hns_roce_cq again. Link: https://lore.kernel.org/r/1572952082-6681-4-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 4 +--- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 22541d19cd09..d1d773958c27 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -347,7 +347,6 @@ static int create_kernel_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, int cq_entries) { struct device *dev = hr_dev->dev; - struct hns_roce_uar *uar; int ret; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { @@ -367,9 +366,8 @@ static int create_kernel_cq(struct hns_roce_dev *hr_dev, goto err_db; } - uar = &hr_dev->priv_uar; hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + - DB_REG_OFFSET * uar->index; + DB_REG_OFFSET * hr_dev->priv_uar.index; return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 409c626708b7..c461cffa9721 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -494,7 +494,6 @@ struct hns_roce_cq { void (*comp)(struct hns_roce_cq *cq); void (*event)(struct hns_roce_cq *cq, enum hns_roce_event event_type); - struct hns_roce_uar *uar; u32 cq_depth; u32 cons_index; u32 *set_ci_db; -- cgit v1.2.3-59-g8ed1b From d938d7856f4253f81906af0b9ca67c6a6e51cbaf Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Tue, 5 Nov 2019 19:07:57 +0800 Subject: RDMA/hns: Modify fields of struct hns_roce_srq Use wqe_cnt instead of max which means the queue size of srq, and remove wqe_ctr which is not used. Link: https://lore.kernel.org/r/1572952082-6681-5-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 5 ++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 ++++++------ drivers/infiniband/hw/hns/hns_roce_srq.c | 26 +++++++++++++------------- 3 files changed, 21 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c461cffa9721..ce777ce06ea2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -517,9 +517,8 @@ struct hns_roce_idx_que { struct hns_roce_srq { struct ib_srq ibsrq; - void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); unsigned long srqn; - int max; + u32 wqe_cnt; int max_gs; int wqe_shift; void __iomem *db_reg_l; @@ -535,8 +534,8 @@ struct hns_roce_srq { spinlock_t lock; int head; int tail; - u16 wqe_ctr; struct mutex mutex; + void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; struct hns_roce_uar_table { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0883fce0e400..4700bac1c6e2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6040,7 +6040,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, hr_dev->caps.srqwqe_hop_num)); roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, - ilog2(srq->max)); + ilog2(srq->wqe_cnt)); roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, SRQC_BYTE_4_SRQN_S, srq->srqn); @@ -6126,7 +6126,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, int ret; if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->max) + if (srq_attr->srq_limit >= srq->wqe_cnt) return -EINVAL; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -6186,7 +6186,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) SRQC_BYTE_8_SRQ_LIMIT_WL_S); attr->srq_limit = limit_wl; - attr->max_wr = srq->max - 1; + attr->max_wr = srq->wqe_cnt - 1; attr->max_sge = srq->max_gs; memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); @@ -6239,7 +6239,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, spin_lock_irqsave(&srq->lock, flags); - ind = srq->head & (srq->max - 1); + ind = srq->head & (srq->wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(wr->num_sge > srq->max_gs)) { @@ -6254,7 +6254,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, break; } - wqe_idx = find_empty_entry(&srq->idx_que, srq->max); + wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); if (wqe_idx < 0) { ret = -ENOMEM; *bad_wr = wr; @@ -6278,7 +6278,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } srq->wrid[wqe_idx] = wr->wr_id; - ind = (ind + 1) & (srq->max - 1); + ind = (ind + 1) & (srq->wqe_cnt - 1); } if (likely(nreq)) { diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index d96041d806f6..6f9d1d250e4a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -255,7 +255,7 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_idx_que *idx_que = &srq->idx_que; - idx_que->bitmap = bitmap_zalloc(srq->max, GFP_KERNEL); + idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) return -ENOMEM; @@ -281,7 +281,7 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) return -ENOMEM; srq->head = 0; - srq->tail = srq->max - 1; + srq->tail = srq->wqe_cnt - 1; ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift, &srq->mtt); @@ -312,7 +312,7 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) if (ret) goto err_kernel_idx_buf; - srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); + srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) { ret = -ENOMEM; goto err_kernel_idx_buf; @@ -358,7 +358,7 @@ static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, } int hns_roce_create_srq(struct ib_srq *ib_srq, - struct ib_srq_init_attr *srq_init_attr, + struct ib_srq_init_attr *init_attr, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); @@ -370,24 +370,24 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, u32 cqn; /* Check the actual SRQ wqe and SRQ sge num */ - if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || - srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) + if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || + init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) return -EINVAL; mutex_init(&srq->mutex); spin_lock_init(&srq->lock); - srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); - srq->max_gs = srq_init_attr->attr.max_sge; + srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); + srq->max_gs = init_attr->attr.max_sge; srq_desc_size = max(16, 16 * srq->max_gs); srq->wqe_shift = ilog2(srq_desc_size); - srq_buf_size = srq->max * srq_desc_size; + srq_buf_size = srq->wqe_cnt * srq_desc_size; srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; - srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz; + srq->idx_que.buf_size = srq->wqe_cnt * srq->idx_que.entry_sz; srq->mtt.mtt_type = MTT_TYPE_SRQWQE; srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; @@ -405,8 +405,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, } } - cqn = ib_srq_has_cq(srq_init_attr->srq_type) ? - to_hr_cq(srq_init_attr->ext.cq)->cqn : 0; + cqn = ib_srq_has_cq(init_attr->srq_type) ? + to_hr_cq(init_attr->ext.cq)->cqn : 0; srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; @@ -453,7 +453,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); } else { kvfree(srq->wrid); - hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, + hns_roce_buf_free(hr_dev, srq->wqe_cnt << srq->wqe_shift, &srq->buf); } ib_umem_release(srq->idx_que.umem); -- cgit v1.2.3-59-g8ed1b From 6eef524201deaaaf980bd21b80aac1b052cd56a7 Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Tue, 5 Nov 2019 19:07:58 +0800 Subject: RDMA/hns: Replace not intuitive function/macro names Replace "sw2hw" and "hw2sw" which is hard to understand with "create" and "destroy". Link: https://lore.kernel.org/r/1572952082-6681-6-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixing Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cmd.h | 14 ++++---- drivers/infiniband/hw/hns/hns_roce_cq.c | 23 ++++++------- drivers/infiniband/hw/hns/hns_roce_device.h | 6 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 7 ++-- drivers/infiniband/hw/hns/hns_roce_mr.c | 50 +++++++++++++++-------------- drivers/infiniband/hw/hns/hns_roce_srq.c | 22 ++++++------- 6 files changed, 63 insertions(+), 59 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 2b6ac646ca9a..cd3ed2b462bd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -115,12 +115,12 @@ enum { enum { /* TPT commands */ - HNS_ROCE_CMD_SW2HW_MPT = 0xd, - HNS_ROCE_CMD_HW2SW_MPT = 0xf, + HNS_ROCE_CMD_CREATE_MPT = 0xd, + HNS_ROCE_CMD_DESTROY_MPT = 0xf, /* CQ commands */ - HNS_ROCE_CMD_SW2HW_CQ = 0x16, - HNS_ROCE_CMD_HW2SW_CQ = 0x17, + HNS_ROCE_CMD_CREATE_CQ = 0x16, + HNS_ROCE_CMD_DESTROY_CQ = 0x17, /* QP/EE commands */ HNS_ROCE_CMD_RST2INIT_QP = 0x19, @@ -129,14 +129,14 @@ enum { HNS_ROCE_CMD_RTS2RTS_QP = 0x1c, HNS_ROCE_CMD_2ERR_QP = 0x1e, HNS_ROCE_CMD_RTS2SQD_QP = 0x1f, - HNS_ROCE_CMD_SQD2SQD_QP = 0x38, HNS_ROCE_CMD_SQD2RTS_QP = 0x20, HNS_ROCE_CMD_2RST_QP = 0x21, HNS_ROCE_CMD_QUERY_QP = 0x22, - HNS_ROCE_CMD_SW2HW_SRQ = 0x70, + HNS_ROCE_CMD_SQD2SQD_QP = 0x38, + HNS_ROCE_CMD_CREATE_SRQ = 0x70, HNS_ROCE_CMD_MODIFY_SRQC = 0x72, HNS_ROCE_CMD_QUERY_SRQC = 0x73, - HNS_ROCE_CMD_HW2SW_SRQ = 0x74, + HNS_ROCE_CMD_DESTROY_SRQ = 0x74, }; int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index d1d773958c27..713df1f78388 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -73,12 +73,13 @@ static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq, } } -static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) +static int hns_roce_hw_create_cq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long cq_num) { return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0, - HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS); + HNS_ROCE_CMD_CREATE_CQ, + HNS_ROCE_CMD_TIMEOUT_MSECS); } static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, @@ -144,7 +145,7 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, nent, vector); /* Send mailbox to hw */ - ret = hns_roce_sw2hw_cq(hr_dev, mailbox, hr_cq->cqn); + ret = hns_roce_hw_create_cq(hr_dev, mailbox, hr_cq->cqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n"); @@ -170,12 +171,12 @@ err_out: return ret; } -static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) +static int hns_roce_hw_destroy_cq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long cq_num) { return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ, + mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } @@ -185,9 +186,9 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) struct device *dev = hr_dev->dev; int ret; - ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn); + ret = hns_roce_hw_destroy_cq(hr_dev, NULL, hr_cq->cqn); if (ret) - dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret, + dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); xa_erase(&cq_table->array, hr_cq->cqn); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ce777ce06ea2..a1b712e9d454 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1184,9 +1184,9 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); -int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index); +int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index); unsigned long key_to_hw_index(u32 key); struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index bfe9cee8c969..89a4c3afdda1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1114,9 +1114,10 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, free_mr = &priv->free_mr; if (mr->enabled) { - if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) - & (hr_dev->caps.num_mtpts - 1))) - dev_warn(dev, "HW2SW_MPT failed!\n"); + if (hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mr->key) & + (hr_dev->caps.num_mtpts - 1))) + dev_warn(dev, "DESTROY_MPT failed!\n"); } mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 5f8416ba09a9..577946bdd574 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -48,21 +48,21 @@ unsigned long key_to_hw_index(u32 key) return (key << 24) | (key >> 8); } -static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) +static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, - HNS_ROCE_CMD_SW2HW_MPT, + HNS_ROCE_CMD_CREATE_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } -int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) +int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, - mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT, + mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } @@ -707,10 +707,11 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, int ret; if (mr->enabled) { - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) - & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mr->key) & + (hr_dev->caps.num_mtpts - 1)); if (ret) - dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); } if (mr->size != ~0ULL) { @@ -763,10 +764,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, - mtpt_idx & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + dev_err(dev, "CREATE_MPT failed (%d)\n", ret); goto err_page; } @@ -1308,9 +1309,9 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, if (ret) goto free_cmd_mbox; - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx); if (ret) - dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); mr->enabled = 0; @@ -1332,9 +1333,9 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, goto free_cmd_mbox; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); if (ret) { - dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + dev_err(dev, "CREATE_MPT failed (%d)\n", ret); ib_umem_release(mr->umem); goto free_cmd_mbox; } @@ -1448,10 +1449,11 @@ static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, int ret; if (mw->enabled) { - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey) - & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mw->rkey) & + (hr_dev->caps.num_mtpts - 1)); if (ret) - dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret); hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, key_to_hw_index(mw->rkey)); @@ -1487,10 +1489,10 @@ static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, - mtpt_idx & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret); + dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret); goto err_page; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 6f9d1d250e4a..d275818554a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -59,21 +59,21 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, } } -static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) +static int hns_roce_hw_create_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) { return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0, - HNS_ROCE_CMD_SW2HW_SRQ, + HNS_ROCE_CMD_CREATE_SRQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) +static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) { return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ, + mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_SRQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } @@ -134,7 +134,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, mtts_wqe, mtts_idx, dma_handle_wqe, dma_handle_idx); - ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn); + ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) goto err_xa; @@ -160,9 +160,9 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; int ret; - ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn); + ret = hns_roce_hw_destroy_srq(hr_dev, NULL, srq->srqn); if (ret) - dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n", + dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); xa_erase(&srq_table->xa, srq->srqn); -- cgit v1.2.3-59-g8ed1b From 880f133c6026bee250220b773f556cc27bb62457 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 5 Nov 2019 19:07:59 +0800 Subject: RDMA/hns: Simplify doorbell initialization code If a variable needs to be set to 0 before use, it can be directly initialized to 0. Link: https://lore.kernel.org/r/1572952082-6681-7-git-send-email-liweihang@hisilicon.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4700bac1c6e2..a8ce3716a63e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4944,10 +4944,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, static void set_eq_cons_index_v2(struct hns_roce_eq *eq) { struct hns_roce_dev *hr_dev = eq->hr_dev; - __le32 doorbell[2]; - - doorbell[0] = 0; - doorbell[1] = 0; + __le32 doorbell[2] = {}; if (eq->type_flag == HNS_ROCE_AEQ) { roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M, -- cgit v1.2.3-59-g8ed1b From 301cc7eb2cd9b0292658b9bfd45e5226cd1fef27 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 5 Nov 2019 19:08:00 +0800 Subject: RDMA/hns: Modify hns_roce_hw_v2_get_cfg to simplify the code Merge base configuration of hr_dev into hns_roce_hw_v2_get_cfg(). In addition, there is no need to return 0 at last, so we change return type of it to void. Link: https://lore.kernel.org/r/1572952082-6681-8-git-send-email-liweihang@hisilicon.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a8ce3716a63e..907c95149cb9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6370,12 +6370,14 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl); -static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, +static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { struct hns_roce_v2_priv *priv = hr_dev->priv; int i; + hr_dev->pci_dev = handle->pdev; + hr_dev->dev = &handle->pdev->dev; hr_dev->hw = &hns_roce_hw_v2; hr_dev->dfx = &hns_roce_dfx_hw_v2; hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; @@ -6400,8 +6402,6 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle); priv->handle = handle; - - return 0; } static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) @@ -6419,14 +6419,7 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_kzalloc; } - hr_dev->pci_dev = handle->pdev; - hr_dev->dev = &handle->pdev->dev; - - ret = hns_roce_hw_v2_get_cfg(hr_dev, handle); - if (ret) { - dev_err(hr_dev->dev, "Get Configuration failed!\n"); - goto error_failed_get_cfg; - } + hns_roce_hw_v2_get_cfg(hr_dev, handle); ret = hns_roce_init(hr_dev); if (ret) { -- cgit v1.2.3-59-g8ed1b From 1ceb0b11a8a2363db3bf44f3e0ae4615134733e9 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Tue, 5 Nov 2019 19:08:01 +0800 Subject: RDMA/hns: Fix non-standard error codes It is better to return a linux error code than define a private constant. Link: https://lore.kernel.org/r/1572952082-6681-9-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_cq.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_mr.c | 15 ++++++++------- drivers/infiniband/hw/hns/hns_roce_pd.c | 2 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 2 +- 5 files changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 8c063c598d2a..da574c26e063 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -55,7 +55,7 @@ int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj) bitmap->last = 0; *obj |= bitmap->top; } else { - ret = -1; + ret = -EINVAL; } spin_unlock(&bitmap->lock); @@ -100,7 +100,7 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, } *obj |= bitmap->top; } else { - ret = -1; + ret = -EINVAL; } spin_unlock(&bitmap->lock); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 713df1f78388..699c987f8da4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -116,9 +116,9 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, hr_cq->vector = vector; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); - if (ret == -1) { + if (ret) { dev_err(dev, "CQ alloc.Failed to alloc index.\n"); - return -ENOMEM; + return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 577946bdd574..6589e283cc77 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -83,7 +83,7 @@ static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, } } spin_unlock(&buddy->lock); - return -1; + return -EINVAL; found: clear_bit(*seg, buddy->bits[o]); @@ -206,13 +206,14 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, } ret = hns_roce_buddy_alloc(buddy, order, seg); - if (ret == -1) - return -1; + if (ret) + return ret; - if (hns_roce_table_get_range(hr_dev, table, *seg, - *seg + (1 << order) - 1)) { + ret = hns_roce_table_get_range(hr_dev, table, *seg, + *seg + (1 << order) - 1); + if (ret) { hns_roce_buddy_free(buddy, *seg, order); - return -1; + return ret; } return 0; @@ -578,7 +579,7 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, /* Allocate a key for mr from mr_table */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); - if (ret == -1) + if (ret) return -ENOMEM; mr->iova = iova; /* MR va starting addr */ diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 912b89b4da34..780c780fdb22 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -96,7 +96,7 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) /* Using bitmap to manager UAR index */ ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx); - if (ret == -1) + if (ret) return -ENOMEM; if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index d275818554a6..96ff782e0fdf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -111,7 +111,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); - if (ret == -1) { + if (ret) { dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n"); return -ENOMEM; } -- cgit v1.2.3-59-g8ed1b From d11769fdc1bbf6664083c0caabd9f2f864dd72ec Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Tue, 5 Nov 2019 19:08:02 +0800 Subject: RDMA/hns: Modify appropriate printings Modify some printings that is not in uniformed style, non-standard or with spelling errors. Link: https://lore.kernel.org/r/1572952082-6681-10-git-send-email-liweihang@hisilicon.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 20 ++++++++++++-------- drivers/infiniband/hw/hns/hns_roce_main.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 8 ++++---- 4 files changed, 19 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 699c987f8da4..e25fa19c249c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -105,32 +105,34 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, mtts = hns_roce_table_find(hr_dev, mtt_table, hr_mtt->first_seg, &dma_handle); if (!mtts) { - dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n"); + dev_err(dev, "Failed to find mtt for CQ buf.\n"); return -EINVAL; } if (vector >= hr_dev->caps.num_comp_vectors) { - dev_err(dev, "CQ alloc.Invalid vector.\n"); + dev_err(dev, "Invalid vector(0x%x) for CQ alloc.\n", vector); return -EINVAL; } hr_cq->vector = vector; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { - dev_err(dev, "CQ alloc.Failed to alloc index.\n"); + dev_err(dev, "Num of CQ out of range.\n"); return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - dev_err(dev, "CQ alloc.Failed to get context mem.\n"); + dev_err(dev, + "Get context mem failed(%d) when CQ(0x%lx) alloc.\n", + ret, hr_cq->cqn); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - dev_err(dev, "CQ alloc failed xa_store.\n"); + dev_err(dev, "Failed to xa_store CQ.\n"); goto err_put; } @@ -148,7 +150,9 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, ret = hns_roce_hw_create_cq(hr_dev, mailbox, hr_cq->cqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n"); + dev_err(dev, + "Send cmd mailbox failed(%d) when CQ(0x%lx) alloc.\n", + ret, hr_cq->cqn); goto err_xa; } @@ -418,7 +422,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, int ret; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n", + dev_err(dev, "Create CQ failed. entries=%d, max=%d\n", cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } @@ -448,7 +452,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, hr_cq, vector); if (ret) { - dev_err(dev, "Creat CQ .Failed to cq_alloc.\n"); + dev_err(dev, "Alloc CQ failed(%d).\n", ret); goto err_dbmap; } diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index a389f9996689..066f01c45b5a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -111,7 +111,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, netdev = hr_dev->iboe.netdevs[port]; if (!netdev) { - dev_err(dev, "port(%d) can't find netdev\n", port); + dev_err(dev, "Can't find netdev on port(%u)!\n", port); return -ENODEV; } @@ -253,7 +253,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, net_dev = hr_dev->iboe.netdevs[port]; if (!net_dev) { spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - dev_err(dev, "find netdev %d failed!\r\n", port); + dev_err(dev, "Find netdev %u failed!\n", port); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6fac4666ae8a..9442f017db02 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1031,7 +1031,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0, hr_qp); if (ret) { - ibdev_err(ibdev, "Create RC QP 0x%06lx failed(%d)\n", + ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n", hr_qp->qpn, ret); kfree(hr_qp); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 96ff782e0fdf..a1bfa51c67fe 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -95,8 +95,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, srq->mtt.first_seg, &dma_handle_wqe); if (!mtts_wqe) { - dev_err(hr_dev->dev, - "SRQ alloc.Failed to find srq buf addr.\n"); + dev_err(hr_dev->dev, "Failed to find mtt for srq buf.\n"); return -EINVAL; } @@ -106,13 +105,14 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, &dma_handle_idx); if (!mtts_idx) { dev_err(hr_dev->dev, - "SRQ alloc.Failed to find idx que buf addr.\n"); + "Failed to find mtt for srq idx queue buf.\n"); return -EINVAL; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n"); + dev_err(hr_dev->dev, + "Failed to alloc a bit from srq bitmap.\n"); return -ENOMEM; } -- cgit v1.2.3-59-g8ed1b From 333ee7e2d0b6590cde2bea3e17f30262e0dc0706 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:36 +0200 Subject: RDMA/hfi1: Delete unreachable code All callers allocate MAD structures with proper sizes, there is no need to recheck it. Link: https://lore.kernel.org/r/20191029062745.7932-8-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/mad.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index d8ff063a5419..a54746f4a0ae 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -4921,10 +4921,6 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, { switch (in_mad->base_version) { case OPA_MGMT_BASE_VERSION: - if (unlikely(in_mad_size != sizeof(struct opa_mad))) { - dev_err(ibdev->dev.parent, "invalid in_mad_size\n"); - return IB_MAD_RESULT_FAILURE; - } return hfi1_process_opa_mad(ibdev, mad_flags, port, in_wc, in_grh, (struct opa_mad *)in_mad, -- cgit v1.2.3-59-g8ed1b From e26e7b88f6b7482cbff633c6fc9eaee3ecbd41b1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:45 +0200 Subject: RDMA: Change MAD processing function to remove extra casting and parameter All users of process_mad() converts input pointers from ib_mad_hdr to be ib_mad, update the function declaration to use ib_mad directly. Also remove not used input MAD size parameter. Link: https://lore.kernel.org/r/20191029062745.7932-17-leon@kernel.org Signed-off-by: Leon Romanovsky Tested-By: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 12 +++--- drivers/infiniband/core/sysfs.c | 6 +-- drivers/infiniband/hw/hfi1/mad.c | 13 +++--- drivers/infiniband/hw/hfi1/verbs.h | 5 +-- drivers/infiniband/hw/mlx4/mad.c | 25 +++++------- drivers/infiniband/hw/mlx4/mlx4_ib.h | 7 ++-- drivers/infiniband/hw/mlx5/mad.c | 24 +++++------ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +-- drivers/infiniband/hw/mthca/mthca_dev.h | 12 ++---- drivers/infiniband/hw/mthca/mthca_mad.c | 70 ++++++++++++++------------------ drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 17 +++----- drivers/infiniband/hw/ocrdma/ocrdma_ah.h | 11 ++--- drivers/infiniband/hw/qedr/verbs.c | 17 ++------ drivers/infiniband/hw/qedr/verbs.h | 7 ++-- drivers/infiniband/hw/qib/qib_mad.c | 15 +++---- drivers/infiniband/hw/qib/qib_verbs.h | 5 +-- include/rdma/ib_verbs.h | 7 ++-- 17 files changed, 104 insertions(+), 154 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 8adb487eb314..c54db13fa9b0 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -913,9 +913,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, /* No GRH for DR SMP */ ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL, - (const struct ib_mad_hdr *)smp, mad_size, - (struct ib_mad_hdr *)mad_priv->mad, - &mad_size, &out_mad_pkey_index); + (const struct ib_mad *)smp, + (struct ib_mad *)mad_priv->mad, &mad_size, + &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: @@ -2321,9 +2321,9 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) if (port_priv->device->ops.process_mad) { ret = port_priv->device->ops.process_mad( port_priv->device, 0, port_priv->port_num, wc, - &recv->grh, (const struct ib_mad_hdr *)recv->mad, - recv->mad_size, (struct ib_mad_hdr *)response->mad, - &mad_size, &resp_mad_pkey_index); + &recv->grh, (const struct ib_mad *)recv->mad, + (struct ib_mad *)response->mad, &mad_size, + &resp_mad_pkey_index); if (opa) wc->pkey_index = resp_mad_pkey_index; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 62c756ea5668..087682e6969e 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -497,10 +497,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, if (attr != IB_PMA_CLASS_PORT_INFO) in_mad->data[41] = port_num; /* PortSelect field */ - if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, - port_num, NULL, NULL, - (const struct ib_mad_hdr *)in_mad, mad_size, - (struct ib_mad_hdr *)out_mad, &mad_size, + if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL, + in_mad, out_mad, &mad_size, &out_mad_pkey_index) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index a54746f4a0ae..a51bcd2b4391 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -4915,11 +4915,10 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, */ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index) { - switch (in_mad->base_version) { + switch (in_mad->mad_hdr.base_version) { case OPA_MGMT_BASE_VERSION: return hfi1_process_opa_mad(ibdev, mad_flags, port, in_wc, in_grh, @@ -4928,10 +4927,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, out_mad_size, out_mad_pkey_index); case IB_MGMT_BASE_VERSION: - return hfi1_process_ib_mad(ibdev, mad_flags, port, - in_wc, in_grh, - (const struct ib_mad *)in_mad, - (struct ib_mad *)out_mad); + return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc, + in_grh, in_mad, out_mad); default: break; } diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index ae9582ddbc8f..b0e9bf7cd150 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -330,9 +330,8 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); /* * The PSN_MASK and PSN_SHIFT allow for diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index c6ea4c50da1e..abe68708d6d6 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -983,13 +983,10 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { struct mlx4_ib_dev *dev = to_mdev(ibdev); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA @@ -997,20 +994,20 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, */ if (link == IB_LINK_LAYER_INFINIBAND) { if (mlx4_is_slave(dev->dev) && - (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && - (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS || - in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT || - in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO))) - return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + (in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS || + in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT || + in->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO))) + return iboe_process_mad(ibdev, mad_flags, port_num, + in_wc, in_grh, in, out); - return ib_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, + in, out); } if (link == IB_LINK_LAYER_ETHERNET) return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + in_grh, in, out); return -EINVAL; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index eb53bb4c0c91..0d846fea8fdc 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -786,11 +786,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, +int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 1c87412a162f..14e0c17de6a9 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -219,15 +219,12 @@ done: int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - u8 mgmt_class = in_mad->mad_hdr.mgmt_class; - u8 method = in_mad->mad_hdr.method; + u8 mgmt_class = in->mad_hdr.mgmt_class; + u8 method = in->mad_hdr.method; u16 slid; int err; @@ -247,13 +244,13 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, /* Don't process SMInfo queries -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) + if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) return IB_MAD_RESULT_SUCCESS; } break; case IB_MGMT_CLASS_PERF_MGMT: if (MLX5_CAP_GEN(dev->mdev, vport_counters) && method == IB_MGMT_METHOD_GET) - return process_pma_cmd(dev, port_num, in_mad, out_mad); + return process_pma_cmd(dev, port_num, in, out); /* fallthrough */ case MLX5_IB_VENDOR_CLASS1: /* fallthrough */ @@ -267,16 +264,15 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; } - err = mlx5_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); + err = mlx5_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, + in_grh, in, out); if (err) return IB_MAD_RESULT_FAILURE; /* set return bit in status of directed route responses */ if (mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + out->mad_hdr.status |= cpu_to_be16(1 << 15); if (method == IB_MGMT_METHOD_TRAP_REPRESS) /* no response for trap repress */ diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 0bdb8b45ea15..933a2059886b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1192,9 +1192,8 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, unsigned int *meta_sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index bfd4eebc1182..599794c5a78f 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -576,14 +576,10 @@ enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port); int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); -int mthca_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); +int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); int mthca_create_agents(struct mthca_dev *dev); void mthca_free_agents(struct mthca_dev *dev); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 0893604d2a62..99aa8183a7f2 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -196,26 +196,19 @@ static void forward_trap(struct mthca_dev *dev, } } -int mthca_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) +int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { int err; u16 slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; /* Forward locally generated traps to the SM */ - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && - slid == 0) { - forward_trap(to_mdev(ibdev), port_num, in_mad); + if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP && !slid) { + forward_trap(to_mdev(ibdev), port_num, in); return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; } @@ -225,40 +218,39 @@ int mthca_process_mad(struct ib_device *ibdev, * Only handle PMA and Mellanox vendor-specific class gets and * sets for other classes. */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in->mad_hdr.method != IB_MGMT_METHOD_GET && + in->mad_hdr.method != IB_MGMT_METHOD_SET && + in->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) return IB_MAD_RESULT_SUCCESS; /* * Don't process SMInfo queries or vendor-specific * MADs -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || - ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == + if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || + ((in->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == IB_SMP_ATTR_VENDOR_MASK)) return IB_MAD_RESULT_SUCCESS; - } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || - in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + } else if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || + in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { + if (in->mad_hdr.method != IB_MGMT_METHOD_GET && + in->mad_hdr.method != IB_MGMT_METHOD_SET) return IB_MAD_RESULT_SUCCESS; } else return IB_MAD_RESULT_SUCCESS; - if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && - in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && - in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + if ((in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in->mad_hdr.method == IB_MGMT_METHOD_SET && + in->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) prev_lid = ib_lid_cpu16(pattr.lid); - err = mthca_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); + err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, + in_grh, in, out); if (err == -EBADMSG) return IB_MAD_RESULT_SUCCESS; else if (err) { @@ -266,16 +258,16 @@ int mthca_process_mad(struct ib_device *ibdev, return IB_MAD_RESULT_FAILURE; } - if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad, prev_lid); - node_desc_override(ibdev, out_mad); + if (!out->mad_hdr.status) { + smp_snoop(ibdev, port_num, in, prev_lid); + node_desc_override(ibdev, out); } /* set return bit in status of directed route responses */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out->mad_hdr.status |= cpu_to_be16(1 << 15); - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) /* no response for trap repress */ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 4098508b9240..2b7f00ac41b0 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -247,23 +247,18 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -int ocrdma_process_mad(struct ib_device *ibdev, - int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, +int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index) { int status = IB_MAD_RESULT_SUCCESS; struct ocrdma_dev *dev; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { dev = get_ocrdma_dev(ibdev); - ocrdma_pma_counters(dev, out_mad); + ocrdma_pma_counters(dev, out); status |= IB_MAD_RESULT_REPLY; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 64cb82c08664..9780afcde780 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -56,12 +56,9 @@ int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int ocrdma_process_mad(struct ib_device *, - int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, +int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index); #endif /* __OCRDMA_AH_H__ */ diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 8096b8fcab4e..fea95faa3b21 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -4346,19 +4346,10 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) } int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *mad_hdr, - size_t in_mad_size, struct ib_mad_hdr *out_mad, - size_t *out_mad_size, u16 *out_mad_pkey_index) + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index) { - struct qedr_dev *dev = get_qedr_dev(ibdev); - - DP_DEBUG(dev, QEDR_MSG_GSI, - "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n", - mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod, - mad_hdr->class_specific, mad_hdr->class_version, - mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status); return IB_MAD_RESULT_SUCCESS; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 620472116c6d..18027844eb87 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -92,10 +92,9 @@ int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *, const struct ib_recv_wr **bad_wr); int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, struct ib_mad_hdr *out_mad, - size_t *out_mad_size, u16 *out_mad_pkey_index); + const struct ib_grh *in_grh, const struct ib_mad *in_mad, + struct ib_mad *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index); int qedr_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index b259aaf85d4a..79bb83222e8d 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2386,24 +2386,21 @@ bail: */ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { int ret; struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - switch (in_mad->mad_hdr.mgmt_class) { + switch (in->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: - ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad); + ret = process_subn(ibdev, mad_flags, port, in, out); goto bail; case IB_MGMT_CLASS_PERF_MGMT: - ret = process_perf(ibdev, port, in_mad, out_mad); + ret = process_perf(ibdev, port, in, out); goto bail; case IB_MGMT_CLASS_CONG_MGMT: @@ -2412,7 +2409,7 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, ret = IB_MAD_RESULT_SUCCESS; goto bail; } - ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad); + ret = process_cc(ibdev, mad_flags, port, in, out); goto bail; default: diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 17bdf8acee2f..8bf414b47b96 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -245,9 +245,8 @@ void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx); void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 416e72ea80d9..663fa16caa76 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2123,7 +2123,7 @@ struct ib_flow_action { atomic_t usecnt; }; -struct ib_mad_hdr; +struct ib_mad; struct ib_grh; enum ib_process_mad_flags { @@ -2301,9 +2301,8 @@ struct ib_device_ops { int (*process_mad)(struct ib_device *device, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); int (*query_device)(struct ib_device *device, struct ib_device_attr *device_attr, struct ib_udata *udata); -- cgit v1.2.3-59-g8ed1b From 208d70f562e563226df178ff8f969364972e9e99 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 3 Nov 2019 16:07:23 +0200 Subject: IB/mlx5: Support flow counters offset for bulk counters Add support for flow steering counters action with a non-base counter ID (offset) for bulk counters. When creating a flow counter object, save the bulk value. This value is used when a flow action with a non-base counter ID is requested - to validate that the required offset is in the range of the allocated bulk. Link: https://lore.kernel.org/r/20191103140723.77411-1-leon@kernel.org Signed-off-by: Yevgeny Kliteynik Signed-off-by: Leon Romanovsky Reviewed-by: Mark Bloch Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 15 ++++++++++++++- drivers/infiniband/hw/mlx5/flow.c | 29 +++++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 1 + 4 files changed, 43 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 6b1fca91d7d3..2a5812a4c3bb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -100,6 +100,7 @@ struct devx_obj { struct mlx5_ib_devx_mr devx_mr; struct mlx5_core_dct core_dct; struct mlx5_core_cq core_cq; + u32 flow_counter_bulk_size; }; struct list_head event_sub; /* holds devx_event_subscription entries */ }; @@ -192,15 +193,20 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) } } -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id) +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id) { struct devx_obj *devx_obj = obj; u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + + if (offset && offset >= devx_obj->flow_counter_bulk_size) + return false; + *counter_id = MLX5_GET(dealloc_flow_counter_in, devx_obj->dinbox, flow_counter_id); + *counter_id += offset; return true; } @@ -1463,6 +1469,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( if (err) goto obj_free; + if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) { + u8 bulk = MLX5_GET(alloc_flow_counter_in, + cmd_in, + flow_counter_bulk); + obj->flow_counter_bulk_size = 128UL * bulk; + } + uobj->object = obj; INIT_LIST_HEAD(&obj->event_sub); obj->ib_dev = dev; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index b198ff10cde9..dbee17d22d50 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -85,6 +85,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); int len, ret, i; u32 counter_id = 0; + u32 *offset_attr; + u32 offset = 0; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -151,8 +153,27 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( if (len) { devx_obj = arr_flow_actions[0]->object; - if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id)) + if (uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { + + int num_offsets = uverbs_attr_ptr_get_array_size( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + sizeof(u32)); + + if (num_offsets != 1) + return -EINVAL; + + offset_attr = uverbs_attr_get_alloced_ptr( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); + offset = *offset_attr; + } + + if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset, + &counter_id)) return -EINVAL; + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; } @@ -598,7 +619,11 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_READ, 1, 1, - UA_OPTIONAL)); + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), + UA_OPTIONAL, + UA_ALLOC_AND_COPY)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DESTROY_FLOW, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 933a2059886b..d14fe43837e7 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1366,7 +1366,7 @@ struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_flow_act *flow_act, u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id); +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index d0da070cf0ab..20d88307f75f 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -198,6 +198,7 @@ enum mlx5_ib_create_flow_attrs { MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, MLX5_IB_ATTR_CREATE_FLOW_TAG, MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, }; enum mlx5_ib_destoy_flow_attrs { -- cgit v1.2.3-59-g8ed1b From e88982ad1bb12db699de96fbc07096359ef6176c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 5 Nov 2019 13:46:32 -0800 Subject: RDMA/srpt: Report the SCSI residual to the initiator The code added by this patch is similar to the code that already exists in ibmvscsis_determine_resid(). This patch has been tested by running the following command: strace sg_raw -r 1k /dev/sdb 12 00 00 00 60 00 -o inquiry.bin |& grep resid= Link: https://lore.kernel.org/r/20191105214632.183302-1-bvanassche@acm.org Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Bart Van Assche Acked-by: Honggang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 653de1583cf9..23c782e3d49a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1362,9 +1362,11 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx, u64 tag, int status) { + struct se_cmd *cmd = &ioctx->cmd; struct srp_rsp *srp_rsp; const u8 *sense_data; int sense_data_len, max_sense_len; + u32 resid = cmd->residual_count; /* * The lowest bit of all SAM-3 status codes is zero (see also @@ -1386,6 +1388,28 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, srp_rsp->tag = tag; srp_rsp->status = status; + if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an underflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOUNDER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an underflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIUNDER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } else if (cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an overflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOOVER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an overflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIOVER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } + if (sense_data_len) { BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); -- cgit v1.2.3-59-g8ed1b From 960657b732e1ce21b07be5ab48a7ad3913d72ba4 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 6 Nov 2019 14:23:54 +0800 Subject: RDMA/qedr: Fix potential use after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the release operation after error log to avoid possible use after free. Link: https://lore.kernel.org/r/1573021434-18768-1-git-send-email-bianpan2016@163.com Signed-off-by: Pan Bian Acked-by: Michal Kalderon  Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 5e9732990be5..d3c13f4b136e 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -481,10 +481,10 @@ qedr_addr6_resolve(struct qedr_dev *dev, if ((!dst) || dst->error) { if (dst) { - dst_release(dst); DP_ERR(dev, "ip6_route_output returned dst->error = %d\n", dst->error); + dst_release(dst); } return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From da046d5f895fca18d63b15ac8faebd5bf784e23a Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 6 Nov 2019 14:44:11 +0800 Subject: RDMA/i40iw: Fix potential use after free Release variable dst after logging dst->error to avoid possible use after free. Link: https://lore.kernel.org/r/1573022651-37171-1-git-send-email-bianpan2016@163.com Signed-off-by: Pan Bian Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 2d6a378e8560..bb78d3280acc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2079,9 +2079,9 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr); if (!dst || dst->error) { if (dst) { - dst_release(dst); i40iw_pr_err("ip6_route_output returned dst->error = %d\n", dst->error); + dst_release(dst); } return rc; } -- cgit v1.2.3-59-g8ed1b From 7ee23491b39259ae83899dd93b2a29ef0f22f0a7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 7 Nov 2019 08:50:25 +0530 Subject: RDMA/qib: Validate ->show()/store() callbacks before calling them The permissions of the read-only or write-only sysfs files can be changed (as root) and the user can then try to read a write-only file or write to a read-only file which will lead to kernel crash here. Protect against that by always validating the show/store callbacks. Link: https://lore.kernel.org/r/d45cc26361a174ae12dbb86c994ef334d257924b.1573096807.git.viresh.kumar@linaro.org Signed-off-by: Viresh Kumar Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_sysfs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 3926be78036e..568b21eb6ea1 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -301,6 +301,9 @@ static ssize_t qib_portattr_show(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->show) + return -EIO; + return pattr->show(ppd, buf); } @@ -312,6 +315,9 @@ static ssize_t qib_portattr_store(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->store) + return -EIO; + return pattr->store(ppd, buf, len); } -- cgit v1.2.3-59-g8ed1b From 6296665cee88a76a875ddcd7031bb1633b4adac8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Nov 2019 22:48:55 +0000 Subject: RDMA/ocrdma: Fix spelling mistake in variable name There is a spelling mistake in the variable nak_invalid_requst_errors, rename it to nak_invalid_request_errors. Link: https://lore.kernel.org/r/20191107224855.417647-1-colin.king@canonical.com Signed-off-by: Colin Ian King Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 6ef89c226ad8..c2e0d0fa44be 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -2034,7 +2034,7 @@ struct ocrdma_rx_stats { }; struct ocrdma_rx_qp_err_stats { - u32 nak_invalid_requst_errors; + u32 nak_invalid_request_errors; u32 nak_remote_operation_errors; u32 nak_count_remote_access_errors; u32 local_length_errors; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 95e8c1560cc2..5f831e3bdbad 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -423,8 +423,8 @@ static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev) memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); pcur = stats; - pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors", - (u64)rx_qp_err_stats->nak_invalid_requst_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_request_errors", + (u64)rx_qp_err_stats->nak_invalid_request_errors); pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors", (u64)rx_qp_err_stats->nak_remote_operation_errors); pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors", -- cgit v1.2.3-59-g8ed1b From 9a5407d74c22821f7944e2be4209bdfc5faf8143 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 10 Nov 2019 13:36:45 +0200 Subject: RDMA/qedr: Make qedr_iw_load_qp() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function qedr_iw_load_qp() is only used in qedr_iw_cm.c Fixes: 82af6d19d8d9 ("RDMA/qedr: Fix synchronization methods and memory leaks in qedr") Link: https://lore.kernel.org/r/20191110113645.20058-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Acked-by: Michal Kalderon  Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index d3c13f4b136e..792eecd206b6 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -506,7 +506,7 @@ qedr_addr6_resolve(struct qedr_dev *dev, return rc; } -struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn) +static struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn) { struct qedr_qp *qp; -- cgit v1.2.3-59-g8ed1b From 289b20b2a5f900dd759300ffe765a2d078296432 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Wed, 13 Nov 2019 16:34:04 +0100 Subject: RDMA/siw: Cleanup unused mmap structures. Removes obsolete driver specific mmap information after generalization of RDMA driver mmap service. Also removes useless forward declaration of struct siw_mr. Fixes: 11f1a75567c4 ("RDMA/siw: Use the common mmap_xa helpers") Link: https://lore.kernel.org/r/20191113153404.7402-1-bmt@zurich.ibm.com Signed-off-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw.h | 11 ----------- drivers/infiniband/sw/siw/siw_verbs.c | 2 -- 2 files changed, 13 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index f851afb5632e..b939f489cd46 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -99,18 +99,9 @@ struct siw_device { struct work_struct netdev_down; }; -struct siw_uobj { - void *addr; - u32 size; -}; - struct siw_ucontext { struct ib_ucontext base_ucontext; struct siw_device *sdev; - - /* xarray of user mappable objects */ - struct xarray xa; - u32 uobj_nextkey; }; /* @@ -150,8 +141,6 @@ struct siw_pbl { struct siw_pble pbe[1]; }; -struct siw_mr; - /* * Generic memory representation for registered siw memory. * Memory lookup always via higher 24 bit of STag (STag index). diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 725985ed8af3..c992dd7299d9 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -87,8 +87,6 @@ int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata) rv = -ENOMEM; goto err_out; } - - ctx->uobj_nextkey = 0; ctx->sdev = sdev; uresp.dev_id = sdev->vendor_part_id; -- cgit v1.2.3-59-g8ed1b From 64c264872b8879e2ab9017eefe9514d4c045c60e Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 12 Nov 2019 11:26:08 +0200 Subject: RDMA/efa: Clear the admin command buffer prior to its submission We cannot rely on the entry memcpy as we only copy the actual size of the command, the rest of the bytes must be memset to zero. Currently providing non-zero memory will not have any user visible impact. However, since admin commands are extendable (in a backwards compatible way) everything beyond the size of the command must be cleared to prevent issues in the future. Fixes: 0420e542569b ("RDMA/efa: Implement functions that submit and complete admin commands") Link: https://lore.kernel.org/r/20191112092608.46964-1-galpress@amazon.com Reviewed-by: Daniel Kranzdorf Reviewed-by: Firas JahJah Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 3c412bc5b94f..0778f4f7dccd 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -317,6 +317,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu struct efa_admin_acq_entry *comp, size_t comp_size_in_bytes) { + struct efa_admin_aq_entry *aqe; struct efa_comp_ctx *comp_ctx; u16 queue_size_mask; u16 cmd_id; @@ -350,7 +351,9 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu reinit_completion(&comp_ctx->wait_event); - memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes); + aqe = &aq->sq.entries[pi]; + memset(aqe, 0, sizeof(*aqe)); + memcpy(aqe, cmd, cmd_size_in_bytes); aq->sq.pc++; atomic64_inc(&aq->stats.submitted_cmd); -- cgit v1.2.3-59-g8ed1b From 7283fff8b524b2f27438429aca458b232f5c5c8a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Nov 2019 08:32:13 +0100 Subject: dma-mapping: remove the DMA_ATTR_WRITE_BARRIER flag This flag is not implemented by any backend and only set by the ib_umem module in a single instance. Link: https://lore.kernel.org/r/20191113073214.9514-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- Documentation/DMA-attributes.txt | 18 ------------------ drivers/infiniband/core/umem.c | 9 ++------- include/linux/dma-mapping.h | 5 +---- 3 files changed, 3 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt index 8f8d97f65d73..29dcbe8826e8 100644 --- a/Documentation/DMA-attributes.txt +++ b/Documentation/DMA-attributes.txt @@ -5,24 +5,6 @@ DMA attributes This document describes the semantics of the DMA attributes that are defined in linux/dma-mapping.h. -DMA_ATTR_WRITE_BARRIER ----------------------- - -DMA_ATTR_WRITE_BARRIER is a (write) barrier attribute for DMA. DMA -to a memory region with the DMA_ATTR_WRITE_BARRIER attribute forces -all pending DMA writes to complete, and thus provides a mechanism to -strictly order DMA from a device across all intervening busses and -bridges. This barrier is not specific to a particular type of -interconnect, it applies to the system as a whole, and so its -implementation must account for the idiosyncrasies of the system all -the way from the DMA device to memory. - -As an example of a situation where DMA_ATTR_WRITE_BARRIER would be -useful, suppose that a device does a DMA write to indicate that data is -ready and available in memory. The DMA of the "completion indication" -could race with data DMA. Mapping the memory used for completion -indications with DMA_ATTR_WRITE_BARRIER would prevent the race. - DMA_ATTR_WEAK_ORDERING ---------------------- diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 24244a2f68cc..66148739b00f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -199,7 +199,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, struct mm_struct *mm; unsigned long npages; int ret; - unsigned long dma_attrs = 0; struct scatterlist *sg; unsigned int gup_flags = FOLL_WRITE; @@ -211,9 +210,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, if (!context) return ERR_PTR(-EIO); - if (dmasync) - dma_attrs |= DMA_ATTR_WRITE_BARRIER; - /* * If the combination of the addr and size requested for this memory * region causes an integer overflow, return error. @@ -294,11 +290,10 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, sg_mark_end(sg); - umem->nmap = ib_dma_map_sg_attrs(context->device, + umem->nmap = ib_dma_map_sg(context->device, umem->sg_head.sgl, umem->sg_nents, - DMA_BIDIRECTIONAL, - dma_attrs); + DMA_BIDIRECTIONAL); if (!umem->nmap) { ret = -ENOMEM; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4a1c4fca475a..8023071d6903 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -15,11 +15,8 @@ /** * List of possible attributes associated with a DMA mapping. The semantics * of each attribute should be defined in Documentation/DMA-attributes.txt. - * - * DMA_ATTR_WRITE_BARRIER: DMA to a memory region with this attribute - * forces all pending DMA writes to complete. */ -#define DMA_ATTR_WRITE_BARRIER (1UL << 0) + /* * DMA_ATTR_WEAK_ORDERING: Specifies that reads and writes to the mapping * may be weakly ordered, that is that reads and writes may pass each other. -- cgit v1.2.3-59-g8ed1b From 72b894b09a96b741c92562709f6629310f2b34a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Nov 2019 08:32:14 +0100 Subject: IB/umem: remove the dmasync argument to ib_umem_get The argument is always ignored, so remove it. Link: https://lore.kernel.org/r/20191113073214.9514-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Acked-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 3 +-- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 10 ++++----- drivers/infiniband/hw/cxgb4/mem.c | 2 +- drivers/infiniband/hw/efa/efa_verbs.c | 2 +- drivers/infiniband/hw/hns/hns_roce_cq.c | 2 +- drivers/infiniband/hw/hns/hns_roce_db.c | 2 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 4 ++-- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/infiniband/hw/mlx4/doorbell.c | 2 +- drivers/infiniband/hw/mlx4/mr.c | 2 +- drivers/infiniband/hw/mlx4/qp.c | 5 ++--- drivers/infiniband/hw/mlx4/srq.c | 2 +- drivers/infiniband/hw/mlx5/cq.c | 4 ++-- drivers/infiniband/hw/mlx5/devx.c | 2 +- drivers/infiniband/hw/mlx5/doorbell.c | 2 +- drivers/infiniband/hw/mlx5/mr.c | 2 +- drivers/infiniband/hw/mlx5/qp.c | 4 ++-- drivers/infiniband/hw/mlx5/srq.c | 2 +- drivers/infiniband/hw/mthca/mthca_provider.c | 4 +--- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- drivers/infiniband/hw/qedr/verbs.c | 29 ++++++++++++--------------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 4 ++-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 2 +- drivers/infiniband/sw/rdmavt/mr.c | 2 +- drivers/infiniband/sw/rxe/rxe_mr.c | 2 +- include/rdma/ib_umem.h | 4 ++-- 31 files changed, 54 insertions(+), 61 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 66148739b00f..7a3b99597ead 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -185,10 +185,9 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz); * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned - * @dmasync: flush in-flight DMA when the memory region is written */ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, - size_t size, int access, int dmasync) + size_t size, int access) { struct ib_ucontext *context; struct ib_umem *umem; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 8afd7d93cfe4..9b6ca15a183c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -837,7 +837,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes += (qplib_qp->sq.max_wqe * psn_sz); } bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -851,7 +851,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(udata, ureq.qprva, bytes, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) goto rqfail; qp->rumem = umem; @@ -1304,7 +1304,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -2547,7 +2547,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->umem = ib_umem_get(udata, req.cq_va, entries * sizeof(struct cq_base), - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->umem)) { rc = PTR_ERR(cq->umem); goto fail; @@ -3512,7 +3512,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, /* The fixed portion of the rkey is the same as the lkey */ mr->ib_mr.rkey = mr->qplib_mr.rkey; - umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(umem)) { dev_err(rdev_to_dev(rdev), "Failed to get umem"); rc = -EFAULT; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 35c284af574d..fe3a7e8561df 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -543,7 +543,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mhp->rhp = rhp; - mhp->umem = ib_umem_get(udata, start, length, acc, 0); + mhp->umem = ib_umem_get(udata, start, length, acc); if (IS_ERR(mhp->umem)) goto err_free_skb; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index b242ea7a3bc8..657baa63faec 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1371,7 +1371,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_out; } - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); ibdev_dbg(&dev->ibdev, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index e25fa19c249c..cde2960328df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -219,7 +219,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, u32 npages; *umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index c00714c2f16a..10af6958ab69 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -31,7 +31,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, refcount_set(&page->refcount, 1); page->user_virt = page_addr; - page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { ret = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 6589e283cc77..9ad19170c3f9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1145,7 +1145,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); goto err_free; @@ -1230,7 +1230,7 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, } ib_umem_release(mr->umem); - mr->umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); mr->umem = NULL; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9442f017db02..a6565b674801 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -745,7 +745,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr, - hr_qp->buff_size, 0, 0); + hr_qp->buff_size, 0); if (IS_ERR(hr_qp->umem)) { dev_err(dev, "ib_umem_get error for create qp\n"); ret = PTR_ERR(hr_qp->umem); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index a1bfa51c67fe..0cceae0fb4af 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -186,7 +186,7 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) return -EFAULT; - srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0); if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); @@ -206,7 +206,7 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, /* config index queue BA */ srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr, - srq->idx_que.buf_size, 0, 0); + srq->idx_que.buf_size, 0); if (IS_ERR(srq->idx_que.umem)) { dev_err(hr_dev->dev, "ib_umem_get error for index queue\n"); ret = PTR_ERR(srq->idx_que.umem); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index cd9ee1664a69..86375947bc67 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1763,7 +1763,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, if (length > I40IW_MAX_MR_SIZE) return ERR_PTR(-EINVAL); - region = ib_umem_get(udata, start, length, acc, 0); + region = ib_umem_get(udata, start, length, acc); if (IS_ERR(region)) return (struct ib_mr *)region; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index a7d238d312f0..306b21281fa2 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -145,7 +145,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata, int n; *umem = ib_umem_get(udata, buf_addr, cqe * cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 0f390351cef0..714f9df5bf39 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -64,7 +64,7 @@ int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 6ae503cfc526..dfa17bcdcdbc 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -398,7 +398,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start, up_read(¤t->mm->mmap_sem); } - return ib_umem_get(udata, start, length, access_flags, 0); + return ib_umem_get(udata, start, length, access_flags); } struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index bd4aa04416c6..85f57b76e446 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -916,7 +916,7 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); - qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0, 0); + qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; @@ -1110,8 +1110,7 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, if (err) goto err; - qp->umem = - ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0, 0); + qp->umem = ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 848db7264cc9..8dcf6e3d9ae2 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -110,7 +110,7 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) return -EFAULT; - srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0); if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index cc938d27f913..dd8d24ee8e1d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -709,7 +709,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, cq->buf.umem = ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->buf.umem)) { err = PTR_ERR(cq->buf.umem); return err; @@ -1110,7 +1110,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, umem = ib_umem_get(udata, ucmd.buf_addr, (size_t)ucmd.cqe_size * entries, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) { err = PTR_ERR(umem); return err; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2a5812a4c3bb..9d0a18cf9e5e 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2134,7 +2134,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0); + obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 8f4e5f22b84c..12737c509aa2 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -64,7 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 077ca10d9ed9..60d39b9ec41c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -764,7 +764,7 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (order) *order = ilog2(roundup_pow_of_two(*ncont)); } else { - u = ib_umem_get(udata, start, length, access_flags, 0); + u = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(u)) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); return PTR_ERR(u); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c831b455ffa8..26fb0227a514 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -749,7 +749,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, { int err; - *umem = ib_umem_get(udata, addr, size, 0, 0); + *umem = ib_umem_get(udata, addr, size, 0); if (IS_ERR(*umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); return PTR_ERR(*umem); @@ -806,7 +806,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (!ucmd->buf_addr) return -EINVAL; - rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0); + rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0); if (IS_ERR(rwq->umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); err = PTR_ERR(rwq->umem); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4e7fde86c96b..62939df3c692 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -80,7 +80,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); - srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0); if (IS_ERR(srq->umem)) { mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); err = PTR_ERR(srq->umem); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 23554d8bf241..33002530fee7 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -880,9 +880,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(udata, start, length, acc, - ucmd.mr_attrs & MTHCA_MR_DMASYNC); - + mr->umem = ib_umem_get(udata, start, length, acc); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e72050de5734..9bc1ca6f6f9e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -869,7 +869,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(status); - mr->umem = ib_umem_get(udata, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc); if (IS_ERR(mr->umem)) { status = -EFAULT; goto umem_err; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index fea95faa3b21..55b77d753d12 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -762,7 +762,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, struct qedr_dev *dev, struct qedr_userq *q, u64 buf_addr, size_t buf_len, bool requires_db_rec, - int access, int dmasync, + int access, int alloc_and_init) { u32 fw_pages; @@ -770,7 +770,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, q->buf_addr = buf_addr; q->buf_len = buf_len; - q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync); + q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access); if (IS_ERR(q->umem)) { DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n", PTR_ERR(q->umem)); @@ -927,9 +927,8 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->cq_type = QEDR_CQ_TYPE_USER; rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr, - ureq.len, true, - IB_ACCESS_LOCAL_WRITE, - 1, 1); + ureq.len, true, IB_ACCESS_LOCAL_WRITE, + 1); if (rc) goto err0; @@ -1401,19 +1400,19 @@ static void qedr_free_srq_kernel_params(struct qedr_srq *srq) static int qedr_init_srq_user_params(struct ib_udata *udata, struct qedr_srq *srq, struct qedr_create_srq_ureq *ureq, - int access, int dmasync) + int access) { struct scatterlist *sg; int rc; rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr, - ureq->srq_len, false, access, dmasync, 1); + ureq->srq_len, false, access, 1); if (rc) return rc; srq->prod_umem = ib_umem_get(udata, ureq->prod_pair_addr, - sizeof(struct rdma_srq_producers), access, dmasync); + sizeof(struct rdma_srq_producers), access); if (IS_ERR(srq->prod_umem)) { qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); ib_umem_release(srq->usrq.umem); @@ -1510,7 +1509,7 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, goto err0; } - rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0); + rc = qedr_init_srq_user_params(udata, srq, &ureq, 0); if (rc) goto err0; @@ -1751,18 +1750,16 @@ static int qedr_create_user_qp(struct qedr_dev *dev, return rc; } - /* SQ - read access only (0), dma sync not required (0) */ + /* SQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, - ureq.sq_len, true, 0, 0, - alloc_and_init); + ureq.sq_len, true, 0, alloc_and_init); if (rc) return rc; if (!qp->srq) { - /* RQ - read access only (0), dma sync not required (0) */ + /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, - ureq.rq_len, true, - 0, 0, alloc_and_init); + ureq.rq_len, true, 0, alloc_and_init); if (rc) return rc; } @@ -2837,7 +2834,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->type = QEDR_MR_USER; - mr->umem = ib_umem_get(udata, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc); if (IS_ERR(mr->umem)) { rc = -EFAULT; goto err0; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 7800e6930502..a26a4fd86bf4 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -136,7 +136,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } cq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->umem)) { ret = PTR_ERR(cq->umem); goto err_cq; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index f3a3d22ee8d7..c61e665ff261 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -126,7 +126,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(-EINVAL); } - umem = ib_umem_get(udata, start, length, access_flags, 0); + umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(umem)) { dev_warn(&dev->pdev->dev, "could not get umem for mem region\n"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 22daf2389d95..f15809c28f67 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -277,7 +277,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, if (!is_srq) { /* set qp->sq.wqe_cnt, shift, buf_size.. */ qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr, - ucmd.rbuf_size, 0, 0); + ucmd.rbuf_size, 0); if (IS_ERR(qp->rumem)) { ret = PTR_ERR(qp->rumem); goto err_qp; @@ -289,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, } qp->sumem = ib_umem_get(udata, ucmd.sbuf_addr, - ucmd.sbuf_size, 0, 0); + ucmd.sbuf_size, 0); if (IS_ERR(qp->sumem)) { if (!is_srq) ib_umem_release(qp->rumem); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 36cdfbdbd325..98c8be71d91d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -146,7 +146,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, goto err_srq; } - srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0); if (IS_ERR(srq->umem)) { ret = PTR_ERR(srq->umem); goto err_srq; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index a6a39f01dca3..b9a76bf74857 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -390,7 +390,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (length == 0) return ERR_PTR(-EINVAL); - umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(umem)) return (void *)umem; diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index ea6a819b7167..35a2baf2f364 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -169,7 +169,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, void *vaddr; int err; - umem = ib_umem_get(udata, start, length, access, 0); + umem = ib_umem_get(udata, start, length, access); if (IS_ERR(umem)) { pr_warn("err %d from rxe_umem_get\n", (int)PTR_ERR(umem)); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index a91b2af64ec4..753f54e17e0a 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -70,7 +70,7 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem) #ifdef CONFIG_INFINIBAND_USER_MEM struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, - size_t size, int access, int dmasync); + size_t size, int access); void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, @@ -85,7 +85,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, static inline struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, size_t size, - int access, int dmasync) + int access) { return ERR_PTR(-EINVAL); } -- cgit v1.2.3-59-g8ed1b From e1ee1e62bec4a8968355517ea11b2a06b7364408 Mon Sep 17 00:00:00 2001 From: Dag Moxnes Date: Wed, 30 Oct 2019 13:44:00 +0100 Subject: RDMA/cma: Use ACK timeout for RoCE packetLifeTime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cma is currently using a hard-coded value, CMA_IBOE_PACKET_LIFETIME, for the PacketLifeTime, as it can not be determined from the network. This value might not be optimal for all networks. The cma module supports the function rdma_set_ack_timeout to set the ACK timeout for a QP associated with a connection. As per IBTA 12.7.34 local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay). Assuming a negligible local ACK delay, we can use PacketLifeTime = local ACK timeout/2 as a reasonable approximation for RoCE networks. Link: https://lore.kernel.org/r/1572439440-17416-1-git-send-email-dag.moxnes@oracle.com Signed-off-by: Dag Moxnes Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index abf249d277ad..25f2b70fd8ef 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2504,7 +2504,9 @@ EXPORT_SYMBOL(rdma_set_service_type); * This function should be called before rdma_connect() on active side, * and on passive side before rdma_accept(). It is applicable to primary * path only. The timeout will affect the local side of the QP, it is not - * negotiated with remote side and zero disables the timer. + * negotiated with remote side and zero disables the timer. In case it is + * set before rdma_resolve_route, the value will also be used to determine + * PacketLifeTime for RoCE. * * Return: 0 for success */ @@ -2913,7 +2915,16 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->path_rec->rate = iboe_get_rate(ndev); dev_put(ndev); route->path_rec->packet_life_time_selector = IB_SA_EQ; - route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; + /* In case ACK timeout is set, use this value to calculate + * PacketLifeTime. As per IBTA 12.7.34, + * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay). + * Assuming a negligible local ACK delay, we can use + * PacketLifeTime = local ACK timeout/2 + * as a reasonable approximation for RoCE networks. + */ + route->path_rec->packet_life_time = id_priv->timeout_set ? + id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME; + if (!route->path_rec->mtu) { ret = -EINVAL; goto err2; -- cgit v1.2.3-59-g8ed1b From 9067f2f0b41d7e817fc8c5259bab1f17512b0147 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 23 Sep 2019 21:07:46 +0200 Subject: RDMA/iw_cgxb4: Fix an error handling path in 'c4iw_connect()' We should jump to fail3 in order to undo the 'xa_insert_irq()' call. Link: https://lore.kernel.org/r/20190923190746.10964-1-christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e87fc0408470..81440eaf0a00 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3381,7 +3381,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { err = pick_local_ipaddrs(dev, cm_id); if (err) - goto fail2; + goto fail3; } /* find a route */ @@ -3403,7 +3403,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { err = pick_local_ip6addrs(dev, cm_id); if (err) - goto fail2; + goto fail3; } /* find a route */ -- cgit v1.2.3-59-g8ed1b From ff3195b3ed85d0068fb9a80eaa2a1471a04df76a Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Fri, 15 Nov 2019 17:44:57 +0200 Subject: IB/mlx4: Update HW GID table while adding vlan GID When adding a new GID compare the vlan along with the GID and type. This allows vlan's to have GIDs that alias each other, such as the default GID. Otherwise they the GID cache view can become inconsistent with the HW view. Link: https://lore.kernel.org/r/20191115154457.247763-1-leon@kernel.org Signed-off-by: Danit Goldberg Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/main.c | 9 ++++++++- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f89b129b7e3a..0b5dc1d5928f 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -256,6 +256,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) int hw_update = 0; int i; struct gid_entry *gids = NULL; + u16 vlan_id = 0xffff; + u8 mac[ETH_ALEN]; if (!rdma_cap_roce_gid_table(attr->device, attr->port_num)) return -EINVAL; @@ -266,12 +268,16 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) if (!context) return -EINVAL; + ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]); + if (ret) + return ret; port_gid_table = &iboe->gids[attr->port_num - 1]; spin_lock_bh(&iboe->lock); for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { if (!memcmp(&port_gid_table->gids[i].gid, &attr->gid, sizeof(attr->gid)) && - port_gid_table->gids[i].gid_type == attr->gid_type) { + port_gid_table->gids[i].gid_type == attr->gid_type && + port_gid_table->gids[i].vlan_id == vlan_id) { found = i; break; } @@ -291,6 +297,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) memcpy(&port_gid_table->gids[free].gid, &attr->gid, sizeof(attr->gid)); port_gid_table->gids[free].gid_type = attr->gid_type; + port_gid_table->gids[free].vlan_id = vlan_id; port_gid_table->gids[free].ctx->real_index = free; port_gid_table->gids[free].ctx->refcount = 1; hw_update = 1; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 0d846fea8fdc..d188573187fa 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -508,6 +508,7 @@ struct gid_entry { union ib_gid gid; enum ib_gid_type gid_type; struct gid_cache_context *ctx; + u16 vlan_id; }; struct mlx4_port_gid_table { -- cgit v1.2.3-59-g8ed1b From c16339b69c0d58f456154b43fc396b1f1c3dc055 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Fri, 15 Nov 2019 17:45:55 +0200 Subject: IB/mlx5: Support extended number of strides for Striding RQ Extends the minimum single WQE strides from 64 to 8, which is exposed by the "min_single_wqe_log_num_of_strides" field of striding_rq_caps. Choose right number of strides based on FW capability. Link: https://lore.kernel.org/r/20191115154555.247856-1-leon@kernel.org Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 10 ++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 44 ++++++++++++++++++++++++++++-------- 3 files changed, 43 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2a510de4ae0f..4ced72185172 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1149,8 +1149,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; resp.striding_rq_caps.max_single_stride_log_num_of_bytes = MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES; - resp.striding_rq_caps.min_single_wqe_log_num_of_strides = - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + if (MLX5_CAP_GEN(dev->mdev, ext_stride_num_range)) + resp.striding_rq_caps + .min_single_wqe_log_num_of_strides = + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + else + resp.striding_rq_caps + .min_single_wqe_log_num_of_strides = + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; resp.striding_rq_caps.max_single_wqe_log_num_of_strides = MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES; resp.striding_rq_caps.supported_qpts = diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d14fe43837e7..5e826eb7e4fe 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -291,6 +291,7 @@ enum mlx5_ib_wq_flags { #define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16 #define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6 #define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13 +#define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3 struct mlx5_ib_rwq { struct ib_wq ibwq; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 26fb0227a514..82814c6d15a3 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -5957,12 +5957,21 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, } MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) { + /* + * In Firmware number of strides in each WQE is: + * "512 * 2^single_wqe_log_num_of_strides" + * Values 3 to 8 are accepted as 10 to 15, 9 to 18 are + * accepted as 0 to 9 + */ + static const u8 fw_map[] = { 10, 11, 12, 13, 14, 15, 0, 1, + 2, 3, 4, 5, 6, 7, 8, 9 }; MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en); MLX5_SET(wq, wq, log_wqe_stride_size, rwq->single_stride_log_num_of_bytes - MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES); - MLX5_SET(wq, wq, log_wqe_num_of_strides, rwq->log_num_strides - - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES); + MLX5_SET(wq, wq, log_wqe_num_of_strides, + fw_map[rwq->log_num_strides - + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES]); } MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size); MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn); @@ -6037,6 +6046,19 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev, return 0; } +static bool log_of_strides_valid(struct mlx5_ib_dev *dev, u32 log_num_strides) +{ + if ((log_num_strides > MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) || + (log_num_strides < MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) + return false; + + if (!MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) && + (log_num_strides < MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) + return false; + + return true; +} + static int prepare_user_rq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata, @@ -6084,14 +6106,16 @@ static int prepare_user_rq(struct ib_pd *pd, MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES); return -EINVAL; } - if ((ucmd.single_wqe_log_num_of_strides > - MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) || - (ucmd.single_wqe_log_num_of_strides < - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) { - mlx5_ib_dbg(dev, "Invalid log num strides (%u. Range is %u - %u)\n", - ucmd.single_wqe_log_num_of_strides, - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES, - MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES); + if (!log_of_strides_valid(dev, + ucmd.single_wqe_log_num_of_strides)) { + mlx5_ib_dbg( + dev, + "Invalid log num strides (%u. Range is %u - %u)\n", + ucmd.single_wqe_log_num_of_strides, + MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) ? + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES : + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES, + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES); return -EINVAL; } rwq->single_stride_log_num_of_bytes = -- cgit v1.2.3-59-g8ed1b From 0acc637dacb6bf12a17c9ad0c41d9ff3696d8b05 Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Sun, 17 Nov 2019 15:33:21 +0200 Subject: RDMA/cm: Use refcount_t type for refcount variable This atomic in struct cm_id_private is being used as a refcount, change it to refcount_t for better clarity and to get the refcount protections. Link: https://lore.kernel.org/r/1573997601-4502-1-git-send-email-danitg@mellanox.com Signed-off-by: Danit Goldberg Reviewed-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 33b384c7df42..455b3659d84b 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -237,7 +237,7 @@ struct cm_id_private { struct rb_node sidr_id_node; spinlock_t lock; /* Do not acquire inside cm.lock */ struct completion comp; - atomic_t refcount; + refcount_t refcount; /* Number of clients sharing this ib_cm_id. Only valid for listeners. * Protected by the cm.lock spinlock. */ int listen_sharecount; @@ -282,7 +282,7 @@ static void cm_work_handler(struct work_struct *work); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) { - if (atomic_dec_and_test(&cm_id_priv->refcount)) + if (refcount_dec_and_test(&cm_id_priv->refcount)) complete(&cm_id_priv->comp); } @@ -339,7 +339,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, m->ah = ah; m->retries = cm_id_priv->max_cm_retries; - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; *msg = m; @@ -600,7 +600,7 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); else cm_id_priv = NULL; } @@ -857,7 +857,7 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, INIT_LIST_HEAD(&cm_id_priv->prim_list); INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); - atomic_set(&cm_id_priv->refcount, 1); + refcount_set(&cm_id_priv->refcount, 1); return &cm_id_priv->id; error: @@ -1204,7 +1204,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, spin_unlock_irqrestore(&cm.lock, flags); return ERR_PTR(-EINVAL); } - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); ++cm_id_priv->listen_sharecount; spin_unlock_irqrestore(&cm.lock, flags); @@ -1861,8 +1861,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, NULL, 0); goto out; } - atomic_inc(&listen_cm_id_priv->refcount); - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&listen_cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); cm_id_priv->id.state = IB_CM_REQ_RCVD; atomic_inc(&cm_id_priv->work_count); spin_unlock_irq(&cm.lock); @@ -2018,7 +2018,7 @@ static int cm_req_handler(struct cm_work *work) return 0; rejected: - atomic_dec(&cm_id_priv->refcount); + refcount_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); free_timeinfo: kfree(cm_id_priv->timewait_info); @@ -2792,7 +2792,7 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) cm_local_id(timewait_info->work.local_id)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); else cm_id_priv = NULL; } @@ -3562,8 +3562,8 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); goto out; /* No match. */ } - atomic_inc(&cur_cm_id_priv->refcount); - atomic_inc(&cm_id_priv->refcount); + refcount_inc(&cur_cm_id_priv->refcount); + refcount_inc(&cm_id_priv->refcount); spin_unlock_irq(&cm.lock); cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; -- cgit v1.2.3-59-g8ed1b From a25984f3baaa97079ce3e75c5a1d362155915a31 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 18 Nov 2019 17:06:45 +0200 Subject: RDMA/qedr: Fix null-pointer dereference when calling rdma_user_mmap_get_offset When running against rdma-core that doesn't support doorbell recovery, the rdma_user_mmap_entry won't be allocated for doorbell recovery related mappings. We have a flag indicating whether rdma-core supports doorbell recovery or not which was used during initialization, however some cases didn't check that the rdma_user_mmap_entry exists before attempting to acquire it's offset. Fixes: 97f612509294 ("RDMA/qedr: Add doorbell overflow recovery support") Link: https://lore.kernel.org/r/20191118150645.26602-1-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/verbs.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 55b77d753d12..4cd292966aa9 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -683,7 +683,9 @@ static int qedr_copy_cq_uresp(struct qedr_dev *dev, uresp.db_offset = db_offset; uresp.icid = cq->icid; - uresp.db_rec_addr = rdma_user_mmap_get_offset(cq->q.db_mmap_entry); + if (cq->q.db_mmap_entry) + uresp.db_rec_addr = + rdma_user_mmap_get_offset(cq->q.db_mmap_entry); rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) @@ -1012,7 +1014,7 @@ err1: if (udata) { qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); - if (ctx) + if (cq->q.db_mmap_entry) rdma_user_mmap_entry_remove(cq->q.db_mmap_entry); } else { dev->ops->common->chain_free(dev->cdev, &cq->pbl); @@ -1245,7 +1247,9 @@ static void qedr_copy_rq_uresp(struct qedr_dev *dev, } uresp->rq_icid = qp->icid; - uresp->rq_db_rec_addr = rdma_user_mmap_get_offset(qp->urq.db_mmap_entry); + if (qp->urq.db_mmap_entry) + uresp->rq_db_rec_addr = + rdma_user_mmap_get_offset(qp->urq.db_mmap_entry); } static void qedr_copy_sq_uresp(struct qedr_dev *dev, @@ -1260,8 +1264,9 @@ static void qedr_copy_sq_uresp(struct qedr_dev *dev, else uresp->sq_icid = qp->icid + 1; - uresp->sq_db_rec_addr = - rdma_user_mmap_get_offset(qp->usq.db_mmap_entry); + if (qp->usq.db_mmap_entry) + uresp->sq_db_rec_addr = + rdma_user_mmap_get_offset(qp->usq.db_mmap_entry); } static int qedr_copy_qp_uresp(struct qedr_dev *dev, -- cgit v1.2.3-59-g8ed1b From bfcb3c5d14854f001881dc3f5cc29bf186598d9f Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Wed, 6 Nov 2019 15:08:32 +0200 Subject: IB/core: Add interfaces to get VF node and port GUIDs Provide ability to get node and port GUIDs of VFs to be symmetrical to already existing set option. Signed-off-by: Danit Goldberg Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/verbs.c | 10 ++++++++++ include/rdma/ib_verbs.h | 6 ++++++ 3 files changed, 17 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 99c4a55545cf..38fadbec054d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2614,6 +2614,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_port_immutable); SET_DEVICE_OP(dev_ops, get_vector_affinity); SET_DEVICE_OP(dev_ops, get_vf_config); + SET_DEVICE_OP(dev_ops, get_vf_guid); SET_DEVICE_OP(dev_ops, get_vf_stats); SET_DEVICE_OP(dev_ops, init_port); SET_DEVICE_OP(dev_ops, invalidate_range); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f974b6854224..7d96351c8d8c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2458,6 +2458,16 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, } EXPORT_SYMBOL(ib_set_vf_guid); +int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + if (!device->ops.get_vf_guid) + return -EOPNOTSUPP; + + return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid); +} +EXPORT_SYMBOL(ib_get_vf_guid); /** * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection * information) and set an appropriate memory region for registration. diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6a47ba85c54c..ec7d1a1f8f31 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2448,6 +2448,9 @@ struct ib_device_ops { struct ifla_vf_info *ivf); int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); + int (*get_vf_guid)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, int type); struct ib_wq *(*create_wq)(struct ib_pd *pd, @@ -3303,6 +3306,9 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info); int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); +int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); -- cgit v1.2.3-59-g8ed1b From 2446887ed226786fcf9b3ece8d89e6b22551411b Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Wed, 6 Nov 2019 14:57:00 +0200 Subject: IB/ipoib: Add ndo operation for getting VFs GUID attributes Add ndo operation to the network driver that enables configuring ipoib_get_vf_guid operation. The operation allows to get a VF port and node GUIDs. Signed-off-by: Danit Goldberg Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ac0583ff280d..e5f438ab716c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2019,6 +2019,15 @@ static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type) return ib_set_vf_guid(priv->ca, vf, priv->port, guid, type); } +static int ipoib_get_vf_guid(struct net_device *dev, int vf, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + return ib_get_vf_guid(priv->ca, vf, priv->port, node_guid, port_guid); +} + static int ipoib_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats) { @@ -2045,6 +2054,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_set_vf_link_state = ipoib_set_vf_link_state, .ndo_get_vf_config = ipoib_get_vf_config, .ndo_get_vf_stats = ipoib_get_vf_stats, + .ndo_get_vf_guid = ipoib_get_vf_guid, .ndo_set_vf_guid = ipoib_set_vf_guid, .ndo_set_mac_address = ipoib_set_mac, .ndo_get_stats64 = ipoib_get_stats, -- cgit v1.2.3-59-g8ed1b From 9c0015ef092879f61129cdffbbe22fd30201d39b Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Wed, 6 Nov 2019 15:18:12 +0200 Subject: IB/mlx5: Implement callbacks for getting VFs GUID attributes Implement the IB defined callback mlx5_ib_get_vf_guid used to query FW for VFs attributes and return node and port GUIDs. Signed-off-by: Danit Goldberg Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/ib_virt.c | 24 ++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ 3 files changed, 28 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index 649a3364f838..4f0edd4832bd 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -201,3 +201,27 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, return -EINVAL; } + +int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(mdev, 1, 1, vf+1, rep); + if (err) + goto ex; + + port_guid->guid = rep->port_guid; + node_guid->guid = rep->node_guid; +ex: + kfree(rep); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 46ea4f0b9b51..b0d7e2d2991e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6310,6 +6310,7 @@ static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { .get_vf_config = mlx5_ib_get_vf_config, + .get_vf_guid = mlx5_ib_get_vf_guid, .get_vf_stats = mlx5_ib_get_vf_stats, .set_vf_guid = mlx5_ib_set_vf_guid, .set_vf_link_state = mlx5_ib_set_vf_link_state, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2ceaef3ea3fb..96811f6ca15a 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1301,6 +1301,9 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); +int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); -- cgit v1.2.3-59-g8ed1b From 6e419e35e68ad7327a383cd52053071b8eb9843b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 20 Nov 2019 21:41:38 +0800 Subject: RDMA/bnxt_re: Fix Kconfig indentation Adjust indentation from spaces to tab (+optional two spaces) as in coding style with command like: $ sed -e 's/^ /\t/' -i */Kconfig Link: https://lore.kernel.org/r/20191120134138.15245-1-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig index ab8779d23382..b83f1cc38c52 100644 --- a/drivers/infiniband/hw/bnxt_re/Kconfig +++ b/drivers/infiniband/hw/bnxt_re/Kconfig @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_BNXT_RE - tristate "Broadcom Netxtreme HCA support" - depends on 64BIT - depends on ETHERNET && NETDEVICES && PCI && INET && DCB - select NET_VENDOR_BROADCOM - select BNXT - ---help--- + tristate "Broadcom Netxtreme HCA support" + depends on 64BIT + depends on ETHERNET && NETDEVICES && PCI && INET && DCB + select NET_VENDOR_BROADCOM + select BNXT + ---help--- This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit RoCE HCAs. To compile this driver as a module, choose M here: the module will be called bnxt_re. -- cgit v1.2.3-59-g8ed1b From e284b159c6881c8bec9713daba2653268f4c4948 Mon Sep 17 00:00:00 2001 From: Luke Starrett Date: Thu, 21 Nov 2019 01:22:21 -0500 Subject: RDMA/bnxt_re: Fix chip number validation Broadcom's Gen P5 series In the first version of Gen P5 ASIC, chip-id was always set to 0x1750 for all adaptor port configurations. This has been fixed in the new chip rev. Due to this missing fix users are not able to use adaptors based on latest chip rev of Broadcom's Gen P5 adaptors. Fixes: ae8637e13185 ("RDMA/bnxt_re: Add chip context to identify 57500 series") Link: https://lore.kernel.org/r/1574317343-23300-2-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Naresh Kumar PBS Signed-off-by: Selvin Xavier Signed-off-by: Luke Starrett Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_res.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index fbda11a7ab1a..aaa76d792185 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -186,7 +186,9 @@ struct bnxt_qplib_chip_ctx { u8 chip_metal; }; -#define CHIP_NUM_57500 0x1750 +#define CHIP_NUM_57508 0x1750 +#define CHIP_NUM_57504 0x1751 +#define CHIP_NUM_57502 0x1752 struct bnxt_qplib_res { struct pci_dev *pdev; @@ -203,7 +205,9 @@ struct bnxt_qplib_res { static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) { - return (cctx->chip_num == CHIP_NUM_57500); + return (cctx->chip_num == CHIP_NUM_57508 || + cctx->chip_num == CHIP_NUM_57504 || + cctx->chip_num == CHIP_NUM_57502); } static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res) -- cgit v1.2.3-59-g8ed1b From 98998ffe5216c7fa2c0225bb5b049ca5cdf8d195 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 21 Nov 2019 01:22:22 -0500 Subject: RDMA/bnxt_re: Fix stat push into dma buffer on gen p5 devices Due to recent advances in the firmware for Broadcom's gen p5 series of adaptors the driver code to report hardware counters has been broken w.r.t. roce devices. The new firmware command expects dma length to be specified during stat dma buffer allocation. Fixes: 2792b5b95ed5 ("bnxt_en: Update firmware interface spec. to 1.10.0.89.") Link: https://lore.kernel.org/r/1574317343-23300-3-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index d6785b8339d2..a2ac88e01a99 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -494,6 +494,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(dma_map); + req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext)); req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); -- cgit v1.2.3-59-g8ed1b From fca5b9dc0986aa49b3f0a7cfe24b6c82422ac1d7 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 21 Nov 2019 01:22:23 -0500 Subject: RDMA/bnxt_re: Fix missing le16_to_cpu From sparse: drivers/infiniband/hw/bnxt_re/main.c:1274:18: warning: cast from restricted __le16 drivers/infiniband/hw/bnxt_re/main.c:1275:18: warning: cast from restricted __le16 drivers/infiniband/hw/bnxt_re/main.c:1276:18: warning: cast from restricted __le16 drivers/infiniband/hw/bnxt_re/main.c:1277:21: warning: restricted __le16 degrades to integer Fixes: 2b827ea1926b ("RDMA/bnxt_re: Query HWRM Interface version from FW") Link: https://lore.kernel.org/r/1574317343-23300-4-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index a2ac88e01a99..e7e8a0f49464 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1291,10 +1291,10 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) return; } rdev->qplib_ctx.hwrm_intf_ver = - (u64)resp.hwrm_intf_major << 48 | - (u64)resp.hwrm_intf_minor << 32 | - (u64)resp.hwrm_intf_build << 16 | - resp.hwrm_intf_patch; + (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 | + (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 | + (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 | + le16_to_cpu(resp.hwrm_intf_patch); } static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) -- cgit v1.2.3-59-g8ed1b From 25d24f4241f7bccdc1978daa026d8847e04a73a6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Nov 2019 15:48:14 +0000 Subject: IB/hfi1: remove redundant assignment to variable ret The variable ret is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Link: https://lore.kernel.org/r/20191122154814.87257-1-colin.king@canonical.com Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index cbf7faa5038c..36593f2efe26 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -634,7 +634,7 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id, u32 config_data, const char *message) { u8 i; - int ret = HCMD_SUCCESS; + int ret; for (i = 0; i < 4; i++) { ret = load_8051_config(ppd->dd, field_id, i, config_data); -- cgit v1.2.3-59-g8ed1b From bcf7cc534cd40f8098850274e8459fb0d088fcda Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 21 Nov 2019 16:15:07 +0200 Subject: RDMA/efa: Store network attributes in device attributes There's no reason to separate the network attributes from all other device attributes. Embed the fields inside the device attributes and query them all in one function. Link: https://lore.kernel.org/r/20191121141509.59297-2-galpress@amazon.com Reviewed-by: Daniel Kranzdorf Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa.h | 2 -- drivers/infiniband/hw/efa/efa_com_cmd.c | 34 ++++++++++++--------------------- drivers/infiniband/hw/efa/efa_com_cmd.h | 9 ++------- drivers/infiniband/hw/efa/efa_main.c | 16 ---------------- drivers/infiniband/hw/efa/efa_verbs.c | 8 ++++---- 5 files changed, 18 insertions(+), 51 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 2bda07078b97..aa7396a1588a 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -60,8 +60,6 @@ struct efa_dev { u64 mem_bar_len; u64 db_bar_addr; u64 db_bar_len; - u8 addr[EFA_GID_SIZE]; - u32 mtu; int admin_msix_vector_idx; struct efa_irq admin_irq; diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index c079f1332082..4713c2756ad3 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -423,28 +423,6 @@ static int efa_com_get_feature(struct efa_com_dev *edev, return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0); } -int efa_com_get_network_attr(struct efa_com_dev *edev, - struct efa_com_get_network_attr_result *result) -{ - struct efa_admin_get_feature_resp resp; - int err; - - err = efa_com_get_feature(edev, &resp, - EFA_ADMIN_NETWORK_ATTR); - if (err) { - ibdev_err_ratelimited(edev->efa_dev, - "Failed to get network attributes %d\n", - err); - return err; - } - - memcpy(result->addr, resp.u.network_attr.addr, - sizeof(resp.u.network_attr.addr)); - result->mtu = resp.u.network_attr.mtu; - - return 0; -} - int efa_com_get_device_attr(struct efa_com_dev *edev, struct efa_com_get_device_attr_result *result) { @@ -501,6 +479,18 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->max_llq_size = resp.u.queue_attr.max_llq_size; result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; + err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); + if (err) { + ibdev_err_ratelimited(edev->efa_dev, + "Failed to get network attributes %d\n", + err); + return err; + } + + memcpy(result->addr, resp.u.network_attr.addr, + sizeof(resp.u.network_attr.addr)); + result->mtu = resp.u.network_attr.mtu; + return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 7f6c13052f49..6134d13ecc6f 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -100,14 +100,11 @@ struct efa_com_destroy_ah_params { u16 pdn; }; -struct efa_com_get_network_attr_result { - u8 addr[EFA_GID_SIZE]; - u32 mtu; -}; - struct efa_com_get_device_attr_result { + u8 addr[EFA_GID_SIZE]; u64 page_size_cap; u64 max_mr_pages; + u32 mtu; u32 fw_version; u32 admin_api_version; u32 device_version; @@ -271,8 +268,6 @@ int efa_com_create_ah(struct efa_com_dev *edev, struct efa_com_create_ah_result *result); int efa_com_destroy_ah(struct efa_com_dev *edev, struct efa_com_destroy_ah_params *params); -int efa_com_get_network_attr(struct efa_com_dev *edev, - struct efa_com_get_network_attr_result *result); int efa_com_get_device_attr(struct efa_com_dev *edev, struct efa_com_get_device_attr_result *result); int efa_com_get_hw_hints(struct efa_com_dev *edev, diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 0e3050d01b75..faf3ff1bca2a 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -30,15 +30,6 @@ MODULE_DEVICE_TABLE(pci, efa_pci_tbl); (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) -static void efa_update_network_attr(struct efa_dev *dev, - struct efa_com_get_network_attr_result *network_attr) -{ - memcpy(dev->addr, network_attr->addr, sizeof(network_attr->addr)); - dev->mtu = network_attr->mtu; - - dev_dbg(&dev->pdev->dev, "Full address %pI6\n", dev->addr); -} - /* This handler will called for unknown event group or unimplemented handlers */ static void unimplemented_aenq_handler(void *data, struct efa_admin_aenq_entry *aenq_e) @@ -234,7 +225,6 @@ static const struct ib_device_ops efa_dev_ops = { static int efa_ib_device_add(struct efa_dev *dev) { - struct efa_com_get_network_attr_result network_attr; struct efa_com_get_hw_hints_result hw_hints; struct pci_dev *pdev = dev->pdev; int err; @@ -250,12 +240,6 @@ static int efa_ib_device_add(struct efa_dev *dev) if (err) return err; - err = efa_com_get_network_attr(&dev->edev, &network_attr); - if (err) - goto err_release_doorbell_bar; - - efa_update_network_attr(dev, &network_attr); - err = efa_com_get_hw_hints(&dev->edev, &hw_hints); if (err) goto err_release_doorbell_bar; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 657baa63faec..e1f1c1495da1 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -232,9 +232,9 @@ int efa_query_port(struct ib_device *ibdev, u8 port, props->pkey_tbl_len = 1; props->active_speed = IB_SPEED_EDR; props->active_width = IB_WIDTH_4X; - props->max_mtu = ib_mtu_int_to_enum(dev->mtu); - props->active_mtu = ib_mtu_int_to_enum(dev->mtu); - props->max_msg_sz = dev->mtu; + props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); + props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); + props->max_msg_sz = dev->dev_attr.mtu; props->max_vl_num = 1; return 0; @@ -295,7 +295,7 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index, { struct efa_dev *dev = to_edev(ibdev); - memcpy(gid->raw, dev->addr, sizeof(dev->addr)); + memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr)); return 0; } -- cgit v1.2.3-59-g8ed1b From e6c4f3ff434c8b336662a2053d48ce677c9fd608 Mon Sep 17 00:00:00 2001 From: Daniel Kranzdorf Date: Thu, 21 Nov 2019 16:15:08 +0200 Subject: RDMA/efa: Support remote read access in MR registration Enable remote read access for memory regions in order to support RDMA operations. Link: https://lore.kernel.org/r/20191121141509.59297-3-galpress@amazon.com Signed-off-by: Daniel Kranzdorf Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 12 +++++++++--- drivers/infiniband/hw/efa/efa_com_cmd.c | 3 +-- drivers/infiniband/hw/efa/efa_com_cmd.h | 7 +------ drivers/infiniband/hw/efa/efa_verbs.c | 5 +++-- 4 files changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 2be0469d545f..7fa9d532db61 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -362,9 +362,13 @@ struct efa_admin_reg_mr_cmd { /* * permissions - * 0 : local_write_enable - Write permissions: value - * of 1 needed for RQ buffers and for RDMA write - * 7:1 : reserved1 - remote access flags, etc + * 0 : local_write_enable - Local write permissions: + * must be set for RQ buffers and buffers posted for + * RDMA Read requests + * 1 : reserved1 - MBZ + * 2 : remote_read_enable - Remote read permissions: + * must be set to enable RDMA read from the region + * 7:3 : reserved2 - MBZ */ u8 permissions; @@ -780,6 +784,8 @@ struct efa_admin_mmio_req_read_less_resp { #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT 7 #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) #define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0) +#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_SHIFT 2 +#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2) /* create_cq_cmd */ #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 4713c2756ad3..520c9d920f9e 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -230,8 +230,7 @@ int efa_com_register_mr(struct efa_com_dev *edev, mr_cmd.flags |= params->page_shift & EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK; mr_cmd.iova = params->iova; - mr_cmd.permissions |= params->permissions & - EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK; + mr_cmd.permissions = params->permissions; if (params->inline_pbl) { memcpy(mr_cmd.pbl.inline_pbl_array, diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 6134d13ecc6f..d119186c41d0 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -178,12 +178,7 @@ struct efa_com_reg_mr_params { * address mapping */ u8 page_shift; - /* - * permissions - * 0: local_write_enable - Write permissions: value of 1 needed - * for RQ buffers and for RDMA write:1: reserved1 - remote - * access flags, etc - */ + /* see permissions field of struct efa_admin_reg_mr_cmd */ u8 permissions; u8 inline_pbl; u8 indirect; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index e1f1c1495da1..9701f8e52c71 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -70,7 +70,8 @@ static const char *const efa_stats_names[] = { #define EFA_CHUNK_USED_SIZE \ ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) -#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE +#define EFA_SUPPORTED_ACCESS_FLAGS \ + (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ) struct pbl_chunk { dma_addr_t dma_addr; @@ -1382,7 +1383,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, params.pd = to_epd(ibpd)->pdn; params.iova = virt_addr; params.mr_length_in_bytes = length; - params.permissions = access_flags & 0x1; + params.permissions = access_flags; pg_sz = ib_umem_find_best_pgsz(mr->umem, dev->dev_attr.page_size_cap, -- cgit v1.2.3-59-g8ed1b From 666e8ff535d401eb286fa20446e55ae984d91049 Mon Sep 17 00:00:00 2001 From: Daniel Kranzdorf Date: Thu, 21 Nov 2019 16:15:09 +0200 Subject: RDMA/efa: Expose RDMA read related attributes Query the device attributes for RDMA operations, including maximum transfer size and maximum number of SGEs per RDMA WR, and report them back to the userspace library. Link: https://lore.kernel.org/r/20191121141509.59297-4-galpress@amazon.com Signed-off-by: Daniel Kranzdorf Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 17 +++++++++++++++++ drivers/infiniband/hw/efa/efa_com_cmd.c | 3 +++ drivers/infiniband/hw/efa/efa_com_cmd.h | 3 +++ drivers/infiniband/hw/efa/efa_verbs.c | 22 +++++++++++++++++----- include/uapi/rdma/efa-abi.h | 6 ++++++ 5 files changed, 46 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 7fa9d532db61..e96bcb16bd2b 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -562,6 +562,16 @@ struct efa_admin_feature_device_attr_desc { /* Indicates how many bits are used virtual address access */ u8 virt_addr_width; + + /* + * 0 : rdma_read - If set, RDMA Read is supported on + * TX queues + * 31:1 : reserved - MBZ + */ + u32 device_caps; + + /* Max RDMA transfer size in bytes */ + u32 max_rdma_size; }; struct efa_admin_feature_queue_attr_desc { @@ -608,6 +618,9 @@ struct efa_admin_feature_queue_attr_desc { /* The maximum size of LLQ in bytes */ u32 max_llq_size; + + /* Maximum number of SGEs for a single RDMA read WQE */ + u16 max_wr_rdma_sges; }; struct efa_admin_feature_aenq_desc { @@ -622,6 +635,7 @@ struct efa_admin_feature_network_attr_desc { /* Raw address data in network byte order */ u8 addr[16]; + /* max packet payload size in bytes */ u32 mtu; }; @@ -797,4 +811,7 @@ struct efa_admin_mmio_req_read_less_resp { /* get_set_feature_common_desc */ #define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) +/* feature_device_attr_desc */ +#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) + #endif /* _EFA_ADMIN_CMDS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 520c9d920f9e..e20bd84a1014 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -444,6 +444,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->phys_addr_width = resp.u.device_attr.phys_addr_width; result->virt_addr_width = resp.u.device_attr.virt_addr_width; result->db_bar = resp.u.device_attr.db_bar; + result->max_rdma_size = resp.u.device_attr.max_rdma_size; + result->device_caps = resp.u.device_attr.device_caps; if (result->admin_api_version < 1) { ibdev_err_ratelimited( @@ -477,6 +479,7 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->max_ah = resp.u.queue_attr.max_ah; result->max_llq_size = resp.u.queue_attr.max_llq_size; result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; + result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges; err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); if (err) { diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index d119186c41d0..31db5a0cbd5b 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -121,9 +121,12 @@ struct efa_com_get_device_attr_result { u32 max_pd; u32 max_ah; u32 max_llq_size; + u32 max_rdma_size; + u32 device_caps; u16 sub_cqs_per_cq; u16 max_sq_sge; u16 max_rq_sge; + u16 max_wr_rdma_sge; u8 db_bar; }; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 9701f8e52c71..c9d294caa27a 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -70,9 +70,6 @@ static const char *const efa_stats_names[] = { #define EFA_CHUNK_USED_SIZE \ ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) -#define EFA_SUPPORTED_ACCESS_FLAGS \ - (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ) - struct pbl_chunk { dma_addr_t dma_addr; u64 *buf; @@ -142,6 +139,11 @@ to_emmap(struct rdma_user_mmap_entry *rdma_entry) return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); } +static inline bool is_rdma_read_cap(struct efa_dev *dev) +{ + return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK; +} + #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ FIELD_SIZEOF(typeof(x), fld) <= (sz)) @@ -201,12 +203,17 @@ int efa_query_device(struct ib_device *ibdev, dev_attr->max_rq_depth); props->max_send_sge = dev_attr->max_sq_sge; props->max_recv_sge = dev_attr->max_rq_sge; + props->max_sge_rd = dev_attr->max_wr_rdma_sge; if (udata && udata->outlen) { resp.max_sq_sge = dev_attr->max_sq_sge; resp.max_rq_sge = dev_attr->max_rq_sge; resp.max_sq_wr = dev_attr->max_sq_depth; resp.max_rq_wr = dev_attr->max_rq_depth; + resp.max_rdma_size = dev_attr->max_rdma_size; + + if (is_rdma_read_cap(dev)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); @@ -1345,6 +1352,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, struct efa_com_reg_mr_params params = {}; struct efa_com_reg_mr_result result = {}; struct pbl_context pbl; + int supp_access_flags; unsigned int pg_sz; struct efa_mr *mr; int inline_size; @@ -1358,10 +1366,14 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_out; } - if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) { + supp_access_flags = + IB_ACCESS_LOCAL_WRITE | + (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0); + + if (access_flags & ~supp_access_flags) { ibdev_dbg(&dev->ibdev, "Unsupported access flags[%#x], supported[%#x]\n", - access_flags, EFA_SUPPORTED_ACCESS_FLAGS); + access_flags, supp_access_flags); err = -EOPNOTSUPP; goto err_out; } diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index 9599a2a62be8..53b6e2036a9b 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -90,12 +90,18 @@ struct efa_ibv_create_ah_resp { __u8 reserved_30[2]; }; +enum { + EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0, +}; + struct efa_ibv_ex_query_device_resp { __u32 comp_mask; __u32 max_sq_wr; __u32 max_rq_wr; __u16 max_sq_sge; __u16 max_rq_sge; + __u32 max_rdma_size; + __u32 device_caps; }; #endif /* EFA_ABI_USER_H */ -- cgit v1.2.3-59-g8ed1b From e2b2744a06d35ba44f32c86e0579d986931187b3 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Mon, 18 Nov 2019 10:34:50 +0800 Subject: RDMA/hns: Redefine interfaces used in creating cq Some interfaces defined with unnecessary input parameters, such as "nent" and "vector". This patch redefined these interfaces to make the code more readable and simple. Link: https://lore.kernel.org/r/1574044493-46984-2-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 83 ++++++++++++++--------------- drivers/infiniband/hw/hns/hns_roce_device.h | 6 +-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 9 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 10 ++-- 4 files changed, 53 insertions(+), 55 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index cde2960328df..87d9b0fb84c8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -82,9 +82,8 @@ static int hns_roce_hw_create_cq(struct hns_roce_dev *dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, - struct hns_roce_mtt *hr_mtt, - struct hns_roce_cq *hr_cq, int vector) +static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq) { struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_hem_table *mtt_table; @@ -103,18 +102,13 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, mtt_table = &hr_dev->mr_table.mtt_table; mtts = hns_roce_table_find(hr_dev, mtt_table, - hr_mtt->first_seg, &dma_handle); + hr_cq->hr_buf.hr_mtt.first_seg, + &dma_handle); if (!mtts) { dev_err(dev, "Failed to find mtt for CQ buf.\n"); return -EINVAL; } - if (vector >= hr_dev->caps.num_comp_vectors) { - dev_err(dev, "Invalid vector(0x%x) for CQ alloc.\n", vector); - return -EINVAL; - } - hr_cq->vector = vector; - ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { dev_err(dev, "Num of CQ out of range.\n"); @@ -143,8 +137,7 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, goto err_xa; } - hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle, - nent, vector); + hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle); /* Send mailbox to hw */ ret = hns_roce_hw_create_cq(hr_dev, mailbox, hr_cq->cqn); @@ -210,15 +203,18 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) } static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, - struct ib_udata *udata, - struct hns_roce_cq_buf *buf, - struct ib_umem **umem, u64 buf_addr, int cqe) + struct hns_roce_cq *hr_cq, + struct hns_roce_ib_create_cq ucmd, + struct ib_udata *udata) { - int ret; + struct hns_roce_cq_buf *buf = &hr_cq->hr_buf; + struct ib_umem **umem = &hr_cq->umem; u32 page_shift; u32 npages; + int ret; - *umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz, + *umem = ib_umem_get(udata, ucmd.buf_addr, + hr_cq->cq_depth * hr_dev->caps.cq_entry_sz, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -257,10 +253,12 @@ err_buf: } static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq_buf *buf, u32 nent) + struct hns_roce_cq *hr_cq) { - int ret; + struct hns_roce_cq_buf *buf = &hr_cq->hr_buf; u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; + u32 nent = hr_cq->cq_depth; + int ret; ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz, (1 << page_shift) * 2, &buf->hr_buf, @@ -295,17 +293,16 @@ out: } static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq_buf *buf, int cqe) + struct hns_roce_cq *hr_cq) { - hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz, - &buf->hr_buf); + hns_roce_buf_free(hr_dev, hr_cq->cq_depth * hr_dev->caps.cq_entry_sz, + &hr_cq->hr_buf.hr_buf); } static int create_user_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, struct ib_udata *udata, - struct hns_roce_ib_create_cq_resp *resp, - int cq_entries) + struct hns_roce_ib_create_cq_resp *resp) { struct hns_roce_ib_create_cq ucmd; struct device *dev = hr_dev->dev; @@ -319,9 +316,7 @@ static int create_user_cq(struct hns_roce_dev *hr_dev, } /* Get user space address, write it into mtt table */ - ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf, - &hr_cq->umem, ucmd.buf_addr, - cq_entries); + ret = hns_roce_ib_get_cq_umem(hr_dev, hr_cq, ucmd, udata); if (ret) { dev_err(dev, "Failed to get_cq_umem.\n"); return ret; @@ -349,7 +344,7 @@ err_mtt: } static int create_kernel_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, int cq_entries) + struct hns_roce_cq *hr_cq) { struct device *dev = hr_dev->dev; int ret; @@ -365,7 +360,7 @@ static int create_kernel_cq(struct hns_roce_dev *hr_dev, } /* Init mtt table and write buff address to mtt table */ - ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, cq_entries); + ret = hns_roce_ib_alloc_cq_buf(hr_dev, hr_cq); if (ret) { dev_err(dev, "Failed to alloc_cq_buf.\n"); goto err_db; @@ -403,7 +398,7 @@ static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe); + hns_roce_ib_free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); @@ -414,11 +409,11 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); - struct device *dev = hr_dev->dev; struct hns_roce_ib_create_cq_resp resp = {}; struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + struct device *dev = hr_dev->dev; int vector = attr->comp_vector; - int cq_entries = attr->cqe; + u32 cq_entries = attr->cqe; int ret; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { @@ -427,21 +422,27 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, return -EINVAL; } - if (hr_dev->caps.min_cqes) - cq_entries = max(cq_entries, hr_dev->caps.min_cqes); + if (vector >= hr_dev->caps.num_comp_vectors) { + dev_err(dev, "Create CQ failed, vector=%d, max=%d\n", + vector, hr_dev->caps.num_comp_vectors); + return -EINVAL; + } - cq_entries = roundup_pow_of_two((unsigned int)cq_entries); - hr_cq->ib_cq.cqe = cq_entries - 1; + cq_entries = max(cq_entries, hr_dev->caps.min_cqes); + cq_entries = roundup_pow_of_two(cq_entries); + hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */ + hr_cq->cq_depth = cq_entries; + hr_cq->vector = vector; spin_lock_init(&hr_cq->lock); if (udata) { - ret = create_user_cq(hr_dev, hr_cq, udata, &resp, cq_entries); + ret = create_user_cq(hr_dev, hr_cq, udata, &resp); if (ret) { dev_err(dev, "Create cq failed in user mode!\n"); goto err_cq; } } else { - ret = create_kernel_cq(hr_dev, hr_cq, cq_entries); + ret = create_kernel_cq(hr_dev, hr_cq); if (ret) { dev_err(dev, "Create cq failed in kernel mode!\n"); goto err_cq; @@ -449,8 +450,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, } /* Allocate cq index, fill cq_context */ - ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, - hr_cq, vector); + ret = hns_roce_cq_alloc(hr_dev, hr_cq); if (ret) { dev_err(dev, "Alloc CQ failed(%d).\n", ret); goto err_dbmap; @@ -468,7 +468,6 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, /* Get created cq handler and carry out event */ hr_cq->comp = hns_roce_ib_cq_comp; hr_cq->event = hns_roce_ib_cq_event; - hr_cq->cq_depth = cq_entries; if (udata) { resp.cqn = hr_cq->cqn; @@ -515,7 +514,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) &hr_cq->db); } else { /* Free the buff of stored cq */ - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe); + hns_roce_ib_free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a1b712e9d454..8f628a772644 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -812,8 +812,8 @@ struct hns_roce_caps { int max_qp_init_rdma; int max_qp_dest_rdma; int num_cqs; - int max_cqes; - int min_cqes; + u32 max_cqes; + u32 min_cqes; u32 min_wqes; int reserved_cqs; int reserved_srqs; @@ -944,7 +944,7 @@ struct hns_roce_hw { int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw); void (*write_cqc)(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, - dma_addr_t dma_handle, int nent, u32 vector); + dma_addr_t dma_handle); int (*set_hem)(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, int step_idx); int (*clear_hem)(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 89a4c3afdda1..600d9f91889a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1990,7 +1990,7 @@ static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^ - !!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL; + !!(n & hr_cq->cq_depth)) ? hr_cqe : NULL; } static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) @@ -2073,8 +2073,7 @@ static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, - u64 *mtts, dma_addr_t dma_handle, int nent, - u32 vector) + u64 *mtts, dma_addr_t dma_handle) { struct hns_roce_cq_context *cq_context = NULL; struct hns_roce_buf_list *tptr_buf; @@ -2109,9 +2108,9 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M, CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S, - ilog2((unsigned int)nent)); + ilog2(hr_cq->cq_depth)); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, - CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector); + CQ_CONTEXT_CQC_BYTE_12_CEQN_S, hr_cq->vector); cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0])); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 907c95149cb9..77c9d7fdd9bc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2457,7 +2457,7 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ return (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_OWNER_S) ^ - !!(n & (hr_cq->ib_cq.cqe + 1))) ? cqe : NULL; + !!(n & hr_cq->cq_depth)) ? cqe : NULL; } static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) @@ -2550,8 +2550,7 @@ static void hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, - u64 *mtts, dma_addr_t dma_handle, int nent, - u32 vector) + u64 *mtts, dma_addr_t dma_handle) { struct hns_roce_v2_cq_context *cq_context; @@ -2563,9 +2562,10 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_ARM_ST_M, V2_CQC_BYTE_4_ARM_ST_S, REG_NXT_CEQE); roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M, - V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent)); + V2_CQC_BYTE_4_SHIFT_S, + ilog2(hr_cq->cq_depth)); roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M, - V2_CQC_BYTE_4_CEQN_S, vector); + V2_CQC_BYTE_4_CEQN_S, hr_cq->vector); roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); -- cgit v1.2.3-59-g8ed1b From 18a96d25ce84a365fbe9ddcb887ade80eb3a6017 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Mon, 18 Nov 2019 10:34:51 +0800 Subject: RDMA/hns: Redefine the member of hns_roce_cq struct There is no need to package buf and mtt into hns_roce_cq_buf, which will make code more complex, just delete this struct and move buf and mtt into hns_roce_cq. Furthermore, we add size member for hns_roce_buf to avoid repeatly calculating where needed it. Link: https://lore.kernel.org/r/1574044493-46984-3-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 73 ++++++++++++----------------- drivers/infiniband/hw/hns/hns_roce_device.h | 9 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 9 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 +- 4 files changed, 38 insertions(+), 56 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 87d9b0fb84c8..e9c94536c59a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -101,9 +101,9 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, else mtt_table = &hr_dev->mr_table.mtt_table; - mtts = hns_roce_table_find(hr_dev, mtt_table, - hr_cq->hr_buf.hr_mtt.first_seg, + mtts = hns_roce_table_find(hr_dev, mtt_table, hr_cq->mtt.first_seg, &dma_handle); + if (!mtts) { dev_err(dev, "Failed to find mtt for CQ buf.\n"); return -EINVAL; @@ -207,45 +207,36 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_cq ucmd, struct ib_udata *udata) { - struct hns_roce_cq_buf *buf = &hr_cq->hr_buf; + struct hns_roce_buf *buf = &hr_cq->buf; + struct hns_roce_mtt *mtt = &hr_cq->mtt; struct ib_umem **umem = &hr_cq->umem; - u32 page_shift; u32 npages; int ret; - *umem = ib_umem_get(udata, ucmd.buf_addr, - hr_cq->cq_depth * hr_dev->caps.cq_entry_sz, + *umem = ib_umem_get(udata, ucmd.buf_addr, buf->size, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - buf->hr_mtt.mtt_type = MTT_TYPE_CQE; + mtt->mtt_type = MTT_TYPE_CQE; else - buf->hr_mtt.mtt_type = MTT_TYPE_WQE; - - if (hr_dev->caps.cqe_buf_pg_sz) { - npages = (ib_umem_page_count(*umem) + - (1 << hr_dev->caps.cqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.cqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, page_shift, - &buf->hr_mtt); - } else { - ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem), - PAGE_SHIFT, &buf->hr_mtt); - } + mtt->mtt_type = MTT_TYPE_WQE; + + npages = DIV_ROUND_UP(ib_umem_page_count(*umem), + 1 << hr_dev->caps.cqe_buf_pg_sz); + ret = hns_roce_mtt_init(hr_dev, npages, buf->page_shift, mtt); if (ret) goto err_buf; - ret = hns_roce_ib_umem_write_mtt(hr_dev, &buf->hr_mtt, *umem); + ret = hns_roce_ib_umem_write_mtt(hr_dev, mtt, *umem); if (ret) goto err_mtt; return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + hns_roce_mtt_cleanup(hr_dev, mtt); err_buf: ib_umem_release(*umem); @@ -255,39 +246,36 @@ err_buf: static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - struct hns_roce_cq_buf *buf = &hr_cq->hr_buf; - u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; - u32 nent = hr_cq->cq_depth; + struct hns_roce_buf *buf = &hr_cq->buf; + struct hns_roce_mtt *mtt = &hr_cq->mtt; int ret; - ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz, - (1 << page_shift) * 2, &buf->hr_buf, - page_shift); + ret = hns_roce_buf_alloc(hr_dev, buf->size, (1 << buf->page_shift) * 2, + buf, buf->page_shift); if (ret) goto out; if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - buf->hr_mtt.mtt_type = MTT_TYPE_CQE; + mtt->mtt_type = MTT_TYPE_CQE; else - buf->hr_mtt.mtt_type = MTT_TYPE_WQE; + mtt->mtt_type = MTT_TYPE_WQE; - ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages, - buf->hr_buf.page_shift, &buf->hr_mtt); + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, mtt); if (ret) goto err_buf; - ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf); + ret = hns_roce_buf_write_mtt(hr_dev, mtt, buf); if (ret) goto err_mtt; return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + hns_roce_mtt_cleanup(hr_dev, mtt); err_buf: - hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz, - &buf->hr_buf); + hns_roce_buf_free(hr_dev, buf->size, buf); + out: return ret; } @@ -295,8 +283,7 @@ out: static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_buf_free(hr_dev, hr_cq->cq_depth * hr_dev->caps.cq_entry_sz, - &hr_cq->hr_buf.hr_buf); + hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); } static int create_user_cq(struct hns_roce_dev *hr_dev, @@ -337,7 +324,7 @@ static int create_user_cq(struct hns_roce_dev *hr_dev, return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); return ret; @@ -390,14 +377,14 @@ static void destroy_user_cq(struct hns_roce_dev *hr_dev, (udata->outlen >= sizeof(*resp))) hns_roce_db_unmap_user(context, &hr_cq->db); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); } static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); hns_roce_ib_free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) @@ -433,6 +420,8 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */ hr_cq->cq_depth = cq_entries; hr_cq->vector = vector; + hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; spin_lock_init(&hr_cq->lock); if (udata) { @@ -502,7 +491,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) } hns_roce_free_cq(hr_dev, hr_cq); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); if (udata) { diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 8f628a772644..608ec5936d71 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -448,6 +448,7 @@ struct hns_roce_buf { struct hns_roce_buf_list *page_list; int nbufs; u32 npages; + u32 size; int page_shift; }; @@ -479,14 +480,10 @@ struct hns_roce_db { int order; }; -struct hns_roce_cq_buf { - struct hns_roce_buf hr_buf; - struct hns_roce_mtt hr_mtt; -}; - struct hns_roce_cq { struct ib_cq ib_cq; - struct hns_roce_cq_buf hr_buf; + struct hns_roce_buf buf; + struct hns_roce_mtt mtt; struct hns_roce_db db; u8 db_en; spinlock_t lock; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 600d9f91889a..9a00361e96aa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1980,8 +1980,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, static void *get_cqe(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, - n * HNS_ROCE_V1_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V1_CQE_ENTRY_SIZE); } static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) @@ -3655,7 +3654,6 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) struct device *dev = &hr_dev->pdev->dev; u32 cqe_cnt_ori; u32 cqe_cnt_cur; - u32 cq_buf_size; int wait_time = 0; hns_roce_free_cq(hr_dev, hr_cq); @@ -3683,13 +3681,12 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) wait_time++; } - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); if (!udata) { /* Free the buff of stored cq */ - cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz; - hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf); + hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 77c9d7fdd9bc..1026ac6e9440 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2447,8 +2447,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, - n * HNS_ROCE_V2_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V2_CQE_ENTRY_SIZE); } static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) -- cgit v1.2.3-59-g8ed1b From 707783ab5f48f054f8da3114ddcdf1685a313a63 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Mon, 18 Nov 2019 10:34:52 +0800 Subject: RDMA/hns: Rename the functions used inside creating cq Current names of functions are not proper, such as hns_roce_free_cq, actually it means free cqc, thus we rename them. Furthermore, functions used inside one file can be named without the prefix hns_roce_ which will make the functions for verbs symbols more eye-catching. Link: https://lore.kernel.org/r/1574044493-46984-4-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cmd.h | 4 +- drivers/infiniband/hw/hns/hns_roce_cq.c | 66 ++++++++++------------------- drivers/infiniband/hw/hns/hns_roce_device.h | 9 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 8 ++-- drivers/infiniband/hw/hns/hns_roce_main.c | 4 +- 5 files changed, 35 insertions(+), 56 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index cd3ed2b462bd..1915bacaded0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -119,8 +119,8 @@ enum { HNS_ROCE_CMD_DESTROY_MPT = 0xf, /* CQ commands */ - HNS_ROCE_CMD_CREATE_CQ = 0x16, - HNS_ROCE_CMD_DESTROY_CQ = 0x17, + HNS_ROCE_CMD_CREATE_CQC = 0x16, + HNS_ROCE_CMD_DESTROY_CQC = 0x17, /* QP/EE commands */ HNS_ROCE_CMD_RST2INIT_QP = 0x19, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index e9c94536c59a..9174d33bdfee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -73,17 +73,8 @@ static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq, } } -static int hns_roce_hw_create_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) -{ - return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0, - HNS_ROCE_CMD_CREATE_CQ, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq) { struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_hem_table *mtt_table; @@ -140,7 +131,8 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle); /* Send mailbox to hw */ - ret = hns_roce_hw_create_cq(hr_dev, mailbox, hr_cq->cqn); + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 0, + HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { dev_err(dev, @@ -168,22 +160,15 @@ err_out: return ret; } -static int hns_roce_hw_destroy_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) -{ - return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_CQ, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = hr_dev->dev; int ret; - ret = hns_roce_hw_destroy_cq(hr_dev, NULL, hr_cq->cqn); + ret = hns_roce_cmd_mbox(hr_dev, 0, 0, hr_cq->cqn, 1, + HNS_ROCE_CMD_DESTROY_CQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); if (ret) dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); @@ -202,10 +187,9 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } -static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, - struct hns_roce_ib_create_cq ucmd, - struct ib_udata *udata) +static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct hns_roce_ib_create_cq ucmd, + struct ib_udata *udata) { struct hns_roce_buf *buf = &hr_cq->buf; struct hns_roce_mtt *mtt = &hr_cq->mtt; @@ -243,8 +227,7 @@ err_buf: return ret; } -static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_buf *buf = &hr_cq->buf; struct hns_roce_mtt *mtt = &hr_cq->mtt; @@ -280,8 +263,7 @@ out: return ret; } -static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); } @@ -303,7 +285,7 @@ static int create_user_cq(struct hns_roce_dev *hr_dev, } /* Get user space address, write it into mtt table */ - ret = hns_roce_ib_get_cq_umem(hr_dev, hr_cq, ucmd, udata); + ret = get_cq_umem(hr_dev, hr_cq, ucmd, udata); if (ret) { dev_err(dev, "Failed to get_cq_umem.\n"); return ret; @@ -347,7 +329,7 @@ static int create_kernel_cq(struct hns_roce_dev *hr_dev, } /* Init mtt table and write buff address to mtt table */ - ret = hns_roce_ib_alloc_cq_buf(hr_dev, hr_cq); + ret = alloc_cq_buf(hr_dev, hr_cq); if (ret) { dev_err(dev, "Failed to alloc_cq_buf.\n"); goto err_db; @@ -385,15 +367,14 @@ static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - hns_roce_ib_free_cq_buf(hr_dev, hr_cq); + free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); } -int hns_roce_ib_create_cq(struct ib_cq *ib_cq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_ib_create_cq_resp resp = {}; @@ -438,8 +419,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, } } - /* Allocate cq index, fill cq_context */ - ret = hns_roce_cq_alloc(hr_dev, hr_cq); + ret = hns_roce_alloc_cqc(hr_dev, hr_cq); if (ret) { dev_err(dev, "Alloc CQ failed(%d).\n", ret); goto err_dbmap; @@ -468,7 +448,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, return 0; err_cqc: - hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_free_cqc(hr_dev, hr_cq); err_dbmap: if (udata) @@ -480,7 +460,7 @@ err_cq: return ret; } -void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); @@ -490,7 +470,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) return; } - hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_free_cqc(hr_dev, hr_cq); hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); @@ -503,7 +483,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) &hr_cq->db); } else { /* Free the buff of stored cq */ - hns_roce_ib_free_cq_buf(hr_dev, hr_cq); + free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 608ec5936d71..152701e4a07f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1240,12 +1240,11 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, __be32 send_ieth(const struct ib_send_wr *wr); int to_hr_qp_type(int qp_type); -int hns_roce_ib_create_cq(struct ib_cq *ib_cq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); +int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); -void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); -void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); +void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 9a00361e96aa..2a2b2112f886 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -732,7 +732,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) if (!cq) return -ENOMEM; - ret = hns_roce_ib_create_cq(cq, &cq_init_attr, NULL); + ret = hns_roce_create_cq(cq, &cq_init_attr, NULL); if (ret) { dev_err(dev, "Create cq for reserved loop qp failed!"); goto alloc_cq_failed; @@ -868,7 +868,7 @@ alloc_pd_failed: kfree(pd); alloc_mem_failed: - hns_roce_ib_destroy_cq(cq, NULL); + hns_roce_destroy_cq(cq, NULL); alloc_cq_failed: kfree(cq); return ret; @@ -897,7 +897,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) i, ret); } - hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); + hns_roce_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); kfree(&free_mr->mr_free_cq->ib_cq); hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL); kfree(&free_mr->mr_free_pd->ibpd); @@ -3656,7 +3656,7 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) u32 cqe_cnt_cur; int wait_time = 0; - hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_free_cqc(hr_dev, hr_cq); /* * Before freeing cq buffer, we need to ensure that the outstanding CQE diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 066f01c45b5a..854ef6e74788 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -419,14 +419,14 @@ static const struct ib_device_ops hns_roce_dev_ops = { .alloc_pd = hns_roce_alloc_pd, .alloc_ucontext = hns_roce_alloc_ucontext, .create_ah = hns_roce_create_ah, - .create_cq = hns_roce_ib_create_cq, + .create_cq = hns_roce_create_cq, .create_qp = hns_roce_create_qp, .dealloc_pd = hns_roce_dealloc_pd, .dealloc_ucontext = hns_roce_dealloc_ucontext, .del_gid = hns_roce_del_gid, .dereg_mr = hns_roce_dereg_mr, .destroy_ah = hns_roce_destroy_ah, - .destroy_cq = hns_roce_ib_destroy_cq, + .destroy_cq = hns_roce_destroy_cq, .disassociate_ucontext = hns_roce_disassociate_ucontext, .fill_res_entry = hns_roce_fill_res_entry, .get_dma_mr = hns_roce_get_dma_mr, -- cgit v1.2.3-59-g8ed1b From f295e4cece5cb4c60715fed539abcd62468f9ef1 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Mon, 18 Nov 2019 10:34:53 +0800 Subject: RDMA/hns: Delete unnecessary callback functions for cq Currently, when cq event occurred, we first call our own callback functions in the event process function, then call ib callback functions. Actually, we can directly call ib callback functions. Link: https://lore.kernel.org/r/1574044493-46984-5-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 91 ++++++++++++----------------- drivers/infiniband/hw/hns/hns_roce_device.h | 3 - 2 files changed, 36 insertions(+), 58 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 9174d33bdfee..af1d8823b3f0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -39,40 +39,6 @@ #include #include "hns_roce_common.h" -static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq) -{ - struct ib_cq *ibcq = &hr_cq->ib_cq; - - ibcq->comp_handler(ibcq, ibcq->cq_context); -} - -static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq, - enum hns_roce_event event_type) -{ - struct hns_roce_dev *hr_dev; - struct ib_event event; - struct ib_cq *ibcq; - - ibcq = &hr_cq->ib_cq; - hr_dev = to_hr_dev(ibcq->device); - - if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && - event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && - event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { - dev_err(hr_dev->dev, - "hns_roce_ib: Unexpected event type 0x%x on CQ %06lx\n", - event_type, hr_cq->cqn); - return; - } - - if (ibcq->event_handler) { - event.device = ibcq->device; - event.event = IB_EVENT_CQ_ERR; - event.element.cq = ibcq; - ibcq->event_handler(&event, ibcq->cq_context); - } -} - static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { @@ -434,10 +400,6 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, if (!udata && hr_cq->tptr_addr) *hr_cq->tptr_addr = 0; - /* Get created cq handler and carry out event */ - hr_cq->comp = hns_roce_ib_cq_comp; - hr_cq->event = hns_roce_ib_cq_event; - if (udata) { resp.cqn = hr_cq->cqn; ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); @@ -491,38 +453,57 @@ void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) { - struct device *dev = hr_dev->dev; - struct hns_roce_cq *cq; + struct hns_roce_cq *hr_cq; + struct ib_cq *ibcq; - cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1)); - if (!cq) { - dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn); + hr_cq = xa_load(&hr_dev->cq_table.array, + cqn & (hr_dev->caps.num_cqs - 1)); + if (!hr_cq) { + dev_warn(hr_dev->dev, "Completion event for bogus CQ 0x%06x\n", + cqn); return; } - ++cq->arm_sn; - cq->comp(cq); + ++hr_cq->arm_sn; + ibcq = &hr_cq->ib_cq; + if (ibcq->comp_handler) + ibcq->comp_handler(ibcq, ibcq->cq_context); } void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) { - struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = hr_dev->dev; - struct hns_roce_cq *cq; + struct hns_roce_cq *hr_cq; + struct ib_event event; + struct ib_cq *ibcq; - cq = xa_load(&cq_table->array, cqn & (hr_dev->caps.num_cqs - 1)); - if (cq) - atomic_inc(&cq->refcount); + hr_cq = xa_load(&hr_dev->cq_table.array, + cqn & (hr_dev->caps.num_cqs - 1)); + if (!hr_cq) { + dev_warn(dev, "Async event for bogus CQ 0x%06x\n", cqn); + return; + } - if (!cq) { - dev_warn(dev, "Async event for bogus CQ %08x\n", cqn); + if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && + event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && + event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { + dev_err(dev, "Unexpected event type 0x%x on CQ 0x%06x\n", + event_type, cqn); return; } - cq->event(cq, (enum hns_roce_event)event_type); + atomic_inc(&hr_cq->refcount); - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); + ibcq = &hr_cq->ib_cq; + if (ibcq->event_handler) { + event.device = ibcq->device; + event.element.cq = ibcq; + event.event = IB_EVENT_CQ_ERR; + ibcq->event_handler(&event, ibcq->cq_context); + } + + if (atomic_dec_and_test(&hr_cq->refcount)) + complete(&hr_cq->free); } int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 152701e4a07f..5617434cbfb4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -488,9 +488,6 @@ struct hns_roce_cq { u8 db_en; spinlock_t lock; struct ib_umem *umem; - void (*comp)(struct hns_roce_cq *cq); - void (*event)(struct hns_roce_cq *cq, enum hns_roce_event event_type); - u32 cq_depth; u32 cons_index; u32 *set_ci_db; -- cgit v1.2.3-59-g8ed1b