From 9533b292a7acc62c294ebcbd9e1f9f9d52adb10b Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 3 May 2018 20:29:19 -0700 Subject: IB: remove redundant INFINIBAND kconfig dependencies INFINIBAND_ADDR_TRANS depends on INFINIBAND. So there's no need for options which depend INFINIBAND_ADDR_TRANS to also depend on INFINIBAND. Remove the unnecessary INFINIBAND depends. Signed-off-by: Greg Thelen Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/Kconfig | 2 +- drivers/nvme/host/Kconfig | 2 +- drivers/nvme/target/Kconfig | 2 +- drivers/staging/lustre/lnet/Kconfig | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/srpt/Kconfig b/drivers/infiniband/ulp/srpt/Kconfig index fb8b7182f05e..25bf6955b6d0 100644 --- a/drivers/infiniband/ulp/srpt/Kconfig +++ b/drivers/infiniband/ulp/srpt/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_SRPT tristate "InfiniBand SCSI RDMA Protocol target support" - depends on INFINIBAND && INFINIBAND_ADDR_TRANS && TARGET_CORE + depends on INFINIBAND_ADDR_TRANS && TARGET_CORE ---help--- Support for the SCSI RDMA Protocol (SRP) Target driver. The diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 88a8b5916624..dbb7464c018c 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -27,7 +27,7 @@ config NVME_FABRICS config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" - depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK + depends on INFINIBAND_ADDR_TRANS && BLOCK select NVME_CORE select NVME_FABRICS select SG_POOL diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 3c7b61ddb0d1..7595664ee753 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -27,7 +27,7 @@ config NVME_TARGET_LOOP config NVME_TARGET_RDMA tristate "NVMe over Fabrics RDMA target support" - depends on INFINIBAND && INFINIBAND_ADDR_TRANS + depends on INFINIBAND_ADDR_TRANS depends on NVME_TARGET select SGL_ALLOC help diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig index ad049e6f24e4..f3b1ad4bd3dc 100644 --- a/drivers/staging/lustre/lnet/Kconfig +++ b/drivers/staging/lustre/lnet/Kconfig @@ -34,7 +34,7 @@ config LNET_SELFTEST config LNET_XPRT_IB tristate "LNET infiniband support" - depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS + depends on LNET && PCI && INFINIBAND_ADDR_TRANS default LNET && INFINIBAND help This option allows the LNET users to use infiniband as an -- cgit v1.2.3-59-g8ed1b From f9e76ca3771bf23d2142a81a88ddd8f31f5c4c03 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Wed, 2 May 2018 06:42:51 -0700 Subject: IB/hfi1: Use after free race condition in send context error path A pio send egress error can occur when the PSM library attempts to to send a bad packet. That issue is still being investigated. The pio error interrupt handler then attempts to progress the recovery of the errored pio send context. Code inspection reveals that the handling lacks the necessary locking if that recovery interleaves with a PSM close of the "context" object contains the pio send context. The lack of the locking can cause the recovery to access the already freed pio send context object and incorrectly deduce that the pio send context is actually a kernel pio send context as shown by the NULL deref stack below: [] _dev_info+0x6c/0x90 [] sc_restart+0x70/0x1f0 [hfi1] [] ? __schedule+0x424/0x9b0 [] sc_halted+0x15/0x20 [hfi1] [] process_one_work+0x17a/0x440 [] worker_thread+0x126/0x3c0 [] ? manage_workers.isra.24+0x2a0/0x2a0 [] kthread+0xcf/0xe0 [] ? insert_kthread_work+0x40/0x40 [] ret_from_fork+0x58/0x90 [] ? insert_kthread_work+0x40/0x40 This is the best case scenario and other scenarios can corrupt the already freed memory. Fix by adding the necessary locking in the pio send context error handler. Cc: # 4.9.x Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index e6a60fa59f2b..e6bdd0c1e80a 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -5944,6 +5944,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, u64 status; u32 sw_index; int i = 0; + unsigned long irq_flags; sw_index = dd->hw_to_sw[hw_context]; if (sw_index >= dd->num_send_contexts) { @@ -5953,10 +5954,12 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, return; } sci = &dd->send_contexts[sw_index]; + spin_lock_irqsave(&dd->sc_lock, irq_flags); sc = sci->sc; if (!sc) { dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__, sw_index, hw_context); + spin_unlock_irqrestore(&dd->sc_lock, irq_flags); return; } @@ -5978,6 +5981,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, */ if (sc->type != SC_USER) queue_work(dd->pport->hfi1_wq, &sc->halt_work); + spin_unlock_irqrestore(&dd->sc_lock, irq_flags); /* * Update the counters for the corresponding status bits. -- cgit v1.2.3-59-g8ed1b From 18b0362e87dfa09e355093b897b9db854e360d28 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 7 May 2018 10:20:01 +0300 Subject: RDMA/mlx5: Don't assume that medium blueFlame register exists User can leave system without medium BlueFlames registers, however the code assumed that at least one such register exists. This patch fixes that assumption. Fixes: c1be5232d21d ("IB/mlx5: Fix micro UAR allocator") Reported-by: Rohit Zambre Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 87b7c1be2a11..2193dc1765fb 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -484,11 +484,6 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) return 1; } -static int first_med_bfreg(void) -{ - return 1; -} - enum { /* this is the first blue flame register in the array of bfregs assigned * to a processes. Since we do not use it for blue flame but rather @@ -514,6 +509,12 @@ static int num_med_bfreg(struct mlx5_ib_dev *dev, return n >= 0 ? n : 0; } +static int first_med_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) +{ + return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM; +} + static int first_hi_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { @@ -541,10 +542,13 @@ static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev, static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { - int minidx = first_med_bfreg(); + int minidx = first_med_bfreg(dev, bfregi); int i; - for (i = first_med_bfreg(); i < first_hi_bfreg(dev, bfregi); i++) { + if (minidx < 0) + return minidx; + + for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) { if (bfregi->count[i] < bfregi->count[minidx]) minidx = i; if (!bfregi->count[minidx]) -- cgit v1.2.3-59-g8ed1b From 37da2a03c036538a5a79766d74bfcf5b873e5cad Mon Sep 17 00:00:00 2001 From: Daria Velikovsky Date: Mon, 7 May 2018 10:20:02 +0300 Subject: RDMA/mlx5: Use proper spec flow label type Flow label is defined as u32 in the in ipv6 flow spec, but used internally in the flow specs parsing as u8. That was causing loss of part of flow_label value. Fixes: 2d1e697e9b716 ('IB/mlx5: Add support to match inner packet fields') Reviewed-by: Maor Gottlieb Signed-off-by: Daria Velikovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a42c6b1cdb5a..cd1c342be6e3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2416,7 +2416,7 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); } -static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, +static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, bool inner) { if (inner) { -- cgit v1.2.3-59-g8ed1b From 5a7189d529cd146cd5838af97b32fcac4122b471 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 7 May 2018 12:52:17 -0500 Subject: i40iw: Fix memory leak in error path of create QP If i40iw_allocate_dma_mem fails when creating a QP, the memory allocated for the QP structure using kzalloc is not freed because iwqp->allocated_buffer is used to free the memory and it is not setup until later. Fix this by setting iwqp->allocated_buffer before allocating the dma memory. Fixes: d37498417947 ("i40iw: add files for iwarp interface") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 40e4f5ab2b46..3ebe91438523 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -614,6 +614,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(-ENOMEM); iwqp = (struct i40iw_qp *)mem; + iwqp->allocated_buffer = mem; qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; @@ -642,7 +643,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - iwqp->allocated_buffer = mem; iwqp->iwdev = iwdev; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; -- cgit v1.2.3-59-g8ed1b From eeb1af4f53fa74fd41d288b113eebcdfca4d311c Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 7 May 2018 12:52:18 -0500 Subject: i40iw: Use correct address in dst_neigh_lookup for IPv6 Use of incorrect structure address for IPv6 neighbor lookup causes connections to IPv6 addresses to fail. Fix this by using correct address in call to dst_neigh_lookup. Fixes: f27b4746f378 ("i40iw: add connection management code") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 4cfa8f4647e2..f7c6fd9ff6e2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2093,7 +2093,7 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, if (netif_is_bond_slave(netdev)) netdev = netdev_master_upper_dev_get(netdev); - neigh = dst_neigh_lookup(dst, &dst_addr); + neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32); rcu_read_lock(); if (neigh) { -- cgit v1.2.3-59-g8ed1b From 1661d3b0e2183ce90f6611641c350a5aa02aaa80 Mon Sep 17 00:00:00 2001 From: Alexandru Moise <00moses.alexander00@gmail.com> Date: Tue, 8 May 2018 11:02:02 +0200 Subject: nvmet,rxe: defer ip datagram sending to tasklet This addresses 3 separate problems: 1. When using NVME over Fabrics we may end up sending IP packets in interrupt context, we should defer this work to a tasklet. [ 50.939957] WARNING: CPU: 3 PID: 0 at kernel/softirq.c:161 __local_bh_enable_ip+0x1f/0xa0 [ 50.942602] CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Tainted: G W 4.17.0-rc3-ARCH+ #104 [ 50.945466] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 [ 50.948163] RIP: 0010:__local_bh_enable_ip+0x1f/0xa0 [ 50.949631] RSP: 0018:ffff88009c183900 EFLAGS: 00010006 [ 50.951029] RAX: 0000000080010403 RBX: 0000000000000200 RCX: 0000000000000001 [ 50.952636] RDX: 0000000000000000 RSI: 0000000000000200 RDI: ffffffff817e04ec [ 50.954278] RBP: ffff88009c183910 R08: 0000000000000001 R09: 0000000000000614 [ 50.956000] R10: ffffea00021d5500 R11: 0000000000000001 R12: ffffffff817e04ec [ 50.957779] R13: 0000000000000000 R14: ffff88009566f400 R15: ffff8800956c7000 [ 50.959402] FS: 0000000000000000(0000) GS:ffff88009c180000(0000) knlGS:0000000000000000 [ 50.961552] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 50.963798] CR2: 000055c4ec0ccac0 CR3: 0000000002209001 CR4: 00000000000606e0 [ 50.966121] Call Trace: [ 50.966845] [ 50.967497] __dev_queue_xmit+0x62d/0x690 [ 50.968722] dev_queue_xmit+0x10/0x20 [ 50.969894] neigh_resolve_output+0x173/0x190 [ 50.971244] ip_finish_output2+0x2b8/0x370 [ 50.972527] ip_finish_output+0x1d2/0x220 [ 50.973785] ? ip_finish_output+0x1d2/0x220 [ 50.975010] ip_output+0xd4/0x100 [ 50.975903] ip_local_out+0x3b/0x50 [ 50.976823] rxe_send+0x74/0x120 [ 50.977702] rxe_requester+0xe3b/0x10b0 [ 50.978881] ? ip_local_deliver_finish+0xd1/0xe0 [ 50.980260] rxe_do_task+0x85/0x100 [ 50.981386] rxe_run_task+0x2f/0x40 [ 50.982470] rxe_post_send+0x51a/0x550 [ 50.983591] nvmet_rdma_queue_response+0x10a/0x170 [ 50.985024] __nvmet_req_complete+0x95/0xa0 [ 50.986287] nvmet_req_complete+0x15/0x60 [ 50.987469] nvmet_bio_done+0x2d/0x40 [ 50.988564] bio_endio+0x12c/0x140 [ 50.989654] blk_update_request+0x185/0x2a0 [ 50.990947] blk_mq_end_request+0x1e/0x80 [ 50.991997] nvme_complete_rq+0x1cc/0x1e0 [ 50.993171] nvme_pci_complete_rq+0x117/0x120 [ 50.994355] __blk_mq_complete_request+0x15e/0x180 [ 50.995988] blk_mq_complete_request+0x6f/0xa0 [ 50.997304] nvme_process_cq+0xe0/0x1b0 [ 50.998494] nvme_irq+0x28/0x50 [ 50.999572] __handle_irq_event_percpu+0xa2/0x1c0 [ 51.000986] handle_irq_event_percpu+0x32/0x80 [ 51.002356] handle_irq_event+0x3c/0x60 [ 51.003463] handle_edge_irq+0x1c9/0x200 [ 51.004473] handle_irq+0x23/0x30 [ 51.005363] do_IRQ+0x46/0xd0 [ 51.006182] common_interrupt+0xf/0xf [ 51.007129] 2. Work must always be offloaded to tasklet for rxe_post_send_kernel() when using NVMEoF in order to solve lock ordering between neigh->ha_lock seqlock and the nvme queue lock: [ 77.833783] Possible interrupt unsafe locking scenario: [ 77.833783] [ 77.835831] CPU0 CPU1 [ 77.837129] ---- ---- [ 77.838313] lock(&(&n->ha_lock)->seqcount); [ 77.839550] local_irq_disable(); [ 77.841377] lock(&(&nvmeq->q_lock)->rlock); [ 77.843222] lock(&(&n->ha_lock)->seqcount); [ 77.845178] [ 77.846298] lock(&(&nvmeq->q_lock)->rlock); [ 77.847986] [ 77.847986] *** DEADLOCK *** 3. Same goes for the lock ordering between sch->q.lock and nvme queue lock: [ 47.634271] Possible interrupt unsafe locking scenario: [ 47.634271] [ 47.636452] CPU0 CPU1 [ 47.637861] ---- ---- [ 47.639285] lock(&(&sch->q.lock)->rlock); [ 47.640654] local_irq_disable(); [ 47.642451] lock(&(&nvmeq->q_lock)->rlock); [ 47.644521] lock(&(&sch->q.lock)->rlock); [ 47.646480] [ 47.647263] lock(&(&nvmeq->q_lock)->rlock); [ 47.648492] [ 47.648492] *** DEADLOCK *** Using NVMEoF after this patch seems to finally be stable, without it, rxe eventually deadlocks the whole system and causes RCU stalls. Signed-off-by: Alexandru Moise <00moses.alexander00@gmail.com> Reviewed-by: Zhu Yanjun Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_verbs.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 2cb52fd48cf1..73a00a1c06f6 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -761,7 +761,6 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, unsigned int mask; unsigned int length = 0; int i; - int must_sched; while (wr) { mask = wr_opcode_mask(wr->opcode, qp); @@ -791,14 +790,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, wr = wr->next; } - /* - * Must sched in case of GSI QP because ib_send_mad() hold irq lock, - * and the requester call ip_local_out_sk() that takes spin_lock_bh. - */ - must_sched = (qp_type(qp) == IB_QPT_GSI) || - (queue_count(qp->sq.queue) > 1); - - rxe_run_task(&qp->req.task, must_sched); + rxe_run_task(&qp->req.task, 1); if (unlikely(qp->req.state == QP_STATE_ERROR)) rxe_run_task(&qp->comp.task, 1); -- cgit v1.2.3-59-g8ed1b From ecaaf1e26a37ddf7cba4e425cf68ae7ce1869bc0 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 4 May 2018 10:57:03 +0800 Subject: RDMA/hns: Add rq inline flags judgement It needs to set the rqie field of qp context by configured rq inline flags. Besides, it need to decide whether posting inline rqwqe by judged rq inline flags. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 25916e8522ed..162d42c6530a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -552,11 +552,15 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, } /* rq support inline data */ - sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; - hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge; - for (i = 0; i < wr->num_sge; i++) { - sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr; - sge_list[i].len = wr->sg_list[i].length; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; + hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = + (u32)wr->num_sge; + for (i = 0; i < wr->num_sge; i++) { + sge_list[i].addr = + (void *)(u64)wr->sg_list[i].addr; + sge_list[i].len = wr->sg_list[i].length; + } } hr_qp->rq.wrid[ind] = wr->wr_id; @@ -2169,6 +2173,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); /* @@ -2281,7 +2286,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, context->rq_db_record_addr = hr_qp->rdb.dma >> 32; qpc_mask->rq_db_record_addr = 0; - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1); + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, + (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0); roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, -- cgit v1.2.3-59-g8ed1b From 3a39bbecc88fa9a99a80de588c8f1fe16aba3446 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 4 May 2018 10:57:04 +0800 Subject: RDMA/hns: Bugfix for rq record db for kernel When used rq record db for kernel, it needs to set the rdb_en of hr_qp to 1 and configures the dma address of record rq db of qp context. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_qp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index d4aad34c21e2..cdfb774e28ff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -660,6 +660,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_rq_sge_list; } *hr_qp->rdb.db_record = 0; + hr_qp->rdb_en = 1; } /* Allocate QP buf */ -- cgit v1.2.3-59-g8ed1b From f97a62c39423575c62649721657e5cc04f67c0ac Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 4 May 2018 10:57:05 +0800 Subject: RDMA/hns: Load the RoCE dirver automatically To enable the linux-kernel system to load the hns-roce-hw-v2 driver automatically when hns-roce-hw-v2 is plugged in pci bus, it need to create a MODULE_DEVICE_TABLE for expose the pci_table of hns-roce-hw-v2 to user. Signed-off-by: Lijun Ou Reported-by: Zhou Wang Tested-by: Xiaojun Tan Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 162d42c6530a..50f55bd2c75f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4709,6 +4709,8 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {0, } }; +MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl); + static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { -- cgit v1.2.3-59-g8ed1b From ad18e20ba2887e221e903d311f4c9a1586eacffb Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 4 May 2018 10:57:06 +0800 Subject: RDMA/hns: Update convert function of endian format Because the sys_image_guid of ib_device_attr structure is __be64, it need to use cpu_to_be64 for converting. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 9d48bc07a9e6..96fb6a9ed93c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -199,7 +199,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, memset(props, 0, sizeof(*props)); - props->sys_image_guid = cpu_to_be32(hr_dev->sys_image_guid); + props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid); props->max_mr_size = (u64)(~(0ULL)); props->page_size_cap = hr_dev->caps.page_size_cap; props->vendor_id = hr_dev->vendor_id; -- cgit v1.2.3-59-g8ed1b From 2349fdd483ea933b223f3eca53ed42835383d316 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 4 May 2018 10:57:07 +0800 Subject: RDMA/hns: Add return operation when configured global param fail When configure global param function run fail, it should directly return and the initial flow will stop. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 50f55bd2c75f..539b0caacb9f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1085,6 +1085,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) if (ret) { dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n", ret); + return ret; } /* Get pf resource owned by every pf */ -- cgit v1.2.3-59-g8ed1b From 391bd5fc7de54a5cb866e8897d60ee1d76b8840a Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 4 May 2018 10:57:08 +0800 Subject: RDMA/hns: Not support qp transition from reset to reset for hip06 Because hip06 hardware is not support for qp transition from reset to reset state, it need to return errno when qp transited from reset to reset. This patch fixes it. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_qp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index cdfb774e28ff..baaf906f7c2e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -956,7 +956,14 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if (cur_state == new_state && cur_state == IB_QPS_RESET) { - ret = 0; + if (hr_dev->caps.min_wqes) { + ret = -EPERM; + dev_err(dev, "cur_state=%d new_state=%d\n", cur_state, + new_state); + } else { + ret = 0; + } + goto out; } -- cgit v1.2.3-59-g8ed1b From 778cc5a8b75eee62d330059a2655b515cda43278 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 4 May 2018 10:57:09 +0800 Subject: RDMA/hns: Fix the bug with rq sge When received multiply rq sge, it should tag the invalid lkey for the last non-zero length sge when have some sges' length are zero. This patch fixes it. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 539b0caacb9f..a40978bbc1d0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -547,8 +547,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, } if (i < hr_qp->rq.max_gs) { - dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg[i].addr = 0; + dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg->addr = 0; } /* rq support inline data */ -- cgit v1.2.3-59-g8ed1b From 90e7a4d5066240b75cdfd1bf8944ca36622153b1 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 4 May 2018 10:57:10 +0800 Subject: RDMA/hns: Set desc_dma_addr for zero when free cmq desc In order to avoid illegal use for desc_dma_addr of ring, it needs to set it zero when free cmq desc. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a40978bbc1d0..46c3ab97e1ba 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -617,6 +617,8 @@ static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev, dma_unmap_single(hr_dev->dev, ring->desc_dma_addr, ring->desc_num * sizeof(struct hns_roce_cmq_desc), DMA_BIDIRECTIONAL); + + ring->desc_dma_addr = 0; kfree(ring->desc); } -- cgit v1.2.3-59-g8ed1b From 85e0274dc66430b0d0fad7ad01cbc0e0cbebf6dc Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 4 May 2018 10:57:11 +0800 Subject: RDMA/hns: Enable inner_pa_vld filed of mpt When enabled inner_pa_vld field of mpt, The pa0 and pa1 will be valid and the hardware will use it directly and not use base address of pbl. As a result, it can reduce the delay. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 46c3ab97e1ba..7ecf0c911808 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1379,6 +1379,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, mr->type == MR_TYPE_MR ? 0 : 1); + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S, + 1); mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa); mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); -- cgit v1.2.3-59-g8ed1b From 79d442071a733057e4d9f28c90fbdb4f39d9fc23 Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 4 May 2018 10:57:12 +0800 Subject: RDMA/hns: Set NULL for __internal_mr This patch mainly configure value for __internal_mr of mr_free_pd. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 47e1b6ac1e1a..b3417a9a49de 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -722,6 +722,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) free_mr->mr_free_pd = to_hr_pd(pd); free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev; free_mr->mr_free_pd->ibpd.uobject = NULL; + free_mr->mr_free_pd->ibpd.__internal_mr = NULL; atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0); attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE; -- cgit v1.2.3-59-g8ed1b From a0403be8af338c319b5176c1d2975d94a930a0bf Mon Sep 17 00:00:00 2001 From: oulijun Date: Fri, 4 May 2018 10:57:13 +0800 Subject: RDMA/hns: Fix the bug with NULL pointer When the last QP of eight QPs is not exist in hns_roce_v1_mr_free_work_fn function, the print for qpn of hr_qp may introduce a calltrace for NULL pointer. Signed-off-by: Lijun Ou Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index b3417a9a49de..8013d69c5ac4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1037,7 +1037,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) do { ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); - if (ret < 0) { + if (ret < 0 && hr_qp) { dev_err(dev, "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n", hr_qp->qpn, ret, hr_mr->key, ne); -- cgit v1.2.3-59-g8ed1b From a75895b1ebd944a7873cbf76d30de8720955f8b3 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 7 May 2018 13:23:36 -0400 Subject: RDMA/i40iw: Avoid panic when objects are being created and destroyed A panic occurs when there is a newly-registered element on the QP/CQ MR list waiting to be attached, but a different MR is deregistered. The current code only checks for whether the list is empty, not whether the element being deregistered is actually on the list. Fix the panic by adding a boolean to track if the object is on the list. Fixes: d37498417947 ("i40iw: add files for iwarp interface") Signed-off-by: Andrew Boyer Reviewed-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 11 +++++++++-- drivers/infiniband/hw/i40iw/i40iw_verbs.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 3ebe91438523..68679ad4c6da 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -394,6 +394,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, list_for_each_entry(iwpbl, pbl_list, list) { if (iwpbl->user_base == va) { + iwpbl->on_list = false; list_del(&iwpbl->list); return iwpbl; } @@ -1898,6 +1899,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, goto error; spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); + iwpbl->on_list = true; spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_CQ: @@ -1908,6 +1910,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); + iwpbl->on_list = true; spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_MEM: @@ -2045,14 +2048,18 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr, switch (iwmr->type) { case IW_MEMREG_TYPE_CQ: spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); - if (!list_empty(&ucontext->cq_reg_mem_list)) + if (iwpbl->on_list) { + iwpbl->on_list = false; list_del(&iwpbl->list); + } spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_QP: spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); - if (!list_empty(&ucontext->qp_reg_mem_list)) + if (iwpbl->on_list) { + iwpbl->on_list = false; list_del(&iwpbl->list); + } spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; default: diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 9067443cd311..76cf173377ab 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -78,6 +78,7 @@ struct i40iw_pbl { }; bool pbl_allocated; + bool on_list; u64 user_base; struct i40iw_pble_alloc pble_alloc; struct i40iw_mr *iwmr; -- cgit v1.2.3-59-g8ed1b From 9f7b16afab9b47de471f4ef6a0c6c337f0a53566 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 7 May 2018 13:23:37 -0400 Subject: RDMA/i40iw: Avoid reference leaks when processing the AEQ In this switch there is a reference held on the QP. 'continue' will grab the next event without releasing the reference, causing a leak. Change it to 'break' to drop the reference before grabbing the next event. Fixes: 4e9042e647ff ("i40iw: add hw and utils files") Signed-off-by: Andrew Boyer Reviewed-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 6139836fb533..c9f62ca7643c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -331,7 +331,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) switch (info->ae_id) { case I40IW_AE_LLP_FIN_RECEIVED: if (qp->term_flags) - continue; + break; if (atomic_inc_return(&iwqp->close_timer_started) == 1) { iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT; if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) && @@ -360,7 +360,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) break; case I40IW_AE_LLP_CONNECTION_RESET: if (atomic_read(&iwqp->close_timer_started)) - continue; + break; i40iw_cm_disconn(iwqp); break; case I40IW_AE_QP_SUSPEND_COMPLETE: -- cgit v1.2.3-59-g8ed1b From 43731753c4b7d832775cf6b2301dd0447a5a1851 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 7 May 2018 13:23:38 -0400 Subject: RDMA/i40iw: Avoid panic when reading back the IRQ affinity hint The current code sets an affinity hint with a cpumask_t stored on the stack. This value can then be accessed through /proc/irq/*/affinity_hint/, causing a segfault or returning corrupt data. Move the cpumask_t into struct i40iw_msix_vector so it is available later. Backtrace: BUG: unable to handle kernel paging request at ffffb16e600e7c90 IP: irq_affinity_hint_proc_show+0x60/0xf0 PGD 17c0c6d067 PUD 17c0c6e067 PMD 15d4a0e067 PTE 0 Oops: 0000 [#1] SMP Modules linked in: ... CPU: 3 PID: 172543 Comm: grep Tainted: G OE ... #1 Hardware name: ... task: ffff9a5caee08000 task.stack: ffffb16e659d8000 RIP: 0010:irq_affinity_hint_proc_show+0x60/0xf0 RSP: 0018:ffffb16e659dbd20 EFLAGS: 00010086 RAX: 0000000000000246 RBX: ffffb16e659dbd20 RCX: 0000000000000000 RDX: ffffb16e600e7c90 RSI: 0000000000000003 RDI: 0000000000000046 RBP: ffffb16e659dbd88 R08: 0000000000000038 R09: 0000000000000001 R10: 0000000070803079 R11: 0000000000000000 R12: ffff9a59d1d97a00 R13: ffff9a5da47a6cd8 R14: ffff9a5da47a6c00 R15: ffff9a59d1d97a00 FS: 00007f946c31d740(0000) GS:ffff9a5dc1800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffb16e600e7c90 CR3: 00000016a4339000 CR4: 00000000007406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: seq_read+0x12d/0x430 ? sched_clock_cpu+0x11/0xb0 proc_reg_read+0x48/0x70 __vfs_read+0x37/0x140 ? security_file_permission+0xa0/0xc0 vfs_read+0x96/0x140 SyS_read+0x58/0xc0 do_syscall_64+0x5a/0x190 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7f946bbc97e0 RSP: 002b:00007ffdd0c4ae08 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 000000000096b000 RCX: 00007f946bbc97e0 RDX: 000000000096b000 RSI: 00007f946a2f0000 RDI: 0000000000000004 RBP: 0000000000001000 R08: 00007f946a2ef011 R09: 000000000000000a R10: 0000000000001000 R11: 0000000000000246 R12: 00007f946a2f0000 R13: 0000000000000004 R14: 0000000000000000 R15: 00007f946a2f0000 Code: b9 08 00 00 00 49 89 c6 48 89 df 31 c0 4d 8d ae d8 00 00 00 f3 48 ab 4c 89 ef e8 6c 9a 56 00 49 8b 96 30 01 00 00 48 85 d2 74 3f <48> 8b 0a 48 89 4d 98 48 8b 4a 08 48 89 4d a0 48 8b 4a 10 48 89 RIP: irq_affinity_hint_proc_show+0x60/0xf0 RSP: ffffb16e659dbd20 CR2: ffffb16e600e7c90 Fixes: 8e06af711bf2 ("i40iw: add main, hdr, status") Signed-off-by: Andrew Boyer Reviewed-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw.h | 1 + drivers/infiniband/hw/i40iw/i40iw_main.c | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index d5d8c1be345a..2f2b4426ded7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -207,6 +207,7 @@ struct i40iw_msix_vector { u32 irq; u32 cpu_affinity; u32 ceq_id; + cpumask_t mask; }; struct l2params_work { diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 9cd0d3ef9057..05001e6da1f8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -687,7 +687,6 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw struct i40iw_msix_vector *msix_vec) { enum i40iw_status_code status; - cpumask_t mask; if (iwdev->msix_shared && !ceq_id) { tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); @@ -697,9 +696,9 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq); } - cpumask_clear(&mask); - cpumask_set_cpu(msix_vec->cpu_affinity, &mask); - irq_set_affinity_hint(msix_vec->irq, &mask); + cpumask_clear(&msix_vec->mask); + cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask); + irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask); if (status) { i40iw_pr_err("ceq irq config fail\n"); -- cgit v1.2.3-59-g8ed1b From 3d69191086fc87f202c79eb8873b9c82c2bb065a Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Tue, 8 May 2018 07:44:27 +0200 Subject: iw_cxgb4: Fix an error handling path in 'c4iw_get_dma_mr()' The error handling path of 'c4iw_get_dma_mr()' does not free resources in the correct order. If an error occures, it can leak 'mhp->wr_waitp'. Fixes: a3f12da0e99a ("iw_cxgb4: allocate wait object for each memory object") Signed-off-by: Christophe JAILLET Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index e90f2fd8dc16..1445918e3239 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -489,10 +489,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) err_dereg_mem: dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); -err_free_wr_wait: - c4iw_put_wr_wait(mhp->wr_waitp); err_free_skb: kfree_skb(mhp->dereg_skb); +err_free_wr_wait: + c4iw_put_wr_wait(mhp->wr_waitp); err_free_mhp: kfree(mhp); return ERR_PTR(ret); -- cgit v1.2.3-59-g8ed1b From 8e907ed4882714fd13cfe670681fc6cb5284c780 Mon Sep 17 00:00:00 2001 From: Lidong Chen Date: Tue, 8 May 2018 16:50:16 +0800 Subject: IB/umem: Use the correct mm during ib_umem_release User-space may invoke ibv_reg_mr and ibv_dereg_mr in different threads. If ibv_dereg_mr is called after the thread which invoked ibv_reg_mr has exited, get_pid_task will return NULL and ib_umem_release will not decrease mm->pinned_vm. Instead of using threads to locate the mm, use the overall tgid from the ib_ucontext struct instead. This matches the behavior of ODP and disassociate in handling the mm of the process that called ibv_reg_mr. Cc: Fixes: 87773dd56d54 ("IB: ib_umem_release() should decrement mm->pinned_vm from ib_umem_get") Signed-off-by: Lidong Chen Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 7 +------ include/rdma/ib_umem.h | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 9a4e899d94b3..2b6c9b516070 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -119,7 +119,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, umem->length = size; umem->address = addr; umem->page_shift = PAGE_SHIFT; - umem->pid = get_task_pid(current, PIDTYPE_PID); /* * We ask for writable memory if any of the following * access flags are set. "Local write" and "remote write" @@ -132,7 +131,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND)); if (access & IB_ACCESS_ON_DEMAND) { - put_pid(umem->pid); ret = ib_umem_odp_get(context, umem, access); if (ret) { kfree(umem); @@ -148,7 +146,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { - put_pid(umem->pid); kfree(umem); return ERR_PTR(-ENOMEM); } @@ -231,7 +228,6 @@ out: if (ret < 0) { if (need_release) __ib_umem_release(context->device, umem, 0); - put_pid(umem->pid); kfree(umem); } else current->mm->pinned_vm = locked; @@ -274,8 +270,7 @@ void ib_umem_release(struct ib_umem *umem) __ib_umem_release(umem->context->device, umem, 1); - task = get_pid_task(umem->pid, PIDTYPE_PID); - put_pid(umem->pid); + task = get_pid_task(umem->context->tgid, PIDTYPE_PID); if (!task) goto out; mm = get_task_mm(task); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 23159dd5be18..a1fd63871d17 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -48,7 +48,6 @@ struct ib_umem { int writable; int hugetlb; struct work_struct work; - struct pid *pid; struct mm_struct *mm; unsigned long diff; struct ib_umem_odp *odp_data; -- cgit v1.2.3-59-g8ed1b From 30bf066cd9989fef34aeeef9080368867fe42be7 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Tue, 15 May 2018 15:13:33 +0300 Subject: RDMA/qedr: Fix doorbell bar mapping for dpi > 1 Each user_context receives a separate dpi value and thus a different address on the doorbell bar. The qedr_mmap function needs to validate the address and map the doorbell bar accordingly. The current implementation always checked against dpi=0 doorbell range leading to a wrong mapping for doorbell bar. (It entered an else case that mapped the address differently). qedr_mmap should only be used for doorbells, so the else was actually wrong in the first place. This only has an affect on arm architecture and not an issue on a x86 based architecture. This lead to doorbells not occurring on arm based systems and left applications that use more than one dpi (or several applications run simultaneously ) to hang. Fixes: ac1b36e55a51 ("qedr: Add support for user context verbs") Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/verbs.c | 60 ++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 7d3763b2e01c..3f9afc02d166 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -401,49 +401,47 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct qedr_ucontext *ucontext = get_qedr_ucontext(context); struct qedr_dev *dev = get_qedr_dev(context->device); - unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; - u64 unmapped_db = dev->db_phys_addr; + unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT; unsigned long len = (vma->vm_end - vma->vm_start); - int rc = 0; - bool found; + unsigned long dpi_start; + + dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size); DP_DEBUG(dev, QEDR_MSG_INIT, - "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n", - vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len); - if (vma->vm_start & (PAGE_SIZE - 1)) { - DP_ERR(dev, "Vma_start not page aligned = %ld\n", - vma->vm_start); + "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n", + (void *)vma->vm_start, (void *)vma->vm_end, + (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size); + + if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { + DP_ERR(dev, + "failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n", + (void *)vma->vm_start, (void *)vma->vm_end); return -EINVAL; } - found = qedr_search_mmap(ucontext, vm_page, len); - if (!found) { - DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n", + if (!qedr_search_mmap(ucontext, phys_addr, len)) { + DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n", vma->vm_pgoff); return -EINVAL; } - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); - - if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + - dev->db_size))) { - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); - if (vma->vm_flags & VM_READ) { - DP_ERR(dev, "Trying to map doorbell bar for read\n"); - return -EPERM; - } - - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + if (phys_addr < dpi_start || + ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) { + DP_ERR(dev, + "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n", + (void *)phys_addr, (void *)dpi_start, + ucontext->dpi_size); + return -EINVAL; + } - rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - PAGE_SIZE, vma->vm_page_prot); - } else { - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n"); - rc = remap_pfn_range(vma, vma->vm_start, - vma->vm_pgoff, len, vma->vm_page_prot); + if (vma->vm_flags & VM_READ) { + DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n"); + return -EINVAL; } - DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc); - return rc; + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, + vma->vm_page_prot); } struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, -- cgit v1.2.3-59-g8ed1b From 05d6a4ddb654ef6f2fbbcf9dcb3b263184baa8e4 Mon Sep 17 00:00:00 2001 From: oulijun Date: Tue, 22 May 2018 20:47:14 +0800 Subject: RDMA/hns: Bugfix for cq record db for kernel When use cq record db for kernel, it needs to set the hr_cq->db_en to 1 and configure the dma address of record cq db of qp context. Fixes: 86188a8810ed ("RDMA/hns: Support cq record doorbell for kernel space") Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 14734d0d0b76..3a485f50fede 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -377,6 +377,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, hr_cq->set_ci_db = hr_cq->db.db_record; *hr_cq->set_ci_db = 0; + hr_cq->db_en = 1; } /* Init mmt table and write buff address to mtt table */ -- cgit v1.2.3-59-g8ed1b From 55ba49cbcef37053d973f9a45bc58818c333fe13 Mon Sep 17 00:00:00 2001 From: oulijun Date: Tue, 22 May 2018 20:47:15 +0800 Subject: RDMA/hns: Move the location for initializing tmp_len When posted work request, it need to compute the length of all sges of every wr and fill it into the msg_len field of send wqe. Thus, While posting multiple wr, tmp_len should be reinitialized to zero. Fixes: 8b9b8d143b46 ("RDMA/hns: Fix the endian problem for hns") Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7ecf0c911808..1f0965bb64ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -142,8 +142,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned long flags; unsigned int ind; void *wqe = NULL; - u32 tmp_len = 0; bool loopback; + u32 tmp_len; int ret = 0; u8 *smac; int nreq; @@ -189,6 +189,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, owner_bit = ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + tmp_len = 0; /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { -- cgit v1.2.3-59-g8ed1b From 08bb558ac11ab944e0539e78619d7b4c356278bd Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 23 May 2018 15:30:30 +0300 Subject: IB/core: Make testing MR flags for writability a static inline function Make the MR writability flags check, which is performed in umem.c, a static inline function in file ib_verbs.h This allows the function to be used by low-level infiniband drivers. Cc: Signed-off-by: Jason Gunthorpe Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/umem.c | 11 +---------- include/rdma/ib_verbs.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 2b6c9b516070..d76455edd292 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -119,16 +119,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, umem->length = size; umem->address = addr; umem->page_shift = PAGE_SHIFT; - /* - * We ask for writable memory if any of the following - * access flags are set. "Local write" and "remote write" - * obviously require write access. "Remote atomic" can do - * things like fetch and add, which will modify memory, and - * "MW bind" can change permissions by binding a window. - */ - umem->writable = !!(access & - (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND)); + umem->writable = ib_access_writable(access); if (access & IB_ACCESS_ON_DEMAND) { ret = ib_umem_odp_get(context, umem, access); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9fc8a825aa28..20fa5c591e81 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3734,6 +3734,20 @@ static inline int ib_check_mr_access(int flags) return 0; } +static inline bool ib_access_writable(int access_flags) +{ + /* + * We have writable memory backing the MR if any of the following + * access flags are set. "Local write" and "remote write" obviously + * require write access. "Remote atomic" can do things like fetch and + * add, which will modify memory, and "MW bind" can change permissions + * by binding a window. + */ + return access_flags & + (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND); +} + /** * ib_check_mr_status: lightweight check of MR status. * This routine may provide status checks on a selected -- cgit v1.2.3-59-g8ed1b From d8f9cc328c8888369880e2527e9186d745f2bbf6 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 23 May 2018 15:30:31 +0300 Subject: IB/mlx4: Mark user MR as writable if actual virtual memory is writable To allow rereg_user_mr to modify the MR from read-only to writable without using get_user_pages again, we needed to define the initial MR as writable. However, this was originally done unconditionally, without taking into account the writability of the underlying virtual memory. As a result, any attempt to register a read-only MR over read-only virtual memory failed. To fix this, do not add the writable flag bit when the user virtual memory is not writable (e.g. const memory). However, when the underlying memory is NOT writable (and we therefore do not define the initial MR as writable), the IB core adds a "force writable" flag to its user-pages request. If this succeeds, the reg_user_mr caller gets a writable copy of the original pages. If the user-space caller then does a rereg_user_mr operation to enable writability, this will succeed. This should not be allowed, since the original virtual memory was not writable. Cc: Fixes: 9376932d0c26 ("IB/mlx4_ib: Add support for user MR re-registration") Signed-off-by: Jason Gunthorpe Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx4/mr.c | 50 ++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 61d8b06375bb..ed1f253faf97 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -367,6 +367,40 @@ end: return block_shift; } +static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start, + u64 length, u64 virt_addr, + int access_flags) +{ + /* + * Force registering the memory as writable if the underlying pages + * are writable. This is so rereg can change the access permissions + * from readable to writable without having to run through ib_umem_get + * again + */ + if (!ib_access_writable(access_flags)) { + struct vm_area_struct *vma; + + down_read(¤t->mm->mmap_sem); + /* + * FIXME: Ideally this would iterate over all the vmas that + * cover the memory, but for now it requires a single vma to + * entirely cover the MR to support RO mappings. + */ + vma = find_vma(current->mm, start); + if (vma && vma->vm_end >= start + length && + vma->vm_start <= start) { + if (vma->vm_flags & VM_WRITE) + access_flags |= IB_ACCESS_LOCAL_WRITE; + } else { + access_flags |= IB_ACCESS_LOCAL_WRITE; + } + + up_read(¤t->mm->mmap_sem); + } + + return ib_umem_get(context, start, length, access_flags, 0); +} + struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -381,10 +415,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - /* Force registering the memory as writable. */ - /* Used for memory re-registeration. HCA protects the access */ - mr->umem = ib_umem_get(pd->uobject->context, start, length, - access_flags | IB_ACCESS_LOCAL_WRITE, 0); + mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length, + virt_addr, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; @@ -454,6 +486,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, } if (flags & IB_MR_REREG_ACCESS) { + if (ib_access_writable(mr_access_flags) && !mmr->umem->writable) + return -EPERM; + err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, convert_access(mr_access_flags)); @@ -467,10 +502,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); ib_umem_release(mmr->umem); - mmr->umem = ib_umem_get(mr->uobject->context, start, length, - mr_access_flags | - IB_ACCESS_LOCAL_WRITE, - 0); + mmr->umem = + mlx4_get_umem_mr(mr->uobject->context, start, length, + virt_addr, mr_access_flags); if (IS_ERR(mmr->umem)) { err = PTR_ERR(mmr->umem); /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ -- cgit v1.2.3-59-g8ed1b