From 511cbce2ff8b9d322077909ee90c5d4b67b29b75 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Nov 2015 14:56:14 +0100 Subject: irq_poll: make blk-iopoll available outside the block layer The new name is irq_poll as iopoll is already taken. Better suggestions welcome. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- include/linux/blk-iopoll.h | 46 ---------------------------------------------- include/linux/interrupt.h | 2 +- include/linux/irq_poll.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 47 deletions(-) delete mode 100644 include/linux/blk-iopoll.h create mode 100644 include/linux/irq_poll.h (limited to 'include/linux') diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h deleted file mode 100644 index 77ae77c0b704..000000000000 --- a/include/linux/blk-iopoll.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef BLK_IOPOLL_H -#define BLK_IOPOLL_H - -struct blk_iopoll; -typedef int (blk_iopoll_fn)(struct blk_iopoll *, int); - -struct blk_iopoll { - struct list_head list; - unsigned long state; - unsigned long data; - int weight; - int max; - blk_iopoll_fn *poll; -}; - -enum { - IOPOLL_F_SCHED = 0, - IOPOLL_F_DISABLE = 1, -}; - -/* - * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating - * that we were the first to acquire this iop for scheduling. If this iop - * is currently disabled, return "failure". - */ -static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop) -{ - if (!test_bit(IOPOLL_F_DISABLE, &iop->state)) - return test_and_set_bit(IOPOLL_F_SCHED, &iop->state); - - return 1; -} - -static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop) -{ - return test_bit(IOPOLL_F_DISABLE, &iop->state); -} - -extern void blk_iopoll_sched(struct blk_iopoll *); -extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *); -extern void blk_iopoll_complete(struct blk_iopoll *); -extern void __blk_iopoll_complete(struct blk_iopoll *); -extern void blk_iopoll_enable(struct blk_iopoll *); -extern void blk_iopoll_disable(struct blk_iopoll *); - -#endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ad16809c8596..7ff98c23199a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -412,7 +412,7 @@ enum NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, - BLOCK_IOPOLL_SOFTIRQ, + IRQ_POLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h new file mode 100644 index 000000000000..50c39dcd2cba --- /dev/null +++ b/include/linux/irq_poll.h @@ -0,0 +1,46 @@ +#ifndef IRQ_POLL_H +#define IRQ_POLL_H + +struct irq_poll; +typedef int (irq_poll_fn)(struct irq_poll *, int); + +struct irq_poll { + struct list_head list; + unsigned long state; + unsigned long data; + int weight; + int max; + irq_poll_fn *poll; +}; + +enum { + IRQ_POLL_F_SCHED = 0, + IRQ_POLL_F_DISABLE = 1, +}; + +/* + * Returns 0 if we successfully set the IRQ_POLL_F_SCHED bit, indicating + * that we were the first to acquire this iop for scheduling. If this iop + * is currently disabled, return "failure". + */ +static inline int irq_poll_sched_prep(struct irq_poll *iop) +{ + if (!test_bit(IRQ_POLL_F_DISABLE, &iop->state)) + return test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state); + + return 1; +} + +static inline int irq_poll_disable_pending(struct irq_poll *iop) +{ + return test_bit(IRQ_POLL_F_DISABLE, &iop->state); +} + +extern void irq_poll_sched(struct irq_poll *); +extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); +extern void irq_poll_complete(struct irq_poll *); +extern void __irq_poll_complete(struct irq_poll *); +extern void irq_poll_enable(struct irq_poll *); +extern void irq_poll_disable(struct irq_poll *); + +#endif -- cgit v1.2.3-59-g8ed1b From ea51190c03150fce4d9e428bfb608abbe0991db8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:41:11 -0800 Subject: irq_poll: fold irq_poll_sched_prep into irq_poll_sched There is no good reason to keep them apart, and this makes using the API a bit simpler. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- drivers/scsi/be2iscsi/be_main.c | 6 ++---- drivers/scsi/ipr.c | 3 +-- include/linux/irq_poll.h | 13 ------------- lib/irq_poll.c | 10 +++++++--- 4 files changed, 10 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 471e2b942435..cb9072a841be 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -910,8 +910,7 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id) num_eq_processed = 0; while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] & EQE_VALID_MASK) { - if (!irq_poll_sched_prep(&pbe_eq->iopoll)) - irq_poll_sched(&pbe_eq->iopoll); + irq_poll_sched(&pbe_eq->iopoll); AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); queue_tail_inc(eq); @@ -972,8 +971,7 @@ static irqreturn_t be_isr(int irq, void *dev_id) spin_unlock_irqrestore(&phba->isr_lock, flags); num_mcceq_processed++; } else { - if (!irq_poll_sched_prep(&pbe_eq->iopoll)) - irq_poll_sched(&pbe_eq->iopoll); + irq_poll_sched(&pbe_eq->iopoll); num_ioeq_processed++; } AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 402e4ca32d70..82031e00b2e9 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -5692,8 +5692,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == hrrq->toggle_bit) { - if (!irq_poll_sched_prep(&hrrq->iopoll)) - irq_poll_sched(&hrrq->iopoll); + irq_poll_sched(&hrrq->iopoll); spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return IRQ_HANDLED; } diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h index 50c39dcd2cba..57efae661400 100644 --- a/include/linux/irq_poll.h +++ b/include/linux/irq_poll.h @@ -18,19 +18,6 @@ enum { IRQ_POLL_F_DISABLE = 1, }; -/* - * Returns 0 if we successfully set the IRQ_POLL_F_SCHED bit, indicating - * that we were the first to acquire this iop for scheduling. If this iop - * is currently disabled, return "failure". - */ -static inline int irq_poll_sched_prep(struct irq_poll *iop) -{ - if (!test_bit(IRQ_POLL_F_DISABLE, &iop->state)) - return test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state); - - return 1; -} - static inline int irq_poll_disable_pending(struct irq_poll *iop) { return test_bit(IRQ_POLL_F_DISABLE, &iop->state); diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 88af87971e8c..43a3370a09fd 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -21,13 +21,17 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); * * Description: * Add this irq_poll structure to the pending poll list and trigger the - * raise of the blk iopoll softirq. The driver must already have gotten a - * successful return from irq_poll_sched_prep() before calling this. + * raise of the blk iopoll softirq. **/ void irq_poll_sched(struct irq_poll *iop) { unsigned long flags; + if (test_bit(IRQ_POLL_F_DISABLE, &iop->state)) + return; + if (!test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state)) + return; + local_irq_save(flags); list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); @@ -58,7 +62,7 @@ EXPORT_SYMBOL(__irq_poll_complete); * Description: * If a driver consumes less than the assigned budget in its run of the * iopoll handler, it'll end the polled mode by calling this function. The - * iopoll handler will not be invoked again before irq_poll_sched_prep() + * iopoll handler will not be invoked again before irq_poll_sched() * is called. **/ void irq_poll_complete(struct irq_poll *iop) -- cgit v1.2.3-59-g8ed1b From 0bc92ace52ef3ed1c8eb9bcf36cd3d7ca72d5d14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:56:36 -0800 Subject: irq_poll: fold irq_poll_disable_pending into irq_poll_softirq Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- include/linux/irq_poll.h | 5 ----- lib/irq_poll.c | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h index 57efae661400..b4ad03cee9d4 100644 --- a/include/linux/irq_poll.h +++ b/include/linux/irq_poll.h @@ -18,11 +18,6 @@ enum { IRQ_POLL_F_DISABLE = 1, }; -static inline int irq_poll_disable_pending(struct irq_poll *iop) -{ - return test_bit(IRQ_POLL_F_DISABLE, &iop->state); -} - extern void irq_poll_sched(struct irq_poll *); extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); extern void irq_poll_complete(struct irq_poll *); diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 43a3370a09fd..53d67e341ebb 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -122,7 +122,7 @@ static void irq_poll_softirq(struct softirq_action *h) * move the instance around on the list at-will. */ if (work >= weight) { - if (irq_poll_disable_pending(iop)) + if (test_bit(IRQ_POLL_F_DISABLE, &iop->state)) __irq_poll_complete(iop); else list_move_tail(&iop->list, list); -- cgit v1.2.3-59-g8ed1b From 83af187d1b776753d58b53d155318d94f9428e92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:57:25 -0800 Subject: irq_poll: mark __irq_poll_complete static Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- include/linux/irq_poll.h | 1 - lib/irq_poll.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h index b4ad03cee9d4..8c4b4087f1f2 100644 --- a/include/linux/irq_poll.h +++ b/include/linux/irq_poll.h @@ -21,7 +21,6 @@ enum { extern void irq_poll_sched(struct irq_poll *); extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); extern void irq_poll_complete(struct irq_poll *); -extern void __irq_poll_complete(struct irq_poll *); extern void irq_poll_enable(struct irq_poll *); extern void irq_poll_disable(struct irq_poll *); diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 53d67e341ebb..2836620e889f 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -47,13 +47,12 @@ EXPORT_SYMBOL(irq_poll_sched); * See irq_poll_complete(). This function must be called with interrupts * disabled. **/ -void __irq_poll_complete(struct irq_poll *iop) +static void __irq_poll_complete(struct irq_poll *iop) { list_del(&iop->list); smp_mb__before_atomic(); clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state); } -EXPORT_SYMBOL(__irq_poll_complete); /** * irq_poll_complete - Mark this @iop as un-polled again -- cgit v1.2.3-59-g8ed1b From 839a301dc2c007ec942b73a0025695056648f59b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:57:52 -0800 Subject: irq_poll: remove unused data and max fields Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- include/linux/irq_poll.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h index 8c4b4087f1f2..3e8c1b8fb9be 100644 --- a/include/linux/irq_poll.h +++ b/include/linux/irq_poll.h @@ -7,9 +7,7 @@ typedef int (irq_poll_fn)(struct irq_poll *, int); struct irq_poll { struct list_head list; unsigned long state; - unsigned long data; int weight; - int max; irq_poll_fn *poll; }; -- cgit v1.2.3-59-g8ed1b From 0de60af649533ad8d9aaeab1df710e6a728d45ea Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:19 +0200 Subject: net/mlx5_core: Introduce access functions to enable/disable RoCE A mlx5 Ethernet port must be explicitly enabled for RoCE. When RoCE is not enabled on the port, the NIC will refuse to create QPs attached to it and incoming RoCE packets will be considered by the NIC as plain Ethernet packets. Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 52 +++++++++++++++++++++++++ include/linux/mlx5/vport.h | 3 ++ 2 files changed, 55 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 54ab63b6b8b0..245ff4a03dd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -70,6 +70,17 @@ static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u32 *out, return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); } +static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); +} + void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) { u32 *out; @@ -350,3 +361,44 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); + +enum mlx5_vport_roce_state { + MLX5_VPORT_ROCE_DISABLED = 0, + MLX5_VPORT_ROCE_ENABLED = 1, +}; + +static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev, + enum mlx5_vport_roce_state state) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1); + MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en, + state); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} + +int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev) +{ + return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED); +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce); + +int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) +{ + return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 967e0fd06e89..4c9ac604cccd 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -52,4 +52,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); +int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); +int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); + #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3-59-g8ed1b From 9efa75254593d6ca3ae54bac8153f47e1a7cbcda Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:20 +0200 Subject: net/mlx5_core: Introduce access functions to query vport RoCE fields Introduce access functions to query NIC vport system_image_guid, node_guid and qkey_viol_cntr. Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 62 +++++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 10 +++- include/linux/mlx5/vport.h | 5 ++ 3 files changed, 76 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 245ff4a03dd6..ecb274ae9a81 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -103,6 +103,68 @@ void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) } EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); +int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, + u64 *system_image_guid) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + mlx5_query_nic_vport_context(mdev, out, outlen); + + *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out, + nic_vport_context.system_image_guid); + + kfree(out); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid); + +int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + mlx5_query_nic_vport_context(mdev, out, outlen); + + *node_guid = MLX5_GET64(query_nic_vport_context_out, out, + nic_vport_context.node_guid); + + kfree(out); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); + +int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, + u16 *qkey_viol_cntr) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + mlx5_query_nic_vport_context(mdev, out, outlen); + + *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.qkey_violation_counter); + + kfree(out); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr); + int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, union ib_gid *gid) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1565324eb620..49b34c6466ac 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2141,7 +2141,15 @@ struct mlx5_ifc_nic_vport_context_bits { u8 reserved_0[0x1f]; u8 roce_en[0x1]; - u8 reserved_1[0x760]; + u8 reserved_1[0x120]; + + u8 system_image_guid[0x40]; + u8 port_guid[0x40]; + u8 node_guid[0x40]; + + u8 reserved_5[0x140]; + u8 qkey_violation_counter[0x10]; + u8 reserved_6[0x430]; u8 reserved_2[0x5]; u8 allowed_list_type[0x3]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 4c9ac604cccd..dfb2d9497d2d 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -37,6 +37,11 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); +int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, + u64 *system_image_guid); +int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); +int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, + u16 *qkey_viol_cntr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, union ib_gid *gid); -- cgit v1.2.3-59-g8ed1b From 3f89a643eb29543af0838d37604bbc29a4e1eb60 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:21 +0200 Subject: IB/mlx5: Extend query_device/port to support RoCE Using the vport access functions to retrieve the Ethernet specific information and return this information in ib_query_device and ib_query_port. Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 75 +++++++++++++++++++++++++++++++++++---- include/linux/mlx5/driver.h | 7 ---- 2 files changed, 69 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8c6e144ae75b..2b6ac2e7b4a0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -120,6 +121,50 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, return ndev; } +static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, + struct ib_port_attr *props) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct net_device *ndev; + enum ib_mtu ndev_ib_mtu; + + memset(props, 0, sizeof(*props)); + + props->port_cap_flags |= IB_PORT_CM_SUP; + props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; + + props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, + roce_address_table_size); + props->max_mtu = IB_MTU_4096; + props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); + props->pkey_tbl_len = 1; + props->state = IB_PORT_DOWN; + props->phys_state = 3; + + mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, + (u16 *)&props->qkey_viol_cntr); + + ndev = mlx5_ib_get_netdev(device, port_num); + if (!ndev) + return 0; + + if (netif_running(ndev) && netif_carrier_ok(ndev)) { + props->state = IB_PORT_ACTIVE; + props->phys_state = 5; + } + + ndev_ib_mtu = iboe_get_mtu(ndev->mtu); + + dev_put(ndev); + + props->active_mtu = min(props->max_mtu, ndev_ib_mtu); + + props->active_width = IB_WIDTH_4X; /* TODO */ + props->active_speed = IB_SPEED_QDR; /* TODO */ + + return 0; +} + static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { return !dev->mdev->issi; @@ -158,13 +203,21 @@ static int mlx5_query_system_image_guid(struct ib_device *ibdev, case MLX5_VPORT_ACCESS_METHOD_HCA: err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); - if (!err) - *sys_image_guid = cpu_to_be64(tmp); - return err; + break; + + case MLX5_VPORT_ACCESS_METHOD_NIC: + err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); + break; default: return -EINVAL; } + + if (!err) + *sys_image_guid = cpu_to_be64(tmp); + + return err; + } static int mlx5_query_max_pkeys(struct ib_device *ibdev, @@ -218,13 +271,20 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, case MLX5_VPORT_ACCESS_METHOD_HCA: err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); - if (!err) - *node_guid = cpu_to_be64(tmp); - return err; + break; + + case MLX5_VPORT_ACCESS_METHOD_NIC: + err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp); + break; default: return -EINVAL; } + + if (!err) + *node_guid = cpu_to_be64(tmp); + + return err; } struct mlx5_reg_node_desc { @@ -522,6 +582,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, case MLX5_VPORT_ACCESS_METHOD_HCA: return mlx5_query_hca_port(ibdev, port, props); + case MLX5_VPORT_ACCESS_METHOD_NIC: + return mlx5_query_port_roce(ibdev, port, props); + default: return -EINVAL; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5c857f2a20d7..7b9c976b42d9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -632,13 +632,6 @@ extern struct workqueue_struct *mlx5_core_wq; .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field -struct ib_field { - size_t struct_offset_bytes; - size_t struct_size_bytes; - int offset_bits; - int size_bits; -}; - static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) { return pci_get_drvdata(pdev); -- cgit v1.2.3-59-g8ed1b From cb34be6da25f45034ef4ff6103d401b451165e39 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:22 +0200 Subject: IB/mlx5: Set network_hdr_type upon RoCE responder completion When handling a responder completion, if the link layer is Ethernet, set the work completion network_hdr_type field according to CQE's info and the IB_WC_WITH_NETWORK_HDR_TYPE flag. Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 17 +++++++++++++++++ include/linux/mlx5/device.h | 6 ++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 3dfd287256d6..3ce5cfa7a4e0 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -171,6 +171,7 @@ enum { static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_ib_qp *qp) { + enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1); struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); struct mlx5_ib_srq *srq; struct mlx5_ib_wq *wq; @@ -236,6 +237,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, } else { wc->pkey_index = 0; } + + if (ll != IB_LINK_LAYER_ETHERNET) + return; + + switch (wc->sl & 0x3) { + case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH: + wc->network_hdr_type = RDMA_NETWORK_IB; + break; + case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6: + wc->network_hdr_type = RDMA_NETWORK_IPV6; + break; + case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4: + wc->network_hdr_type = RDMA_NETWORK_IPV4; + break; + } + wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; } static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0b473cbfa7ef..84aa7e0e1dfa 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -628,6 +628,12 @@ enum { CQE_RSS_HTYPE_L4 = 0x3 << 2, }; +enum { + MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH = 0x0, + MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6 = 0x1, + MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4 = 0x2, +}; + enum { CQE_L2_OK = 1 << 0, CQE_L3_OK = 1 << 1, -- cgit v1.2.3-59-g8ed1b From 3cca26069a4b7f6d8fd3dc0ed707e795c22712e2 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:23 +0200 Subject: IB/mlx5: Support IB device's callbacks for adding/deleting GIDs These callbacks write into the mlx5 RoCE address table. Upon del_gid we write a zero'd GID. Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 89 +++++++++++++++++++++++++++++++++++++++ include/linux/mlx5/device.h | 20 +++++++++ 2 files changed, 109 insertions(+) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2b6ac2e7b4a0..bbe92caba93b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -165,6 +165,93 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, return 0; } +static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid, + const struct ib_gid_attr *attr, + void *mlx5_addr) +{ +#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) + char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, + source_l3_address); + void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, + source_mac_47_32); + + if (!gid) + return; + + ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr); + + if (is_vlan_dev(attr->ndev)) { + MLX5_SET_RA(mlx5_addr, vlan_valid, 1); + MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev)); + } + + switch (attr->gid_type) { + case IB_GID_TYPE_IB: + MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1); + break; + case IB_GID_TYPE_ROCE_UDP_ENCAP: + MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2); + break; + + default: + WARN_ON(true); + } + + if (attr->gid_type != IB_GID_TYPE_IB) { + if (ipv6_addr_v4mapped((void *)gid)) + MLX5_SET_RA(mlx5_addr, roce_l3_type, + MLX5_ROCE_L3_TYPE_IPV4); + else + MLX5_SET_RA(mlx5_addr, roce_l3_type, + MLX5_ROCE_L3_TYPE_IPV6); + } + + if ((attr->gid_type == IB_GID_TYPE_IB) || + !ipv6_addr_v4mapped((void *)gid)) + memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid)); + else + memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4); +} + +static int set_roce_addr(struct ib_device *device, u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + u32 in[MLX5_ST_SZ_DW(set_roce_address_in)]; + u32 out[MLX5_ST_SZ_DW(set_roce_address_out)]; + void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); + enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num); + + if (ll != IB_LINK_LAYER_ETHERNET) + return -EINVAL; + + memset(in, 0, sizeof(in)); + + ib_gid_to_mlx5_roce_addr(gid, attr, in_addr); + + MLX5_SET(set_roce_address_in, in, roce_address_index, index); + MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, + unsigned int index, const union ib_gid *gid, + const struct ib_gid_attr *attr, + __always_unused void **context) +{ + return set_roce_addr(device, port_num, index, gid, attr); +} + +static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, + unsigned int index, __always_unused void **context) +{ + return set_roce_addr(device, port_num, index, NULL, NULL); +} + static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { return !dev->mdev->issi; @@ -1515,6 +1602,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (ll == IB_LINK_LAYER_ETHERNET) dev->ib_dev.get_netdev = mlx5_ib_get_netdev; dev->ib_dev.query_gid = mlx5_ib_query_gid; + dev->ib_dev.add_gid = mlx5_ib_add_gid; + dev->ib_dev.del_gid = mlx5_ib_del_gid; dev->ib_dev.query_pkey = mlx5_ib_query_pkey; dev->ib_dev.modify_device = mlx5_ib_modify_device; dev->ib_dev.modify_port = mlx5_ib_modify_port; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 84aa7e0e1dfa..ea4281b00c8d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -278,6 +278,26 @@ enum { MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, }; +enum { + MLX5_ROCE_VERSION_1 = 0, + MLX5_ROCE_VERSION_2 = 2, +}; + +enum { + MLX5_ROCE_VERSION_1_CAP = 1 << MLX5_ROCE_VERSION_1, + MLX5_ROCE_VERSION_2_CAP = 1 << MLX5_ROCE_VERSION_2, +}; + +enum { + MLX5_ROCE_L3_TYPE_IPV4 = 0, + MLX5_ROCE_L3_TYPE_IPV6 = 1, +}; + +enum { + MLX5_ROCE_L3_TYPE_IPV4_CAP = 1 << 1, + MLX5_ROCE_L3_TYPE_IPV6_CAP = 1 << 2, +}; + enum { MLX5_OPCODE_NOP = 0x00, MLX5_OPCODE_SEND_INVAL = 0x01, -- cgit v1.2.3-59-g8ed1b From 2811ba51b04958cd001b6409c9f70e8563376346 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Wed, 23 Dec 2015 18:47:24 +0200 Subject: IB/mlx5: Add RoCE fields to Address Vector Set the address handle and QP address path fields according to the link layer type (IB/Eth). Signed-off-by: Achiad Shochat Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/ah.c | 32 +++++++++++++++++++++------ drivers/infiniband/hw/mlx5/main.c | 21 ++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +++-- drivers/infiniband/hw/mlx5/qp.c | 42 ++++++++++++++++++++++++++---------- include/linux/mlx5/qp.h | 21 ++++++++++++------ 5 files changed, 96 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 66080580e24d..745efa4cfc71 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -32,8 +32,10 @@ #include "mlx5_ib.h" -struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, - struct mlx5_ib_ah *ah) +static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, + struct mlx5_ib_ah *ah, + struct ib_ah_attr *ah_attr, + enum rdma_link_layer ll) { if (ah_attr->ah_flags & IB_AH_GRH) { memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16); @@ -44,9 +46,20 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, ah->av.tclass = ah_attr->grh.traffic_class; } - ah->av.rlid = cpu_to_be16(ah_attr->dlid); - ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f; - ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf); + ah->av.stat_rate_sl = (ah_attr->static_rate << 4); + + if (ll == IB_LINK_LAYER_ETHERNET) { + memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac)); + ah->av.udp_sport = + mlx5_get_roce_udp_sport(dev, + ah_attr->port_num, + ah_attr->grh.sgid_index); + ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1; + } else { + ah->av.rlid = cpu_to_be16(ah_attr->dlid); + ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f; + ah->av.stat_rate_sl |= (ah_attr->sl & 0xf); + } return &ah->ibah; } @@ -54,12 +67,19 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { struct mlx5_ib_ah *ah; + struct mlx5_ib_dev *dev = to_mdev(pd->device); + enum rdma_link_layer ll; + + ll = pd->device->get_link_layer(pd->device, ah_attr->port_num); + + if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH)) + return ERR_PTR(-EINVAL); ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); - return create_ib_ah(ah_attr, ah); /* never fails */ + return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */ } int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index bbe92caba93b..3ee431ab8ea9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -252,6 +253,26 @@ static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, return set_roce_addr(device, port_num, index, NULL, NULL); } +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, + int index) +{ + struct ib_gid_attr attr; + union ib_gid gid; + + if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) + return 0; + + if (!attr.ndev) + return 0; + + dev_put(attr.ndev); + + if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) + return 0; + + return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); +} + static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { return !dev->mdev->issi; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 1eaa6111dce0..b0deeb38175d 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -517,8 +517,6 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, - struct mlx5_ib_ah *ah); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx5_ib_destroy_ah(struct ib_ah *ah); @@ -647,6 +645,9 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, + int index); + static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 307bdbca8938..0d94a7713f38 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_ib.h" #include "user.h" @@ -1364,17 +1365,12 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, u32 path_flags, const struct ib_qp_attr *attr) { + enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port); int err; - path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; - path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0; - if (attr_mask & IB_QP_PKEY_INDEX) path->pkey_index = attr->pkey_index; - path->grh_mlid = ah->src_path_bits & 0x7f; - path->rlid = cpu_to_be16(ah->dlid); - if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= dev->mdev->port_caps[port - 1].gid_table_len) { @@ -1383,7 +1379,27 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, dev->mdev->port_caps[port - 1].gid_table_len); return -EINVAL; } - path->grh_mlid |= 1 << 7; + } + + if (ll == IB_LINK_LAYER_ETHERNET) { + if (!(ah->ah_flags & IB_AH_GRH)) + return -EINVAL; + memcpy(path->rmac, ah->dmac, sizeof(ah->dmac)); + path->udp_sport = mlx5_get_roce_udp_sport(dev, port, + ah->grh.sgid_index); + path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4; + } else { + path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; + path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : + 0; + path->rlid = cpu_to_be16(ah->dlid); + path->grh_mlid = ah->src_path_bits & 0x7f; + if (ah->ah_flags & IB_AH_GRH) + path->grh_mlid |= 1 << 7; + path->dci_cfi_prio_sl = ah->sl & 0xf; + } + + if (ah->ah_flags & IB_AH_GRH) { path->mgid_index = ah->grh.sgid_index; path->hop_limit = ah->grh.hop_limit; path->tclass_flowlabel = @@ -1401,8 +1417,6 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, if (attr_mask & IB_QP_TIMEOUT) path->ackto_lt = attr->timeout << 3; - path->sl = ah->sl & 0xf; - return 0; } @@ -1765,15 +1779,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_state cur_state, new_state; int err = -EINVAL; int port; + enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) { + port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); + } + if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, - IB_LINK_LAYER_UNSPECIFIED)) + ll)) goto out; if ((attr_mask & IB_QP_PORT) && @@ -3003,7 +3023,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports)) return; - ib_ah_attr->sl = path->sl & 0xf; + ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf; ib_ah_attr->dlid = be16_to_cpu(path->rlid); ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index f079fb1a31f7..a9ad40169191 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -248,8 +248,12 @@ struct mlx5_av { __be32 dqp_dct; u8 stat_rate_sl; u8 fl_mlid; - __be16 rlid; - u8 reserved0[10]; + union { + __be16 rlid; + __be16 udp_sport; + }; + u8 reserved0[4]; + u8 rmac[6]; u8 tclass; u8 hop_limit; __be32 grh_gid_fl; @@ -456,11 +460,16 @@ struct mlx5_qp_path { u8 static_rate; u8 hop_limit; __be32 tclass_flowlabel; - u8 rgid[16]; - u8 rsvd1[4]; - u8 sl; + union { + u8 rgid[16]; + u8 rip[16]; + }; + u8 f_dscp_ecn_prio; + u8 ecn_dscp; + __be16 udp_sport; + u8 dci_cfi_prio_sl; u8 port; - u8 rsvd2[6]; + u8 rmac[6]; }; struct mlx5_qp_context { -- cgit v1.2.3-59-g8ed1b From 7c60bcbb68122b39fe3e92143abce01be75f3fa6 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 15 Dec 2015 20:30:11 +0200 Subject: IB/mlx5: Add support for hca_core_clock and timestamp_mask Reporting the hca_core_clock (in kHZ) and the timestamp_mask in query_device extended verb. timestamp_mask is used by users in order to know what is the valid range of the raw timestamps, while hca_core_clock reports the clock frequency that is used for timestamps. Signed-off-by: Matan Barak Reviewed-by: Moshe Lazer Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 ++ include/linux/mlx5/mlx5_ifc.h | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 22ae0939b20e..2d7fac53214f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -503,6 +503,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ + props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); + props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (MLX5_CAP_GEN(mdev, pg)) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 49b34c6466ac..091d8343d594 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -794,15 +794,18 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_63[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_64[0x100]; + u8 reserved_64[0x20]; + u8 device_frequency_mhz[0x20]; + u8 device_frequency_khz[0x20]; + u8 reserved_65[0xa0]; - u8 reserved_65[0x1f]; + u8 reserved_66[0x1f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_66[0x220]; + u8 reserved_67[0x220]; }; enum { -- cgit v1.2.3-59-g8ed1b From b368d7cb8ceb77f481b066bd8be5fada82da7301 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 15 Dec 2015 20:30:12 +0200 Subject: IB/mlx5: Add hca_core_clock_offset to udata in init_ucontext Pass hca_core_clock_offset to user-space is mandatory in order to let the user-space read the free-running clock register from the right offset in the memory mapped page. Passing this value is done by changing the vendor's command and response of init_ucontext to be in extensible form. Signed-off-by: Matan Barak Reviewed-by: Moshe Lazer Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 37 ++++++++++++++++++++++++++++-------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/user.h | 12 ++++++++++-- include/linux/mlx5/device.h | 7 +++++-- 4 files changed, 47 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2d7fac53214f..a3ae9ccde460 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -795,8 +795,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_alloc_ucontext_req_v2 req; - struct mlx5_ib_alloc_ucontext_resp resp; + struct mlx5_ib_alloc_ucontext_req_v2 req = {}; + struct mlx5_ib_alloc_ucontext_resp resp = {}; struct mlx5_ib_ucontext *context; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; @@ -811,20 +811,19 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (!dev->ib_active) return ERR_PTR(-EAGAIN); - memset(&req, 0, sizeof(req)); reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) ver = 0; - else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) + else if (reqlen >= sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) ver = 2; else return ERR_PTR(-EINVAL); - err = ib_copy_from_udata(&req, udata, reqlen); + err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req))); if (err) return ERR_PTR(err); - if (req.flags || req.reserved) + if (req.flags) return ERR_PTR(-EINVAL); if (req.total_num_uuars > MLX5_MAX_UUARS) @@ -833,6 +832,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (req.total_num_uuars == 0) return ERR_PTR(-EINVAL); + if (req.comp_mask) + return ERR_PTR(-EOPNOTSUPP); + + if (reqlen > sizeof(req) && + !ib_is_udata_cleared(udata, sizeof(req), + udata->inlen - sizeof(req))) + return ERR_PTR(-EOPNOTSUPP); + req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_NON_FP_BF_REGS_PER_PAGE); if (req.num_low_latency_uuars > req.total_num_uuars - 1) @@ -848,6 +855,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + resp.response_length = min(offsetof(typeof(resp), response_length) + + sizeof(resp.response_length), udata->outlen); context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -898,8 +907,20 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.tot_uuars = req.total_num_uuars; resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); - err = ib_copy_to_udata(udata, &resp, - sizeof(resp) - sizeof(resp.reserved)); + + if (field_avail(typeof(resp), reserved2, udata->outlen)) + resp.response_length += sizeof(resp.reserved2); + + if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { + resp.comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; + resp.hca_core_clock_offset = + offsetof(struct mlx5_init_seg, internal_timer_h) % + PAGE_SIZE; + resp.response_length += sizeof(resp.hca_core_clock_offset); + } + + err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto out_uars; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b0deeb38175d..b2a66432b865 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -55,6 +55,9 @@ pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ __LINE__, current->pid, ##arg) +#define field_avail(type, fld, sz) (offsetof(type, fld) + \ + sizeof(((type *)0)->fld) <= (sz)) + enum { MLX5_IB_MMAP_CMD_SHIFT = 8, MLX5_IB_MMAP_CMD_MASK = 0xff, diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 76fb7b927d37..e22a35f03238 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -66,7 +66,11 @@ struct mlx5_ib_alloc_ucontext_req_v2 { __u32 total_num_uuars; __u32 num_low_latency_uuars; __u32 flags; - __u32 reserved; + __u32 comp_mask; +}; + +enum mlx5_ib_alloc_ucontext_resp_mask { + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, }; struct mlx5_ib_alloc_ucontext_resp { @@ -80,7 +84,11 @@ struct mlx5_ib_alloc_ucontext_resp { __u32 max_recv_wr; __u32 max_srq_recv_wr; __u16 num_ports; - __u16 reserved; + __u16 reserved1; + __u32 comp_mask; + __u32 response_length; + __u32 reserved2; + __u64 hca_core_clock_offset; }; struct mlx5_ib_alloc_pd_resp { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index ea4281b00c8d..48c4623ad651 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -462,9 +462,12 @@ struct mlx5_init_seg { __be32 rsvd1[120]; __be32 initializing; struct health_buffer health; - __be32 rsvd2[884]; + __be32 rsvd2[880]; + __be32 internal_timer_h; + __be32 internal_timer_l; + __be32 rsvd3[2]; __be32 health_counter; - __be32 rsvd3[1019]; + __be32 rsvd4[1019]; __be64 ieee1588_clk; __be32 ieee1588_clk_type; __be32 clr_intx; -- cgit v1.2.3-59-g8ed1b From 051f263098a90d208e2d20251bfd4834bc783214 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Dec 2015 12:16:11 +0200 Subject: IB/mlx5: Add driver cross-channel support Add support of cross-channel functionality to mlx5 driver. This includes ability to ignore overrun for CQ which intended for cross-channel, export device capability and configure the QP to be sync master/slave queues. The cross-channel enabled QP supports combination of three possible properties: * WQE processing on the receive queue of this QP * WQE processing on the send queue of this QP * WQE are supported on the send queue Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 7 ++++- drivers/infiniband/hw/mlx5/main.c | 3 ++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 16 +++++++++++ drivers/infiniband/hw/mlx5/qp.c | 54 +++++++++++++++++++++++++++++------- include/linux/mlx5/qp.h | 3 ++ 5 files changed, 72 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index bc21ad8ebffd..b14316603e44 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -778,7 +778,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int eqn; int err; - if (attr->flags) + if (check_cq_create_flags(attr->flags)) return ERR_PTR(-EINVAL); if (entries < 0) @@ -800,6 +800,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, spin_lock_init(&cq->lock); cq->resize_buf = NULL; cq->resize_umem = NULL; + cq->create_flags = attr->flags; if (context) { err = create_cq_user(dev, udata, context, cq, entries, @@ -817,6 +818,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->cqe_size = cqe_size; cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; + + if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN) + cqb->ctx.cqe_sz_flags |= (1 << 1); + cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index); err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); if (err) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 602f067c14cc..8bee6fe732a1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -512,6 +512,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->odp_caps = dev->odp_caps; #endif + if (MLX5_CAP_GEN(mdev, cd)) + props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; + return 0; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 1303487ffe6b..d4b227126265 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -90,6 +90,10 @@ enum mlx5_ib_mad_ifc_flags { MLX5_MAD_IFC_NET_VIEW = 4, }; +enum { + MLX5_CROSS_CHANNEL_UUAR = 0, +}; + struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; @@ -247,6 +251,9 @@ struct mlx5_ib_cq_buf { enum mlx5_ib_qp_flags { MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0, MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, + MLX5_IB_QP_CROSS_CHANNEL = 1 << 2, + MLX5_IB_QP_MANAGED_SEND = 1 << 3, + MLX5_IB_QP_MANAGED_RECV = 1 << 4, }; struct mlx5_umr_wr { @@ -289,6 +296,7 @@ struct mlx5_ib_cq { struct mlx5_ib_cq_buf *resize_buf; struct ib_umem *resize_umem; int cqe_size; + u32 create_flags; }; struct mlx5_ib_srq { @@ -678,4 +686,12 @@ static inline int is_qp1(enum ib_qp_type qp_type) #define MLX5_MAX_UMR_SHIFT 16 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) +static inline u32 check_cq_create_flags(u32 flags) +{ + /* + * It returns non-zero value for unsupported CQ + * create flags, otherwise it returns zero. + */ + return (flags & ~IB_CQ_FLAGS_IGNORE_OVERRUN); +} #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0d94a7713f38..1ea049ed87da 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -616,18 +616,23 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, /* * TBD: should come from the verbs when we have the API */ - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); - if (uuarn < 0) { - mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); - mlx5_ib_dbg(dev, "reverting to medium latency\n"); - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM); + if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + /* In CROSS_CHANNEL CQ and QP must use the same UAR */ + uuarn = MLX5_CROSS_CHANNEL_UUAR; + else { + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); if (uuarn < 0) { - mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); - mlx5_ib_dbg(dev, "reverting to high latency\n"); - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); + mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); + mlx5_ib_dbg(dev, "reverting to medium latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM); if (uuarn < 0) { - mlx5_ib_warn(dev, "uuar allocation failed\n"); - return uuarn; + mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); + mlx5_ib_dbg(dev, "reverting to high latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); + if (uuarn < 0) { + mlx5_ib_warn(dev, "uuar allocation failed\n"); + return uuarn; + } } } } @@ -881,6 +886,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, } } + if (init_attr->create_flags & + (IB_QP_CREATE_CROSS_CHANNEL | + IB_QP_CREATE_MANAGED_SEND | + IB_QP_CREATE_MANAGED_RECV)) { + if (!MLX5_CAP_GEN(mdev, cd)) { + mlx5_ib_dbg(dev, "cross-channel isn't supported\n"); + return -EINVAL; + } + if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL) + qp->flags |= MLX5_IB_QP_CROSS_CHANNEL; + if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND) + qp->flags |= MLX5_IB_QP_MANAGED_SEND; + if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) + qp->flags |= MLX5_IB_QP_MANAGED_RECV; + } if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; @@ -955,6 +975,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST); + if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER); + if (qp->flags & MLX5_IB_QP_MANAGED_SEND) + in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND); + if (qp->flags & MLX5_IB_QP_MANAGED_RECV) + in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV); + if (qp->scat_cqe && is_connected(init_attr->qp_type)) { int rcqe_sz; int scqe_sz; @@ -3130,6 +3157,13 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL; + if (qp->flags & MLX5_IB_QP_MANAGED_SEND) + qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; + if (qp->flags & MLX5_IB_QP_MANAGED_RECV) + qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; + qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index a9ad40169191..fd1ff4110e80 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -130,6 +130,9 @@ enum { MLX5_QP_BIT_RWE = 1 << 14, MLX5_QP_BIT_RAE = 1 << 13, MLX5_QP_BIT_RIC = 1 << 4, + MLX5_QP_BIT_CC_SLAVE_RECV = 1 << 2, + MLX5_QP_BIT_CC_SLAVE_SEND = 1 << 1, + MLX5_QP_BIT_CC_MASTER = 1 << 0 }; enum { -- cgit v1.2.3-59-g8ed1b From f91e6d8941bf450f7842dfc1ed80e948aaa65e8c Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 14 Dec 2015 16:34:09 +0200 Subject: net/mlx5_core: Add setting ATOMIC endian mode HW is capable of 2 requestor endianness modes for standard 8 Bytes atomic: BE (0x0) and host endianness (0x1). Read the supported modes from hca atomic capabilities and configure HW to host endianness mode if supported. Signed-off-by: Eran Ben Elisha Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 57 +++++++++++++++++++++++++- include/linux/mlx5/mlx5_ifc.h | 22 ++++++---- 2 files changed, 70 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4ac8d4cc4973..8b69fbfa2788 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -74,6 +74,11 @@ struct mlx5_device_context { void *context; }; +enum { + MLX5_ATOMIC_REQ_MODE_BE = 0x0, + MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, +}; + static struct mlx5_profile profile[] = { [0] = { .mask = 0, @@ -383,7 +388,7 @@ query_ex: return err; } -static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz) +static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod) { u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)]; int err; @@ -391,6 +396,7 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz) memset(out, 0, sizeof(out)); MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1); err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); if (err) return err; @@ -400,6 +406,46 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz) return err; } +static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) +{ + void *set_ctx; + void *set_hca_cap; + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + int req_endianness; + int err; + + if (MLX5_CAP_GEN(dev, atomic)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } else { + return 0; + } + + req_endianness = + MLX5_CAP_ATOMIC(dev, + supported_atomic_req_8B_endianess_mode_1); + + if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS) + return 0; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + + /* Set requestor to host endianness */ + MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode, + MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS); + + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC); + + kfree(set_ctx); + return err; +} + static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; @@ -441,7 +487,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); - err = set_caps(dev, set_ctx, set_sz); + err = set_caps(dev, set_ctx, set_sz, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); query_ex: kfree(set_ctx); @@ -974,6 +1021,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto reclaim_boot_pages; } + err = handle_hca_cap_atomic(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n"); + goto reclaim_boot_pages; + } + err = mlx5_satisfy_startup_pages(dev, 0); if (err) { dev_err(&pdev->dev, "failed to allocate init pages\n"); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 091d8343d594..991283b51f61 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -66,6 +66,11 @@ enum { MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN = 0x3 }; +enum { + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, + MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, +}; + enum { MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, MLX5_CMD_OP_QUERY_ADAPTER = 0x101, @@ -527,21 +532,24 @@ enum { struct mlx5_ifc_atomic_caps_bits { u8 reserved_0[0x40]; - u8 atomic_req_endianness[0x1]; - u8 reserved_1[0x1f]; + u8 atomic_req_8B_endianess_mode[0x2]; + u8 reserved_1[0x4]; + u8 supported_atomic_req_8B_endianess_mode_1[0x1]; - u8 reserved_2[0x20]; + u8 reserved_2[0x19]; - u8 reserved_3[0x10]; - u8 atomic_operations[0x10]; + u8 reserved_3[0x20]; u8 reserved_4[0x10]; - u8 atomic_size_qp[0x10]; + u8 atomic_operations[0x10]; u8 reserved_5[0x10]; + u8 atomic_size_qp[0x10]; + + u8 reserved_6[0x10]; u8 atomic_size_dc[0x10]; - u8 reserved_6[0x720]; + u8 reserved_7[0x720]; }; struct mlx5_ifc_odp_cap_bits { -- cgit v1.2.3-59-g8ed1b From da7525d2a9ae9d9d9af754441befcf2560f6cac3 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 14 Dec 2015 16:34:10 +0200 Subject: IB/mlx5: Advertise atomic capabilities in query device In order to ensure IB spec atomic correctness in atomic operations, if HW is configured to host endianness, advertise IB_ATOMIC_HCA. if not, advertise IB_ATOMIC_NONE. Signed-off-by: Eran Ben Elisha Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 28 +++++++++++++++++++++++++++- include/linux/mlx5/driver.h | 5 +++++ 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8bee6fe732a1..03ce0f9fdd86 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -65,6 +65,10 @@ static char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; +enum { + MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3, +}; + static enum rdma_link_layer mlx5_port_type_cap_to_rdma_ll(int port_type_cap) { @@ -296,6 +300,28 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev) return MLX5_VPORT_ACCESS_METHOD_HCA; } +static void get_atomic_caps(struct mlx5_ib_dev *dev, + struct ib_device_attr *props) +{ + u8 tmp; + u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); + u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); + u8 atomic_req_8B_endianness_mode = + MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode); + + /* Check if HW supports 8 bytes standard atomic operations and capable + * of host endianness respond + */ + tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD; + if (((atomic_operations & tmp) == tmp) && + (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) && + (atomic_req_8B_endianness_mode)) { + props->atomic_cap = IB_ATOMIC_HCA; + } else { + props->atomic_cap = IB_ATOMIC_NONE; + } +} + static int mlx5_query_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid) { @@ -496,7 +522,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; - props->atomic_cap = IB_ATOMIC_NONE; + get_atomic_caps(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7b9c976b42d9..53c57724c8dd 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -115,6 +115,11 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, }; +enum { + MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0, + MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1, +}; + enum mlx5_page_fault_resume_flags { MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0, MLX5_PAGE_FAULT_RESUME_WRITE = 1 << 1, -- cgit v1.2.3-59-g8ed1b From f25bf1977f7a968e85fe8ab99252b8132c6cf8c4 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:48:07 +0200 Subject: net/mlx4: Remove unused macro The macro mlx4_foreach_non_ib_transport_port() is not used anywhere. Remove it. Fixes: aa9a2d51a3e7 ("mlx4: Activate RoCE/SRIOV") Signed-off-by: Moni Shoua Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d3133be12d92..971037188907 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -979,10 +979,6 @@ struct mlx4_mad_ifc { for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if ((type) == (dev)->caps.port_mask[(port)]) -#define mlx4_foreach_non_ib_transport_port(port, dev) \ - for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ - if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB)) - #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ -- cgit v1.2.3-59-g8ed1b From cc886c9ff1607eda04062bdcec963e2f8e6a3eb1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:49:12 -0500 Subject: svcrdma: Improve allocation of struct svc_rdma_op_ctxt When the maximum payload size of NFS READ and WRITE was increased by commit cc9a903d915c ("svcrdma: Change maximum server payload back to RPCSVC_MAXPAYLOAD"), the size of struct svc_rdma_op_ctxt increased to over 6KB (on x86_64). That makes allocating one of these from a kmem_cache more likely to fail in situations when system memory is exhausted. Since I'm about to add a caller where this allocation must always work _and_ it cannot sleep, pre-allocate ctxts for each connection. Another motivation for this change is that NFSv4.x servers are required by specification not to drop NFS requests. Pre-allocating memory resources reduces the likelihood of a drop. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 6 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 102 +++++++++++++++++++++++++++---- 2 files changed, 94 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f869807a0d0e..be2804b72cd8 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -69,6 +69,7 @@ extern atomic_t rdma_stat_sq_prod; * completes. */ struct svc_rdma_op_ctxt { + struct list_head free; struct svc_rdma_op_ctxt *read_hdr; struct svc_rdma_fastreg_mr *frmr; int hdr_count; @@ -141,7 +142,10 @@ struct svcxprt_rdma { struct ib_pd *sc_pd; atomic_t sc_dma_used; - atomic_t sc_ctxt_used; + spinlock_t sc_ctxt_lock; + struct list_head sc_ctxts; + int sc_ctxt_used; + struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index a100d561d6a4..9801115f6e59 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) +static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt, + gfp_t flags) { struct svc_rdma_op_ctxt *ctxt; - ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, - GFP_KERNEL | __GFP_NOFAIL); - ctxt->xprt = xprt; - INIT_LIST_HEAD(&ctxt->dto_q); + ctxt = kmalloc(sizeof(*ctxt), flags); + if (ctxt) { + ctxt->xprt = xprt; + INIT_LIST_HEAD(&ctxt->free); + INIT_LIST_HEAD(&ctxt->dto_q); + } + return ctxt; +} + +static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt) +{ + int i; + + /* Each RPC/RDMA credit can consume a number of send + * and receive WQEs. One ctxt is allocated for each. + */ + i = xprt->sc_sq_depth + xprt->sc_max_requests; + + while (i--) { + struct svc_rdma_op_ctxt *ctxt; + + ctxt = alloc_ctxt(xprt, GFP_KERNEL); + if (!ctxt) { + dprintk("svcrdma: No memory for RDMA ctxt\n"); + return false; + } + list_add(&ctxt->free, &xprt->sc_ctxts); + } + return true; +} + +struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) +{ + struct svc_rdma_op_ctxt *ctxt = NULL; + + spin_lock_bh(&xprt->sc_ctxt_lock); + xprt->sc_ctxt_used++; + if (list_empty(&xprt->sc_ctxts)) + goto out_empty; + + ctxt = list_first_entry(&xprt->sc_ctxts, + struct svc_rdma_op_ctxt, free); + list_del_init(&ctxt->free); + spin_unlock_bh(&xprt->sc_ctxt_lock); + +out: ctxt->count = 0; ctxt->frmr = NULL; - atomic_inc(&xprt->sc_ctxt_used); return ctxt; + +out_empty: + /* Either pre-allocation missed the mark, or send + * queue accounting is broken. + */ + spin_unlock_bh(&xprt->sc_ctxt_lock); + + ctxt = alloc_ctxt(xprt, GFP_NOIO); + if (ctxt) + goto out; + + spin_lock_bh(&xprt->sc_ctxt_lock); + xprt->sc_ctxt_used--; + spin_unlock_bh(&xprt->sc_ctxt_lock); + WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n"); + return NULL; } void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) @@ -190,16 +248,29 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) { - struct svcxprt_rdma *xprt; + struct svcxprt_rdma *xprt = ctxt->xprt; int i; - xprt = ctxt->xprt; if (free_pages) for (i = 0; i < ctxt->count; i++) put_page(ctxt->pages[i]); - kmem_cache_free(svc_rdma_ctxt_cachep, ctxt); - atomic_dec(&xprt->sc_ctxt_used); + spin_lock_bh(&xprt->sc_ctxt_lock); + xprt->sc_ctxt_used--; + list_add(&ctxt->free, &xprt->sc_ctxts); + spin_unlock_bh(&xprt->sc_ctxt_lock); +} + +static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) +{ + while (!list_empty(&xprt->sc_ctxts)) { + struct svc_rdma_op_ctxt *ctxt; + + ctxt = list_first_entry(&xprt->sc_ctxts, + struct svc_rdma_op_ctxt, free); + list_del(&ctxt->free); + kfree(ctxt); + } } /* @@ -521,11 +592,13 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); + INIT_LIST_HEAD(&cma_xprt->sc_ctxts); init_waitqueue_head(&cma_xprt->sc_send_wait); spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); spin_lock_init(&cma_xprt->sc_frmr_q_lock); + spin_lock_init(&cma_xprt->sc_ctxt_lock); if (listener) set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); @@ -913,6 +986,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) (size_t)svcrdma_max_requests); newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; + if (!svc_rdma_prealloc_ctxts(newxprt)) + goto errout; + /* * Limit ORD based on client limit, local device limit, and * configured svcrdma limit. @@ -1174,15 +1250,15 @@ static void __svc_rdma_free(struct work_struct *work) } /* Warn if we leaked a resource or under-referenced */ - if (atomic_read(&rdma->sc_ctxt_used) != 0) + if (rdma->sc_ctxt_used != 0) pr_err("svcrdma: ctxt still in use? (%d)\n", - atomic_read(&rdma->sc_ctxt_used)); + rdma->sc_ctxt_used); if (atomic_read(&rdma->sc_dma_used) != 0) pr_err("svcrdma: dma still in use? (%d)\n", atomic_read(&rdma->sc_dma_used)); - /* De-allocate fastreg mr */ rdma_dealloc_frmr_q(rdma); + svc_rdma_destroy_ctxts(rdma); /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) -- cgit v1.2.3-59-g8ed1b From 2fe81b239dbb00d0a2fd8858ac9dd4ef4a8841ee Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:49:20 -0500 Subject: svcrdma: Improve allocation of struct svc_rdma_req_map To ensure this allocation cannot fail and will not sleep, pre-allocate the req_map structures per-connection. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 8 ++- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 6 +-- net/sunrpc/xprtrdma/svc_rdma_transport.c | 85 ++++++++++++++++++++++++++++---- 3 files changed, 84 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index be2804b72cd8..05bf4febad44 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -113,6 +113,7 @@ struct svc_rdma_fastreg_mr { struct list_head frmr_list; }; struct svc_rdma_req_map { + struct list_head free; unsigned long count; union { struct kvec sge[RPCSVC_MAXPAGES]; @@ -145,6 +146,8 @@ struct svcxprt_rdma { spinlock_t sc_ctxt_lock; struct list_head sc_ctxts; int sc_ctxt_used; + spinlock_t sc_map_lock; + struct list_head sc_maps; struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; @@ -223,8 +226,9 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); -extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); -extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); +extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *); +extern void svc_rdma_put_req_map(struct svcxprt_rdma *, + struct svc_rdma_req_map *); extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 969a1ab75fc3..9a097f95e10b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -591,7 +591,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) /* Build an req vec for the XDR */ ctxt = svc_rdma_get_context(rdma); ctxt->direction = DMA_TO_DEVICE; - vec = svc_rdma_get_req_map(); + vec = svc_rdma_get_req_map(rdma); ret = map_xdr(rdma, &rqstp->rq_res, vec); if (ret) goto err0; @@ -630,14 +630,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec, inline_bytes); - svc_rdma_put_req_map(vec); + svc_rdma_put_req_map(rdma, vec); dprintk("svcrdma: send_reply returns %d\n", ret); return ret; err1: put_page(res_page); err0: - svc_rdma_put_req_map(vec); + svc_rdma_put_req_map(rdma, vec); svc_rdma_put_context(ctxt, 0); return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 9801115f6e59..0b9e17ea777e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -273,23 +273,83 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) } } -/* - * Temporary NFS req mappings are shared across all transport - * instances. These are short lived and should be bounded by the number - * of concurrent server threads * depth of the SQ. - */ -struct svc_rdma_req_map *svc_rdma_get_req_map(void) +static struct svc_rdma_req_map *alloc_req_map(gfp_t flags) { struct svc_rdma_req_map *map; - map = kmem_cache_alloc(svc_rdma_map_cachep, - GFP_KERNEL | __GFP_NOFAIL); + + map = kmalloc(sizeof(*map), flags); + if (map) + INIT_LIST_HEAD(&map->free); + return map; +} + +static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt) +{ + int i; + + /* One for each receive buffer on this connection. */ + i = xprt->sc_max_requests; + + while (i--) { + struct svc_rdma_req_map *map; + + map = alloc_req_map(GFP_KERNEL); + if (!map) { + dprintk("svcrdma: No memory for request map\n"); + return false; + } + list_add(&map->free, &xprt->sc_maps); + } + return true; +} + +struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt) +{ + struct svc_rdma_req_map *map = NULL; + + spin_lock(&xprt->sc_map_lock); + if (list_empty(&xprt->sc_maps)) + goto out_empty; + + map = list_first_entry(&xprt->sc_maps, + struct svc_rdma_req_map, free); + list_del_init(&map->free); + spin_unlock(&xprt->sc_map_lock); + +out: map->count = 0; return map; + +out_empty: + spin_unlock(&xprt->sc_map_lock); + + /* Pre-allocation amount was incorrect */ + map = alloc_req_map(GFP_NOIO); + if (map) + goto out; + + WARN_ONCE(1, "svcrdma: empty request map list?\n"); + return NULL; } -void svc_rdma_put_req_map(struct svc_rdma_req_map *map) +void svc_rdma_put_req_map(struct svcxprt_rdma *xprt, + struct svc_rdma_req_map *map) { - kmem_cache_free(svc_rdma_map_cachep, map); + spin_lock(&xprt->sc_map_lock); + list_add(&map->free, &xprt->sc_maps); + spin_unlock(&xprt->sc_map_lock); +} + +static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt) +{ + while (!list_empty(&xprt->sc_maps)) { + struct svc_rdma_req_map *map; + + map = list_first_entry(&xprt->sc_maps, + struct svc_rdma_req_map, free); + list_del(&map->free); + kfree(map); + } } /* ib_cq event handler */ @@ -593,12 +653,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); INIT_LIST_HEAD(&cma_xprt->sc_ctxts); + INIT_LIST_HEAD(&cma_xprt->sc_maps); init_waitqueue_head(&cma_xprt->sc_send_wait); spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); spin_lock_init(&cma_xprt->sc_frmr_q_lock); spin_lock_init(&cma_xprt->sc_ctxt_lock); + spin_lock_init(&cma_xprt->sc_map_lock); if (listener) set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); @@ -988,6 +1050,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (!svc_rdma_prealloc_ctxts(newxprt)) goto errout; + if (!svc_rdma_prealloc_maps(newxprt)) + goto errout; /* * Limit ORD based on client limit, local device limit, and @@ -1259,6 +1323,7 @@ static void __svc_rdma_free(struct work_struct *work) rdma_dealloc_frmr_q(rdma); svc_rdma_destroy_ctxts(rdma); + svc_rdma_destroy_maps(rdma); /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) -- cgit v1.2.3-59-g8ed1b From 71810ef3271d1a06f7002c55c7e354d8c3233762 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:49:28 -0500 Subject: svcrdma: Remove unused req_map and ctxt kmem_caches Clean up. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma.c | 35 ----------------------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 7 ------- 3 files changed, 1 insertion(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 05bf4febad44..141edbbb73b3 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -242,6 +242,7 @@ extern struct svc_xprt_class svc_rdma_bc_class; #endif /* svc_rdma.c */ +extern struct workqueue_struct *svc_rdma_wq; extern int svc_rdma_init(void); extern void svc_rdma_cleanup(void); diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 1b7051bdbdc8..e894e0698c45 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -71,10 +71,6 @@ atomic_t rdma_stat_rq_prod; atomic_t rdma_stat_sq_poll; atomic_t rdma_stat_sq_prod; -/* Temporary NFS request map and context caches */ -struct kmem_cache *svc_rdma_map_cachep; -struct kmem_cache *svc_rdma_ctxt_cachep; - struct workqueue_struct *svc_rdma_wq; /* @@ -243,8 +239,6 @@ void svc_rdma_cleanup(void) svc_unreg_xprt_class(&svc_rdma_bc_class); #endif svc_unreg_xprt_class(&svc_rdma_class); - kmem_cache_destroy(svc_rdma_map_cachep); - kmem_cache_destroy(svc_rdma_ctxt_cachep); } int svc_rdma_init(void) @@ -264,39 +258,10 @@ int svc_rdma_init(void) svcrdma_table_header = register_sysctl_table(svcrdma_root_table); - /* Create the temporary map cache */ - svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache", - sizeof(struct svc_rdma_req_map), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!svc_rdma_map_cachep) { - printk(KERN_INFO "Could not allocate map cache.\n"); - goto err0; - } - - /* Create the temporary context cache */ - svc_rdma_ctxt_cachep = - kmem_cache_create("svc_rdma_ctxt_cache", - sizeof(struct svc_rdma_op_ctxt), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!svc_rdma_ctxt_cachep) { - printk(KERN_INFO "Could not allocate WR ctxt cache.\n"); - goto err1; - } - /* Register RDMA with the SVC transport switch */ svc_reg_xprt_class(&svc_rdma_class); #if defined(CONFIG_SUNRPC_BACKCHANNEL) svc_reg_xprt_class(&svc_rdma_bc_class); #endif return 0; - err1: - kmem_cache_destroy(svc_rdma_map_cachep); - err0: - unregister_sysctl_table(svcrdma_table_header); - destroy_workqueue(svc_rdma_wq); - return -ENOMEM; } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 419719198caf..72276c7907e4 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -528,11 +528,4 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *); void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -/* Temporary NFS request map cache. Created in svc_rdma.c */ -extern struct kmem_cache *svc_rdma_map_cachep; -/* WR context cache. Created in svc_rdma.c */ -extern struct kmem_cache *svc_rdma_ctxt_cachep; -/* Workqueue created in svc_rdma.c */ -extern struct workqueue_struct *svc_rdma_wq; - #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ -- cgit v1.2.3-59-g8ed1b From 39b09a1a121cb22820c374f4e92f7ca34be1b75d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:49:37 -0500 Subject: svcrdma: Add gfp flags to svc_rdma_post_recv() svc_rdma_post_recv() allocates pages for receive buffers on-demand. It uses GFP_KERNEL so the allocator tries hard, and may sleep. But I'm about to add a call to svc_rdma_post_recv() from a function that may not sleep. Since all svc_rdma_post_recv() call sites can tolerate its failure, allow it to fail if the page allocator returns nothing. Longer term, receive buffers, being a finite resource per-connection, should be pre-allocated and re-used. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 2 +- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 8 +++++--- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 141edbbb73b3..729ff356c18a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -221,7 +221,7 @@ extern struct rpcrdma_read_chunk * extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode); -extern int svc_rdma_post_recv(struct svcxprt_rdma *); +extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t); extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 9a097f95e10b..aeaec7aa34df 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -465,7 +465,7 @@ static int send_reply(struct svcxprt_rdma *rdma, int ret; /* Post a recv buffer to handle another request. */ - ret = svc_rdma_post_recv(rdma); + ret = svc_rdma_post_recv(rdma, GFP_KERNEL); if (ret) { printk(KERN_INFO "svcrdma: could not post a receive buffer, err=%d." diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 0b9e17ea777e..20fc095d6b12 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -668,7 +668,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, return cma_xprt; } -int svc_rdma_post_recv(struct svcxprt_rdma *xprt) +int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags) { struct ib_recv_wr recv_wr, *bad_recv_wr; struct svc_rdma_op_ctxt *ctxt; @@ -686,7 +686,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err_put_ctxt; } - page = alloc_page(GFP_KERNEL | __GFP_NOFAIL); + page = alloc_page(flags); + if (!page) + goto err_put_ctxt; ctxt->pages[sge_no] = page; pa = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, @@ -1182,7 +1184,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Post receive buffers */ for (i = 0; i < newxprt->sc_max_requests; i++) { - ret = svc_rdma_post_recv(newxprt); + ret = svc_rdma_post_recv(newxprt, GFP_KERNEL); if (ret) { dprintk("svcrdma: failure posting receive buffers\n"); goto errout; -- cgit v1.2.3-59-g8ed1b From ba986c96f907a513215fb7f1c0a89261c97251ca Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:49:53 -0500 Subject: svcrdma: Make map_xdr non-static Pre-requisite to use map_xdr in the backchannel code. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 2 ++ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 729ff356c18a..aeffa30655ce 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -213,6 +213,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, u32, u32, u64, bool); /* svc_rdma_sendto.c */ +extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *, + struct svc_rdma_req_map *); extern int svc_rdma_sendto(struct svc_rqst *); extern struct rpcrdma_read_chunk * svc_rdma_get_read_chunk(struct rpcrdma_msg *); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index de7df7b4d855..3c250523f7cc 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -50,9 +50,9 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT -static int map_xdr(struct svcxprt_rdma *xprt, - struct xdr_buf *xdr, - struct svc_rdma_req_map *vec) +int svc_rdma_map_xdr(struct svcxprt_rdma *xprt, + struct xdr_buf *xdr, + struct svc_rdma_req_map *vec) { int sge_no; u32 sge_bytes; @@ -62,7 +62,7 @@ static int map_xdr(struct svcxprt_rdma *xprt, if (xdr->len != (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) { - pr_err("svcrdma: map_xdr: XDR buffer length error\n"); + pr_err("svcrdma: %s: XDR buffer length error\n", __func__); return -EIO; } @@ -97,9 +97,9 @@ static int map_xdr(struct svcxprt_rdma *xprt, sge_no++; } - dprintk("svcrdma: map_xdr: sge_no %d page_no %d " + dprintk("svcrdma: %s: sge_no %d page_no %d " "page_base %u page_len %u head_len %zu tail_len %zu\n", - sge_no, page_no, xdr->page_base, xdr->page_len, + __func__, sge_no, page_no, xdr->page_base, xdr->page_len, xdr->head[0].iov_len, xdr->tail[0].iov_len); vec->count = sge_no; @@ -592,7 +592,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ctxt = svc_rdma_get_context(rdma); ctxt->direction = DMA_TO_DEVICE; vec = svc_rdma_get_req_map(rdma); - ret = map_xdr(rdma, &rqstp->rq_res, vec); + ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec); if (ret) goto err0; inline_bytes = rqstp->rq_res.len; -- cgit v1.2.3-59-g8ed1b From 03fe9931536fe4782e9e34f7f499d588acd2015b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:50:02 -0500 Subject: svcrdma: Define maximum number of backchannel requests Extra resources for handling backchannel requests have to be pre-allocated when a transport instance is created. Set up additional fields in svcxprt_rdma to track these resources. The max_requests fields are elements of the RPC-over-RDMA protocol, so they should be u32. To ensure that unsigned arithmetic is used everywhere, some other fields in the svcxprt_rdma struct are updated. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 13 ++++++++++--- net/sunrpc/xprtrdma/svc_rdma.c | 6 ++++-- net/sunrpc/xprtrdma/svc_rdma_transport.c | 24 ++++++++++++++---------- 3 files changed, 28 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index aeffa30655ce..9a2c418dc690 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -51,6 +51,7 @@ /* RPC/RDMA parameters and stats */ extern unsigned int svcrdma_ord; extern unsigned int svcrdma_max_requests; +extern unsigned int svcrdma_max_bc_requests; extern unsigned int svcrdma_max_req_size; extern atomic_t rdma_stat_recv; @@ -134,10 +135,11 @@ struct svcxprt_rdma { int sc_max_sge; int sc_max_sge_rd; /* max sge for read target */ - int sc_sq_depth; /* Depth of SQ */ atomic_t sc_sq_count; /* Number of SQ WR on queue */ - - int sc_max_requests; /* Depth of RQ */ + unsigned int sc_sq_depth; /* Depth of SQ */ + unsigned int sc_rq_depth; /* Depth of RQ */ + u32 sc_max_requests; /* Forward credits */ + u32 sc_max_bc_requests;/* Backward credits */ int sc_max_req_size; /* Size of each RQ WR buf */ struct ib_pd *sc_pd; @@ -186,6 +188,11 @@ struct svcxprt_rdma { #define RPCRDMA_MAX_REQUESTS 32 #define RPCRDMA_MAX_REQ_SIZE 4096 +/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our + * current NFSv4.1 implementation supports one backchannel slot. + */ +#define RPCRDMA_MAX_BC_REQUESTS 2 + #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD /* svc_rdma_marshal.c */ diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index e894e0698c45..c846ca9f1eba 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -55,6 +55,7 @@ unsigned int svcrdma_ord = RPCRDMA_ORD; static unsigned int min_ord = 1; static unsigned int max_ord = 4096; unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS; +unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS; static unsigned int min_max_requests = 4; static unsigned int max_max_requests = 16384; unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE; @@ -245,9 +246,10 @@ int svc_rdma_init(void) { dprintk("SVCRDMA Module Init, register RPC RDMA transport\n"); dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord); - dprintk("\tmax_requests : %d\n", svcrdma_max_requests); - dprintk("\tsq_depth : %d\n", + dprintk("\tmax_requests : %u\n", svcrdma_max_requests); + dprintk("\tsq_depth : %u\n", svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); + dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests); dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 8b3ee04f5bb4..af86dfeceb4a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -169,12 +169,12 @@ static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt, static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt) { - int i; + unsigned int i; /* Each RPC/RDMA credit can consume a number of send * and receive WQEs. One ctxt is allocated for each. */ - i = xprt->sc_sq_depth + xprt->sc_max_requests; + i = xprt->sc_sq_depth + xprt->sc_rq_depth; while (i--) { struct svc_rdma_op_ctxt *ctxt; @@ -285,7 +285,7 @@ static struct svc_rdma_req_map *alloc_req_map(gfp_t flags) static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt) { - int i; + unsigned int i; /* One for each receive buffer on this connection. */ i = xprt->sc_max_requests; @@ -1016,8 +1016,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct ib_device *dev; int uninitialized_var(dma_mr_acc); int need_dma_mr = 0; + unsigned int i; int ret = 0; - int i; listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); clear_bit(XPT_CONN, &xprt->xpt_flags); @@ -1046,9 +1046,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd, RPCSVC_MAXPAGES); newxprt->sc_max_req_size = svcrdma_max_req_size; - newxprt->sc_max_requests = min((size_t)dev->attrs.max_qp_wr, - (size_t)svcrdma_max_requests); - newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; + newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr, + svcrdma_max_requests); + newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr, + svcrdma_max_bc_requests); + newxprt->sc_rq_depth = newxprt->sc_max_requests + + newxprt->sc_max_bc_requests; + newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth; if (!svc_rdma_prealloc_ctxts(newxprt)) goto errout; @@ -1077,7 +1081,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk("svcrdma: error creating SQ CQ for connect request\n"); goto errout; } - cq_attr.cqe = newxprt->sc_max_requests; + cq_attr.cqe = newxprt->sc_rq_depth; newxprt->sc_rq_cq = ib_create_cq(dev, rq_comp_handler, cq_event_handler, @@ -1092,7 +1096,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = &newxprt->sc_xprt; qp_attr.cap.max_send_wr = newxprt->sc_sq_depth; - qp_attr.cap.max_recv_wr = newxprt->sc_max_requests; + qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth; qp_attr.cap.max_send_sge = newxprt->sc_max_sge; qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; @@ -1183,7 +1187,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_dma_lkey = dev->local_dma_lkey; /* Post receive buffers */ - for (i = 0; i < newxprt->sc_max_requests; i++) { + for (i = 0; i < newxprt->sc_rq_depth; i++) { ret = svc_rdma_post_recv(newxprt, GFP_KERNEL); if (ret) { dprintk("svcrdma: failure posting receive buffers\n"); -- cgit v1.2.3-59-g8ed1b From 5d252f90a800cee5bc57c76d636ae60464f7a887 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 7 Jan 2016 14:50:10 -0500 Subject: svcrdma: Add class for RDMA backwards direction transport To support the server-side of an NFSv4.1 backchannel on RDMA connections, add a transport class that enables backward direction messages on an existing forward channel connection. Signed-off-by: Chuck Lever Acked-by: Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 5 + net/sunrpc/xprt.c | 1 + net/sunrpc/xprtrdma/Makefile | 2 +- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 371 +++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 52 ++++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 14 +- net/sunrpc/xprtrdma/transport.c | 30 ++- net/sunrpc/xprtrdma/xprt_rdma.h | 15 ++ 8 files changed, 475 insertions(+), 15 deletions(-) create mode 100644 net/sunrpc/xprtrdma/svc_rdma_backchannel.c (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 9a2c418dc690..b13513a0caf4 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -195,6 +195,11 @@ struct svcxprt_rdma { #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD +/* svc_rdma_backchannel.c */ +extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, + struct rpcrdma_msg *rmsgp, + struct xdr_buf *rcvbuf); + /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2e98f4a243e5..37edea6fa92d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1425,3 +1425,4 @@ void xprt_put(struct rpc_xprt *xprt) if (atomic_dec_and_test(&xprt->count)) xprt_destroy(xprt); } +EXPORT_SYMBOL_GPL(xprt_put); diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 33f99d3004f2..dc9f3b513a05 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o rpcrdma-y := transport.o rpc_rdma.o verbs.o \ fmr_ops.o frwr_ops.o physical_ops.o \ - svc_rdma.o svc_rdma_transport.o \ + svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \ module.o rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c new file mode 100644 index 000000000000..deff06a82914 --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2015 Oracle. All rights reserved. + * + * Support for backward direction RPCs on RPC/RDMA (server-side). + */ + +#include +#include "xprt_rdma.h" + +#define RPCDBG_FACILITY RPCDBG_SVCXPRT + +#undef SVCRDMA_BACKCHANNEL_DEBUG + +int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, + struct xdr_buf *rcvbuf) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct kvec *dst, *src = &rcvbuf->head[0]; + struct rpc_rqst *req; + unsigned long cwnd; + u32 credits; + size_t len; + __be32 xid; + __be32 *p; + int ret; + + p = (__be32 *)src->iov_base; + len = src->iov_len; + xid = rmsgp->rm_xid; + +#ifdef SVCRDMA_BACKCHANNEL_DEBUG + pr_info("%s: xid=%08x, length=%zu\n", + __func__, be32_to_cpu(xid), len); + pr_info("%s: RPC/RDMA: %*ph\n", + __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp); + pr_info("%s: RPC: %*ph\n", + __func__, (int)len, p); +#endif + + ret = -EAGAIN; + if (src->iov_len < 24) + goto out_shortreply; + + spin_lock_bh(&xprt->transport_lock); + req = xprt_lookup_rqst(xprt, xid); + if (!req) + goto out_notfound; + + dst = &req->rq_private_buf.head[0]; + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); + if (dst->iov_len < len) + goto out_unlock; + memcpy(dst->iov_base, p, len); + + credits = be32_to_cpu(rmsgp->rm_credit); + if (credits == 0) + credits = 1; /* don't deadlock */ + else if (credits > r_xprt->rx_buf.rb_bc_max_requests) + credits = r_xprt->rx_buf.rb_bc_max_requests; + + cwnd = xprt->cwnd; + xprt->cwnd = credits << RPC_CWNDSHIFT; + if (xprt->cwnd > cwnd) + xprt_release_rqst_cong(req->rq_task); + + ret = 0; + xprt_complete_rqst(req->rq_task, rcvbuf->len); + rcvbuf->len = 0; + +out_unlock: + spin_unlock_bh(&xprt->transport_lock); +out: + return ret; + +out_shortreply: + dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n", + xprt, src->iov_len); + goto out; + +out_notfound: + dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n", + xprt, be32_to_cpu(xid)); + + goto out_unlock; +} + +/* Send a backwards direction RPC call. + * + * Caller holds the connection's mutex and has already marshaled + * the RPC/RDMA request. + * + * This is similar to svc_rdma_reply, but takes an rpc_rqst + * instead, does not support chunks, and avoids blocking memory + * allocation. + * + * XXX: There is still an opportunity to block in svc_rdma_send() + * if there are no SQ entries to post the Send. This may occur if + * the adapter has a small maximum SQ depth. + */ +static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, + struct rpc_rqst *rqst) +{ + struct xdr_buf *sndbuf = &rqst->rq_snd_buf; + struct svc_rdma_op_ctxt *ctxt; + struct svc_rdma_req_map *vec; + struct ib_send_wr send_wr; + int ret; + + vec = svc_rdma_get_req_map(rdma); + ret = svc_rdma_map_xdr(rdma, sndbuf, vec); + if (ret) + goto out_err; + + /* Post a recv buffer to handle the reply for this request. */ + ret = svc_rdma_post_recv(rdma, GFP_NOIO); + if (ret) { + pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n", + ret); + pr_err("svcrdma: closing transport %p.\n", rdma); + set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + ret = -ENOTCONN; + goto out_err; + } + + ctxt = svc_rdma_get_context(rdma); + ctxt->pages[0] = virt_to_page(rqst->rq_buffer); + ctxt->count = 1; + + ctxt->wr_op = IB_WR_SEND; + ctxt->direction = DMA_TO_DEVICE; + ctxt->sge[0].lkey = rdma->sc_dma_lkey; + ctxt->sge[0].length = sndbuf->len; + ctxt->sge[0].addr = + ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0, + sndbuf->len, DMA_TO_DEVICE); + if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) { + ret = -EIO; + goto out_unmap; + } + atomic_inc(&rdma->sc_dma_used); + + memset(&send_wr, 0, sizeof(send_wr)); + send_wr.wr_id = (unsigned long)ctxt; + send_wr.sg_list = ctxt->sge; + send_wr.num_sge = 1; + send_wr.opcode = IB_WR_SEND; + send_wr.send_flags = IB_SEND_SIGNALED; + + ret = svc_rdma_send(rdma, &send_wr); + if (ret) { + ret = -EIO; + goto out_unmap; + } + +out_err: + svc_rdma_put_req_map(rdma, vec); + dprintk("svcrdma: %s returns %d\n", __func__, ret); + return ret; + +out_unmap: + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 1); + goto out_err; +} + +/* Server-side transport endpoint wants a whole page for its send + * buffer. The client RPC code constructs the RPC header in this + * buffer before it invokes ->send_request. + * + * Returns NULL if there was a temporary allocation failure. + */ +static void * +xprt_rdma_bc_allocate(struct rpc_task *task, size_t size) +{ + struct rpc_rqst *rqst = task->tk_rqstp; + struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; + struct svcxprt_rdma *rdma; + struct page *page; + + rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); + + /* Prevent an infinite loop: try to make this case work */ + if (size > PAGE_SIZE) + WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n", + size); + + page = alloc_page(RPCRDMA_DEF_GFP); + if (!page) + return NULL; + + return page_address(page); +} + +static void +xprt_rdma_bc_free(void *buffer) +{ + /* No-op: ctxt and page have already been freed. */ +} + +static int +rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) +{ + struct rpc_xprt *xprt = rqst->rq_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer; + int rc; + + /* Space in the send buffer for an RPC/RDMA header is reserved + * via xprt->tsh_size. + */ + headerp->rm_xid = rqst->rq_xid; + headerp->rm_vers = rpcrdma_version; + headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); + headerp->rm_type = rdma_msg; + headerp->rm_body.rm_chunks[0] = xdr_zero; + headerp->rm_body.rm_chunks[1] = xdr_zero; + headerp->rm_body.rm_chunks[2] = xdr_zero; + +#ifdef SVCRDMA_BACKCHANNEL_DEBUG + pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); +#endif + + rc = svc_rdma_bc_sendto(rdma, rqst); + if (rc) + goto drop_connection; + return rc; + +drop_connection: + dprintk("svcrdma: failed to send bc call\n"); + xprt_disconnect_done(xprt); + return -ENOTCONN; +} + +/* Send an RPC call on the passive end of a transport + * connection. + */ +static int +xprt_rdma_bc_send_request(struct rpc_task *task) +{ + struct rpc_rqst *rqst = task->tk_rqstp; + struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; + struct svcxprt_rdma *rdma; + int ret; + + dprintk("svcrdma: sending bc call with xid: %08x\n", + be32_to_cpu(rqst->rq_xid)); + + if (!mutex_trylock(&sxprt->xpt_mutex)) { + rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL); + if (!mutex_trylock(&sxprt->xpt_mutex)) + return -EAGAIN; + rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task); + } + + ret = -ENOTCONN; + rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); + if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) + ret = rpcrdma_bc_send_request(rdma, rqst); + + mutex_unlock(&sxprt->xpt_mutex); + + if (ret < 0) + return ret; + return 0; +} + +static void +xprt_rdma_bc_close(struct rpc_xprt *xprt) +{ + dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); +} + +static void +xprt_rdma_bc_put(struct rpc_xprt *xprt) +{ + dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); + + xprt_free(xprt); + module_put(THIS_MODULE); +} + +static struct rpc_xprt_ops xprt_rdma_bc_procs = { + .reserve_xprt = xprt_reserve_xprt_cong, + .release_xprt = xprt_release_xprt_cong, + .alloc_slot = xprt_alloc_slot, + .release_request = xprt_release_rqst_cong, + .buf_alloc = xprt_rdma_bc_allocate, + .buf_free = xprt_rdma_bc_free, + .send_request = xprt_rdma_bc_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_def, + .close = xprt_rdma_bc_close, + .destroy = xprt_rdma_bc_put, + .print_stats = xprt_rdma_print_stats +}; + +static const struct rpc_timeout xprt_rdma_bc_timeout = { + .to_initval = 60 * HZ, + .to_maxval = 60 * HZ, +}; + +/* It shouldn't matter if the number of backchannel session slots + * doesn't match the number of RPC/RDMA credits. That just means + * one or the other will have extra slots that aren't used. + */ +static struct rpc_xprt * +xprt_setup_rdma_bc(struct xprt_create *args) +{ + struct rpc_xprt *xprt; + struct rpcrdma_xprt *new_xprt; + + if (args->addrlen > sizeof(xprt->addr)) { + dprintk("RPC: %s: address too large\n", __func__); + return ERR_PTR(-EBADF); + } + + xprt = xprt_alloc(args->net, sizeof(*new_xprt), + RPCRDMA_MAX_BC_REQUESTS, + RPCRDMA_MAX_BC_REQUESTS); + if (!xprt) { + dprintk("RPC: %s: couldn't allocate rpc_xprt\n", + __func__); + return ERR_PTR(-ENOMEM); + } + + xprt->timeout = &xprt_rdma_bc_timeout; + xprt_set_bound(xprt); + xprt_set_connected(xprt); + xprt->bind_timeout = RPCRDMA_BIND_TO; + xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; + xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; + + xprt->prot = XPRT_TRANSPORT_BC_RDMA; + xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32); + xprt->ops = &xprt_rdma_bc_procs; + + memcpy(&xprt->addr, args->dstaddr, args->addrlen); + xprt->addrlen = args->addrlen; + xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr); + xprt->resvport = 0; + + xprt->max_payload = xprt_rdma_max_inline_read; + + new_xprt = rpcx_to_rdmax(xprt); + new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs; + + xprt_get(xprt); + args->bc_xprt->xpt_bc_xprt = xprt; + xprt->bc_xprt = args->bc_xprt; + + if (!try_module_get(THIS_MODULE)) + goto out_fail; + + /* Final put for backchannel xprt is in __svc_rdma_free */ + xprt_get(xprt); + return xprt; + +out_fail: + xprt_rdma_free_addresses(xprt); + args->bc_xprt->xpt_bc_xprt = NULL; + xprt_put(xprt); + xprt_free(xprt); + return ERR_PTR(-EINVAL); +} + +struct xprt_class xprt_rdma_bc = { + .list = LIST_HEAD_INIT(xprt_rdma_bc.list), + .name = "rdma backchannel", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_BC_RDMA, + .setup = xprt_setup_rdma_bc, +}; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ff4f01e527ec..3dfe4642ec92 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -567,6 +567,38 @@ static int rdma_read_complete(struct svc_rqst *rqstp, return ret; } +/* By convention, backchannel calls arrive via rdma_msg type + * messages, and never populate the chunk lists. This makes + * the RPC/RDMA header small and fixed in size, so it is + * straightforward to check the RPC header's direction field. + */ +static bool +svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp) +{ + __be32 *p = (__be32 *)rmsgp; + + if (!xprt->xpt_bc_xprt) + return false; + + if (rmsgp->rm_type != rdma_msg) + return false; + if (rmsgp->rm_body.rm_chunks[0] != xdr_zero) + return false; + if (rmsgp->rm_body.rm_chunks[1] != xdr_zero) + return false; + if (rmsgp->rm_body.rm_chunks[2] != xdr_zero) + return false; + + /* sanity */ + if (p[7] != rmsgp->rm_xid) + return false; + /* call direction */ + if (p[8] == cpu_to_be32(RPC_CALL)) + return false; + + return true; +} + /* * Set up the rqstp thread context to point to the RQ buffer. If * necessary, pull additional data from the client with an RDMA_READ @@ -632,6 +664,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto close_out; } + if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) { + ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp, + &rqstp->rq_arg); + svc_rdma_put_context(ctxt, 0); + if (ret) + goto repost; + return ret; + } + /* Read read-list data. */ ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); if (ret > 0) { @@ -668,4 +709,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) set_bit(XPT_CLOSE, &xprt->xpt_flags); defer: return 0; + +repost: + ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL); + if (ret) { + pr_err("svcrdma: could not post a receive buffer, err=%d.\n", + ret); + pr_err("svcrdma: closing transport %p.\n", rdma_xprt); + set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags); + ret = -ENOTCONN; + } + return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index af86dfeceb4a..7fd23955f1d4 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1287,12 +1287,14 @@ static void __svc_rdma_free(struct work_struct *work) { struct svcxprt_rdma *rdma = container_of(work, struct svcxprt_rdma, sc_work); - dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); + struct svc_xprt *xprt = &rdma->sc_xprt; + + dprintk("svcrdma: %s(%p)\n", __func__, rdma); /* We should only be called from kref_put */ - if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0) + if (atomic_read(&xprt->xpt_ref.refcount) != 0) pr_err("svcrdma: sc_xprt still in use? (%d)\n", - atomic_read(&rdma->sc_xprt.xpt_ref.refcount)); + atomic_read(&xprt->xpt_ref.refcount)); /* * Destroy queued, but not processed read completions. Note @@ -1327,6 +1329,12 @@ static void __svc_rdma_free(struct work_struct *work) pr_err("svcrdma: dma still in use? (%d)\n", atomic_read(&rdma->sc_dma_used)); + /* Final put of backchannel client transport */ + if (xprt->xpt_bc_xprt) { + xprt_put(xprt->xpt_bc_xprt); + xprt->xpt_bc_xprt = NULL; + } + rdma_dealloc_frmr_q(rdma); svc_rdma_destroy_ctxts(rdma); svc_rdma_destroy_maps(rdma); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8c545f7d7525..5c7d235672fa 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -63,7 +63,7 @@ */ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; -static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; +unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; @@ -143,12 +143,7 @@ static struct ctl_table sunrpc_table[] = { #endif -#define RPCRDMA_BIND_TO (60U * HZ) -#define RPCRDMA_INIT_REEST_TO (5U * HZ) -#define RPCRDMA_MAX_REEST_TO (30U * HZ) -#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) - -static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ +static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */ static void xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) @@ -174,7 +169,7 @@ xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; } -static void +void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap) { char buf[128]; @@ -203,7 +198,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap) xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; } -static void +void xprt_rdma_free_addresses(struct rpc_xprt *xprt) { unsigned int i; @@ -499,7 +494,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) if (req == NULL) return NULL; - flags = GFP_NOIO | __GFP_NOWARN; + flags = RPCRDMA_DEF_GFP; if (RPC_IS_SWAPPER(task)) flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; @@ -639,7 +634,7 @@ drop_connection: return -ENOTCONN; /* implies disconnect */ } -static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) +void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); long idle_time = 0; @@ -740,6 +735,11 @@ void xprt_rdma_cleanup(void) rpcrdma_destroy_wq(); frwr_destroy_recovery_wq(); + + rc = xprt_unregister_transport(&xprt_rdma_bc); + if (rc) + dprintk("RPC: %s: xprt_unregister(bc) returned %i\n", + __func__, rc); } int xprt_rdma_init(void) @@ -763,6 +763,14 @@ int xprt_rdma_init(void) return rc; } + rc = xprt_register_transport(&xprt_rdma_bc); + if (rc) { + xprt_unregister_transport(&xprt_rdma); + rpcrdma_destroy_wq(); + frwr_destroy_recovery_wq(); + return rc; + } + dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); dprintk("Defaults:\n"); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 72276c7907e4..5a38236d0516 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -55,6 +55,11 @@ #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ +#define RPCRDMA_BIND_TO (60U * HZ) +#define RPCRDMA_INIT_REEST_TO (5U * HZ) +#define RPCRDMA_MAX_REEST_TO (30U * HZ) +#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) + /* * Interface Adapter -- one per transport instance */ @@ -147,6 +152,8 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) return (struct rpcrdma_msg *)rb->rg_base; } +#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) + /* * struct rpcrdma_rep -- this structure encapsulates state required to recv * and complete a reply, asychronously. It needs several pieces of @@ -308,6 +315,8 @@ struct rpcrdma_buffer { u32 rb_bc_srv_max_requests; spinlock_t rb_reqslock; /* protect rb_allreqs */ struct list_head rb_allreqs; + + u32 rb_bc_max_requests; }; #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) @@ -513,6 +522,10 @@ int rpcrdma_marshal_req(struct rpc_rqst *); /* RPC/RDMA module init - xprtrdma/transport.c */ +extern unsigned int xprt_rdma_max_inline_read; +void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); +void xprt_rdma_free_addresses(struct rpc_xprt *xprt); +void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); int xprt_rdma_init(void); void xprt_rdma_cleanup(void); @@ -528,4 +541,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *); void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ +extern struct xprt_class xprt_rdma_bc; + #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ -- cgit v1.2.3-59-g8ed1b From 5fe1043da84887369d32459514f2c7d98ff37936 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Jan 2016 23:53:41 -0800 Subject: svc_rdma: use local_dma_lkey We now alwasy have a per-PD local_dma_lkey available. Make use of that fact in svc_rdma and stop registering our own MR. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe Reviewed-by: Chuck Lever Reviewed-by: Steve Wise Acked-by: J. Bruce Fields Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 2 -- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 4 ++-- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 6 ++--- net/sunrpc/xprtrdma/svc_rdma_transport.c | 36 ++++-------------------------- 5 files changed, 10 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index b13513a0caf4..5322fea6fe4c 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -156,13 +156,11 @@ struct svcxprt_rdma { struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; - struct ib_mr *sc_phys_mr; /* MR for server memory */ int (*sc_reader)(struct svcxprt_rdma *, struct svc_rqst *, struct svc_rdma_op_ctxt *, int *, u32 *, u32, u32, u64, bool); u32 sc_dev_caps; /* distilled device caps */ - u32 sc_dma_lkey; /* local dma key */ unsigned int sc_frmr_pg_list_len; struct list_head sc_frmr_q; spinlock_t sc_frmr_q_lock; diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index deff06a82914..65a7c232a345 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -128,7 +128,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, ctxt->wr_op = IB_WR_SEND; ctxt->direction = DMA_TO_DEVICE; - ctxt->sge[0].lkey = rdma->sc_dma_lkey; + ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; ctxt->sge[0].length = sndbuf->len; ctxt->sge[0].addr = ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0, diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 3dfe4642ec92..c8b8a8b4181e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -144,6 +144,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; head->arg.page_len += len; + head->arg.len += len; if (!pg_off) head->count++; @@ -160,8 +161,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, goto err; atomic_inc(&xprt->sc_dma_used); - /* The lkey here is either a local dma lkey or a dma_mr lkey */ - ctxt->sge[pno].lkey = xprt->sc_dma_lkey; + ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey; ctxt->sge[pno].length = len; ctxt->count++; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 3c250523f7cc..df57f3ce6cd2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -265,7 +265,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge[sge_no].addr)) goto err; atomic_inc(&xprt->sc_dma_used); - sge[sge_no].lkey = xprt->sc_dma_lkey; + sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; ctxt->count++; sge_off = 0; sge_no++; @@ -480,7 +480,7 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->count = 1; /* Prepare the SGE for the RPCRDMA Header */ - ctxt->sge[0].lkey = rdma->sc_dma_lkey; + ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); ctxt->sge[0].addr = ib_dma_map_page(rdma->sc_cm_id->device, page, 0, @@ -504,7 +504,7 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[sge_no].addr)) goto err; atomic_inc(&rdma->sc_dma_used); - ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; + ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; ctxt->sge[sge_no].length = sge_bytes; } if (byte_count != 0) { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 7fd23955f1d4..5763825d09bf 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -232,11 +232,11 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { /* * Unmap the DMA addr in the SGE if the lkey matches - * the sc_dma_lkey, otherwise, ignore it since it is + * the local_dma_lkey, otherwise, ignore it since it is * an FRMR lkey and will be unmapped later when the * last WR that uses it completes. */ - if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { + if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) { atomic_dec(&xprt->sc_dma_used); ib_dma_unmap_page(xprt->sc_cm_id->device, ctxt->sge[i].addr, @@ -698,7 +698,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags) atomic_inc(&xprt->sc_dma_used); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; - ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; + ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; ctxt->count = sge_no + 1; buflen += PAGE_SIZE; } @@ -1014,8 +1014,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct ib_cq_init_attr cq_attr = {}; struct ib_qp_init_attr qp_attr; struct ib_device *dev; - int uninitialized_var(dma_mr_acc); - int need_dma_mr = 0; unsigned int i; int ret = 0; @@ -1160,32 +1158,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num)) goto errout; - if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) || - !(dev->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { - need_dma_mr = 1; - dma_mr_acc = IB_ACCESS_LOCAL_WRITE; - if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) && - !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) - dma_mr_acc |= IB_ACCESS_REMOTE_WRITE; - } - if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num)) newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; - /* Create the DMA MR if needed, otherwise, use the DMA LKEY */ - if (need_dma_mr) { - /* Register all of physical memory */ - newxprt->sc_phys_mr = - ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc); - if (IS_ERR(newxprt->sc_phys_mr)) { - dprintk("svcrdma: Failed to create DMA MR ret=%d\n", - ret); - goto errout; - } - newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey; - } else - newxprt->sc_dma_lkey = dev->local_dma_lkey; - /* Post receive buffers */ for (i = 0; i < newxprt->sc_rq_depth; i++) { ret = svc_rdma_post_recv(newxprt, GFP_KERNEL); @@ -1349,9 +1324,6 @@ static void __svc_rdma_free(struct work_struct *work) if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq)) ib_destroy_cq(rdma->sc_rq_cq); - if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr)) - ib_dereg_mr(rdma->sc_phys_mr); - if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) ib_dealloc_pd(rdma->sc_pd); @@ -1479,7 +1451,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, return; } atomic_inc(&xprt->sc_dma_used); - ctxt->sge[0].lkey = xprt->sc_dma_lkey; + ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey; ctxt->sge[0].length = length; /* Prepare SEND WR */ -- cgit v1.2.3-59-g8ed1b From d8ae914196d35bbc0c459aec6de588ba585a1c3e Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:32 +0200 Subject: net/mlx4: Query RoCE support Query the RoCE support from firmware using the appropriate firmware commands. Downstream patches will read these capabilities and act accordingly. Signed-off-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx4/fw.c | 23 +++++++++++++++++++++++ include/linux/mlx4/device.h | 10 +++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 90db94e83fde..e6282fc16d74 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -157,6 +157,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [29] = "802.1ad offload support", [31] = "Modifying loopback source checks using UPDATE_QP support", [32] = "Loopback source checks support", + [33] = "RoCEv2 support" }; int i; @@ -626,6 +627,8 @@ out: return err; } +static void disable_unsupported_roce_caps(void *buf); + int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { struct mlx4_cmd_mailbox *mailbox; @@ -738,6 +741,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (err) goto out; + if (mlx4_is_mfunc(dev)) + disable_unsupported_roce_caps(outbox); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET); dev_cap->reserved_qps = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET); @@ -905,6 +910,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE; MLX4_GET(dev_cap->bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2; if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP; MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); @@ -1160,6 +1167,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; + disable_unsupported_roce_caps(outbox->buf); /* add port mng change event capability and disable mw type 1 * unconditionally to slaves */ @@ -1257,6 +1265,21 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, return 0; } +static void disable_unsupported_roce_caps(void *buf) +{ + u32 flags; + + MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + flags &= ~(1UL << 31); + MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + flags &= ~(1UL << 24); + MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + MLX4_GET(flags, buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + flags &= ~(MLX4_FLAG_ROCE_V1_V2); + MLX4_PUT(buf, flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); +} + int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 971037188907..28cbee0df7d7 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -216,6 +216,7 @@ enum { MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30, MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, + MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, }; enum { @@ -267,12 +268,14 @@ enum { MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, + MLX4_BMME_FLAG_ROCE_V1_V2 = 1 << 19, MLX4_BMME_FLAG_PORT_REMAP = 1 << 24, MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28, }; enum { - MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP + MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP, + MLX4_FLAG_ROCE_V1_V2 = MLX4_BMME_FLAG_ROCE_V1_V2 }; enum mlx4_event { @@ -980,9 +983,10 @@ struct mlx4_mad_ifc { if ((type) == (dev)->caps.port_mask[(port)]) #define mlx4_foreach_ib_transport_port(port, dev) \ - for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ - ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) + ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \ + ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)) #define MLX4_INVALID_SLAVE_ID 0xFF #define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1) -- cgit v1.2.3-59-g8ed1b From 7e57b85c444c3c1bf3550aa6890666fc4353bd33 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:35 +0200 Subject: IB/mlx4: Add support for setting RoCEv2 gids in hardware To tell hardware about a gid with type RoCEv2, software needs a new modifier to the SET_PORT command: MLX4_SET_PORT_ROCE_ADDR. This can replace the old method, MLX4_SET_PORT_GID_TABLE, for RoCEv1 gids. Signed-off-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 63 +++++++++++++++++++++++++++++++++++++-- include/linux/mlx4/cmd.h | 3 +- 2 files changed, 62 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b179909ac875..edf3b1c0523d 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n return dev; } -static int mlx4_ib_update_gids(struct gid_entry *gids, - struct mlx4_ib_dev *ibdev, - u8 port_num) +static int mlx4_ib_update_gids_v1(struct gid_entry *gids, + struct mlx4_ib_dev *ibdev, + u8 port_num) { struct mlx4_cmd_mailbox *mailbox; int err; @@ -187,6 +187,63 @@ static int mlx4_ib_update_gids(struct gid_entry *gids, return err; } +static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids, + struct mlx4_ib_dev *ibdev, + u8 port_num) +{ + struct mlx4_cmd_mailbox *mailbox; + int err; + struct mlx4_dev *dev = ibdev->dev; + int i; + struct { + union ib_gid gid; + __be32 rsrvd1[2]; + __be16 rsrvd2; + u8 type; + u8 version; + __be32 rsrvd3; + } *gid_tbl; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return -ENOMEM; + + gid_tbl = mailbox->buf; + for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { + memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid)); + if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + gid_tbl[i].version = 2; + if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid)) + gid_tbl[i].type = 1; + else + memset(&gid_tbl[i].gid, 0, 12); + } + } + + err = mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_ROCE_ADDR << 8 | port_num, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (mlx4_is_bonded(dev)) + err += mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_ROCE_ADDR << 8 | 2, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +static int mlx4_ib_update_gids(struct gid_entry *gids, + struct mlx4_ib_dev *ibdev, + u8 port_num) +{ + if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) + return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num); + + return mlx4_ib_update_gids_v1(gids, ibdev, port_num); +} + static int mlx4_ib_add_gid(struct ib_device *device, u8 port_num, unsigned int index, diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 58391f2e0414..116b284bc4ce 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -206,7 +206,8 @@ enum { MLX4_SET_PORT_GID_TABLE = 0x5, MLX4_SET_PORT_PRIO2TC = 0x8, MLX4_SET_PORT_SCHEDULER = 0x9, - MLX4_SET_PORT_VXLAN = 0xB + MLX4_SET_PORT_VXLAN = 0xB, + MLX4_SET_PORT_ROCE_ADDR = 0xD }; enum { -- cgit v1.2.3-59-g8ed1b From fca83006294a6356705781eee31da1658fd411a5 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:36 +0200 Subject: net/mlx4_core: Add support for configuring RoCE v2 UDP port In order to support RoCE v2, the hardware needs to be configured to classify certain UDP packets as RoCE v2 packets and pass it through its RoCE pipeline. This patch enables configuring this UDP port. Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx4/fw.c | 16 +++++++++++++++- include/linux/mlx4/device.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index e6282fc16d74..9d1dfc608887 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -2252,7 +2252,8 @@ struct mlx4_config_dev { __be32 rsvd1[3]; __be16 vxlan_udp_dport; __be16 rsvd2; - __be32 rsvd3; + __be16 roce_v2_entropy; + __be16 roce_v2_udp_dport; __be32 roce_flags; __be32 rsvd4[25]; __be16 rsvd5; @@ -2261,6 +2262,7 @@ struct mlx4_config_dev { }; #define MLX4_VXLAN_UDP_DPORT (1 << 0) +#define MLX4_ROCE_V2_UDP_DPORT BIT(3) #define MLX4_DISABLE_RX_PORT BIT(18) static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) @@ -2378,6 +2380,18 @@ int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis) return mlx4_CONFIG_DEV_set(dev, &config_dev); } +int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port) +{ + struct mlx4_config_dev config_dev; + + memset(&config_dev, 0, sizeof(config_dev)); + config_dev.update_flags = cpu_to_be32(MLX4_ROCE_V2_UDP_DPORT); + config_dev.roce_v2_udp_dport = cpu_to_be16(udp_port); + + return mlx4_CONFIG_DEV_set(dev, &config_dev); +} +EXPORT_SYMBOL_GPL(mlx4_config_roce_v2_port); + int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2) { struct mlx4_cmd_mailbox *mailbox; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 28cbee0df7d7..430a929f048b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1457,6 +1457,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis); +int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port); int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2); int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); -- cgit v1.2.3-59-g8ed1b From 3f723f42d9d625bb9ecfe923d19d1d42da775797 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:37 +0200 Subject: net/mlx4_core: Add support for RoCE v2 entropy In RoCE v2 we need to choose a source UDP port, we do so by using entropy over the source and dest QPNs. Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx4/qp.c | 26 ++++++++++++++++++++++++++ include/linux/mlx4/qp.h | 13 ++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 168823dde79f..d1cd9c32a9ae 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -167,6 +167,12 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; } + if ((cur_state == MLX4_QP_STATE_RTR) && + (new_state == MLX4_QP_STATE_RTS) && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) + context->roce_entropy = + cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn)); + *(__be32 *) mailbox->buf = cpu_to_be32(optpar); memcpy(mailbox->buf + 8, context, sizeof *context); @@ -921,3 +927,23 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, return 0; } EXPORT_SYMBOL_GPL(mlx4_qp_to_ready); + +u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn) +{ + struct mlx4_qp_context context; + struct mlx4_qp qp; + int err; + + qp.qpn = qpn; + err = mlx4_qp_query(dev, &qp, &context); + if (!err) { + u32 dest_qpn = be32_to_cpu(context.remote_qpn) & 0xffffff; + u16 folded_dst = folded_qp(dest_qpn); + u16 folded_src = folded_qp(qpn); + + return (dest_qpn != qpn) ? + ((folded_dst ^ folded_src) | 0xC000) : + folded_src | 0xC000; + } + return 0xdead; +} diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index fe052e234906..cdf110d3f260 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -204,7 +204,8 @@ struct mlx4_qp_context { u32 reserved1; __be32 next_send_psn; __be32 cqn_send; - u32 reserved2[2]; + __be16 roce_entropy; + __be16 reserved2[3]; __be32 last_acked_psn; __be32 ssn; __be32 params2; @@ -487,4 +488,14 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp); +static inline u16 folded_qp(u32 q) +{ + u16 res; + + res = ((q & 0xff) ^ ((q & 0xff0000) >> 16)) | (q & 0xff00); + return res; +} + +u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn); + #endif /* MLX4_QP_H */ -- cgit v1.2.3-59-g8ed1b From 3b5daf28ac4bb9354b7d2f10ce5942cad23e979a Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 14 Jan 2016 17:50:39 +0200 Subject: IB/mlx4: Support modify_qp for RoCE v2 In order to support modify_qp for RoCE v2, we need to set the gid_type in the QP context. Signed-off-by: Moni Shoua Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 37 ++++++++++++++++++++++++++++++++++--- include/linux/mlx4/qp.h | 2 +- 2 files changed, 35 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 89f921f786b3..a9433a52a6f0 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1509,6 +1509,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) return 0; } +enum { + MLX4_QPC_ROCE_MODE_1 = 0, + MLX4_QPC_ROCE_MODE_2 = 2, + MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff +}; + +static u8 gid_type_to_qpc(enum ib_gid_type gid_type) +{ + switch (gid_type) { + case IB_GID_TYPE_ROCE: + return MLX4_QPC_ROCE_MODE_1; + case IB_GID_TYPE_ROCE_UDP_ENCAP: + return MLX4_QPC_ROCE_MODE_2; + default: + return MLX4_QPC_ROCE_MODE_UNDEFINED; + } +} + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -1652,9 +1670,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, u16 vlan = 0xffff; u8 smac[ETH_ALEN]; int status = 0; + int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && + attr->ah_attr.ah_flags & IB_AH_GRH; - if (rdma_cap_eth_ah(&dev->ib_dev, port_num) && - attr->ah_attr.ah_flags & IB_AH_GRH) { + if (is_eth) { int index = attr->ah_attr.grh.sgid_index; status = ib_get_cached_gid(ibqp->device, port_num, @@ -1676,6 +1695,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | MLX4_QP_OPTPAR_SCHED_QUEUE); + + if (is_eth && + (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) { + u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type); + + if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) { + err = -EINVAL; + goto out; + } + context->rlkey_roce_mode |= (qpc_roce_mode << 6); + } + } if (attr_mask & IB_QP_TIMEOUT) { @@ -1847,7 +1878,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, sqd_event = 0; if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) - context->rlkey |= (1 << 4); + context->rlkey_roce_mode |= (1 << 4); /* * Before passing a kernel QP to the HW, make sure that the diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index cdf110d3f260..587cdf943b52 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -194,7 +194,7 @@ struct mlx4_qp_context { u8 mtu_msgmax; u8 rq_size_stride; u8 sq_size_stride; - u8 rlkey; + u8 rlkey_roce_mode; __be32 usr_page; __be32 local_qpn; __be32 remote_qpn; -- cgit v1.2.3-59-g8ed1b From 8d7f9ecb371a15e48754fa816e3f716517df7b13 Mon Sep 17 00:00:00 2001 From: "majd@mellanox.com" Date: Thu, 14 Jan 2016 19:12:59 +0200 Subject: net/mlx5_core: Export transport objects To be used by mlx5_ib in the following patches for implementing RAW PACKET QP. Add mlx5_core_ prefix to alloc and delloc transport_domain since they are exposed now. Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 14 +++- drivers/net/ethernet/mellanox/mlx5/core/transobj.h | 72 --------------------- include/linux/mlx5/transobj.h | 74 ++++++++++++++++++++++ 6 files changed, 90 insertions(+), 80 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/transobj.h create mode 100644 include/linux/mlx5/transobj.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 22e72bf1ae48..034ccab6d9a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -36,8 +36,8 @@ #include #include #include +#include #include "wq.h" -#include "transobj.h" #include "mlx5_core.h" #define MLX5E_MAX_NUM_TC 8 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1e52db32c73d..7eb373564ad4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2129,7 +2129,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_unmap_free_uar; } - err = mlx5_alloc_transport_domain(mdev, &priv->tdn); + err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn); if (err) { mlx5_core_err(mdev, "alloc td failed, %d\n", err); goto err_dealloc_pd; @@ -2212,7 +2212,7 @@ err_destroy_mkey: mlx5_core_destroy_mkey(mdev, &priv->mr); err_dealloc_transport_domain: - mlx5_dealloc_transport_domain(mdev, priv->tdn); + mlx5_core_dealloc_transport_domain(mdev, priv->tdn); err_dealloc_pd: mlx5_core_dealloc_pd(mdev, priv->pdn); @@ -2244,7 +2244,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) mlx5e_close_drop_rq(priv); mlx5e_destroy_tises(priv); mlx5_core_destroy_mkey(priv->mdev, &priv->mr); - mlx5_dealloc_transport_domain(priv->mdev, priv->tdn); + mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); free_netdev(netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index ec5f901cfcfe..04bc522605a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -37,7 +37,7 @@ #include #include #include "mlx5_core.h" -#include "transobj.h" +#include void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index d7068f54e800..91ea2780e412 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -32,9 +32,9 @@ #include #include "mlx5_core.h" -#include "transobj.h" +#include -int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn) +int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn) { u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)]; u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)]; @@ -53,8 +53,9 @@ int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn) return err; } +EXPORT_SYMBOL(mlx5_core_alloc_transport_domain); -void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn) +void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn) { u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)]; u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)]; @@ -68,6 +69,7 @@ void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain); int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) { @@ -94,6 +96,7 @@ int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) memset(out, 0, sizeof(out)); return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_modify_rq); void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) { @@ -133,6 +136,7 @@ int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen) memset(out, 0, sizeof(out)); return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_modify_sq); void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn) { @@ -162,6 +166,7 @@ int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; } +EXPORT_SYMBOL(mlx5_core_create_tir); int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, int inlen) @@ -187,6 +192,7 @@ void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_destroy_tir); int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn) @@ -203,6 +209,7 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; } +EXPORT_SYMBOL(mlx5_core_create_tis); void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) { @@ -216,6 +223,7 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_destroy_tis); int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rmpn) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h deleted file mode 100644 index 74cae51436e4..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __TRANSOBJ_H__ -#define __TRANSOBJ_H__ - -int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn); -void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn); -int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rqn); -int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen); -void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn); -int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *sqn); -int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); -void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); -int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *tirn); -int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, - int inlen); -void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); -int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *tisn); -void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); -int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rmpn); -int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); -int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); -int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); -int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); -int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rmpn); -int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn); -int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); -int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); - -int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rqtn); -int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, - int inlen); -void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn); - -#endif /* __TRANSOBJ_H__ */ diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h new file mode 100644 index 000000000000..376229f09499 --- /dev/null +++ b/include/linux/mlx5/transobj.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __TRANSOBJ_H__ +#define __TRANSOBJ_H__ + +#include + +int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn); +void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn); +int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqn); +int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen); +void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn); +int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *sqn); +int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); +void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); +int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tirn); +int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, + int inlen); +void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); +int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *tisn); +void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); +int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); +int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); +int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); + +int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqtn); +int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, + int inlen); +void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn); + +#endif /* __TRANSOBJ_H__ */ -- cgit v1.2.3-59-g8ed1b From e2013b212f9f201c71fc5826ce41f39ebece0852 Mon Sep 17 00:00:00 2001 From: "majd@mellanox.com" Date: Thu, 14 Jan 2016 19:13:00 +0200 Subject: net/mlx5_core: Add RQ and SQ event handling RQ/SQ will be used to implement IB verbs QPs, so the IB QP affiliated events are affiliated also with SQs and RQs. Since SQ, RQ and QP resource numbers do not share the same name space, a queue type field was added to the event data to specify the SW object that the event is affiliated with. Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/qp.c | 126 +++++++++++++++++++++++---- include/linux/mlx5/device.h | 12 ++- include/linux/mlx5/driver.h | 8 +- include/linux/mlx5/qp.h | 8 ++ 5 files changed, 132 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 713ead583347..cda545f10267 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -227,6 +227,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n", eqe_type_str(eqe->type), eqe->type, rsn); mlx5_rsc_event(dev, rsn, eqe->type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 803a1f268c0f..431885f77369 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "mlx5_core.h" @@ -77,6 +78,8 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) switch (common->res) { case MLX5_RES_QP: + case MLX5_RES_RQ: + case MLX5_RES_SQ: qp = (struct mlx5_core_qp *)common; qp->event(qp, event_type); break; @@ -177,12 +180,48 @@ void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) } #endif +static int create_qprqsq_common(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp, + int rsc_type) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + int err; + + qp->common.res = rsc_type; + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, + qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN), + qp); + spin_unlock_irq(&table->lock); + if (err) + return err; + + atomic_set(&qp->common.refcount, 1); + init_completion(&qp->common.free); + qp->pid = current->pid; + + return 0; +} + +static void destroy_qprqsq_common(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + unsigned long flags; + + spin_lock_irqsave(&table->lock, flags); + radix_tree_delete(&table->tree, + qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN)); + spin_unlock_irqrestore(&table->lock, flags); + mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp); + wait_for_completion(&qp->common.free); +} + int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, struct mlx5_create_qp_mbox_in *in, int inlen) { - struct mlx5_qp_table *table = &dev->priv.qp_table; struct mlx5_create_qp_mbox_out out; struct mlx5_destroy_qp_mbox_in din; struct mlx5_destroy_qp_mbox_out dout; @@ -206,24 +245,16 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, qp->qpn = be32_to_cpu(out.qpn) & 0xffffff; mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); - qp->common.res = MLX5_RES_QP; - spin_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, qp->qpn, qp); - spin_unlock_irq(&table->lock); - if (err) { - mlx5_core_warn(dev, "err %d\n", err); + err = create_qprqsq_common(dev, qp, MLX5_RES_QP); + if (err) goto err_cmd; - } err = mlx5_debug_qp_add(dev, qp); if (err) mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n", qp->qpn); - qp->pid = current->pid; - atomic_set(&qp->common.refcount, 1); atomic_inc(&dev->num_qps); - init_completion(&qp->common.free); return 0; @@ -243,18 +274,11 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, { struct mlx5_destroy_qp_mbox_in in; struct mlx5_destroy_qp_mbox_out out; - struct mlx5_qp_table *table = &dev->priv.qp_table; - unsigned long flags; int err; mlx5_debug_qp_remove(dev, qp); - spin_lock_irqsave(&table->lock, flags); - radix_tree_delete(&table->tree, qp->qpn); - spin_unlock_irqrestore(&table->lock, flags); - - mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp); - wait_for_completion(&qp->common.free); + destroy_qprqsq_common(dev, qp); memset(&in, 0, sizeof(in)); memset(&out, 0, sizeof(out)); @@ -442,3 +466,67 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, } EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); #endif + +int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, + struct mlx5_core_qp *rq) +{ + int err; + u32 rqn; + + err = mlx5_core_create_rq(dev, in, inlen, &rqn); + if (err) + return err; + + rq->qpn = rqn; + err = create_qprqsq_common(dev, rq, MLX5_RES_RQ); + if (err) + goto err_destroy_rq; + + return 0; + +err_destroy_rq: + mlx5_core_destroy_rq(dev, rq->qpn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_rq_tracked); + +void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, + struct mlx5_core_qp *rq) +{ + destroy_qprqsq_common(dev, rq); + mlx5_core_destroy_rq(dev, rq->qpn); +} +EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked); + +int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, + struct mlx5_core_qp *sq) +{ + int err; + u32 sqn; + + err = mlx5_core_create_sq(dev, in, inlen, &sqn); + if (err) + return err; + + sq->qpn = sqn; + err = create_qprqsq_common(dev, sq, MLX5_RES_SQ); + if (err) + goto err_destroy_sq; + + return 0; + +err_destroy_sq: + mlx5_core_destroy_sq(dev, sq->qpn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_sq_tracked); + +void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, + struct mlx5_core_qp *sq) +{ + destroy_qprqsq_common(dev, sq); + mlx5_core_destroy_sq(dev, sq->qpn); +} +EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 48c4623ad651..b7eaccf997ff 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -223,6 +223,14 @@ enum { #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT +#define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8) + +enum { + MLX5_EVENT_QUEUE_TYPE_QP = 0, + MLX5_EVENT_QUEUE_TYPE_RQ = 1, + MLX5_EVENT_QUEUE_TYPE_SQ = 2, +}; + enum mlx5_event { MLX5_EVENT_TYPE_COMP = 0x0, @@ -479,7 +487,9 @@ struct mlx5_eqe_comp { }; struct mlx5_eqe_qp_srq { - __be32 reserved[6]; + __be32 reserved1[5]; + u8 type; + u8 reserved2[3]; __be32 qp_srq_n; }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 53c57724c8dd..ae8f91528b6f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -346,9 +346,11 @@ struct mlx5_core_mr { }; enum mlx5_res_type { - MLX5_RES_QP, - MLX5_RES_SRQ, - MLX5_RES_XSRQ, + MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP, + MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ, + MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ, + MLX5_RES_SRQ = 3, + MLX5_RES_XSRQ = 4, }; struct mlx5_core_rsc_common { diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index fd1ff4110e80..431176ec70e2 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -651,6 +651,14 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, u8 context, int error); #endif +int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, + struct mlx5_core_qp *rq); +void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, + struct mlx5_core_qp *rq); +int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, + struct mlx5_core_qp *sq); +void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, + struct mlx5_core_qp *sq); static inline const char *mlx5_qp_type_str(int type) { -- cgit v1.2.3-59-g8ed1b From 6d2f89df04b796e7dcc4f9f8dc0d8f04ad7f144b Mon Sep 17 00:00:00 2001 From: "majd@mellanox.com" Date: Thu, 14 Jan 2016 19:13:05 +0200 Subject: IB/mlx5: Add Raw Packet QP query functionality Since Raw Packet QP is composed of RQ and SQ, the IB QP's state is derived from the sub-objects. Therefore we need to query each one of the sub-objects, and decide on the IB QP's state. Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 191 ++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 24 +++ include/linux/mlx5/qp.h | 11 +- include/linux/mlx5/transobj.h | 2 + 4 files changed, 205 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 89f05bff0485..290e97bc065c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3438,40 +3438,153 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at } } -int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, - struct ib_qp_init_attr *qp_init_attr) +static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, + u8 *sq_state) +{ + void *out; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(query_sq_out); + out = mlx5_vzalloc(inlen); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out); + if (err) + goto out; + + sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context); + *sq_state = MLX5_GET(sqc, sqc, state); + sq->state = *sq_state; + +out: + kvfree(out); + return err; +} + +static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev, + struct mlx5_ib_rq *rq, + u8 *rq_state) +{ + void *out; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(query_rq_out); + out = mlx5_vzalloc(inlen); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out); + if (err) + goto out; + + rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context); + *rq_state = MLX5_GET(rqc, rqc, state); + rq->state = *rq_state; + +out: + kvfree(out); + return err; +} + +static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state, + struct mlx5_ib_qp *qp, u8 *qp_state) +{ + static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = { + [MLX5_RQC_STATE_RST] = { + [MLX5_SQC_STATE_RST] = IB_QPS_RESET, + [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD, + [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE_BAD, + [MLX5_SQ_STATE_NA] = IB_QPS_RESET, + }, + [MLX5_RQC_STATE_RDY] = { + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD, + [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, + [MLX5_SQC_STATE_ERR] = IB_QPS_SQE, + [MLX5_SQ_STATE_NA] = MLX5_QP_STATE, + }, + [MLX5_RQC_STATE_ERR] = { + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD, + [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD, + [MLX5_SQC_STATE_ERR] = IB_QPS_ERR, + [MLX5_SQ_STATE_NA] = IB_QPS_ERR, + }, + [MLX5_RQ_STATE_NA] = { + [MLX5_SQC_STATE_RST] = IB_QPS_RESET, + [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, + [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE, + [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD, + }, + }; + + *qp_state = sqrq_trans[rq_state][sq_state]; + + if (*qp_state == MLX5_QP_STATE_BAD) { + WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x", + qp->raw_packet_qp.sq.base.mqp.qpn, sq_state, + qp->raw_packet_qp.rq.base.mqp.qpn, rq_state); + return -EINVAL; + } + + if (*qp_state == MLX5_QP_STATE) + *qp_state = qp->state; + + return 0; +} + +static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + u8 *raw_packet_qp_state) +{ + struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; + struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + struct mlx5_ib_rq *rq = &raw_packet_qp->rq; + int err; + u8 sq_state = MLX5_SQ_STATE_NA; + u8 rq_state = MLX5_RQ_STATE_NA; + + if (qp->sq.wqe_cnt) { + err = query_raw_packet_qp_sq_state(dev, sq, &sq_state); + if (err) + return err; + } + + if (qp->rq.wqe_cnt) { + err = query_raw_packet_qp_rq_state(dev, rq, &rq_state); + if (err) + return err; + } + + return sqrq_state_to_qp_state(sq_state, rq_state, qp, + raw_packet_qp_state); +} + +static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_qp_attr *qp_attr) { - struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_query_qp_mbox_out *outb; struct mlx5_qp_context *context; int mlx5_state; int err = 0; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - /* - * Wait for any outstanding page faults, in case the user frees memory - * based upon this query's result. - */ - flush_workqueue(mlx5_ib_page_fault_wq); -#endif - - mutex_lock(&qp->mutex); outb = kzalloc(sizeof(*outb), GFP_KERNEL); - if (!outb) { - err = -ENOMEM; - goto out; - } + if (!outb) + return -ENOMEM; + context = &outb->ctx; err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb, sizeof(*outb)); if (err) - goto out_free; + goto out; mlx5_state = be32_to_cpu(context->flags) >> 28; qp->state = to_ib_qp_state(mlx5_state); - qp_attr->qp_state = qp->state; qp_attr->path_mtu = context->mtu_msgmax >> 5; qp_attr->path_mig_state = to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); @@ -3505,6 +3618,43 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7; qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3; + +out: + kfree(outb); + return err; +} + +int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + int err = 0; + u8 raw_packet_qp_state; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + /* + * Wait for any outstanding page faults, in case the user frees memory + * based upon this query's result. + */ + flush_workqueue(mlx5_ib_page_fault_wq); +#endif + + mutex_lock(&qp->mutex); + + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state); + if (err) + goto out; + qp->state = raw_packet_qp_state; + qp_attr->port_num = 1; + } else { + err = query_qp_attr(dev, qp, qp_attr); + if (err) + goto out; + } + + qp_attr->qp_state = qp->state; qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; qp_attr->cap.max_recv_sge = qp->rq.max_gs; @@ -3538,9 +3688,6 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; -out_free: - kfree(outb); - out: mutex_unlock(&qp->mutex); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 91ea2780e412..460d9ff0222f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -111,6 +111,18 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0}; + int outlen = MLX5_ST_SZ_BYTES(query_rq_out); + + MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ); + MLX5_SET(query_rq_in, in, rqn, rqn); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); +} +EXPORT_SYMBOL(mlx5_core_query_rq); + int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) { u32 out[MLX5_ST_SZ_DW(create_sq_out)]; @@ -151,6 +163,18 @@ void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0}; + int outlen = MLX5_ST_SZ_BYTES(query_sq_out); + + MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ); + MLX5_SET(query_sq_in, in, sqn, sqn); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); +} +EXPORT_SYMBOL(mlx5_core_query_sq); + int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn) { diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 431176ec70e2..f033c7a1490c 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -85,7 +85,16 @@ enum mlx5_qp_state { MLX5_QP_STATE_ERR = 6, MLX5_QP_STATE_SQ_DRAINING = 7, MLX5_QP_STATE_SUSPENDED = 9, - MLX5_QP_NUM_STATE + MLX5_QP_NUM_STATE, + MLX5_QP_STATE, + MLX5_QP_STATE_BAD, +}; + +enum { + MLX5_SQ_STATE_NA = MLX5_SQC_STATE_ERR + 1, + MLX5_SQ_NUM_STATE = MLX5_SQ_STATE_NA + 1, + MLX5_RQ_STATE_NA = MLX5_RQC_STATE_ERR + 1, + MLX5_RQ_NUM_STATE = MLX5_RQ_STATE_NA + 1, }; enum { diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 376229f09499..d259e4c423dd 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -41,10 +41,12 @@ int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn); int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen); void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn); +int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out); int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn); int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); +int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out); int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn); int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, -- cgit v1.2.3-59-g8ed1b From 75850d0bcece42416ba81bd38e4c719f101c832d Mon Sep 17 00:00:00 2001 From: "majd@mellanox.com" Date: Thu, 14 Jan 2016 19:13:06 +0200 Subject: IB/mlx5: Support setting Ethernet priority for Raw Packet QPs When the user changes the Address Vector(AV) in the modify QP, he provides an SL. This SL should be translated to Ethernet Priority by taking the 3 LSB bits, and modify the QP's TIS according to this Ethernet priority. Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 38 ++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 12 +++++++ include/linux/mlx5/mlx5_ifc.h | 9 ++++- include/linux/mlx5/transobj.h | 2 ++ 4 files changed, 57 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 290e97bc065c..38413dcaf4cf 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1752,7 +1752,33 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) return rate + MLX5_STAT_RATE_OFFSET; } -static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, +static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, + struct mlx5_ib_sq *sq, u8 sl) +{ + void *in; + void *tisc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tis_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tis_in, in, bitmask.prio, 1); + + tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); + MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1)); + + err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen); + + kvfree(in); + + return err; +} + +static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, u32 path_flags, const struct ib_qp_attr *attr) { @@ -1808,6 +1834,11 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, if (attr_mask & IB_QP_TIMEOUT) path->ackto_lt = attr->timeout << 3; + if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) + return modify_raw_packet_eth_prio(dev->mdev, + &qp->raw_packet_qp.sq, + ah->sl & 0xf); + return 0; } @@ -2029,7 +2060,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, context->pri_path.port = attr->port_num; if (attr_mask & IB_QP_AV) { - err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path, + err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port, attr_mask, 0, attr); if (err) @@ -2040,7 +2071,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, context->pri_path.ackto_lt |= attr->timeout << 3; if (attr_mask & IB_QP_ALT_PATH) { - err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path, + err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, + &context->alt_path, attr->alt_port_num, attr_mask, 0, attr); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 460d9ff0222f..03a5093ffeb7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -235,6 +235,18 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, } EXPORT_SYMBOL(mlx5_core_create_tis); +int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0}; + + MLX5_SET(modify_tis_in, in, tisn, tisn); + MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS); + + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_modify_tis); + void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) { u32 in[MLX5_ST_SZ_DW(destroy_tis_in)]; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 991283b51f61..4633b88b0c3b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4052,6 +4052,13 @@ struct mlx5_ifc_modify_tis_out_bits { u8 reserved_1[0x40]; }; +struct mlx5_ifc_modify_tis_bitmask_bits { + u8 reserved_0[0x20]; + + u8 reserved_1[0x1f]; + u8 prio[0x1]; +}; + struct mlx5_ifc_modify_tis_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; @@ -4064,7 +4071,7 @@ struct mlx5_ifc_modify_tis_in_bits { u8 reserved_3[0x20]; - u8 modify_bitmask[0x40]; + struct mlx5_ifc_modify_tis_bitmask_bits bitmask; u8 reserved_4[0x40]; diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index d259e4c423dd..88441f5ece25 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -54,6 +54,8 @@ int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn); +int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in, + int inlen); void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rmpn); -- cgit v1.2.3-59-g8ed1b From 427c1e7bcd7e5cd62160fcda0ce215ebbe0da3a1 Mon Sep 17 00:00:00 2001 From: "majd@mellanox.com" Date: Thu, 14 Jan 2016 19:13:07 +0200 Subject: {IB, net}/mlx5: Move the modify QP operation table to mlx5_ib When modifying a QP, the desired operation was determined in the mlx5_core using a transition table that takes the current state, the final state, and returns the desired operation. Since this logic will be used for Raw Packet QP, move the operation table to the mlx5_ib. Signed-off-by: Majd Dibbiny Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 53 ++++++++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 48 ++----------------------- include/linux/mlx5/qp.h | 3 +- 3 files changed, 50 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 38413dcaf4cf..26e461b6a7b9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1528,10 +1528,9 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) if (qp->state != IB_QPS_RESET) { mlx5_ib_qp_disable_pagefaults(qp); - if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state), - MLX5_QP_STATE_RST, in, 0, - &base->mqp)) - mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", + if (mlx5_core_qp_modify(dev->mdev, MLX5_CMD_OP_2RST_QP, + in, 0, &base->mqp)) + mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n", base->mqp.qpn); } @@ -1989,6 +1988,43 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { + static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { + [MLX5_QP_STATE_RST] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP, + }, + [MLX5_QP_STATE_INIT] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP, + [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP, + }, + [MLX5_QP_STATE_RTR] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP, + }, + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP, + }, + [MLX5_QP_STATE_SQD] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + }, + [MLX5_QP_STATE_SQER] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP, + }, + [MLX5_QP_STATE_ERR] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + } + }; + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_qp_base *base = &qp->trans_qp.base; @@ -2001,6 +2037,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, int sqd_event; int mlx5_st; int err; + u16 op; in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) @@ -2147,11 +2184,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) mlx5_ib_qp_disable_pagefaults(qp); + if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE || + !optab[mlx5_cur][mlx5_new]) + goto out; + + op = optab[mlx5_cur][mlx5_new]; optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; in->optparam = cpu_to_be32(optpar); - err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state), - to_mlx5_state(new_state), in, sqd_event, + err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event, &base->mqp); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index c46024910736..def289375ecb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -348,59 +348,15 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp); -int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state, - enum mlx5_qp_state new_state, +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, struct mlx5_modify_qp_mbox_in *in, int sqd_event, struct mlx5_core_qp *qp) { - static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { - [MLX5_QP_STATE_RST] = { - [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, - [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, - [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP, - }, - [MLX5_QP_STATE_INIT] = { - [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, - [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, - [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP, - [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP, - }, - [MLX5_QP_STATE_RTR] = { - [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, - [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, - [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP, - }, - [MLX5_QP_STATE_RTS] = { - [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, - [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, - [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP, - }, - [MLX5_QP_STATE_SQD] = { - [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, - [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, - }, - [MLX5_QP_STATE_SQER] = { - [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, - [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, - [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP, - }, - [MLX5_QP_STATE_ERR] = { - [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, - [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, - } - }; - struct mlx5_modify_qp_mbox_out out; int err = 0; - u16 op; - - if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE || - !optab[cur_state][new_state]) - return -EINVAL; memset(&out, 0, sizeof(out)); - op = optab[cur_state][new_state]; - in->hdr.opcode = cpu_to_be16(op); + in->hdr.opcode = cpu_to_be16(operation); in->qpn = cpu_to_be32(qp->qpn); err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)); if (err) diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index f033c7a1490c..5b8c89ffaa58 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -641,8 +641,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, struct mlx5_create_qp_mbox_in *in, int inlen); -int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state, - enum mlx5_qp_state new_state, +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, struct mlx5_modify_qp_mbox_in *in, int sqd_event, struct mlx5_core_qp *qp); int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, -- cgit v1.2.3-59-g8ed1b