diff options
Diffstat (limited to 'drivers/infiniband/sw')
77 files changed, 4450 insertions, 6264 deletions
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig index 1f2759c72108..0df48b3a6b56 100644 --- a/drivers/infiniband/sw/rdmavt/Kconfig +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_RDMAVT tristate "RDMA verbs transport library" - depends on X86_64 && ARCH_DMA_ADDR_T_64BIT + depends on INFINIBAND_VIRT_DMA + depends on X86_64 depends on PCI - select DMA_VIRT_OPS - ---help--- + help This is a common software verbs provider for RDMA networks. diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index ee02c6176007..63999239ed9e 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2016 - 2019 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <linux/slab.h> @@ -90,22 +48,21 @@ EXPORT_SYMBOL(rvt_check_ah); /** * rvt_create_ah - create an address handle * @ibah: the IB address handle - * @ah_attr: the attributes of the AH - * @create_flags: create address handle flags (see enum rdma_create_ah_flags) + * @init_attr: the attributes of the AH * @udata: pointer to user's input output buffer information. * * This may be called from interrupt context. * * Return: 0 on success */ -int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 create_flags, struct ib_udata *udata) +int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct rvt_ah *ah = ibah_to_rvtah(ibah); struct rvt_dev_info *dev = ib_to_rvt(ibah->device); unsigned long flags; - if (rvt_check_ah(ibah->device, ah_attr)) + if (rvt_check_ah(ibah->device, init_attr->ah_attr)) return -EINVAL; spin_lock_irqsave(&dev->n_ahs_lock, flags); @@ -117,22 +74,22 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, dev->n_ahs_allocated++; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - rdma_copy_ah_attr(&ah->attr, ah_attr); + rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr); if (dev->driver_f.notify_new_ah) - dev->driver_f.notify_new_ah(ibah->device, ah_attr, ah); + dev->driver_f.notify_new_ah(ibah->device, + init_attr->ah_attr, ah); return 0; } /** - * rvt_destory_ah - Destory an address handle + * rvt_destroy_ah - Destroy an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) - * * Return: 0 on success */ -void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) +int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) { struct rvt_dev_info *dev = ib_to_rvt(ibah->device); struct rvt_ah *ah = ibah_to_rvtah(ibah); @@ -143,6 +100,7 @@ void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) spin_unlock_irqrestore(&dev->n_ahs_lock, flags); rdma_destroy_ah_attr(&ah->attr); + return 0; } /** diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index bbb4d3bdec4e..c11fdf637d64 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -1,58 +1,16 @@ -#ifndef DEF_RVTAH_H -#define DEF_RVTAH_H - +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RVTAH_H +#define DEF_RVTAH_H + #include <rdma/rdma_vt.h> -int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, - u32 create_flags, struct ib_udata *udata); -void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); +int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); +int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 13d7f66eadab..9fe4dcaa049a 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2016 - 2018 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <linux/slab.h> @@ -211,7 +169,7 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, int err; if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (entries < 1 || entries > rdi->dparms.props.max_cqe) return -EINVAL; @@ -248,8 +206,8 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, */ if (udata && udata->outlen >= sizeof(__u64)) { cq->ip = rvt_create_mmap_info(rdi, sz, udata, u_wc); - if (!cq->ip) { - err = -ENOMEM; + if (IS_ERR(cq->ip)) { + err = PTR_ERR(cq->ip); goto bail_wc; } @@ -315,7 +273,7 @@ bail_wc: * * Called by ib_destroy_cq() in the generic verbs code. */ -void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); struct rvt_dev_info *rdi = cq->rdi; @@ -327,7 +285,8 @@ void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) if (cq->ip) kref_put(&cq->ip->ref, rvt_release_mmap_info); else - vfree(cq->queue); + vfree(cq->kqueue); + return 0; } /** @@ -370,7 +329,7 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) return ret; } -/** +/* * rvt_resize_cq - change the size of the CQ * @ibcq: the completion queue * diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 5e26a2eb19a4..b0a948ec760b 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -1,59 +1,17 @@ -#ifndef DEF_RVTCQ_H -#define DEF_RVTCQ_H - +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 - 2018 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RVTCQ_H +#define DEF_RVTCQ_H + #include <rdma/rdma_vt.h> #include <rdma/rdmavt_cq.h> int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index 108c71e3ac23..98a8fe3b04ef 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <rdma/ib_mad.h> @@ -56,8 +14,11 @@ * @port_num: the port number this packet came in on, 1 based from ib core * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet - * @in_mad: the incoming MAD - * @out_mad: any outgoing MAD reply + * @in: the incoming MAD + * @in_mad_size: size of the incoming MAD reply + * @out: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: unused * * Note that the verbs framework has already done the MAD sanity checks, * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE @@ -67,7 +28,7 @@ * * Return: IB_MAD_RESULT_SUCCESS or error */ -int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, +int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, @@ -79,9 +40,6 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, * future may choose to implement this but it should not be made into a * requirement. */ - if (ibport_num_to_idx(ibdev, port_num) < 0) - return -EINVAL; - return IB_MAD_RESULT_FAILURE; } diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h index a9d6eecc3723..368be29eab37 100644 --- a/drivers/infiniband/sw/rdmavt/mad.h +++ b/drivers/infiniband/sw/rdmavt/mad.h @@ -1,56 +1,14 @@ -#ifndef DEF_RVTMAD_H -#define DEF_RVTMAD_H - +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RVTMAD_H +#define DEF_RVTMAD_H + #include <rdma/rdma_vt.h> -int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, +int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index dd11c6fcd060..a123874e1ca7 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <linux/slab.h> @@ -54,7 +12,7 @@ #include "mcast.h" /** - * rvt_driver_mcast - init resources for multicast + * rvt_driver_mcast_init - init resources for multicast * @rdi: rvt dev struct * * This is per device that registers with rdmavt @@ -69,7 +27,7 @@ void rvt_driver_mcast_init(struct rvt_dev_info *rdi) } /** - * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct + * rvt_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct * @qp: the QP to link */ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) @@ -98,7 +56,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) } /** - * mcast_alloc - allocate the multicast GID structure + * rvt_mcast_alloc - allocate the multicast GID structure * @mgid: the multicast GID * @lid: the muilticast LID (host order) * @@ -180,8 +138,8 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, } EXPORT_SYMBOL(rvt_mcast_find); -/** - * mcast_add - insert mcast GID into table and attach QP struct +/* + * rvt_mcast_add - insert mcast GID into table and attach QP struct * @mcast: the mcast GID table * @mqp: the QP to attach * @@ -426,8 +384,8 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } /** - *rvt_mast_tree_empty - determine if any qps are attached to any mcast group - *@rdi: rvt dev struct + * rvt_mcast_tree_empty - determine if any qps are attached to any mcast group + * @rdi: rvt dev struct * * Return: in use count */ diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h index 29f579267608..b96d86f9625b 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.h +++ b/drivers/infiniband/sw/rdmavt/mcast.h @@ -1,53 +1,11 @@ -#ifndef DEF_RVTMCAST_H -#define DEF_RVTMCAST_H - +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RVTMCAST_H +#define DEF_RVTMCAST_H + #include <rdma/rdma_vt.h> void rvt_driver_mcast_init(struct rvt_dev_info *rdi); diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index 652f4a7efc1b..4d2238f3f3c8 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -1,54 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm.h> -#include <asm/pgtable.h> #include <rdma/uverbs_ioctl.h> #include "mmap.h" @@ -154,7 +111,7 @@ done: * @udata: user data (must be valid!) * @obj: opaque pointer to a cq, wq etc * - * Return: rvt_mmap struct on success + * Return: rvt_mmap struct on success, ERR_PTR on failure */ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, u32 size, struct ib_udata *udata, void *obj) @@ -166,7 +123,7 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, u32 size, ip = kmalloc_node(sizeof(*ip), GFP_KERNEL, rdi->dparms.node); if (!ip) - return ip; + return ERR_PTR(-ENOMEM); size = PAGE_ALIGN(size); diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h index 02466c40bc1e..7e92cf28e071 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.h +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -1,53 +1,11 @@ -#ifndef DEF_RDMAVTMMAP_H -#define DEF_RDMAVTMMAP_H - +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RDMAVTMMAP_H +#define DEF_RDMAVTMMAP_H + #include <rdma/rdma_vt.h> void rvt_mmap_init(struct rvt_dev_info *rdi); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 72f6534fbb52..8a1f2e285180 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <linux/slab.h> @@ -97,13 +55,12 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); rdi->dparms.props.max_mr = rdi->lkey_table.max; - rdi->dparms.props.max_fmr = rdi->lkey_table.max; return 0; } /** - *rvt_mr_exit: clean up MR - *@rdi: rvt dev structure + * rvt_mr_exit - clean up MR + * @rdi: rvt dev structure * * called when drivers have unregistered or perhaps failed to register with us */ @@ -325,8 +282,6 @@ static void __rvt_free_mr(struct rvt_mr *mr) * @acc: access flags * * Return: the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the functions in - * struct dma_virt_ops. */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) { @@ -372,6 +327,7 @@ bail: * @pd: protection domain for this memory region * @start: starting userspace address * @length: length of region to register + * @virt_addr: associated virtual address * @mr_access_flags: access flags for this memory region * @udata: unused by the driver * @@ -412,7 +368,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.page_shift = PAGE_SHIFT; m = 0; n = 0; - for_each_sg_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { void *vaddr; vaddr = page_address(sg_page_iter_page(&sg_iter)); @@ -441,8 +397,8 @@ bail_umem: /** * rvt_dereg_clean_qp_cb - callback from iterator - * @qp - the qp - * @v - the mregion (as u64) + * @qp: the qp + * @v: the mregion (as u64) * * This routine fields the callback for all QPs and * for QPs in the same PD as the MR will call the @@ -460,7 +416,7 @@ static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v) /** * rvt_dereg_clean_qps - find QPs for reference cleanup - * @mr - the MR that is being deregistered + * @mr: the MR that is being deregistered * * This routine iterates RC QPs looking for references * to the lkey noted in mr. @@ -474,8 +430,8 @@ static void rvt_dereg_clean_qps(struct rvt_mregion *mr) /** * rvt_check_refs - check references - * @mr - the megion - * @t - the caller identification + * @mr: the megion + * @t: the caller identification * * This routine checks MRs holding a reference during * when being de-registered. @@ -500,7 +456,7 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t) rvt_pr_err(rdi, "%s timeout mr %p pd %p lkey %x refcount %ld\n", t, mr, mr->pd, mr->lkey, - atomic_long_read(&mr->refcount.count)); + atomic_long_read(&mr->refcount.data->count)); rvt_get_mr(mr); return -EBUSY; } @@ -509,8 +465,8 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t) /** * rvt_mr_has_lkey - is MR - * @mr - the mregion - * @lkey - the lkey + * @mr: the mregion + * @lkey: the lkey */ bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey) { @@ -519,8 +475,8 @@ bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey) /** * rvt_ss_has_lkey - is mr in sge tests - * @ss - the sge state - * @lkey + * @ss: the sge state + * @lkey: the lkey * * This code tests for an MR in the indicated * sge state. @@ -543,7 +499,7 @@ bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey) /** * rvt_dereg_mr - unregister and free a memory region * @ibmr: the memory region to free - * + * @udata: unused by the driver * * Note that this is called to free MRs created by rvt_get_dma_mr() * or rvt_reg_user_mr(). @@ -577,7 +533,7 @@ out: * Return: the memory region on success, otherwise return an errno. */ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct rvt_mr *mr; @@ -714,160 +670,6 @@ bail: EXPORT_SYMBOL(rvt_invalidate_rkey); /** - * rvt_alloc_fmr - allocate a fast memory region - * @pd: the protection domain for this memory region - * @mr_access_flags: access flags for this memory region - * @fmr_attr: fast memory region attributes - * - * Return: the memory region on success, otherwise returns an errno. - */ -struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct rvt_fmr *fmr; - int m; - struct ib_fmr *ret; - int rval = -ENOMEM; - - /* Allocate struct plus pointers to first level page tables. */ - m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; - fmr = kzalloc(struct_size(fmr, mr.map, m), GFP_KERNEL); - if (!fmr) - goto bail; - - rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages, - PERCPU_REF_INIT_ATOMIC); - if (rval) - goto bail; - - /* - * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & - * rkey. - */ - rval = rvt_alloc_lkey(&fmr->mr, 0); - if (rval) - goto bail_mregion; - fmr->ibfmr.rkey = fmr->mr.lkey; - fmr->ibfmr.lkey = fmr->mr.lkey; - /* - * Resources are allocated but no valid mapping (RKEY can't be - * used). - */ - fmr->mr.access_flags = mr_access_flags; - fmr->mr.max_segs = fmr_attr->max_pages; - fmr->mr.page_shift = fmr_attr->page_shift; - - ret = &fmr->ibfmr; -done: - return ret; - -bail_mregion: - rvt_deinit_mregion(&fmr->mr); -bail: - kfree(fmr); - ret = ERR_PTR(rval); - goto done; -} - -/** - * rvt_map_phys_fmr - set up a fast memory region - * @ibfmr: the fast memory region to set up - * @page_list: the list of pages to associate with the fast memory region - * @list_len: the number of pages to associate with the fast memory region - * @iova: the virtual address of the start of the fast memory region - * - * This may be called from interrupt context. - * - * Return: 0 on success - */ - -int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova) -{ - struct rvt_fmr *fmr = to_ifmr(ibfmr); - struct rvt_lkey_table *rkt; - unsigned long flags; - int m, n; - unsigned long i; - u32 ps; - struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); - - i = atomic_long_read(&fmr->mr.refcount.count); - if (i > 2) - return -EBUSY; - - if (list_len > fmr->mr.max_segs) - return -EINVAL; - - rkt = &rdi->lkey_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = iova; - fmr->mr.iova = iova; - ps = 1 << fmr->mr.page_shift; - fmr->mr.length = list_len * ps; - m = 0; - n = 0; - for (i = 0; i < list_len; i++) { - fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; - fmr->mr.map[m]->segs[n].length = ps; - trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps); - if (++n == RVT_SEGSZ) { - m++; - n = 0; - } - } - spin_unlock_irqrestore(&rkt->lock, flags); - return 0; -} - -/** - * rvt_unmap_fmr - unmap fast memory regions - * @fmr_list: the list of fast memory regions to unmap - * - * Return: 0 on success. - */ -int rvt_unmap_fmr(struct list_head *fmr_list) -{ - struct rvt_fmr *fmr; - struct rvt_lkey_table *rkt; - unsigned long flags; - struct rvt_dev_info *rdi; - - list_for_each_entry(fmr, fmr_list, ibfmr.list) { - rdi = ib_to_rvt(fmr->ibfmr.device); - rkt = &rdi->lkey_table; - spin_lock_irqsave(&rkt->lock, flags); - fmr->mr.user_base = 0; - fmr->mr.iova = 0; - fmr->mr.length = 0; - spin_unlock_irqrestore(&rkt->lock, flags); - } - return 0; -} - -/** - * rvt_dealloc_fmr - deallocate a fast memory region - * @ibfmr: the fast memory region to deallocate - * - * Return: 0 on success. - */ -int rvt_dealloc_fmr(struct ib_fmr *ibfmr) -{ - struct rvt_fmr *fmr = to_ifmr(ibfmr); - int ret = 0; - - rvt_free_lkey(&fmr->mr); - rvt_put_mr(&fmr->mr); /* will set completion if last */ - ret = rvt_check_refs(&fmr->mr, __func__); - if (ret) - goto out; - rvt_deinit_mregion(&fmr->mr); - kfree(fmr); -out: - return ret; -} - -/** * rvt_sge_adjacent - is isge compressible * @last_sge: last outgoing SGE written * @sge: SGE to check @@ -921,7 +723,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, /* * We use LKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ if (sge->lkey == 0) { struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); @@ -1032,7 +834,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, /* * We use RKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ rcu_read_lock(); if (rkey == 0) { diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index 2c8d0752e8e3..d17f1400b5f6 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -1,58 +1,12 @@ -#ifndef DEF_RVTMR_H -#define DEF_RVTMR_H - +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RVTMR_H +#define DEF_RVTMR_H + #include <rdma/rdma_vt.h> -struct rvt_fmr { - struct ib_fmr ibfmr; - struct rvt_mregion mr; /* must be last */ -}; struct rvt_mr { struct ib_mr ibmr; @@ -60,11 +14,6 @@ struct rvt_mr { struct rvt_mregion mr; /* must be last */ }; -static inline struct rvt_fmr *to_ifmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct rvt_fmr, ibfmr); -} - static inline struct rvt_mr *to_imr(struct ib_mr *ibmr) { return container_of(ibmr, struct rvt_mr, ibmr); @@ -80,14 +29,8 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata); int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr); -int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova); -int rvt_unmap_fmr(struct list_head *fmr_list); -int rvt_dealloc_fmr(struct ib_fmr *ibfmr); #endif /* DEF_RVTMR_H */ diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index a403718f0b5e..ae62071969fa 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <linux/slab.h> @@ -95,11 +53,12 @@ bail: * * Return: always 0 */ -void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); spin_lock(&dev->n_pds_lock); dev->n_pds_allocated--; spin_unlock(&dev->n_pds_lock); + return 0; } diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h index 71ba76d72b1d..42a0ef3b7da3 100644 --- a/drivers/infiniband/sw/rdmavt/pd.h +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -1,56 +1,14 @@ -#ifndef DEF_RDMAVTPD_H -#define DEF_RDMAVTPD_H - +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RDMAVTPD_H +#define DEF_RDMAVTPD_H + #include <rdma/rdma_vt.h> int rvt_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); #endif /* DEF_RDMAVTPD_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 7858d499db03..3acab569fbb9 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* - * Copyright(c) 2016 - 2019 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * + * Copyright(c) 2016 - 2020 Intel Corporation. */ #include <linux/hash.h> @@ -156,7 +114,7 @@ void rvt_wss_exit(struct rvt_dev_info *rdi) rdi->wss = NULL; } -/** +/* * rvt_wss_init - Init wss data structures * * Return: 0 on success @@ -323,6 +281,7 @@ static void get_map_page(struct rvt_qpn_table *qpt, /** * init_qpn_table - initialize the QP number table for a device + * @rdi: rvt dev struct * @qpt: the QPN table */ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) @@ -524,16 +483,20 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * IB_QPT_SMI/IB_QPT_GSI * @rdi: rvt device info structure * @qpt: queue pair number table pointer + * @type: the QP type * @port_num: IB port number, 1 based, comes from core + * @exclude_prefix: prefix of special queue pair number being allocated * * Return: The queue pair number */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port_num) + enum ib_qp_type type, u8 port_num, u8 exclude_prefix) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; + u32 max_qpn = exclude_prefix == RVT_AIP_QP_PREFIX ? + RVT_AIP_QPN_MAX : RVT_QPN_MAX; if (rdi->driver_f.alloc_qpn) return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num); @@ -553,7 +516,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, } qpn = qpt->last + qpt->incr; - if (qpn >= RVT_QPN_MAX) + if (qpn >= max_qpn) qpn = qpt->incr | ((qpt->last & 1) ^ 1); /* offset carries bit 0 */ offset = qpn & RVT_BITS_PER_PAGE_MASK; @@ -652,8 +615,8 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) /** * rvt_swqe_has_lkey - return true if lkey is used by swqe - * @wqe - the send wqe - * @lkey - the lkey + * @wqe: the send wqe + * @lkey: the lkey * * Test the swqe for using lkey */ @@ -672,8 +635,8 @@ static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey) /** * rvt_qp_sends_has_lkey - return true is qp sends use lkey - * @qp - the rvt_qp - * @lkey - the lkey + * @qp: the rvt_qp + * @lkey: the lkey */ static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey) { @@ -696,8 +659,8 @@ static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey) /** * rvt_qp_acks_has_lkey - return true if acks have lkey - * @qp - the qp - * @lkey - the lkey + * @qp: the qp + * @lkey: the lkey */ static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey) { @@ -713,10 +676,10 @@ static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey) return false; } -/* +/** * rvt_qp_mr_clean - clean up remote ops for lkey - * @qp - the qp - * @lkey - the lkey that is being de-registered + * @qp: the qp + * @lkey: the lkey that is being de-registered * * This routine checks if the lkey is being used by * the qp. @@ -850,6 +813,7 @@ bail: /** * rvt_init_qp - initialize the QP state to the reset state + * @rdi: rvt dev struct * @qp: the QP to init or reinit * @type: the QP type * @@ -898,14 +862,13 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_tail_ack_queue = 0; qp->s_acked_ack_queue = 0; qp->s_num_rd_atomic = 0; - if (qp->r_rq.kwq) - qp->r_rq.kwq->count = qp->r_rq.size; qp->r_sge.num_sge = 0; atomic_set(&qp->s_reserved_used, 0); } /** * _rvt_reset_qp - initialize the QP state to the reset state + * @rdi: rvt dev struct * @qp: the QP to reset * @type: the QP type * @@ -979,7 +942,8 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, spin_unlock_irq(&qp->r_lock); } -/** rvt_free_qpn - Free a qpn from the bit map +/** + * rvt_free_qpn - Free a qpn from the bit map * @qpt: QP table * @qpn: queue pair number to free */ @@ -987,6 +951,9 @@ static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { struct rvt_qpn_map *map; + if ((qpn & RVT_AIP_QP_PREFIX_MASK) == RVT_AIP_QP_BASE) + qpn &= RVT_AIP_QP_SUFFIX; + map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE; if (map->page) clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); @@ -1049,7 +1016,7 @@ static int alloc_ud_wq_attr(struct rvt_qp *qp, int node) /** * rvt_create_qp - create a queue pair for a device - * @ibpd: the protection domain who's device we create the queue pair for + * @ibqp: the queue pair * @init_attr: the attributes of the queue pair * @udata: user data for libibverbs.so * @@ -1057,44 +1024,45 @@ static int alloc_ud_wq_attr(struct rvt_qp *qp, int node) * unique idea of what queue pair numbers mean. For instance there is a reserved * range for PSM. * - * Return: the queue pair on success, otherwise returns an errno. + * Return: 0 on success, otherwise returns an errno. * * Called by the ib_create_qp() core verbs function. */ -struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { - struct rvt_qp *qp; - int err; + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + int ret = -ENOMEM; struct rvt_swqe *swq = NULL; size_t sz; - size_t sg_list_sz; - struct ib_qp *ret = ERR_PTR(-ENOMEM); - struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); + size_t sg_list_sz = 0; + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); void *priv = NULL; size_t sqsize; + u8 exclude_prefix = 0; if (!rdi) - return ERR_PTR(-EINVAL); + return -EINVAL; + + if (init_attr->create_flags & ~IB_QP_CREATE_NETDEV_USE) + return -EOPNOTSUPP; if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || - init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - init_attr->create_flags) - return ERR_PTR(-EINVAL); + init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr) + return -EINVAL; /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_recv_sge || init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) - return ERR_PTR(-EINVAL); + return -EINVAL; if (init_attr->cap.max_send_sge + init_attr->cap.max_send_wr + init_attr->cap.max_recv_sge + init_attr->cap.max_recv_wr == 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } sqsize = init_attr->cap.max_send_wr + 1 + @@ -1103,19 +1071,17 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, case IB_QPT_SMI: case IB_QPT_GSI: if (init_attr->port_num == 0 || - init_attr->port_num > ibpd->device->phys_port_cnt) - return ERR_PTR(-EINVAL); - /* fall through */ + init_attr->port_num > ibqp->device->phys_port_cnt) + return -EINVAL; + fallthrough; case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: sz = struct_size(swq, sg_list, init_attr->cap.max_send_sge); swq = vzalloc_node(array_size(sz, sqsize), rdi->dparms.node); if (!swq) - return ERR_PTR(-ENOMEM); + return -ENOMEM; - sz = sizeof(*qp); - sg_list_sz = 0; if (init_attr->srq) { struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); @@ -1125,10 +1091,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL, - rdi->dparms.node); - if (!qp) - goto bail_swq; + qp->r_sg_list = + kzalloc_node(sg_list_sz, GFP_KERNEL, rdi->dparms.node); + if (!qp->r_sg_list) + goto bail_qp; qp->allowed_ops = get_allowed_ops(init_attr->qp_type); RCU_INIT_POINTER(qp->next, NULL); @@ -1153,7 +1119,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, */ priv = rdi->driver_f.qp_priv_alloc(rdi, qp); if (IS_ERR(priv)) { - ret = priv; + ret = PTR_ERR(priv); goto bail_qp; } qp->priv = priv; @@ -1167,12 +1133,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct rvt_rwqe); - err = rvt_alloc_rq(&qp->r_rq, qp->r_rq.size * sz, + ret = rvt_alloc_rq(&qp->r_rq, qp->r_rq.size * sz, rdi->dparms.node, udata); - if (err) { - ret = ERR_PTR(err); + if (ret) goto bail_driver_priv; - } } /* @@ -1193,34 +1157,35 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = RVT_S_SIGNAL_REQ_WR; - err = alloc_ud_wq_attr(qp, rdi->dparms.node); - if (err) { - ret = (ERR_PTR(err)); - goto bail_driver_priv; - } + ret = alloc_ud_wq_attr(qp, rdi->dparms.node); + if (ret) + goto bail_rq_rvt; + + if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE) + exclude_prefix = RVT_AIP_QP_PREFIX; - err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, + ret = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, - init_attr->port_num); - if (err < 0) { - ret = ERR_PTR(err); + init_attr->port_num, + exclude_prefix); + if (ret < 0) goto bail_rq_wq; - } - qp->ibqp.qp_num = err; + + qp->ibqp.qp_num = ret; + if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE) + qp->ibqp.qp_num |= RVT_AIP_QP_BASE; qp->port_num = init_attr->port_num; rvt_init_qp(rdi, qp, init_attr->qp_type); if (rdi->driver_f.qp_priv_init) { - err = rdi->driver_f.qp_priv_init(rdi, qp, init_attr); - if (err) { - ret = ERR_PTR(err); + ret = rdi->driver_f.qp_priv_init(rdi, qp, init_attr); + if (ret) goto bail_rq_wq; - } } break; default: /* Don't support raw QPs */ - return ERR_PTR(-EINVAL); + return -EOPNOTSUPP; } init_attr->cap.max_inline_data = 0; @@ -1233,28 +1198,24 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (!qp->r_rq.wq) { __u64 offset = 0; - err = ib_copy_to_udata(udata, &offset, + ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (err) { - ret = ERR_PTR(err); + if (ret) goto bail_qpn; - } } else { u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; qp->ip = rvt_create_mmap_info(rdi, s, udata, qp->r_rq.wq); - if (!qp->ip) { - ret = ERR_PTR(-ENOMEM); + if (IS_ERR(qp->ip)) { + ret = PTR_ERR(qp->ip); goto bail_qpn; } - err = ib_copy_to_udata(udata, &qp->ip->offset, + ret = ib_copy_to_udata(udata, &qp->ip->offset, sizeof(qp->ip->offset)); - if (err) { - ret = ERR_PTR(err); + if (ret) goto bail_ip; - } } qp->pid = current->pid; } @@ -1262,7 +1223,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, spin_lock(&rdi->n_qps_lock); if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) { spin_unlock(&rdi->n_qps_lock); - ret = ERR_PTR(-ENOMEM); + ret = -ENOMEM; goto bail_ip; } @@ -1288,9 +1249,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, spin_unlock_irq(&rdi->pending_lock); } - ret = &qp->ibqp; - - return ret; + return 0; bail_ip: if (qp->ip) @@ -1300,19 +1259,18 @@ bail_qpn: rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - rvt_free_rq(&qp->r_rq); free_ud_wq_attr(qp); +bail_rq_rvt: + rvt_free_rq(&qp->r_rq); + bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); bail_qp: kfree(qp->s_ack_queue); - kfree(qp); - -bail_swq: + kfree(qp->r_sg_list); vfree(swq); - return ret; } @@ -1455,6 +1413,9 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int pmtu = 0; /* for gcc warning only */ int opa_ah; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); @@ -1708,6 +1669,7 @@ inval: /** * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy + * @udata: unused by the driver * * Note that this can be called while the QP is actively sending or * receiving! @@ -1738,11 +1700,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) kvfree(qp->r_rq.kwq); rdi->driver_f.qp_priv_free(rdi, qp); kfree(qp->s_ack_queue); + kfree(qp->r_sg_list); rdma_destroy_ah_attr(&qp->remote_ah_attr); rdma_destroy_ah_attr(&qp->alt_ah_attr); free_ud_wq_attr(qp); vfree(qp->s_wq); - kfree(qp); return 0; } @@ -1809,7 +1771,7 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } /** - * rvt_post_receive - post a receive on a QP + * rvt_post_recv - post a receive on a QP * @ibqp: the QP to post the receive on * @wr: the WR to post * @bad_wr: the first bad WR is put here @@ -1883,9 +1845,9 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, /** * rvt_qp_valid_operation - validate post send wr request - * @qp - the qp - * @post-parms - the post send table for the driver - * @wr - the work request + * @qp: the qp + * @post_parms: the post send table for the driver + * @wr: the work request * * The routine validates the operation based on the * validation table an returns the length of the operation @@ -1995,6 +1957,7 @@ static inline int rvt_qp_is_avail( * rvt_post_one_wr - post one RC, UC, or UD send work request * @qp: the QP to post on * @wr: the work request to send + * @call_send: kick the send engine into gear */ static int rvt_post_one_wr(struct rvt_qp *qp, const struct ib_send_wr *wr, @@ -2231,7 +2194,7 @@ bail: } /** - * rvt_post_srq_receive - post a receive on a shared receive queue + * rvt_post_srq_recv - post a receive on a shared receive queue * @ibsrq: the SRQ to post the receive on * @wr: the list of work requests to post * @bad_wr: A pointer to the first WR to cause a problem is put here @@ -2351,31 +2314,6 @@ bad_lkey: } /** - * get_count - count numbers of request work queue entries - * in circular buffer - * @rq: data structure for request queue entry - * @tail: tail indices of the circular buffer - * @head: head indices of the circular buffer - * - * Return - total number of entries in the circular buffer - */ -static u32 get_count(struct rvt_rq *rq, u32 tail, u32 head) -{ - u32 count; - - count = head; - - if (count >= rq->size) - count = 0; - if (count < tail) - count += rq->size - tail; - else - count -= tail; - - return count; -} - -/** * get_rvt_head - get head indices of the circular buffer * @rq: data structure for request queue entry * @ip: the QP @@ -2449,7 +2387,7 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) if (kwq->count < RVT_RWQ_COUNT_THRESHOLD) { head = get_rvt_head(rq, ip); - kwq->count = get_count(rq, tail, head); + kwq->count = rvt_get_rq_count(rq, head, tail); } if (unlikely(kwq->count == 0)) { ret = 0; @@ -2484,7 +2422,9 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) * the number of remaining WQEs. */ if (kwq->count < srq->limit) { - kwq->count = get_count(rq, tail, get_rvt_head(rq, ip)); + kwq->count = + rvt_get_rq_count(rq, + get_rvt_head(rq, ip), tail); if (kwq->count < srq->limit) { struct ib_event ev; @@ -2506,7 +2446,7 @@ bail: EXPORT_SYMBOL(rvt_get_rwqe); /** - * qp_comm_est - handle trap with QP established + * rvt_comm_est - handle trap with QP established * @qp: the QP */ void rvt_comm_est(struct rvt_qp *qp) @@ -2617,7 +2557,7 @@ EXPORT_SYMBOL(rvt_stop_rc_timers); /** * rvt_stop_rnr_timer - stop an rnr timer - * @qp - the QP + * @qp: the QP * * stop an rnr timer and return if the timer * had been pending. @@ -2835,7 +2775,7 @@ void rvt_qp_iter(struct rvt_dev_info *rdi, EXPORT_SYMBOL(rvt_qp_iter); /* - * This should be called with s_lock held. + * This should be called with s_lock and r_lock held. */ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) @@ -2952,7 +2892,7 @@ static enum ib_wc_status loopback_qp_drop(struct rvt_ibport *rvp, } /** - * ruc_loopback - handle UC and RC loopback requests + * rvt_ruc_loopback - handle UC and RC loopback requests * @sqp: the sending QP * * This is called from rvt_do_send() to forward a WQE addressed to the same HFI @@ -3133,6 +3073,8 @@ do_write: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; + if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1))) + goto inv_err; if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), wqe->atomic_wr.remote_addr, wqe->atomic_wr.rkey, @@ -3192,7 +3134,9 @@ send_comp: rvp->n_loop_pkts++; flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (local_ops) { atomic_dec(&sqp->local_ops_pending); local_ops = 0; @@ -3246,9 +3190,15 @@ serr: spin_unlock_irqrestore(&qp->r_lock, flags); serr_no_r_lock: spin_lock_irqsave(&sqp->s_lock, flags); + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + int lastwqe; + + spin_lock(&sqp->r_lock); + lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + spin_unlock(&sqp->r_lock); sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 2cdba1283bf6..bd04be80723c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -1,60 +1,17 @@ -#ifndef DEF_RVTQP_H -#define DEF_RVTQP_H - +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RVTQP_H +#define DEF_RVTQP_H + #include <rdma/rdmavt_qp.h> int rvt_driver_qp_init(struct rvt_dev_info *rdi); void rvt_qp_exit(struct rvt_dev_info *rdi); -struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); +int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c index 977906cc0d11..4e5d4a27633c 100644 --- a/drivers/infiniband/sw/rdmavt/rc.c +++ b/drivers/infiniband/sw/rdmavt/rc.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <rdma/rdmavt_qp.h> @@ -127,9 +85,7 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp) * not atomic, which is OK, since the fuzziness is * resolved as further ACKs go out. */ - credits = head - tail; - if ((int)credits < 0) - credits += qp->r_rq.size; + credits = rvt_get_rq_count(&qp->r_rq, head, tail); } /* * Binary search the credit table to find the code to diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 24fef021d51d..14d196bde2a1 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <linux/err.h> @@ -67,7 +25,7 @@ void rvt_driver_srq_init(struct rvt_dev_info *rdi) /** * rvt_create_srq - create a shared receive queue - * @ibpd: the protection domain of the SRQ to create + * @ibsrq: the protection domain of the SRQ to create * @srq_init_attr: the attributes of the SRQ * @udata: data from libibverbs when creating a user SRQ * @@ -111,8 +69,8 @@ int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr, u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; srq->ip = rvt_create_mmap_info(dev, s, udata, srq->rq.wq); - if (!srq->ip) { - ret = -ENOMEM; + if (IS_ERR(srq->ip)) { + ret = PTR_ERR(srq->ip); goto bail_wq; } @@ -311,7 +269,8 @@ bail_free: return ret; } -/** rvt_query_srq - query srq data +/** + * rvt_query_srq - query srq data * @ibsrq: srq to query * @attr: return info in attr * @@ -330,9 +289,9 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) /** * rvt_destroy_srq - destory an srq * @ibsrq: srq object to destroy - * + * @udata: user data for libibverbs.so */ -void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); @@ -343,4 +302,5 @@ void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) if (srq->ip) kref_put(&srq->ip->ref, rvt_release_mmap_info); kvfree(srq->rq.kwq); + return 0; } diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h index 6427d7d62a9a..7d17372cd269 100644 --- a/drivers/infiniband/sw/rdmavt/srq.h +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -1,53 +1,11 @@ -#ifndef DEF_RVTSRQ_H -#define DEF_RVTSRQ_H - +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RVTSRQ_H +#define DEF_RVTSRQ_H + #include <rdma/rdma_vt.h> void rvt_driver_srq_init(struct rvt_dev_info *rdi); int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr, @@ -56,6 +14,6 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); -void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); #endif /* DEF_RVTSRQ_H */ diff --git a/drivers/infiniband/sw/rdmavt/trace.c b/drivers/infiniband/sw/rdmavt/trace.c index d593285a349c..01704b8dd683 100644 --- a/drivers/infiniband/sw/rdmavt/trace.c +++ b/drivers/infiniband/sw/rdmavt/trace.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #define CREATE_TRACE_POINTS diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 36ddbd291ee0..30eb4a72ea7d 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -1,48 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016, 2017 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #define RDI_DEV_ENTRY(rdi) __string(dev, rvt_get_ibdev_name(rdi)) diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h index e3c416c6f900..30dd1d9bae26 100644 --- a/drivers/infiniband/sw/rdmavt/trace_cq.h +++ b/drivers/infiniband/sw/rdmavt/trace_cq.h @@ -1,48 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 - 2018 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #if !defined(__RVT_TRACE_CQ_H) || defined(TRACE_HEADER_MULTI_READ) #define __RVT_TRACE_CQ_H @@ -85,7 +43,7 @@ DECLARE_EVENT_CLASS(rvt_cq_template, __field(int, comp_vector_cpu) __field(u32, flags) ), - TP_fast_assign(RDI_DEV_ASSIGN(cq->rdi) + TP_fast_assign(RDI_DEV_ASSIGN(cq->rdi); __entry->ip = cq->ip; __entry->cqe = attr->cqe; __entry->comp_vector = attr->comp_vector; @@ -123,7 +81,7 @@ DECLARE_EVENT_CLASS( __field(u32, imm) ), TP_fast_assign( - RDI_DEV_ASSIGN(cq->rdi) + RDI_DEV_ASSIGN(cq->rdi); __entry->wr_id = wc->wr_id; __entry->status = wc->status; __entry->opcode = wc->opcode; diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h index 95b8a0e3b8bd..1de7012000cb 100644 --- a/drivers/infiniband/sw/rdmavt/trace_mr.h +++ b/drivers/infiniband/sw/rdmavt/trace_mr.h @@ -1,48 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ) #define __RVT_TRACE_MR_H @@ -195,7 +153,7 @@ TRACE_EVENT( __field(uint, sg_offset) ), TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(to_imr(ibmr)->mr.pd->device)) + RDI_DEV_ASSIGN(ib_to_rvt(to_imr(ibmr)->mr.pd->device)); __entry->ibmr_iova = ibmr->iova; __entry->iova = to_imr(ibmr)->mr.iova; __entry->user_base = to_imr(ibmr)->mr.user_base; diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h index c32d21cc615e..c28c81fcb32a 100644 --- a/drivers/infiniband/sw/rdmavt/trace_qp.h +++ b/drivers/infiniband/sw/rdmavt/trace_qp.h @@ -1,48 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ) #define __RVT_TRACE_QP_H @@ -65,7 +23,7 @@ DECLARE_EVENT_CLASS(rvt_qphash_template, __field(u32, bucket) ), TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)); __entry->qpn = qp->ibqp.qp_num; __entry->bucket = bucket; ), @@ -97,7 +55,7 @@ DECLARE_EVENT_CLASS( __field(u32, to) ), TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)); __entry->qpn = qp->ibqp.qp_num; __entry->hrtimer = &qp->s_rnr_timer; __entry->s_flags = qp->s_flags; diff --git a/drivers/infiniband/sw/rdmavt/trace_rc.h b/drivers/infiniband/sw/rdmavt/trace_rc.h index c47357af2099..833bf778b05d 100644 --- a/drivers/infiniband/sw/rdmavt/trace_rc.h +++ b/drivers/infiniband/sw/rdmavt/trace_rc.h @@ -1,48 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2017 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #if !defined(__RVT_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ) #define __RVT_TRACE_RC_H @@ -71,7 +29,7 @@ DECLARE_EVENT_CLASS(rvt_rc_template, __field(u32, r_psn) ), TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)); __entry->qpn = qp->ibqp.qp_num; __entry->s_flags = qp->s_flags; __entry->psn = psn; diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h index 746f33461d9a..9df6b0b8263b 100644 --- a/drivers/infiniband/sw/rdmavt/trace_rvt.h +++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h @@ -1,48 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ) #define __RVT_TRACE_RVT_H diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h index d963ca755828..ff7d39a30768 100644 --- a/drivers/infiniband/sw/rdmavt/trace_tx.h +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -1,48 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ) #define __RVT_TRACE_TX_H @@ -111,7 +69,7 @@ TRACE_EVENT( __field(int, wr_num_sge) ), TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)); __entry->wqe = wqe; __entry->wr_id = wqe->wr.wr_id; __entry->qpn = qp->ibqp.qp_num; @@ -170,7 +128,7 @@ TRACE_EVENT( __field(int, send_flags) ), TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)); __entry->wqe = wqe; __entry->wr_id = wqe->wr.wr_id; __entry->qpn = qp->ibqp.qp_num; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 986265ad6e79..d61f8de7f21c 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2016 - 2018 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #include <linux/module.h> @@ -57,7 +15,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("RDMA Verbs Transport Library"); -static int rvt_init(void) +static int __init rvt_init(void) { int ret = rvt_driver_cq_init(); @@ -68,7 +26,7 @@ static int rvt_init(void) } module_init(rvt_init); -static void rvt_cleanup(void) +static void __exit rvt_cleanup(void) { rvt_cq_exit(); } @@ -95,9 +53,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) if (!rdi) return rdi; - rdi->ports = kcalloc(nports, - sizeof(struct rvt_ibport **), - GFP_KERNEL); + rdi->ports = kcalloc(nports, sizeof(*rdi->ports), GFP_KERNEL); if (!rdi->ports) ib_dealloc_device(&rdi->ibdev); @@ -133,6 +89,13 @@ static int rvt_query_device(struct ib_device *ibdev, return 0; } +static int rvt_get_numa_node(struct ib_device *ibdev) +{ + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + + return rdi->dparms.node; +} + static int rvt_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify) @@ -146,22 +109,19 @@ static int rvt_modify_device(struct ib_device *device, } /** - * rvt_query_port: Passes the query port call to the driver + * rvt_query_port - Passes the query port call to the driver * @ibdev: Verbs IB dev * @port_num: port number, 1 based from ib core * @props: structure to hold returned properties * * Return: 0 on success */ -static int rvt_query_port(struct ib_device *ibdev, u8 port_num, +static int rvt_query_port(struct ib_device *ibdev, u32 port_num, struct ib_port_attr *props) { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct rvt_ibport *rvp; - int port_index = ibport_num_to_idx(ibdev, port_num); - - if (port_index < 0) - return -EINVAL; + u32 port_index = ibport_num_to_idx(ibdev, port_num); rvp = rdi->ports[port_index]; /* props being zeroed by the caller, avoid zeroing it here */ @@ -180,7 +140,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num, } /** - * rvt_modify_port + * rvt_modify_port - modify port * @ibdev: Verbs IB dev * @port_num: Port number, 1 based from ib core * @port_modify_mask: How to change the port @@ -188,16 +148,13 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num, * * Return: 0 on success */ -static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, +static int rvt_modify_port(struct ib_device *ibdev, u32 port_num, int port_modify_mask, struct ib_port_modify *props) { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct rvt_ibport *rvp; int ret = 0; - int port_index = ibport_num_to_idx(ibdev, port_num); - - if (port_index < 0) - return -EINVAL; + u32 port_index = ibport_num_to_idx(ibdev, port_num); rvp = rdi->ports[port_index]; if (port_modify_mask & IB_PORT_OPA_MASK_CHG) { @@ -227,7 +184,7 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, * * Return: 0 on failure pkey otherwise */ -static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, +static int rvt_query_pkey(struct ib_device *ibdev, u32 port_num, u16 index, u16 *pkey) { /* @@ -237,11 +194,9 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, * no way to protect against that anyway. */ struct rvt_dev_info *rdi = ib_to_rvt(ibdev); - int port_index; + u32 port_index; port_index = ibport_num_to_idx(ibdev, port_num); - if (port_index < 0) - return -EINVAL; if (index >= rvt_get_npkeys(rdi)) return -EINVAL; @@ -259,12 +214,12 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, * * Return: 0 on success */ -static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, +static int rvt_query_gid(struct ib_device *ibdev, u32 port_num, int guid_index, union ib_gid *gid) { struct rvt_dev_info *rdi; struct rvt_ibport *rvp; - int port_index; + u32 port_index; /* * Driver is responsible for updating the guid table. Which will be used @@ -272,8 +227,6 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, * is being done. */ port_index = ibport_num_to_idx(ibdev, port_num); - if (port_index < 0) - return -EINVAL; rdi = ib_to_rvt(ibdev); rvp = rdi->ports[port_index]; @@ -284,12 +237,6 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, &gid->global.interface_id); } -static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext - *ibucontext) -{ - return container_of(ibucontext, struct rvt_ucontext, ibucontext); -} - /** * rvt_alloc_ucontext - Allocate a user context * @uctx: Verbs context @@ -302,23 +249,19 @@ static int rvt_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) /** * rvt_dealloc_ucontext - Free a user context - * @context - Free this + * @context: Unused */ static void rvt_dealloc_ucontext(struct ib_ucontext *context) { return; } -static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, +static int rvt_get_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable) { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct ib_port_attr attr; - int err, port_index; - - port_index = ibport_num_to_idx(ibdev, port_num); - if (port_index < 0) - return -EINVAL; + int err; immutable->core_cap_flags = rdi->dparms.core_cap_flags; @@ -384,7 +327,6 @@ enum { static const struct ib_device_ops rvt_dev_ops = { .uverbs_abi_ver = RVT_UVERBS_ABI_VERSION, - .alloc_fmr = rvt_alloc_fmr, .alloc_mr = rvt_alloc_mr, .alloc_pd = rvt_alloc_pd, .alloc_ucontext = rvt_alloc_ucontext, @@ -393,7 +335,7 @@ static const struct ib_device_ops rvt_dev_ops = { .create_cq = rvt_create_cq, .create_qp = rvt_create_qp, .create_srq = rvt_create_srq, - .dealloc_fmr = rvt_dealloc_fmr, + .create_user_ah = rvt_create_ah, .dealloc_pd = rvt_dealloc_pd, .dealloc_ucontext = rvt_dealloc_ucontext, .dereg_mr = rvt_dereg_mr, @@ -403,9 +345,9 @@ static const struct ib_device_ops rvt_dev_ops = { .destroy_srq = rvt_destroy_srq, .detach_mcast = rvt_detach_mcast, .get_dma_mr = rvt_get_dma_mr, + .get_numa_node = rvt_get_numa_node, .get_port_immutable = rvt_get_port_immutable, .map_mr_sg = rvt_map_mr_sg, - .map_phys_fmr = rvt_map_phys_fmr, .mmap = rvt_mmap, .modify_ah = rvt_modify_ah, .modify_device = rvt_modify_device, @@ -426,11 +368,11 @@ static const struct ib_device_ops rvt_dev_ops = { .reg_user_mr = rvt_reg_user_mr, .req_notify_cq = rvt_req_notify_cq, .resize_cq = rvt_resize_cq, - .unmap_fmr = rvt_unmap_fmr, INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, rvt_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_qp, rvt_qp, ibqp), INIT_RDMA_OBJ_SIZE(ib_srq, rvt_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext), }; @@ -443,7 +385,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) * These functions are not part of verbs specifically but are * required for rdmavt to function. */ - if ((!rdi->ibdev.ops.init_port) || + if ((!rdi->ibdev.ops.port_groups) || (!rdi->driver_f.get_pci_dev)) return -EINVAL; break; @@ -590,9 +532,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) /* Completion queues */ spin_lock_init(&rdi->n_cqs_lock); - /* DMA Operations */ - rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops; - /* Protection Domain */ spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; @@ -603,43 +542,18 @@ int rvt_register_device(struct rvt_dev_info *rdi) * exactly which functions rdmavt supports, nor do they know the ABI * version, so we do all of this sort of stuff here. */ - rdi->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + rdi->ibdev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); rdi->ibdev.node_type = RDMA_NODE_IB_CA; if (!rdi->ibdev.num_comp_vectors) rdi->ibdev.num_comp_vectors = 1; /* We are now good to announce we exist */ - ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev)); + ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev), NULL); if (ret) { rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); goto bail_wss; diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index d19ff817c2c7..461574e3f6a5 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -1,53 +1,11 @@ -#ifndef DEF_RDMAVT_H -#define DEF_RDMAVT_H - +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* * Copyright(c) 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ +#ifndef DEF_RDMAVT_H +#define DEF_RDMAVT_H + #include <rdma/rdma_vt.h> #include <linux/pci.h> #include "pd.h" @@ -96,16 +54,9 @@ #define __rvt_pr_err_ratelimited(pdev, name, fmt, ...) \ dev_err_ratelimited(&(pdev)->dev, "%s: " fmt, name, ##__VA_ARGS__) -static inline int ibport_num_to_idx(struct ib_device *ibdev, u8 port_num) +static inline u32 ibport_num_to_idx(struct ib_device *ibdev, u32 port_num) { - struct rvt_dev_info *rdi = ib_to_rvt(ibdev); - int port_index; - - port_index = port_num - 1; /* IB ports start at 1 our arrays at 0 */ - if ((port_index < 0) || (port_index >= rdi->dparms.nports)) - return -EINVAL; - - return port_index; + return port_num - 1; /* IB ports start at 1 our arrays at 0 */ } #endif /* DEF_RDMAVT_H */ diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index d9bcfe740588..06b8dc5093f7 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -2,11 +2,11 @@ config RDMA_RXE tristate "Software RDMA over Ethernet (RoCE) driver" depends on INET && PCI && INFINIBAND - depends on !64BIT || ARCH_DMA_ADDR_T_64BIT + depends on INFINIBAND_VIRT_DMA select NET_UDP_TUNNEL + select CRYPTO select CRYPTO_CRC32 - select DMA_VIRT_OPS - ---help--- + help This driver implements the InfiniBand RDMA transport over the Linux network stack. It enables a system with a standard Ethernet adapter to interoperate with a RoCE diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile index 66af72dca759..5395a581f4bb 100644 --- a/drivers/infiniband/sw/rxe/Makefile +++ b/drivers/infiniband/sw/rxe/Makefile @@ -15,11 +15,11 @@ rdma_rxe-y := \ rxe_qp.o \ rxe_cq.o \ rxe_mr.o \ + rxe_mw.o \ rxe_opcode.o \ rxe_mmap.o \ rxe_icrc.o \ rxe_mcast.o \ rxe_task.o \ rxe_net.o \ - rxe_sysfs.o \ rxe_hw_counters.o diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 0946a301a5c5..51daac5c4feb 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <rdma/rdma_netlink.h> @@ -40,14 +13,6 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); MODULE_DESCRIPTION("Soft RDMA transport"); MODULE_LICENSE("Dual BSD/GPL"); -/* free resources for all ports on a device */ -static void rxe_cleanup_ports(struct rxe_dev *rxe) -{ - kfree(rxe->port.pkey_tbl); - rxe->port.pkey_tbl = NULL; - -} - /* free resources for a rxe device all objects created for this device must * have been destroyed */ @@ -63,10 +28,8 @@ void rxe_dealloc(struct ib_device *ib_dev) rxe_pool_cleanup(&rxe->cq_pool); rxe_pool_cleanup(&rxe->mr_pool); rxe_pool_cleanup(&rxe->mw_pool); - rxe_pool_cleanup(&rxe->mc_grp_pool); - rxe_pool_cleanup(&rxe->mc_elem_pool); - rxe_cleanup_ports(rxe); + WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree)); if (rxe->tfm) crypto_free_shash(rxe->tfm); @@ -77,17 +40,20 @@ static void rxe_init_device_param(struct rxe_dev *rxe) { rxe->max_inline_data = RXE_MAX_INLINE_DATA; + rxe->attr.vendor_id = RXE_VENDOR_ID; rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; + rxe->attr.kernel_cap_flags = IBK_ALLOW_USER_UNREG; rxe->attr.max_send_sge = RXE_MAX_SGE; rxe->attr.max_recv_sge = RXE_MAX_SGE; rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; rxe->attr.max_cq = RXE_MAX_CQ; rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; rxe->attr.max_mr = RXE_MAX_MR; + rxe->attr.max_mw = RXE_MAX_MW; rxe->attr.max_pd = RXE_MAX_PD; rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; @@ -103,12 +69,14 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; rxe->attr.max_pkeys = RXE_MAX_PKEYS; rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; + addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid, + rxe->ndev->dev_addr); rxe->max_ucontext = RXE_MAX_UCONTEXT; } /* initialize port attributes */ -static int rxe_init_port_param(struct rxe_port *port) +static void rxe_init_port_param(struct rxe_port *port) { port->attr.state = IB_PORT_DOWN; port->attr.max_mtu = IB_MTU_4096; @@ -131,145 +99,53 @@ static int rxe_init_port_param(struct rxe_port *port) port->attr.phys_state = RXE_PORT_PHYS_STATE; port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256); port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); - - return 0; } /* initialize port state, note IB convention that HCA ports are always * numbered from 1 */ -static int rxe_init_ports(struct rxe_dev *rxe) +static void rxe_init_ports(struct rxe_dev *rxe) { struct rxe_port *port = &rxe->port; rxe_init_port_param(port); - - if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len) - return -EINVAL; - - port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len, - sizeof(*port->pkey_tbl), GFP_KERNEL); - - if (!port->pkey_tbl) - return -ENOMEM; - - port->pkey_tbl[0] = 0xffff; addrconf_addr_eui48((unsigned char *)&port->port_guid, rxe->ndev->dev_addr); - spin_lock_init(&port->port_lock); - - return 0; } /* init pools of managed objects */ -static int rxe_init_pools(struct rxe_dev *rxe) +static void rxe_init_pools(struct rxe_dev *rxe) { - int err; - - err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC, - rxe->max_ucontext); - if (err) - goto err1; - - err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD, - rxe->attr.max_pd); - if (err) - goto err2; - - err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH, - rxe->attr.max_ah); - if (err) - goto err3; - - err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ, - rxe->attr.max_srq); - if (err) - goto err4; - - err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP, - rxe->attr.max_qp); - if (err) - goto err5; - - err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ, - rxe->attr.max_cq); - if (err) - goto err6; - - err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR, - rxe->attr.max_mr); - if (err) - goto err7; - - err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW, - rxe->attr.max_mw); - if (err) - goto err8; - - err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP, - rxe->attr.max_mcast_grp); - if (err) - goto err9; - - err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM, - rxe->attr.max_total_mcast_qp_attach); - if (err) - goto err10; - - return 0; - -err10: - rxe_pool_cleanup(&rxe->mc_grp_pool); -err9: - rxe_pool_cleanup(&rxe->mw_pool); -err8: - rxe_pool_cleanup(&rxe->mr_pool); -err7: - rxe_pool_cleanup(&rxe->cq_pool); -err6: - rxe_pool_cleanup(&rxe->qp_pool); -err5: - rxe_pool_cleanup(&rxe->srq_pool); -err4: - rxe_pool_cleanup(&rxe->ah_pool); -err3: - rxe_pool_cleanup(&rxe->pd_pool); -err2: - rxe_pool_cleanup(&rxe->uc_pool); -err1: - return err; + rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC); + rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD); + rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH); + rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ); + rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP); + rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ); + rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR); + rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW); } /* initialize rxe device state */ -static int rxe_init(struct rxe_dev *rxe) +static void rxe_init(struct rxe_dev *rxe) { - int err; - /* init default device parameters */ rxe_init_device_param(rxe); - err = rxe_init_ports(rxe); - if (err) - goto err1; - - err = rxe_init_pools(rxe); - if (err) - goto err2; + rxe_init_ports(rxe); + rxe_init_pools(rxe); /* init pending mmap list */ spin_lock_init(&rxe->mmap_offset_lock); spin_lock_init(&rxe->pending_lock); INIT_LIST_HEAD(&rxe->pending_mmaps); - mutex_init(&rxe->usdev_lock); - - return 0; + /* init multicast support */ + spin_lock_init(&rxe->mcg_lock); + rxe->mcg_tree = RB_ROOT; -err2: - rxe_cleanup_ports(rxe); -err1: - return err; + mutex_init(&rxe->usdev_lock); } void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) @@ -291,12 +167,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) */ int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name) { - int err; - - err = rxe_init(rxe); - if (err) - return err; - + rxe_init(rxe); rxe_set_mtu(rxe, mtu); return rxe_register_device(rxe, ibdev_name); @@ -307,6 +178,12 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) struct rxe_dev *exists; int err = 0; + if (is_vlan_dev(ndev)) { + pr_err("rxe creation allowed on top of a real device only\n"); + err = -EPERM; + goto err; + } + exists = rxe_get_dev_from_net(ndev); if (exists) { ib_device_put(&exists->ib_dev); @@ -333,13 +210,6 @@ static int __init rxe_module_init(void) { int err; - /* initialize slab caches for managed objects */ - err = rxe_cache_init(); - if (err) { - pr_err("unable to init object pools\n"); - return err; - } - err = rxe_net_init(); if (err) return err; @@ -354,7 +224,6 @@ static void __exit rxe_module_exit(void) rdma_link_unregister(&rxe_link_ops); ib_unregister_driver(RDMA_DRIVER_RXE); rxe_net_exit(); - rxe_cache_exit(); pr_info("unloaded\n"); } diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index fb07eed9e402..30fbdf3bc76a 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_H @@ -39,9 +12,7 @@ #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> #include <linux/skbuff.h> -#include <linux/crc32.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> @@ -67,27 +38,6 @@ #define RXE_ROCE_V2_SPORT (0xc000) -static inline u32 rxe_crc32(struct rxe_dev *rxe, - u32 crc, void *next, size_t len) -{ - u32 retval; - int err; - - SHASH_DESC_ON_STACK(shash, rxe->tfm); - - shash->tfm = rxe->tfm; - *(u32 *)shash_desc_ctx(shash) = crc; - err = crypto_shash_update(shash, next, len); - if (unlikely(err)) { - pr_warn_ratelimited("failed crc calculation, err: %d\n", err); - return crc32_le(crc, next, len); - } - - retval = *(u32 *)shash_desc_ctx(shash); - barrier_data(shash_desc_ctx(shash)); - return retval; -} - void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name); diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 81ee756c19b8..3b05314ca739 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" @@ -43,15 +16,24 @@ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av) int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) { + const struct ib_global_route *grh = rdma_ah_read_grh(attr); struct rxe_port *port; + int type; port = &rxe->port; if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) { - u8 sgid_index = rdma_ah_read_grh(attr)->sgid_index; + if (grh->sgid_index > port->attr.gid_tbl_len) { + pr_warn("invalid sgid index = %d\n", + grh->sgid_index); + return -EINVAL; + } - if (sgid_index > port->attr.gid_tbl_len) { - pr_warn("invalid sgid index = %d\n", sgid_index); + type = rdma_gid_attr_network_type(grh->sgid_attr); + if (type < RDMA_NETWORK_IPV4 || + type > RDMA_NETWORK_IPV6) { + pr_warn("invalid network type for rdma_rxe = %d\n", + type); return -EINVAL; } } @@ -92,20 +74,71 @@ void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr) void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr) { const struct ib_gid_attr *sgid_attr = attr->grh.sgid_attr; + int ibtype; + int type; rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &rdma_ah_read_grh(attr)->dgid); - av->network_type = rdma_gid_attr_network_type(sgid_attr); + + ibtype = rdma_gid_attr_network_type(sgid_attr); + + switch (ibtype) { + case RDMA_NETWORK_IPV4: + type = RXE_NETWORK_TYPE_IPV4; + break; + case RDMA_NETWORK_IPV6: + type = RXE_NETWORK_TYPE_IPV6; + break; + default: + /* not reached - checked in rxe_av_chk_attr */ + type = 0; + break; + } + + av->network_type = type; } -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp) { + struct rxe_ah *ah; + u32 ah_num; + + if (ahp) + *ahp = NULL; + if (!pkt || !pkt->qp) return NULL; if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) return &pkt->qp->pri_av; - return (pkt->wqe) ? &pkt->wqe->av : NULL; + if (!pkt->wqe) + return NULL; + + ah_num = pkt->wqe->wr.wr.ud.ah_num; + if (ah_num) { + /* only new user provider or kernel client */ + ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); + if (!ah) { + pr_warn("Unable to find AH matching ah_num\n"); + return NULL; + } + + if (rxe_ah_pd(ah) != pkt->qp->pd) { + pr_warn("PDs don't match for AH and QP\n"); + rxe_put(ah); + return NULL; + } + + if (ahp) + *ahp = ah; + else + rxe_put(ah); + + return &ah->av; + } + + /* only old user provider for UD sends*/ + return &pkt->wqe->wr.wr.ud.av; } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 4bc88708b355..fb0c008af78c 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/skbuff.h> @@ -130,6 +103,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ; case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; case IB_WR_REG_MR: return IB_WC_REG_MR; + case IB_WR_BIND_MW: return IB_WC_BIND_MW; default: return 0xff; @@ -140,6 +114,8 @@ void retransmit_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, retrans_timer); + pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index); + if (qp->valid) { qp->comp.timeout = 1; rxe_run_task(&qp->comp.task, 1); @@ -168,7 +144,7 @@ static inline enum comp_state get_wqe(struct rxe_qp *qp, /* we come here whether or not we found a response packet to see if * there are any posted WQEs */ - wqe = queue_head(qp->sq.queue); + wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); *wqe_p = wqe; /* no WQE or requester has not started it yet */ @@ -282,7 +258,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, if ((syn & AETH_TYPE_MASK) != AETH_ACK) return COMPST_ERROR; - /* fall through */ + fallthrough; /* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH) */ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: @@ -372,14 +348,16 @@ static inline enum comp_state do_read(struct rxe_qp *qp, ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &wqe->dma, payload_addr(pkt), - payload_size(pkt), to_mem_obj, NULL); - if (ret) + payload_size(pkt), RXE_TO_MR_OBJ); + if (ret) { + wqe->status = IB_WC_LOC_PROT_ERR; return COMPST_ERROR; + } if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK)) return COMPST_COMP_ACK; - else - return COMPST_UPDATE_COMP; + + return COMPST_UPDATE_COMP; } static inline enum comp_state do_atomic(struct rxe_qp *qp, @@ -392,40 +370,47 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp, ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &wqe->dma, &atomic_orig, - sizeof(u64), to_mem_obj, NULL); - if (ret) + sizeof(u64), RXE_TO_MR_OBJ); + if (ret) { + wqe->status = IB_WC_LOC_PROT_ERR; return COMPST_ERROR; - else - return COMPST_COMP_ACK; + } + + return COMPST_COMP_ACK; } static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_cqe *cqe) { + struct ib_wc *wc = &cqe->ibwc; + struct ib_uverbs_wc *uwc = &cqe->uibwc; + memset(cqe, 0, sizeof(*cqe)); if (!qp->is_user) { - struct ib_wc *wc = &cqe->ibwc; - - wc->wr_id = wqe->wr.wr_id; - wc->status = wqe->status; - wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - wc->wc_flags = IB_WC_WITH_IMM; - wc->byte_len = wqe->dma.length; - wc->qp = &qp->ibqp; + wc->wr_id = wqe->wr.wr_id; + wc->status = wqe->status; + wc->qp = &qp->ibqp; } else { - struct ib_uverbs_wc *uwc = &cqe->uibwc; - - uwc->wr_id = wqe->wr.wr_id; - uwc->status = wqe->status; - uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - uwc->wc_flags = IB_WC_WITH_IMM; - uwc->byte_len = wqe->dma.length; - uwc->qp_num = qp->ibqp.qp_num; + uwc->wr_id = wqe->wr.wr_id; + uwc->status = wqe->status; + uwc->qp_num = qp->ibqp.qp_num; + } + + if (wqe->status == IB_WC_SUCCESS) { + if (!qp->is_user) { + wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + wc->wc_flags = IB_WC_WITH_IMM; + wc->byte_len = wqe->dma.length; + } else { + uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + uwc->wc_flags = IB_WC_WITH_IMM; + uwc->byte_len = wqe->dma.length; + } } } @@ -441,16 +426,20 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_cqe cqe; + bool post; - if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - wqe->status != IB_WC_SUCCESS) { + /* do we need to post a completion */ + post = ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + wqe->status != IB_WC_SUCCESS); + + if (post) make_send_cqe(qp, wqe, &cqe); - advance_consumer(qp->sq.queue); + + queue_advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); + + if (post) rxe_cq_post(qp->scq, &cqe, 0); - } else { - advance_consumer(qp->sq.queue); - } if (wqe->wr.opcode == IB_WR_SEND || wqe->wr.opcode == IB_WR_SEND_WITH_IMM || @@ -471,8 +460,6 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, struct rxe_send_wqe *wqe) { - unsigned long flags; - if (wqe->has_rd_atomic) { wqe->has_rd_atomic = 0; atomic_inc(&qp->req.rd_atomic); @@ -485,11 +472,11 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, if (unlikely(qp->req.state == QP_STATE_DRAIN)) { /* state_lock used by requester & completer */ - spin_lock_irqsave(&qp->state_lock, flags); + spin_lock_bh(&qp->state_lock); if ((qp->req.state == QP_STATE_DRAIN) && (qp->comp.psn == qp->req.psn)) { qp->req.state = QP_STATE_DRAINED; - spin_unlock_irqrestore(&qp->state_lock, flags); + spin_unlock_bh(&qp->state_lock); if (qp->ibqp.event_handler) { struct ib_event ev; @@ -501,7 +488,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, qp->ibqp.qp_context); } } else { - spin_unlock_irqrestore(&qp->state_lock, flags); + spin_unlock_bh(&qp->state_lock); } } @@ -538,22 +525,35 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) { struct sk_buff *skb; struct rxe_send_wqe *wqe; + struct rxe_queue *q = qp->sq.queue; while ((skb = skb_dequeue(&qp->resp_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } - while ((wqe = queue_head(qp->sq.queue))) { + while ((wqe = queue_head(q, q->type))) { if (notify) { wqe->status = IB_WC_WR_FLUSH_ERR; do_complete(qp, wqe); } else { - advance_consumer(qp->sq.queue); + queue_advance_consumer(q, q->type); } } } +static void free_pkt(struct rxe_pkt_info *pkt) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + struct rxe_qp *qp = pkt->qp; + struct ib_device *dev = qp->ibqp.device; + + kfree_skb(skb); + rxe_put(qp); + ib_device_put(dev); +} + int rxe_completer(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; @@ -562,13 +562,15 @@ int rxe_completer(void *arg) struct sk_buff *skb = NULL; struct rxe_pkt_info *pkt = NULL; enum comp_state state; + int ret; - rxe_add_ref(qp); + if (!rxe_get(qp)) + return -EAGAIN; - if (!qp->valid || qp->req.state == QP_STATE_ERROR || - qp->req.state == QP_STATE_RESET) { + if (!qp->valid || qp->comp.state == QP_STATE_ERROR || + qp->comp.state == QP_STATE_RESET) { rxe_drain_resp_pkts(qp, qp->valid && - qp->req.state == QP_STATE_ERROR); + qp->comp.state == QP_STATE_ERROR); goto exit; } @@ -651,11 +653,6 @@ int rxe_completer(void *arg) break; case COMPST_DONE: - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } goto done; case COMPST_EXIT: @@ -690,23 +687,15 @@ int rxe_completer(void *arg) */ /* there is nothing to retry in this case */ - if (!wqe || (wqe->state == wqe_state_posted)) { + if (!wqe || (wqe->state == wqe_state_posted)) goto exit; - } /* if we've started a retry, don't start another * retry sequence, unless this is a timeout. */ if (qp->comp.started_retry && - !qp->comp.timeout_retry) { - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - + !qp->comp.timeout_retry) goto done; - } if (qp->comp.retry_cnt > 0) { if (qp->comp.retry_cnt != 7) @@ -727,13 +716,6 @@ int rxe_completer(void *arg) qp->comp.started_retry = 1; rxe_run_task(&qp->req.task, 0); } - - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - goto done; } else { @@ -744,19 +726,20 @@ int rxe_completer(void *arg) break; case COMPST_RNR_RETRY: + /* we come here if we received an RNR NAK */ if (qp->comp.rnr_retry > 0) { if (qp->comp.rnr_retry != 7) qp->comp.rnr_retry--; - qp->req.need_retry = 1; + /* don't start a retry flow until the + * rnr timer has fired + */ + qp->req.wait_for_rnr_timer = 1; pr_debug("qp#%d set rnr nak timer\n", qp_num(qp)); mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; goto exit; } else { rxe_counter_inc(rxe, @@ -770,30 +753,23 @@ int rxe_completer(void *arg) WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); do_complete(qp, wqe); rxe_qp_error(qp); - - if (pkt) { - rxe_drop_ref(pkt->qp); - kfree_skb(skb); - skb = NULL; - } - goto exit; } } -exit: - /* we come here if we are done with processing and want the task to - * exit from the loop calling us + /* A non-zero return value will cause rxe_do_task to + * exit its loop and end the tasklet. A zero return + * will continue looping and return to rxe_completer */ - WARN_ON_ONCE(skb); - rxe_drop_ref(qp); - return -EAGAIN; - done: - /* we come here if we have processed a packet we want the task to call - * us again to see if there is anything else to do - */ - WARN_ON_ONCE(skb); - rxe_drop_ref(qp); - return 0; + ret = 0; + goto out; +exit: + ret = -EAGAIN; +out: + if (pkt) + free_pkt(pkt); + rxe_put(qp); + + return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index ad3090131126..b1a0ab3cd4bd 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/vmalloc.h> #include "rxe.h" @@ -46,16 +19,16 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, } if (cqe > rxe->attr.max_cqe) { - pr_warn("cqe(%d) > max_cqe(%d)\n", - cqe, rxe->attr.max_cqe); + pr_debug("cqe(%d) > max_cqe(%d)\n", + cqe, rxe->attr.max_cqe); goto err1; } if (cq) { - count = queue_count(cq->queue); + count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT); if (cqe < count) { - pr_warn("cqe(%d) < current # elements in queue (%d)", - cqe, count); + pr_debug("cqe(%d) < current # elements in queue (%d)", + cqe, count); goto err1; } } @@ -66,9 +39,9 @@ err1: return -EINVAL; } -static void rxe_send_complete(unsigned long data) +static void rxe_send_complete(struct tasklet_struct *t) { - struct rxe_cq *cq = (struct rxe_cq *)data; + struct rxe_cq *cq = from_tasklet(cq, t, comp_task); unsigned long flags; spin_lock_irqsave(&cq->cq_lock, flags); @@ -86,9 +59,11 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, struct rxe_create_cq_resp __user *uresp) { int err; + enum queue_type type; + type = QUEUE_TYPE_TO_CLIENT; cq->queue = rxe_queue_init(rxe, &cqe, - sizeof(struct rxe_cqe)); + sizeof(struct rxe_cqe), type); if (!cq->queue) { pr_warn("unable to create cq\n"); return -ENOMEM; @@ -102,12 +77,11 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, return err; } - if (uresp) - cq->is_user = 1; + cq->is_user = uresp; cq->is_dying = false; - tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq); + tasklet_setup(&cq->comp_task, rxe_send_complete); spin_lock_init(&cq->cq_lock); cq->ibcq.cqe = cqe; @@ -132,11 +106,14 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) { struct ib_event ev; + int full; + void *addr; unsigned long flags; spin_lock_irqsave(&cq->cq_lock, flags); - if (unlikely(queue_full(cq->queue))) { + full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT); + if (unlikely(full)) { spin_unlock_irqrestore(&cq->cq_lock, flags); if (cq->ibcq.event_handler) { ev.device = cq->ibcq.device; @@ -148,14 +125,11 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) return -EBUSY; } - memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); + addr = queue_producer_addr(cq->queue, QUEUE_TYPE_TO_CLIENT); + memcpy(addr, cqe, sizeof(*cqe)); - /* make sure all changes to the CQ are written before we update the - * producer pointer - */ - smp_wmb(); + queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT); - advance_producer(cq->queue); spin_unlock_irqrestore(&cq->cq_lock, flags); if ((cq->notify == IB_CQ_NEXT_COMP) || @@ -176,9 +150,9 @@ void rxe_cq_disable(struct rxe_cq *cq) spin_unlock_irqrestore(&cq->cq_lock, flags); } -void rxe_cq_cleanup(struct rxe_pool_entry *arg) +void rxe_cq_cleanup(struct rxe_pool_elem *elem) { - struct rxe_cq *cq = container_of(arg, typeof(*cq), pelem); + struct rxe_cq *cq = container_of(elem, typeof(*cq), elem); if (cq->queue) rxe_queue_cleanup(cq->queue); diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index ce003666b800..e432f9e37795 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_HDR_H @@ -49,7 +22,6 @@ struct rxe_pkt_info { u16 paylen; /* length of bth - icrc */ u8 port_num; /* port pkt received on */ u8 opcode; /* bth opcode of packet */ - u8 offset; /* bth offset from pkt->hdr */ }; /* Macros should be used only for received skb */ @@ -307,134 +279,134 @@ static inline void __bth_set_psn(void *arg, u32 psn) static inline u8 bth_opcode(struct rxe_pkt_info *pkt) { - return __bth_opcode(pkt->hdr + pkt->offset); + return __bth_opcode(pkt->hdr); } static inline void bth_set_opcode(struct rxe_pkt_info *pkt, u8 opcode) { - __bth_set_opcode(pkt->hdr + pkt->offset, opcode); + __bth_set_opcode(pkt->hdr, opcode); } static inline u8 bth_se(struct rxe_pkt_info *pkt) { - return __bth_se(pkt->hdr + pkt->offset); + return __bth_se(pkt->hdr); } static inline void bth_set_se(struct rxe_pkt_info *pkt, int se) { - __bth_set_se(pkt->hdr + pkt->offset, se); + __bth_set_se(pkt->hdr, se); } static inline u8 bth_mig(struct rxe_pkt_info *pkt) { - return __bth_mig(pkt->hdr + pkt->offset); + return __bth_mig(pkt->hdr); } static inline void bth_set_mig(struct rxe_pkt_info *pkt, u8 mig) { - __bth_set_mig(pkt->hdr + pkt->offset, mig); + __bth_set_mig(pkt->hdr, mig); } static inline u8 bth_pad(struct rxe_pkt_info *pkt) { - return __bth_pad(pkt->hdr + pkt->offset); + return __bth_pad(pkt->hdr); } static inline void bth_set_pad(struct rxe_pkt_info *pkt, u8 pad) { - __bth_set_pad(pkt->hdr + pkt->offset, pad); + __bth_set_pad(pkt->hdr, pad); } static inline u8 bth_tver(struct rxe_pkt_info *pkt) { - return __bth_tver(pkt->hdr + pkt->offset); + return __bth_tver(pkt->hdr); } static inline void bth_set_tver(struct rxe_pkt_info *pkt, u8 tver) { - __bth_set_tver(pkt->hdr + pkt->offset, tver); + __bth_set_tver(pkt->hdr, tver); } static inline u16 bth_pkey(struct rxe_pkt_info *pkt) { - return __bth_pkey(pkt->hdr + pkt->offset); + return __bth_pkey(pkt->hdr); } static inline void bth_set_pkey(struct rxe_pkt_info *pkt, u16 pkey) { - __bth_set_pkey(pkt->hdr + pkt->offset, pkey); + __bth_set_pkey(pkt->hdr, pkey); } static inline u32 bth_qpn(struct rxe_pkt_info *pkt) { - return __bth_qpn(pkt->hdr + pkt->offset); + return __bth_qpn(pkt->hdr); } static inline void bth_set_qpn(struct rxe_pkt_info *pkt, u32 qpn) { - __bth_set_qpn(pkt->hdr + pkt->offset, qpn); + __bth_set_qpn(pkt->hdr, qpn); } static inline int bth_fecn(struct rxe_pkt_info *pkt) { - return __bth_fecn(pkt->hdr + pkt->offset); + return __bth_fecn(pkt->hdr); } static inline void bth_set_fecn(struct rxe_pkt_info *pkt, int fecn) { - __bth_set_fecn(pkt->hdr + pkt->offset, fecn); + __bth_set_fecn(pkt->hdr, fecn); } static inline int bth_becn(struct rxe_pkt_info *pkt) { - return __bth_becn(pkt->hdr + pkt->offset); + return __bth_becn(pkt->hdr); } static inline void bth_set_becn(struct rxe_pkt_info *pkt, int becn) { - __bth_set_becn(pkt->hdr + pkt->offset, becn); + __bth_set_becn(pkt->hdr, becn); } static inline u8 bth_resv6a(struct rxe_pkt_info *pkt) { - return __bth_resv6a(pkt->hdr + pkt->offset); + return __bth_resv6a(pkt->hdr); } static inline void bth_set_resv6a(struct rxe_pkt_info *pkt) { - __bth_set_resv6a(pkt->hdr + pkt->offset); + __bth_set_resv6a(pkt->hdr); } static inline int bth_ack(struct rxe_pkt_info *pkt) { - return __bth_ack(pkt->hdr + pkt->offset); + return __bth_ack(pkt->hdr); } static inline void bth_set_ack(struct rxe_pkt_info *pkt, int ack) { - __bth_set_ack(pkt->hdr + pkt->offset, ack); + __bth_set_ack(pkt->hdr, ack); } static inline void bth_set_resv7(struct rxe_pkt_info *pkt) { - __bth_set_resv7(pkt->hdr + pkt->offset); + __bth_set_resv7(pkt->hdr); } static inline u32 bth_psn(struct rxe_pkt_info *pkt) { - return __bth_psn(pkt->hdr + pkt->offset); + return __bth_psn(pkt->hdr); } static inline void bth_set_psn(struct rxe_pkt_info *pkt, u32 psn) { - __bth_set_psn(pkt->hdr + pkt->offset, psn); + __bth_set_psn(pkt->hdr, psn); } static inline void bth_init(struct rxe_pkt_info *pkt, u8 opcode, int se, int mig, int pad, u16 pkey, u32 qpn, int ack_req, u32 psn) { - struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr + pkt->offset); + struct rxe_bth *bth = (struct rxe_bth *)(pkt->hdr); bth->opcode = opcode; bth->flags = (pad << 4) & BTH_PAD_MASK; @@ -475,14 +447,14 @@ static inline void __rdeth_set_een(void *arg, u32 een) static inline u8 rdeth_een(struct rxe_pkt_info *pkt) { - return __rdeth_een(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RDETH]); + return __rdeth_een(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RDETH]); } static inline void rdeth_set_een(struct rxe_pkt_info *pkt, u32 een) { - __rdeth_set_een(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een); + __rdeth_set_een(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RDETH], een); } /****************************************************************************** @@ -526,26 +498,26 @@ static inline void __deth_set_sqp(void *arg, u32 sqp) static inline u32 deth_qkey(struct rxe_pkt_info *pkt) { - return __deth_qkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH]); + return __deth_qkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); } static inline void deth_set_qkey(struct rxe_pkt_info *pkt, u32 qkey) { - __deth_set_qkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey); + __deth_set_qkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH], qkey); } static inline u32 deth_sqp(struct rxe_pkt_info *pkt) { - return __deth_sqp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH]); + return __deth_sqp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH]); } static inline void deth_set_sqp(struct rxe_pkt_info *pkt, u32 sqp) { - __deth_set_sqp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp); + __deth_set_sqp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_DETH], sqp); } /****************************************************************************** @@ -601,38 +573,38 @@ static inline void __reth_set_len(void *arg, u32 len) static inline u64 reth_va(struct rxe_pkt_info *pkt) { - return __reth_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_va(struct rxe_pkt_info *pkt, u64 va) { - __reth_set_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], va); + __reth_set_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], va); } static inline u32 reth_rkey(struct rxe_pkt_info *pkt) { - return __reth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __reth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey); + __reth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], rkey); } static inline u32 reth_len(struct rxe_pkt_info *pkt) { - return __reth_len(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH]); + return __reth_len(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH]); } static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len) { - __reth_set_len(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_RETH], len); + __reth_set_len(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_RETH], len); } /****************************************************************************** @@ -703,50 +675,50 @@ static inline void __atmeth_set_comp(void *arg, u64 comp) static inline u64 atmeth_va(struct rxe_pkt_info *pkt) { - return __atmeth_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_va(struct rxe_pkt_info *pkt, u64 va) { - __atmeth_set_va(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va); + __atmeth_set_va(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], va); } static inline u32 atmeth_rkey(struct rxe_pkt_info *pkt) { - return __atmeth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __atmeth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey); + __atmeth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], rkey); } static inline u64 atmeth_swap_add(struct rxe_pkt_info *pkt) { - return __atmeth_swap_add(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_swap_add(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_swap_add(struct rxe_pkt_info *pkt, u64 swap_add) { - __atmeth_set_swap_add(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add); + __atmeth_set_swap_add(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], swap_add); } static inline u64 atmeth_comp(struct rxe_pkt_info *pkt) { - return __atmeth_comp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); + return __atmeth_comp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH]); } static inline void atmeth_set_comp(struct rxe_pkt_info *pkt, u64 comp) { - __atmeth_set_comp(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp); + __atmeth_set_comp(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMETH], comp); } /****************************************************************************** @@ -807,26 +779,26 @@ static inline void __aeth_set_msn(void *arg, u32 msn) static inline u8 aeth_syn(struct rxe_pkt_info *pkt) { - return __aeth_syn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH]); + return __aeth_syn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); } static inline void aeth_set_syn(struct rxe_pkt_info *pkt, u8 syn) { - __aeth_set_syn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn); + __aeth_set_syn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH], syn); } static inline u32 aeth_msn(struct rxe_pkt_info *pkt) { - return __aeth_msn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH]); + return __aeth_msn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH]); } static inline void aeth_set_msn(struct rxe_pkt_info *pkt, u32 msn) { - __aeth_set_msn(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn); + __aeth_set_msn(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_AETH], msn); } /****************************************************************************** @@ -852,14 +824,14 @@ static inline void __atmack_set_orig(void *arg, u64 orig) static inline u64 atmack_orig(struct rxe_pkt_info *pkt) { - return __atmack_orig(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]); + return __atmack_orig(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK]); } static inline void atmack_set_orig(struct rxe_pkt_info *pkt, u64 orig) { - __atmack_set_orig(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig); + __atmack_set_orig(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_ATMACK], orig); } /****************************************************************************** @@ -885,14 +857,14 @@ static inline void __immdt_set_imm(void *arg, __be32 imm) static inline __be32 immdt_imm(struct rxe_pkt_info *pkt) { - return __immdt_imm(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]); + return __immdt_imm(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT]); } static inline void immdt_set_imm(struct rxe_pkt_info *pkt, __be32 imm) { - __immdt_set_imm(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm); + __immdt_set_imm(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IMMDT], imm); } /****************************************************************************** @@ -918,14 +890,14 @@ static inline void __ieth_set_rkey(void *arg, u32 rkey) static inline u32 ieth_rkey(struct rxe_pkt_info *pkt) { - return __ieth_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IETH]); + return __ieth_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IETH]); } static inline void ieth_set_rkey(struct rxe_pkt_info *pkt, u32 rkey) { - __ieth_set_rkey(pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey); + __ieth_set_rkey(pkt->hdr + + rxe_opcode[pkt->opcode].offset[RXE_IETH], rkey); } enum rxe_hdr_length { @@ -942,13 +914,12 @@ enum rxe_hdr_length { static inline size_t header_size(struct rxe_pkt_info *pkt) { - return pkt->offset + rxe_opcode[pkt->opcode].length; + return rxe_opcode[pkt->opcode].length; } static inline void *payload_addr(struct rxe_pkt_info *pkt) { - return pkt->hdr + pkt->offset - + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]; + return pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_PAYLOAD]; } static inline size_t payload_size(struct rxe_pkt_info *pkt) diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c index 636edb5f4cf4..a012522b577a 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c @@ -1,59 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" #include "rxe_hw_counters.h" -static const char * const rxe_counter_name[] = { - [RXE_CNT_SENT_PKTS] = "sent_pkts", - [RXE_CNT_RCVD_PKTS] = "rcvd_pkts", - [RXE_CNT_DUP_REQ] = "duplicate_request", - [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_seq_request", - [RXE_CNT_RCV_RNR] = "rcvd_rnr_err", - [RXE_CNT_SND_RNR] = "send_rnr_err", - [RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err", - [RXE_CNT_COMPLETER_SCHED] = "ack_deferred", - [RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err", - [RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err", - [RXE_CNT_COMP_RETRY] = "completer_retry_err", - [RXE_CNT_SEND_ERR] = "send_err", - [RXE_CNT_LINK_DOWNED] = "link_downed", - [RXE_CNT_RDMA_SEND] = "rdma_sends", - [RXE_CNT_RDMA_RECV] = "rdma_recvs", +static const struct rdma_stat_desc rxe_counter_descs[] = { + [RXE_CNT_SENT_PKTS].name = "sent_pkts", + [RXE_CNT_RCVD_PKTS].name = "rcvd_pkts", + [RXE_CNT_DUP_REQ].name = "duplicate_request", + [RXE_CNT_OUT_OF_SEQ_REQ].name = "out_of_seq_request", + [RXE_CNT_RCV_RNR].name = "rcvd_rnr_err", + [RXE_CNT_SND_RNR].name = "send_rnr_err", + [RXE_CNT_RCV_SEQ_ERR].name = "rcvd_seq_err", + [RXE_CNT_COMPLETER_SCHED].name = "ack_deferred", + [RXE_CNT_RETRY_EXCEEDED].name = "retry_exceeded_err", + [RXE_CNT_RNR_RETRY_EXCEEDED].name = "retry_rnr_exceeded_err", + [RXE_CNT_COMP_RETRY].name = "completer_retry_err", + [RXE_CNT_SEND_ERR].name = "send_err", + [RXE_CNT_LINK_DOWNED].name = "link_downed", + [RXE_CNT_RDMA_SEND].name = "rdma_sends", + [RXE_CNT_RDMA_RECV].name = "rdma_recvs", }; int rxe_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, - u8 port, int index) + u32 port, int index) { struct rxe_dev *dev = to_rdev(ibdev); unsigned int cnt; @@ -61,21 +34,18 @@ int rxe_ib_get_hw_stats(struct ib_device *ibdev, if (!port || !stats) return -EINVAL; - for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++) + for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_descs); cnt++) stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]); - return ARRAY_SIZE(rxe_counter_name); + return ARRAY_SIZE(rxe_counter_descs); } -struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev, - u8 port_num) +struct rdma_hw_stats *rxe_ib_alloc_hw_port_stats(struct ib_device *ibdev, + u32 port_num) { - BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_name) != RXE_NUM_OF_COUNTERS); - /* We support only per port stats */ - if (!port_num) - return NULL; + BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_descs) != RXE_NUM_OF_COUNTERS); - return rdma_alloc_hw_stats_struct(rxe_counter_name, - ARRAY_SIZE(rxe_counter_name), + return rdma_alloc_hw_stats_struct(rxe_counter_descs, + ARRAY_SIZE(rxe_counter_descs), RDMA_HW_STATS_DEFAULT_LIFESPAN); } diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h index 72c0d63c79e0..71f4d4fa9dc8 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_HW_COUNTERS_H @@ -56,9 +29,9 @@ enum rxe_counters { RXE_NUM_OF_COUNTERS }; -struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev, - u8 port_num); +struct rdma_hw_stats *rxe_ib_alloc_hw_port_stats(struct ib_device *ibdev, + u32 port_num); int rxe_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, - u8 port, int index); + u32 port, int index); #endif /* RXE_HW_COUNTERS_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c index 39e0be31aab1..46bb07c5c4df 100644 --- a/drivers/infiniband/sw/rxe/rxe_icrc.c +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -1,48 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ +#include <linux/crc32.h> + #include "rxe.h" #include "rxe_loc.h" -/* Compute a partial ICRC for all the IB transport headers. */ -u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb) +/** + * rxe_icrc_init() - Initialize crypto function for computing crc32 + * @rxe: rdma_rxe device object + * + * Return: 0 on success else an error + */ +int rxe_icrc_init(struct rxe_dev *rxe) +{ + struct crypto_shash *tfm; + + tfm = crypto_alloc_shash("crc32", 0, 0); + if (IS_ERR(tfm)) { + pr_warn("failed to init crc32 algorithm err:%ld\n", + PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + + rxe->tfm = tfm; + + return 0; +} + +/** + * rxe_crc32() - Compute cumulative crc32 for a contiguous segment + * @rxe: rdma_rxe device object + * @crc: starting crc32 value from previous segments + * @next: starting address of current segment + * @len: length of current segment + * + * Return: the cumulative crc32 checksum + */ +static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) +{ + __be32 icrc; + int err; + + SHASH_DESC_ON_STACK(shash, rxe->tfm); + + shash->tfm = rxe->tfm; + *(__be32 *)shash_desc_ctx(shash) = crc; + err = crypto_shash_update(shash, next, len); + if (unlikely(err)) { + pr_warn_ratelimited("failed crc calculation, err: %d\n", err); + return (__force __be32)crc32_le((__force u32)crc, next, len); + } + + icrc = *(__be32 *)shash_desc_ctx(shash); + barrier_data(shash_desc_ctx(shash)); + + return icrc; +} + +/** + * rxe_icrc_hdr() - Compute the partial ICRC for the network and transport + * headers of a packet. + * @skb: packet buffer + * @pkt: packet information + * + * Return: the partial ICRC + */ +static __be32 rxe_icrc_hdr(struct sk_buff *skb, struct rxe_pkt_info *pkt) { unsigned int bth_offset = 0; struct iphdr *ip4h = NULL; struct ipv6hdr *ip6h = NULL; struct udphdr *udph; struct rxe_bth *bth; - int crc; + __be32 crc; int length; int hdr_size = sizeof(struct udphdr) + (skb->protocol == htons(ETH_P_IP) ? @@ -57,7 +91,7 @@ u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb) /* This seed is the result of computing a CRC with a seed of * 0xfffffff and 8 bytes of 0xff representing a masked LRH. */ - crc = 0xdebb20e3; + crc = (__force __be32)0xdebb20e3; if (skb->protocol == htons(ETH_P_IP)) { /* IPv4 */ memcpy(pshdr, ip_hdr(skb), hdr_size); @@ -94,3 +128,48 @@ u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb) rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES); return crc; } + +/** + * rxe_icrc_check() - Compute ICRC for a packet and compare to the ICRC + * delivered in the packet. + * @skb: packet buffer + * @pkt: packet information + * + * Return: 0 if the values match else an error + */ +int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt) +{ + __be32 *icrcp; + __be32 pkt_icrc; + __be32 icrc; + + icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); + pkt_icrc = *icrcp; + + icrc = rxe_icrc_hdr(skb, pkt); + icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt), + payload_size(pkt) + bth_pad(pkt)); + icrc = ~icrc; + + if (unlikely(icrc != pkt_icrc)) + return -EINVAL; + + return 0; +} + +/** + * rxe_icrc_generate() - compute ICRC for a packet. + * @skb: packet buffer + * @pkt: packet information + */ +void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt) +{ + __be32 *icrcp; + __be32 icrc; + + icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); + icrc = rxe_icrc_hdr(skb, pkt); + icrc = rxe_crc32(pkt->rxe, icrc, (u8 *)payload_addr(pkt), + payload_size(pkt) + bth_pad(pkt)); + *icrcp = ~icrc; +} diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 775c23becaec..c2a5c8814a48 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_LOC_H @@ -46,7 +19,7 @@ void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp); /* rxe_cq.c */ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, @@ -64,21 +37,13 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); void rxe_cq_disable(struct rxe_cq *cq); -void rxe_cq_cleanup(struct rxe_pool_entry *arg); +void rxe_cq_cleanup(struct rxe_pool_elem *elem); /* rxe_mcast.c */ -int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, - struct rxe_mc_grp **grp_p); - -int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - struct rxe_mc_grp *grp); - -int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - union ib_gid *mgid); - -void rxe_drop_all_mcast_groups(struct rxe_qp *qp); - -void rxe_mc_cleanup(struct rxe_pool_entry *arg); +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid); +int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid); +int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid); +void rxe_cleanup_mcg(struct kref *kref); /* rxe_mmap.c */ struct rxe_mmap_info { @@ -98,82 +63,57 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev, u32 size, int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); /* rxe_mr.c */ -enum copy_direction { - to_mem_obj, - from_mem_obj, -}; - -int rxe_mem_init_dma(struct rxe_pd *pd, - int access, struct rxe_mem *mem); - -int rxe_mem_init_user(struct rxe_pd *pd, u64 start, - u64 length, u64 iova, int access, struct ib_udata *udata, - struct rxe_mem *mr); - -int rxe_mem_init_fast(struct rxe_pd *pd, - int max_pages, struct rxe_mem *mem); - -int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, - int length, enum copy_direction dir, u32 *crcp); - -int copy_data(struct rxe_pd *pd, int access, - struct rxe_dma_info *dma, void *addr, int length, - enum copy_direction dir, u32 *crcp); - -void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length); - -enum lookup_type { - lookup_local, - lookup_remote, -}; - -struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, - enum lookup_type type); - -int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length); - -int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, - u64 *page, int num_pages, u64 iova); - -void rxe_mem_cleanup(struct rxe_pool_entry *arg); - +u8 rxe_get_next_key(u32 last_key); +void rxe_mr_init_dma(int access, struct rxe_mr *mr); +int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, + int access, struct rxe_mr *mr); +int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); +int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, + enum rxe_mr_copy_dir dir); +int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, + void *addr, int length, enum rxe_mr_copy_dir dir); +void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length); +struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, + enum rxe_mr_lookup_type type); +int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length); int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); +int rxe_invalidate_mr(struct rxe_qp *qp, u32 key); +int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe); +int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); +void rxe_mr_cleanup(struct rxe_pool_elem *elem); + +/* rxe_mw.c */ +int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata); +int rxe_dealloc_mw(struct ib_mw *ibmw); +int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe); +int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey); +struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey); +void rxe_mw_cleanup(struct rxe_pool_elem *elem); /* rxe_net.c */ -void rxe_loopback(struct sk_buff *skb); -int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb); struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt); -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc); -enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num); +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb); +int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, + struct sk_buff *skb); const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); -struct device *rxe_dma_device(struct rxe_dev *rxe); -int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid); -int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid); /* rxe_qp.c */ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init); - int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct ib_qp_init_attr *init, struct rxe_create_qp_resp __user *uresp, struct ib_pd *ibpd, struct ib_udata *udata); - int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init); - int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); - int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, struct ib_udata *udata); - int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); - void rxe_qp_error(struct rxe_qp *qp); - -void rxe_qp_destroy(struct rxe_qp *qp); - -void rxe_qp_cleanup(struct rxe_pool_entry *arg); +int rxe_qp_chk_destroy(struct rxe_qp *qp); +void rxe_qp_cleanup(struct rxe_pool_elem *elem); static inline int qp_num(struct rxe_qp *qp) { @@ -204,7 +144,7 @@ static inline int rcv_wqe_size(int max_sge) max_sge * sizeof(struct ib_sge); } -void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res); +void free_rd_atomic_resource(struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) { @@ -217,18 +157,16 @@ void retransmit_timer(struct timer_list *t); void rnr_nak_timer(struct timer_list *t); /* rxe_srq.c */ -#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT) - -int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, - struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); - +int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init); int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_init_attr *init, struct ib_udata *udata, struct rxe_create_srq_resp __user *uresp); - +int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata); +void rxe_srq_cleanup(struct rxe_pool_elem *elem); void rxe_dealloc(struct ib_device *ib_dev); @@ -236,7 +174,10 @@ int rxe_completer(void *arg); int rxe_requester(void *arg); int rxe_responder(void *arg); -u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb); +/* rxe_icrc.c */ +int rxe_icrc_init(struct rxe_dev *rxe); +int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt); +void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt); void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb); @@ -247,47 +188,4 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp) return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type]; } -static inline int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, - struct sk_buff *skb) -{ - int err; - int is_request = pkt->mask & RXE_REQ_MASK; - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - - if ((is_request && (qp->req.state != QP_STATE_READY)) || - (!is_request && (qp->resp.state != QP_STATE_READY))) { - pr_info("Packet dropped. QP is not in ready state\n"); - goto drop; - } - - if (pkt->mask & RXE_LOOPBACK_MASK) { - memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); - rxe_loopback(skb); - err = 0; - } else { - err = rxe_send(pkt, skb); - } - - if (err) { - rxe->xmit_errors++; - rxe_counter_inc(rxe, RXE_CNT_SEND_ERR); - return err; - } - - if ((qp_type(qp) != IB_QPT_RC) && - (pkt->mask & RXE_END_MASK)) { - pkt->wqe->state = wqe_state_done; - rxe_run_task(&qp->comp.task, 1); - } - - rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS); - goto done; - -drop: - kfree_skb(skb); - err = 0; -done: - return err; -} - #endif /* RXE_LOC_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index 522a7942c56c..86cc2e18a7fd 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -1,190 +1,479 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* + * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. + */ + +/* + * rxe_mcast.c implements driver support for multicast transport. + * It is based on two data structures struct rxe_mcg ('mcg') and + * struct rxe_mca ('mca'). An mcg is allocated each time a qp is + * attached to a new mgid for the first time. These are indexed by + * a red-black tree using the mgid. This data structure is searched + * for the mcg when a multicast packet is received and when another + * qp is attached to the same mgid. It is cleaned up when the last qp + * is detached from the mcg. Each time a qp is attached to an mcg an + * mca is created. It holds a pointer to the qp and is added to a list + * of qp's that are attached to the mcg. The qp_list is used to replicate + * mcast packets in the rxe receive path. + */ + +#include "rxe.h" + +/** + * rxe_mcast_add - add multicast address to rxe device + * @rxe: rxe device object + * @mgid: multicast address as a gid * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: + * Returns 0 on success else an error + */ +static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) +{ + unsigned char ll_addr[ETH_ALEN]; + + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + + return dev_mc_add(rxe->ndev, ll_addr); +} + +/** + * rxe_mcast_del - delete multicast address from rxe device + * @rxe: rxe device object + * @mgid: multicast address as a gid * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: + * Returns 0 on success else an error + */ +static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) +{ + unsigned char ll_addr[ETH_ALEN]; + + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + + return dev_mc_del(rxe->ndev, ll_addr); +} + +/** + * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree) + * @mcg: mcg object with an embedded red-black tree node * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. + * Context: caller must hold a reference to mcg and rxe->mcg_lock and + * is responsible to avoid adding the same mcg twice to the tree. + */ +static void __rxe_insert_mcg(struct rxe_mcg *mcg) +{ + struct rb_root *tree = &mcg->rxe->mcg_tree; + struct rb_node **link = &tree->rb_node; + struct rb_node *node = NULL; + struct rxe_mcg *tmp; + int cmp; + + while (*link) { + node = *link; + tmp = rb_entry(node, struct rxe_mcg, node); + + cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid)); + if (cmp > 0) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + rb_link_node(&mcg->node, node, link); + rb_insert_color(&mcg->node, tree); +} + +/** + * __rxe_remove_mcg - remove an mcg from red-black tree holding lock + * @mcg: mcast group object with an embedded red-black tree node * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. + * Context: caller must hold a reference to mcg and rxe->mcg_lock + */ +static void __rxe_remove_mcg(struct rxe_mcg *mcg) +{ + rb_erase(&mcg->node, &mcg->rxe->mcg_tree); +} + +/** + * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock + * @rxe: rxe device object + * @mgid: multicast IP address * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Context: caller must hold rxe->mcg_lock + * Returns: mcg on success and takes a ref to mcg else NULL */ +static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, + union ib_gid *mgid) +{ + struct rb_root *tree = &rxe->mcg_tree; + struct rxe_mcg *mcg; + struct rb_node *node; + int cmp; -#include "rxe.h" -#include "rxe_loc.h" + node = tree->rb_node; + + while (node) { + mcg = rb_entry(node, struct rxe_mcg, node); -int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, - struct rxe_mc_grp **grp_p) + cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid)); + + if (cmp > 0) + node = node->rb_left; + else if (cmp < 0) + node = node->rb_right; + else + break; + } + + if (node) { + kref_get(&mcg->ref_cnt); + return mcg; + } + + return NULL; +} + +/** + * rxe_lookup_mcg - lookup up mcg in red-back tree + * @rxe: rxe device object + * @mgid: multicast IP address + * + * Returns: mcg if found else NULL + */ +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) +{ + struct rxe_mcg *mcg; + + spin_lock_bh(&rxe->mcg_lock); + mcg = __rxe_lookup_mcg(rxe, mgid); + spin_unlock_bh(&rxe->mcg_lock); + + return mcg; +} + +/** + * __rxe_init_mcg - initialize a new mcg + * @rxe: rxe device + * @mgid: multicast address as a gid + * @mcg: new mcg object + * + * Context: caller should hold rxe->mcg lock + */ +static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, + struct rxe_mcg *mcg) { + kref_init(&mcg->ref_cnt); + memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); + INIT_LIST_HEAD(&mcg->qp_list); + mcg->rxe = rxe; + + /* caller holds a ref on mcg but that will be + * dropped when mcg goes out of scope. We need to take a ref + * on the pointer that will be saved in the red-black tree + * by __rxe_insert_mcg and used to lookup mcg from mgid later. + * Inserting mcg makes it visible to outside so this should + * be done last after the object is ready. + */ + kref_get(&mcg->ref_cnt); + __rxe_insert_mcg(mcg); +} + +/** + * rxe_get_mcg - lookup or allocate a mcg + * @rxe: rxe device object + * @mgid: multicast IP address as a gid + * + * Returns: mcg on success else ERR_PTR(error) + */ +static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) +{ + struct rxe_mcg *mcg, *tmp; int err; - struct rxe_mc_grp *grp; - if (rxe->attr.max_mcast_qp_attach == 0) { - err = -EINVAL; - goto err1; - } + if (rxe->attr.max_mcast_grp == 0) + return ERR_PTR(-EINVAL); - grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); - if (grp) - goto done; + /* check to see if mcg already exists */ + mcg = rxe_lookup_mcg(rxe, mgid); + if (mcg) + return mcg; - grp = rxe_alloc(&rxe->mc_grp_pool); - if (!grp) { + /* check to see if we have reached limit */ + if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { err = -ENOMEM; - goto err1; + goto err_dec; } - INIT_LIST_HEAD(&grp->qp_list); - spin_lock_init(&grp->mcg_lock); - grp->rxe = rxe; + /* speculative alloc of new mcg */ + mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); + if (!mcg) { + err = -ENOMEM; + goto err_dec; + } - rxe_add_key(grp, mgid); + spin_lock_bh(&rxe->mcg_lock); + /* re-check to see if someone else just added it */ + tmp = __rxe_lookup_mcg(rxe, mgid); + if (tmp) { + spin_unlock_bh(&rxe->mcg_lock); + atomic_dec(&rxe->mcg_num); + kfree(mcg); + return tmp; + } + + __rxe_init_mcg(rxe, mgid, mcg); + spin_unlock_bh(&rxe->mcg_lock); + /* add mcast address outside of lock */ err = rxe_mcast_add(rxe, mgid); - if (err) - goto err2; + if (!err) + return mcg; -done: - *grp_p = grp; - return 0; + kfree(mcg); +err_dec: + atomic_dec(&rxe->mcg_num); + return ERR_PTR(err); +} -err2: - rxe_drop_ref(grp); -err1: - return err; +/** + * rxe_cleanup_mcg - cleanup mcg for kref_put + * @kref: struct kref embnedded in mcg + */ +void rxe_cleanup_mcg(struct kref *kref) +{ + struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); + + kfree(mcg); } -int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - struct rxe_mc_grp *grp) +/** + * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock + * @mcg: the mcg object + * + * Context: caller is holding rxe->mcg_lock + * no qp's are attached to mcg + */ +static void __rxe_destroy_mcg(struct rxe_mcg *mcg) { - int err; - struct rxe_mc_elem *elem; + struct rxe_dev *rxe = mcg->rxe; - /* check to see of the qp is already a member of the group */ - spin_lock_bh(&qp->grp_lock); - spin_lock_bh(&grp->mcg_lock); - list_for_each_entry(elem, &grp->qp_list, qp_list) { - if (elem->qp == qp) { - err = 0; - goto out; - } - } + /* remove mcg from red-black tree then drop ref */ + __rxe_remove_mcg(mcg); + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); - if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) { - err = -ENOMEM; - goto out; + atomic_dec(&rxe->mcg_num); +} + +/** + * rxe_destroy_mcg - destroy mcg object + * @mcg: the mcg object + * + * Context: no qp's are attached to mcg + */ +static void rxe_destroy_mcg(struct rxe_mcg *mcg) +{ + /* delete mcast address outside of lock */ + rxe_mcast_del(mcg->rxe, &mcg->mgid); + + spin_lock_bh(&mcg->rxe->mcg_lock); + __rxe_destroy_mcg(mcg); + spin_unlock_bh(&mcg->rxe->mcg_lock); +} + +/** + * __rxe_init_mca - initialize a new mca holding lock + * @qp: qp object + * @mcg: mcg object + * @mca: empty space for new mca + * + * Context: caller must hold references on qp and mcg, rxe->mcg_lock + * and pass memory for new mca + * + * Returns: 0 on success else an error + */ +static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg, + struct rxe_mca *mca) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int n; + + n = atomic_inc_return(&rxe->mcg_attach); + if (n > rxe->attr.max_total_mcast_qp_attach) { + atomic_dec(&rxe->mcg_attach); + return -ENOMEM; } - elem = rxe_alloc(&rxe->mc_elem_pool); - if (!elem) { - err = -ENOMEM; - goto out; + n = atomic_inc_return(&mcg->qp_num); + if (n > rxe->attr.max_mcast_qp_attach) { + atomic_dec(&mcg->qp_num); + atomic_dec(&rxe->mcg_attach); + return -ENOMEM; } - /* each qp holds a ref on the grp */ - rxe_add_ref(grp); + atomic_inc(&qp->mcg_num); + + rxe_get(qp); + mca->qp = qp; - grp->num_qp++; - elem->qp = qp; - elem->grp = grp; + list_add_tail(&mca->qp_list, &mcg->qp_list); - list_add(&elem->qp_list, &grp->qp_list); - list_add(&elem->grp_list, &qp->grp_list); + return 0; +} - err = 0; +/** + * rxe_attach_mcg - attach qp to mcg if not already attached + * @qp: qp object + * @mcg: mcg object + * + * Context: caller must hold reference on qp and mcg. + * Returns: 0 on success else an error + */ +static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) +{ + struct rxe_dev *rxe = mcg->rxe; + struct rxe_mca *mca, *tmp; + int err; + + /* check to see if the qp is already a member of the group */ + spin_lock_bh(&rxe->mcg_lock); + list_for_each_entry(mca, &mcg->qp_list, qp_list) { + if (mca->qp == qp) { + spin_unlock_bh(&rxe->mcg_lock); + return 0; + } + } + spin_unlock_bh(&rxe->mcg_lock); + + /* speculative alloc new mca without using GFP_ATOMIC */ + mca = kzalloc(sizeof(*mca), GFP_KERNEL); + if (!mca) + return -ENOMEM; + + spin_lock_bh(&rxe->mcg_lock); + /* re-check to see if someone else just attached qp */ + list_for_each_entry(tmp, &mcg->qp_list, qp_list) { + if (tmp->qp == qp) { + kfree(mca); + err = 0; + goto out; + } + } + + err = __rxe_init_mca(qp, mcg, mca); + if (err) + kfree(mca); out: - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); + spin_unlock_bh(&rxe->mcg_lock); return err; } -int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - union ib_gid *mgid) +/** + * __rxe_cleanup_mca - cleanup mca object holding lock + * @mca: mca object + * @mcg: mcg object + * + * Context: caller must hold a reference to mcg and rxe->mcg_lock + */ +static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg) { - struct rxe_mc_grp *grp; - struct rxe_mc_elem *elem, *tmp; + list_del(&mca->qp_list); + + atomic_dec(&mcg->qp_num); + atomic_dec(&mcg->rxe->mcg_attach); + atomic_dec(&mca->qp->mcg_num); + rxe_put(mca->qp); + + kfree(mca); +} - grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); - if (!grp) - goto err1; +/** + * rxe_detach_mcg - detach qp from mcg + * @mcg: mcg object + * @qp: qp object + * + * Returns: 0 on success else an error if qp is not attached. + */ +static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) +{ + struct rxe_dev *rxe = mcg->rxe; + struct rxe_mca *mca, *tmp; - spin_lock_bh(&qp->grp_lock); - spin_lock_bh(&grp->mcg_lock); + spin_lock_bh(&rxe->mcg_lock); + list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { + if (mca->qp == qp) { + __rxe_cleanup_mca(mca, mcg); - list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) { - if (elem->qp == qp) { - list_del(&elem->qp_list); - list_del(&elem->grp_list); - grp->num_qp--; + /* if the number of qp's attached to the + * mcast group falls to zero go ahead and + * tear it down. This will not free the + * object since we are still holding a ref + * from the caller + */ + if (atomic_read(&mcg->qp_num) <= 0) + __rxe_destroy_mcg(mcg); - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); - rxe_drop_ref(elem); - rxe_drop_ref(grp); /* ref held by QP */ - rxe_drop_ref(grp); /* ref from get_key */ + spin_unlock_bh(&rxe->mcg_lock); return 0; } } - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); - rxe_drop_ref(grp); /* ref from get_key */ -err1: + /* we didn't find the qp on the list */ + spin_unlock_bh(&rxe->mcg_lock); return -EINVAL; } -void rxe_drop_all_mcast_groups(struct rxe_qp *qp) +/** + * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1) + * @ibqp: (IB) qp object + * @mgid: multicast IP address + * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) + * + * Returns: 0 on success else an errno + */ +int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) { - struct rxe_mc_grp *grp; - struct rxe_mc_elem *elem; + int err; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_mcg *mcg; - while (1) { - spin_lock_bh(&qp->grp_lock); - if (list_empty(&qp->grp_list)) { - spin_unlock_bh(&qp->grp_lock); - break; - } - elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem, - grp_list); - list_del(&elem->grp_list); - spin_unlock_bh(&qp->grp_lock); - - grp = elem->grp; - spin_lock_bh(&grp->mcg_lock); - list_del(&elem->qp_list); - grp->num_qp--; - spin_unlock_bh(&grp->mcg_lock); - rxe_drop_ref(grp); - rxe_drop_ref(elem); - } + /* takes a ref on mcg if successful */ + mcg = rxe_get_mcg(rxe, mgid); + if (IS_ERR(mcg)) + return PTR_ERR(mcg); + + err = rxe_attach_mcg(mcg, qp); + + /* if we failed to attach the first qp to mcg tear it down */ + if (atomic_read(&mcg->qp_num) == 0) + rxe_destroy_mcg(mcg); + + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); + + return err; } -void rxe_mc_cleanup(struct rxe_pool_entry *arg) +/** + * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2) + * @ibqp: address of (IB) qp object + * @mgid: multicast IP address + * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) + * + * Returns: 0 on success else an errno + */ +int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) { - struct rxe_mc_grp *grp = container_of(arg, typeof(*grp), pelem); - struct rxe_dev *rxe = grp->rxe; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_mcg *mcg; + int err; + + mcg = rxe_lookup_mcg(rxe, mgid); + if (!mcg) + return -EINVAL; - rxe_drop_key(grp); - rxe_mcast_delete(rxe, &grp->mgid); + err = rxe_detach_mcg(mcg, qp); + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); + + return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 48f48122ddcb..9149b6095429 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -1,41 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ -#include <linux/module.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/errno.h> -#include <asm/pgtable.h> #include <rdma/uverbs_ioctl.h> #include "rxe.h" @@ -151,7 +122,7 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, u32 size, ip = kmalloc(sizeof(*ip), GFP_KERNEL); if (!ip) - return NULL; + return ERR_PTR(-ENOMEM); size = PAGE_ALIGN(size); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index e83c7b518bfa..502e9ada99b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -1,71 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" #include "rxe_loc.h" -/* - * lfsr (linear feedback shift register) with period 255 +/* Return a random 8 bit key value that is + * different than the last_key. Set last_key to -1 + * if this is the first key for an MR or MW */ -static u8 rxe_get_key(void) +u8 rxe_get_next_key(u32 last_key) { - static u32 key = 1; - - key = key << 1; + u8 key; - key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10)) - ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40)); - - key &= 0xff; + do { + get_random_bytes(&key, 1); + } while (key == last_key); return key; } -int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) +int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) { - switch (mem->type) { - case RXE_MEM_TYPE_DMA: + + + switch (mr->type) { + case IB_MR_TYPE_DMA: return 0; - case RXE_MEM_TYPE_MR: - case RXE_MEM_TYPE_FMR: - if (iova < mem->iova || - length > mem->length || - iova > mem->iova + mem->length - length) + case IB_MR_TYPE_USER: + case IB_MR_TYPE_MEM_REG: + if (iova < mr->ibmr.iova || length > mr->ibmr.length || + iova > mr->ibmr.iova + mr->ibmr.length - length) return -EFAULT; return 0; default: + pr_warn("%s: mr type (%d) not supported\n", + __func__, mr->type); return -EFAULT; } } @@ -74,92 +48,72 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) | IB_ACCESS_REMOTE_WRITE \ | IB_ACCESS_REMOTE_ATOMIC) -static void rxe_mem_init(int access, struct rxe_mem *mem) +static void rxe_mr_init(int access, struct rxe_mr *mr) { - u32 lkey = mem->pelem.index << 8 | rxe_get_key(); + u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1); u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; - if (mem->pelem.pool->type == RXE_TYPE_MR) { - mem->ibmr.lkey = lkey; - mem->ibmr.rkey = rkey; - } - - mem->lkey = lkey; - mem->rkey = rkey; - mem->state = RXE_MEM_STATE_INVALID; - mem->type = RXE_MEM_TYPE_NONE; - mem->map_shift = ilog2(RXE_BUF_PER_MAP); -} - -void rxe_mem_cleanup(struct rxe_pool_entry *arg) -{ - struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem); - int i; - - ib_umem_release(mem->umem); + /* set ibmr->l/rkey and also copy into private l/rkey + * for user MRs these will always be the same + * for cases where caller 'owns' the key portion + * they may be different until REG_MR WQE is executed. + */ + mr->lkey = mr->ibmr.lkey = lkey; + mr->rkey = mr->ibmr.rkey = rkey; - if (mem->map) { - for (i = 0; i < mem->num_map; i++) - kfree(mem->map[i]); - - kfree(mem->map); - } + mr->state = RXE_MR_STATE_INVALID; + mr->map_shift = ilog2(RXE_BUF_PER_MAP); } -static int rxe_mem_alloc(struct rxe_mem *mem, int num_buf) +static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) { int i; int num_map; - struct rxe_map **map = mem->map; + struct rxe_map **map = mr->map; num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; - mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); - if (!mem->map) + mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); + if (!mr->map) goto err1; for (i = 0; i < num_map; i++) { - mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); - if (!mem->map[i]) + mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); + if (!mr->map[i]) goto err2; } BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); - mem->map_shift = ilog2(RXE_BUF_PER_MAP); - mem->map_mask = RXE_BUF_PER_MAP - 1; + mr->map_shift = ilog2(RXE_BUF_PER_MAP); + mr->map_mask = RXE_BUF_PER_MAP - 1; - mem->num_buf = num_buf; - mem->num_map = num_map; - mem->max_buf = num_map * RXE_BUF_PER_MAP; + mr->num_buf = num_buf; + mr->num_map = num_map; + mr->max_buf = num_map * RXE_BUF_PER_MAP; return 0; err2: for (i--; i >= 0; i--) - kfree(mem->map[i]); + kfree(mr->map[i]); - kfree(mem->map); + kfree(mr->map); err1: return -ENOMEM; } -int rxe_mem_init_dma(struct rxe_pd *pd, - int access, struct rxe_mem *mem) +void rxe_mr_init_dma(int access, struct rxe_mr *mr) { - rxe_mem_init(access, mem); + rxe_mr_init(access, mr); - mem->pd = pd; - mem->access = access; - mem->state = RXE_MEM_STATE_VALID; - mem->type = RXE_MEM_TYPE_DMA; - - return 0; + mr->access = access; + mr->state = RXE_MR_STATE_VALID; + mr->type = IB_MR_TYPE_DMA; } -int rxe_mem_init_user(struct rxe_pd *pd, u64 start, - u64 length, u64 iova, int access, struct ib_udata *udata, - struct rxe_mem *mem) +int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, + int access, struct rxe_mr *mr) { struct rxe_map **map; struct rxe_phys_buf *buf = NULL; @@ -168,36 +122,36 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, int num_buf; void *vaddr; int err; + int i; - umem = ib_umem_get(pd->ibpd.device, start, length, access); + umem = ib_umem_get(&rxe->ib_dev, start, length, access); if (IS_ERR(umem)) { - pr_warn("err %d from rxe_umem_get\n", - (int)PTR_ERR(umem)); - err = -EINVAL; - goto err1; + pr_warn("%s: Unable to pin memory region err = %d\n", + __func__, (int)PTR_ERR(umem)); + err = PTR_ERR(umem); + goto err_out; } - mem->umem = umem; num_buf = ib_umem_num_pages(umem); - rxe_mem_init(access, mem); + rxe_mr_init(access, mr); - err = rxe_mem_alloc(mem, num_buf); + err = rxe_mr_alloc(mr, num_buf); if (err) { - pr_warn("err %d from rxe_mem_alloc\n", err); - ib_umem_release(umem); - goto err1; + pr_warn("%s: Unable to allocate memory for map\n", + __func__); + goto err_release_umem; } - mem->page_shift = PAGE_SHIFT; - mem->page_mask = PAGE_SIZE - 1; + mr->page_shift = PAGE_SHIFT; + mr->page_mask = PAGE_SIZE - 1; num_buf = 0; - map = mem->map; + map = mr->map; if (length > 0) { buf = map[0]->buf; - for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { if (num_buf >= RXE_BUF_PER_MAP) { map++; buf = map[0]->buf; @@ -206,9 +160,10 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { - pr_warn("null vaddr\n"); + pr_warn("%s: Unable to get virtual address\n", + __func__); err = -ENOMEM; - goto err1; + goto err_cleanup_map; } buf->addr = (uintptr_t)vaddr; @@ -219,40 +174,38 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, } } - mem->pd = pd; - mem->umem = umem; - mem->access = access; - mem->length = length; - mem->iova = iova; - mem->va = start; - mem->offset = ib_umem_offset(umem); - mem->state = RXE_MEM_STATE_VALID; - mem->type = RXE_MEM_TYPE_MR; + mr->umem = umem; + mr->access = access; + mr->offset = ib_umem_offset(umem); + mr->state = RXE_MR_STATE_VALID; + mr->type = IB_MR_TYPE_USER; return 0; -err1: +err_cleanup_map: + for (i = 0; i < mr->num_map; i++) + kfree(mr->map[i]); + kfree(mr->map); +err_release_umem: + ib_umem_release(umem); +err_out: return err; } -int rxe_mem_init_fast(struct rxe_pd *pd, - int max_pages, struct rxe_mem *mem) +int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr) { int err; - rxe_mem_init(0, mem); - - /* In fastreg, we also set the rkey */ - mem->ibmr.rkey = mem->ibmr.lkey; + /* always allow remote access for FMRs */ + rxe_mr_init(IB_ACCESS_REMOTE, mr); - err = rxe_mem_alloc(mem, max_pages); + err = rxe_mr_alloc(mr, max_pages); if (err) goto err1; - mem->pd = pd; - mem->max_buf = max_pages; - mem->state = RXE_MEM_STATE_FREE; - mem->type = RXE_MEM_TYPE_MR; + mr->max_buf = max_pages; + mr->state = RXE_MR_STATE_FREE; + mr->type = IB_MR_TYPE_MEM_REG; return 0; @@ -260,28 +213,24 @@ err1: return err; } -static void lookup_iova( - struct rxe_mem *mem, - u64 iova, - int *m_out, - int *n_out, - size_t *offset_out) +static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, + size_t *offset_out) { - size_t offset = iova - mem->iova + mem->offset; + size_t offset = iova - mr->ibmr.iova + mr->offset; int map_index; int buf_index; u64 length; - if (likely(mem->page_shift)) { - *offset_out = offset & mem->page_mask; - offset >>= mem->page_shift; - *n_out = offset & mem->map_mask; - *m_out = offset >> mem->map_shift; + if (likely(mr->page_shift)) { + *offset_out = offset & mr->page_mask; + offset >>= mr->page_shift; + *n_out = offset & mr->map_mask; + *m_out = offset >> mr->map_shift; } else { map_index = 0; buf_index = 0; - length = mem->map[map_index]->buf[buf_index].size; + length = mr->map[map_index]->buf[buf_index].size; while (offset >= length) { offset -= length; @@ -291,7 +240,7 @@ static void lookup_iova( map_index++; buf_index = 0; } - length = mem->map[map_index]->buf[buf_index].size; + length = mr->map[map_index]->buf[buf_index].size; } *m_out = map_index; @@ -300,49 +249,48 @@ static void lookup_iova( } } -void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length) +void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) { size_t offset; int m, n; void *addr; - if (mem->state != RXE_MEM_STATE_VALID) { - pr_warn("mem not in valid state\n"); + if (mr->state != RXE_MR_STATE_VALID) { + pr_warn("mr not in valid state\n"); addr = NULL; goto out; } - if (!mem->map) { + if (!mr->map) { addr = (void *)(uintptr_t)iova; goto out; } - if (mem_check_range(mem, iova, length)) { + if (mr_check_range(mr, iova, length)) { pr_warn("range violation\n"); addr = NULL; goto out; } - lookup_iova(mem, iova, &m, &n, &offset); + lookup_iova(mr, iova, &m, &n, &offset); - if (offset + length > mem->map[m]->buf[n].size) { + if (offset + length > mr->map[m]->buf[n].size) { pr_warn("crosses page boundary\n"); addr = NULL; goto out; } - addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset; + addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; out: return addr; } /* copy data from a range (vaddr, vaddr+length-1) to or from - * a mem object starting at iova. Compute incremental value of - * crc32 if crcp is not zero. caller must hold a reference to mem + * a mr object starting at iova. */ -int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, - enum copy_direction dir, u32 *crcp) +int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, + enum rxe_mr_copy_dir dir) { int err; int bytes; @@ -352,48 +300,41 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, int m; int i; size_t offset; - u32 crc = crcp ? (*crcp) : 0; if (length == 0) return 0; - if (mem->type == RXE_MEM_TYPE_DMA) { + if (mr->type == IB_MR_TYPE_DMA) { u8 *src, *dest; - src = (dir == to_mem_obj) ? - addr : ((void *)(uintptr_t)iova); + src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); - dest = (dir == to_mem_obj) ? - ((void *)(uintptr_t)iova) : addr; + dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr; memcpy(dest, src, length); - if (crcp) - *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device), - *crcp, dest, length); - return 0; } - WARN_ON_ONCE(!mem->map); + WARN_ON_ONCE(!mr->map); - err = mem_check_range(mem, iova, length); + err = mr_check_range(mr, iova, length); if (err) { err = -EFAULT; goto err1; } - lookup_iova(mem, iova, &m, &i, &offset); + lookup_iova(mr, iova, &m, &i, &offset); - map = mem->map + m; + map = mr->map + m; buf = map[0]->buf + i; while (length > 0) { u8 *src, *dest; va = (u8 *)(uintptr_t)buf->addr + offset; - src = (dir == to_mem_obj) ? addr : va; - dest = (dir == to_mem_obj) ? va : addr; + src = (dir == RXE_TO_MR_OBJ) ? addr : va; + dest = (dir == RXE_TO_MR_OBJ) ? va : addr; bytes = buf->size - offset; @@ -402,10 +343,6 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, memcpy(dest, src, bytes); - if (crcp) - crc = rxe_crc32(to_rdev(mem->pd->ibpd.device), - crc, dest, bytes); - length -= bytes; addr += bytes; @@ -420,9 +357,6 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, } } - if (crcp) - *crcp = crc; - return 0; err1: @@ -438,14 +372,13 @@ int copy_data( struct rxe_dma_info *dma, void *addr, int length, - enum copy_direction dir, - u32 *crcp) + enum rxe_mr_copy_dir dir) { int bytes; struct rxe_sge *sge = &dma->sge[dma->cur_sge]; int offset = dma->sge_offset; int resid = dma->resid; - struct rxe_mem *mem = NULL; + struct rxe_mr *mr = NULL; u64 iova; int err; @@ -458,8 +391,8 @@ int copy_data( } if (sge->length && (offset < sge->length)) { - mem = lookup_mem(pd, access, sge->lkey, lookup_local); - if (!mem) { + mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL); + if (!mr) { err = -EINVAL; goto err1; } @@ -469,9 +402,9 @@ int copy_data( bytes = length; if (offset >= sge->length) { - if (mem) { - rxe_drop_ref(mem); - mem = NULL; + if (mr) { + rxe_put(mr); + mr = NULL; } sge++; dma->cur_sge++; @@ -483,9 +416,9 @@ int copy_data( } if (sge->length) { - mem = lookup_mem(pd, access, sge->lkey, - lookup_local); - if (!mem) { + mr = lookup_mr(pd, access, sge->lkey, + RXE_LOOKUP_LOCAL); + if (!mr) { err = -EINVAL; goto err1; } @@ -500,7 +433,7 @@ int copy_data( if (bytes > 0) { iova = sge->addr + offset; - err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp); + err = rxe_mr_copy(mr, iova, addr, bytes, dir); if (err) goto err2; @@ -514,14 +447,14 @@ int copy_data( dma->sge_offset = offset; dma->resid = resid; - if (mem) - rxe_drop_ref(mem); + if (mr) + rxe_put(mr); return 0; err2: - if (mem) - rxe_drop_ref(mem); + if (mr) + rxe_put(mr); err1: return err; } @@ -559,75 +492,144 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) return 0; } -/* (1) find the mem (mr or mw) corresponding to lkey/rkey +/* (1) find the mr corresponding to lkey/rkey * depending on lookup_type - * (2) verify that the (qp) pd matches the mem pd - * (3) verify that the mem can support the requested access - * (4) verify that mem state is valid + * (2) verify that the (qp) pd matches the mr pd + * (3) verify that the mr can support the requested access + * (4) verify that mr state is valid */ -struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, - enum lookup_type type) +struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, + enum rxe_mr_lookup_type type) { - struct rxe_mem *mem; + struct rxe_mr *mr; struct rxe_dev *rxe = to_rdev(pd->ibpd.device); int index = key >> 8; - mem = rxe_pool_get_index(&rxe->mr_pool, index); - if (!mem) + mr = rxe_pool_get_index(&rxe->mr_pool, index); + if (!mr) return NULL; - if (unlikely((type == lookup_local && mem->lkey != key) || - (type == lookup_remote && mem->rkey != key) || - mem->pd != pd || - (access && !(access & mem->access)) || - mem->state != RXE_MEM_STATE_VALID)) { - rxe_drop_ref(mem); - mem = NULL; + if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || + (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || + mr_pd(mr) != pd || (access && !(access & mr->access)) || + mr->state != RXE_MR_STATE_VALID)) { + rxe_put(mr); + mr = NULL; } - return mem; + return mr; } -int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, - u64 *page, int num_pages, u64 iova) +int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) { - int i; - int num_buf; - int err; - struct rxe_map **map; - struct rxe_phys_buf *buf; - int page_size; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mr *mr; + int ret; + + mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); + if (!mr) { + pr_err("%s: No MR for key %#x\n", __func__, key); + ret = -EINVAL; + goto err; + } - if (num_pages > mem->max_buf) { - err = -EINVAL; - goto err1; + if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) { + pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n", + __func__, key, (mr->rkey ? mr->rkey : mr->lkey)); + ret = -EINVAL; + goto err_drop_ref; } - num_buf = 0; - page_size = 1 << mem->page_shift; - map = mem->map; - buf = map[0]->buf; + if (atomic_read(&mr->num_mw) > 0) { + pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n", + __func__); + ret = -EINVAL; + goto err_drop_ref; + } - for (i = 0; i < num_pages; i++) { - buf->addr = *page++; - buf->size = page_size; - buf++; - num_buf++; + if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) { + pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type); + ret = -EINVAL; + goto err_drop_ref; + } - if (num_buf == RXE_BUF_PER_MAP) { - map++; - buf = map[0]->buf; - num_buf = 0; - } + mr->state = RXE_MR_STATE_FREE; + ret = 0; + +err_drop_ref: + rxe_put(mr); +err: + return ret; +} + +/* user can (re)register fast MR by executing a REG_MR WQE. + * user is expected to hold a reference on the ib mr until the + * WQE completes. + * Once a fast MR is created this is the only way to change the + * private keys. It is the responsibility of the user to maintain + * the ib mr keys in sync with rxe mr keys. + */ +int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr); + u32 key = wqe->wr.wr.reg.key; + u32 access = wqe->wr.wr.reg.access; + + /* user can only register MR in free state */ + if (unlikely(mr->state != RXE_MR_STATE_FREE)) { + pr_warn("%s: mr->lkey = 0x%x not free\n", + __func__, mr->lkey); + return -EINVAL; + } + + /* user can only register mr with qp in same protection domain */ + if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { + pr_warn("%s: qp->pd and mr->pd don't match\n", + __func__); + return -EINVAL; } - mem->iova = iova; - mem->va = iova; - mem->length = num_pages << mem->page_shift; - mem->state = RXE_MEM_STATE_VALID; + /* user is only allowed to change key portion of l/rkey */ + if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) { + pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n", + __func__, key, mr->lkey); + return -EINVAL; + } + + mr->access = access; + mr->lkey = key; + mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0; + mr->ibmr.iova = wqe->wr.wr.reg.mr->iova; + mr->state = RXE_MR_STATE_VALID; + + return 0; +} + +int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) +{ + struct rxe_mr *mr = to_rmr(ibmr); + + /* See IBA 10.6.7.2.6 */ + if (atomic_read(&mr->num_mw) > 0) + return -EINVAL; + + rxe_cleanup(mr); return 0; +} -err1: - return err; +void rxe_mr_cleanup(struct rxe_pool_elem *elem) +{ + struct rxe_mr *mr = container_of(elem, typeof(*mr), elem); + int i; + + rxe_put(mr_pd(mr)); + ib_umem_release(mr->umem); + + if (mr->map) { + for (i = 0; i < mr->num_map; i++) + kfree(mr->map[i]); + + kfree(mr->map); + } } diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c new file mode 100644 index 000000000000..902b7df7aaed --- /dev/null +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved. + */ + +/* + * The rdma_rxe driver supports type 1 or type 2B memory windows. + * Type 1 MWs are created by ibv_alloc_mw() verbs calls and bound by + * ibv_bind_mw() calls. Type 2 MWs are also created by ibv_alloc_mw() + * but bound by bind_mw work requests. The ibv_bind_mw() call is converted + * by libibverbs to a bind_mw work request. + */ + +#include "rxe.h" + +int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) +{ + struct rxe_mw *mw = to_rmw(ibmw); + struct rxe_pd *pd = to_rpd(ibmw->pd); + struct rxe_dev *rxe = to_rdev(ibmw->device); + int ret; + + rxe_get(pd); + + ret = rxe_add_to_pool(&rxe->mw_pool, mw); + if (ret) { + rxe_put(pd); + return ret; + } + + mw->rkey = ibmw->rkey = (mw->elem.index << 8) | rxe_get_next_key(-1); + mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ? + RXE_MW_STATE_FREE : RXE_MW_STATE_VALID; + spin_lock_init(&mw->lock); + + rxe_finalize(mw); + + return 0; +} + +int rxe_dealloc_mw(struct ib_mw *ibmw) +{ + struct rxe_mw *mw = to_rmw(ibmw); + + rxe_cleanup(mw); + + return 0; +} + +static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_mw *mw, struct rxe_mr *mr) +{ + if (mw->ibmw.type == IB_MW_TYPE_1) { + if (unlikely(mw->state != RXE_MW_STATE_VALID)) { + pr_err_once( + "attempt to bind a type 1 MW not in the valid state\n"); + return -EINVAL; + } + + /* o10-36.2.2 */ + if (unlikely((mw->access & IB_ZERO_BASED))) { + pr_err_once("attempt to bind a zero based type 1 MW\n"); + return -EINVAL; + } + } + + if (mw->ibmw.type == IB_MW_TYPE_2) { + /* o10-37.2.30 */ + if (unlikely(mw->state != RXE_MW_STATE_FREE)) { + pr_err_once( + "attempt to bind a type 2 MW not in the free state\n"); + return -EINVAL; + } + + /* C10-72 */ + if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) { + pr_err_once( + "attempt to bind type 2 MW with qp with different PD\n"); + return -EINVAL; + } + + /* o10-37.2.40 */ + if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) { + pr_err_once( + "attempt to invalidate type 2 MW by binding with NULL or zero length MR\n"); + return -EINVAL; + } + } + + /* remaining checks only apply to a nonzero MR */ + if (!mr) + return 0; + + if (unlikely(mr->access & IB_ZERO_BASED)) { + pr_err_once("attempt to bind MW to zero based MR\n"); + return -EINVAL; + } + + /* C10-73 */ + if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) { + pr_err_once( + "attempt to bind an MW to an MR without bind access\n"); + return -EINVAL; + } + + /* C10-74 */ + if (unlikely((mw->access & + (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) && + !(mr->access & IB_ACCESS_LOCAL_WRITE))) { + pr_err_once( + "attempt to bind an Writable MW to an MR without local write access\n"); + return -EINVAL; + } + + /* C10-75 */ + if (mw->access & IB_ZERO_BASED) { + if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) { + pr_err_once( + "attempt to bind a ZB MW outside of the MR\n"); + return -EINVAL; + } + } else { + if (unlikely((wqe->wr.wr.mw.addr < mr->ibmr.iova) || + ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) > + (mr->ibmr.iova + mr->ibmr.length)))) { + pr_err_once( + "attempt to bind a VA MW outside of the MR\n"); + return -EINVAL; + } + } + + return 0; +} + +static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_mw *mw, struct rxe_mr *mr) +{ + u32 key = wqe->wr.wr.mw.rkey & 0xff; + + mw->rkey = (mw->rkey & ~0xff) | key; + mw->access = wqe->wr.wr.mw.access; + mw->state = RXE_MW_STATE_VALID; + mw->addr = wqe->wr.wr.mw.addr; + mw->length = wqe->wr.wr.mw.length; + + if (mw->mr) { + rxe_put(mw->mr); + atomic_dec(&mw->mr->num_mw); + mw->mr = NULL; + } + + if (mw->length) { + mw->mr = mr; + atomic_inc(&mr->num_mw); + rxe_get(mr); + } + + if (mw->ibmw.type == IB_MW_TYPE_2) { + rxe_get(qp); + mw->qp = qp; + } +} + +int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + int ret; + struct rxe_mw *mw; + struct rxe_mr *mr; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + u32 mw_rkey = wqe->wr.wr.mw.mw_rkey; + u32 mr_lkey = wqe->wr.wr.mw.mr_lkey; + + mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8); + if (unlikely(!mw)) { + ret = -EINVAL; + goto err; + } + + if (unlikely(mw->rkey != mw_rkey)) { + ret = -EINVAL; + goto err_drop_mw; + } + + if (likely(wqe->wr.wr.mw.length)) { + mr = rxe_pool_get_index(&rxe->mr_pool, mr_lkey >> 8); + if (unlikely(!mr)) { + ret = -EINVAL; + goto err_drop_mw; + } + + if (unlikely(mr->lkey != mr_lkey)) { + ret = -EINVAL; + goto err_drop_mr; + } + } else { + mr = NULL; + } + + spin_lock_bh(&mw->lock); + + ret = rxe_check_bind_mw(qp, wqe, mw, mr); + if (ret) + goto err_unlock; + + rxe_do_bind_mw(qp, wqe, mw, mr); +err_unlock: + spin_unlock_bh(&mw->lock); +err_drop_mr: + if (mr) + rxe_put(mr); +err_drop_mw: + rxe_put(mw); +err: + return ret; +} + +static int rxe_check_invalidate_mw(struct rxe_qp *qp, struct rxe_mw *mw) +{ + if (unlikely(mw->state == RXE_MW_STATE_INVALID)) + return -EINVAL; + + /* o10-37.2.26 */ + if (unlikely(mw->ibmw.type == IB_MW_TYPE_1)) + return -EINVAL; + + return 0; +} + +static void rxe_do_invalidate_mw(struct rxe_mw *mw) +{ + struct rxe_qp *qp; + struct rxe_mr *mr; + + /* valid type 2 MW will always have a QP pointer */ + qp = mw->qp; + mw->qp = NULL; + rxe_put(qp); + + /* valid type 2 MW will always have an MR pointer */ + mr = mw->mr; + mw->mr = NULL; + atomic_dec(&mr->num_mw); + rxe_put(mr); + + mw->access = 0; + mw->addr = 0; + mw->length = 0; + mw->state = RXE_MW_STATE_FREE; +} + +int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mw *mw; + int ret; + + mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); + if (!mw) { + ret = -EINVAL; + goto err; + } + + if (rkey != mw->rkey) { + ret = -EINVAL; + goto err_drop_ref; + } + + spin_lock_bh(&mw->lock); + + ret = rxe_check_invalidate_mw(qp, mw); + if (ret) + goto err_unlock; + + rxe_do_invalidate_mw(mw); +err_unlock: + spin_unlock_bh(&mw->lock); +err_drop_ref: + rxe_put(mw); +err: + return ret; +} + +struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_pd *pd = to_rpd(qp->ibqp.pd); + struct rxe_mw *mw; + int index = rkey >> 8; + + mw = rxe_pool_get_index(&rxe->mw_pool, index); + if (!mw) + return NULL; + + if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd || + (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) || + (mw->length == 0) || + (access && !(access & mw->access)) || + mw->state != RXE_MW_STATE_VALID)) { + rxe_put(mw); + return NULL; + } + + return mw; +} + +void rxe_mw_cleanup(struct rxe_pool_elem *elem) +{ + struct rxe_mw *mw = container_of(elem, typeof(*mw), elem); + struct rxe_pd *pd = to_rpd(mw->ibmw.pd); + + rxe_put(pd); + + if (mw->mr) { + struct rxe_mr *mr = mw->mr; + + mw->mr = NULL; + atomic_dec(&mr->num_mw); + rxe_put(mr); + } + + if (mw->qp) { + struct rxe_qp *qp = mw->qp; + + mw->qp = NULL; + rxe_put(qp); + } + + mw->access = 0; + mw->addr = 0; + mw->length = 0; + mw->state = RXE_MW_STATE_INVALID; +} diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 312c2fc961c0..35f327b9d4b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/skbuff.h> @@ -47,40 +20,6 @@ static struct rxe_recv_sockets recv_sockets; -struct device *rxe_dma_device(struct rxe_dev *rxe) -{ - struct net_device *ndev; - - ndev = rxe->ndev; - - if (is_vlan_dev(ndev)) - ndev = vlan_dev_real_dev(ndev); - - return ndev->dev.parent; -} - -int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) -{ - int err; - unsigned char ll_addr[ETH_ALEN]; - - ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - err = dev_mc_add(rxe->ndev, ll_addr); - - return err; -} - -int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) -{ - int err; - unsigned char ll_addr[ETH_ALEN]; - - ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - err = dev_mc_del(rxe->ndev, ll_addr); - - return err; -} - static struct dst_entry *rxe_find_route4(struct net_device *ndev, struct in_addr *saddr, struct in_addr *daddr) @@ -120,7 +59,7 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk), recv_sockets.sk6->sk, &fl6, NULL); - if (unlikely(IS_ERR(ndst))) { + if (IS_ERR(ndst)) { pr_err_ratelimited("no route to %pI6\n", daddr); return NULL; } @@ -160,14 +99,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev, if (dst) dst_release(dst); - if (av->network_type == RDMA_NETWORK_IPV4) { + if (av->network_type == RXE_NETWORK_TYPE_IPV4) { struct in_addr *saddr; struct in_addr *daddr; saddr = &av->sgid_addr._sockaddr_in.sin_addr; daddr = &av->dgid_addr._sockaddr_in.sin_addr; dst = rxe_find_route4(ndev, saddr, daddr); - } else if (av->network_type == RDMA_NETWORK_IPV6) { + } else if (av->network_type == RXE_NETWORK_TYPE_IPV6) { struct in6_addr *saddr6; struct in6_addr *daddr6; @@ -192,20 +131,20 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev, static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct udphdr *udph; + struct rxe_dev *rxe; struct net_device *ndev = skb->dev; - struct net_device *rdev = ndev; - struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - if (!rxe && is_vlan_dev(rdev)) { - rdev = vlan_dev_real_dev(ndev); - rxe = rxe_get_dev_from_net(rdev); - } + /* takes a reference on rxe->ib_dev + * drop when skb is freed + */ + rxe = rxe_get_dev_from_net(ndev); + if (!rxe && is_vlan_dev(ndev)) + rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev)); if (!rxe) goto drop; if (skb_linearize(skb)) { - pr_err("skb_linearize failed\n"); ib_device_put(&rxe->ib_dev); goto drop; } @@ -219,12 +158,6 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) rxe_rcv(skb); - /* - * FIXME: this is in the wrong place, it needs to be done when pkt is - * destroyed - */ - ib_device_put(&rxe->ib_dev); - return 0; drop: kfree_skb(skb); @@ -251,10 +184,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, /* Create UDP socket */ err = udp_sock_create(net, &udp_cfg, &sock); - if (err < 0) { - pr_err("failed to create udp socket. err = %d\n", err); + if (err < 0) return ERR_PTR(err); - } tnl_cfg.encap_type = 1; tnl_cfg.encap_rcv = rxe_udp_encap_recv; @@ -305,6 +236,7 @@ static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, iph->version = IPVERSION; iph->ihl = sizeof(struct iphdr) >> 2; + iph->tot_len = htons(skb->len); iph->frag_off = df; iph->protocol = proto; iph->tos = tos; @@ -313,8 +245,6 @@ static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, iph->ttl = ttl; __ip_select_ident(dev_net(dst->dev), iph, skb_shinfo(skb)->gso_segs ?: 1); - iph->tot_len = htons(skb->len); - ip_send_check(iph); } static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, @@ -340,13 +270,13 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); - struct rxe_av *av = rxe_get_av(pkt); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; @@ -366,11 +296,11 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; - struct rxe_av *av = rxe_get_av(pkt); struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; @@ -391,18 +321,17 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc) +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { int err = 0; if (skb->protocol == htons(ETH_P_IP)) - err = prepare4(pkt, skb); + err = prepare4(av, pkt, skb); else if (skb->protocol == htons(ETH_P_IPV6)) - err = prepare6(pkt, skb); - - *crc = rxe_icrc_hdr(pkt, skb); + err = prepare6(av, pkt, skb); - if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac)) + if (ether_addr_equal(skb->dev->dev_addr, av->dmac)) pkt->mask |= RXE_LOOPBACK_MASK; return err; @@ -418,17 +347,17 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) rxe_run_task(&qp->req.task, 1); - rxe_drop_ref(qp); + rxe_put(qp); } -int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) { int err; skb->destructor = rxe_skb_tx_dtor; skb->sk = pkt->qp->sk->sk; - rxe_add_ref(pkt->qp); + rxe_get(pkt->qp); atomic_inc(&pkt->qp->skb_out); if (skb->protocol == htons(ETH_P_IP)) { @@ -438,7 +367,7 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) } else { pr_err("Unknown layer 3 protocol: %d\n", skb->protocol); atomic_dec(&pkt->qp->skb_out); - rxe_drop_ref(pkt->qp); + rxe_put(pkt->qp); kfree_skb(skb); return -EINVAL; } @@ -451,9 +380,66 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -void rxe_loopback(struct sk_buff *skb) +/* fix up a send packet to match the packets + * received from UDP before looping them back + */ +static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt) { + memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); + + if (skb->protocol == htons(ETH_P_IP)) + skb_pull(skb, sizeof(struct iphdr)); + else + skb_pull(skb, sizeof(struct ipv6hdr)); + + if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev))) { + kfree_skb(skb); + return -EIO; + } + rxe_rcv(skb); + + return 0; +} + +int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, + struct sk_buff *skb) +{ + int err; + int is_request = pkt->mask & RXE_REQ_MASK; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + + if ((is_request && (qp->req.state != QP_STATE_READY)) || + (!is_request && (qp->resp.state != QP_STATE_READY))) { + pr_info("Packet dropped. QP is not in ready state\n"); + goto drop; + } + + rxe_icrc_generate(skb, pkt); + + if (pkt->mask & RXE_LOOPBACK_MASK) + err = rxe_loopback(skb, pkt); + else + err = rxe_send(skb, pkt); + if (err) { + rxe_counter_inc(rxe, RXE_CNT_SEND_ERR); + return err; + } + + if ((qp_type(qp) != IB_QPT_RC) && + (pkt->mask & RXE_END_MASK)) { + pkt->wqe->state = wqe_state_done; + rxe_run_task(&qp->comp.task, 1); + } + + rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS); + goto done; + +drop: + kfree_skb(skb); + err = 0; +done: + return err; } struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, @@ -469,7 +455,7 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, if (IS_ERR(attr)) return NULL; - if (av->network_type == RDMA_NETWORK_IPV4) + if (av->network_type == RXE_NETWORK_TYPE_IPV4) hdr_len = ETH_HLEN + sizeof(struct udphdr) + sizeof(struct iphdr); else @@ -496,14 +482,14 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, skb->dev = ndev; rcu_read_unlock(); - if (av->network_type == RDMA_NETWORK_IPV4) + if (av->network_type == RXE_NETWORK_TYPE_IPV4) skb->protocol = htons(ETH_P_IP); else skb->protocol = htons(ETH_P_IPV6); pkt->rxe = rxe; pkt->port_num = port_num; - pkt->hdr = skb_put_zero(skb, paylen); + pkt->hdr = skb_put(skb, paylen); pkt->mask |= RXE_GRH_MASK; out: @@ -520,11 +506,6 @@ const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num) return rxe->ndev->name; } -enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num) -{ - return IB_LINK_LAYER_ETHERNET; -} - int rxe_net_add(const char *ibdev_name, struct net_device *ndev) { int err; @@ -655,6 +636,12 @@ static int rxe_net_ipv6_init(void) recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), true); + if (PTR_ERR(recv_sockets.sk6) == -EAFNOSUPPORT) { + recv_sockets.sk6 = NULL; + pr_warn("IPv6 is not supported, can not create a UDPv6 socket\n"); + return 0; + } + if (IS_ERR(recv_sockets.sk6)) { recv_sockets.sk6 = NULL; pr_err("Failed to create IPv6 UDP tunnel\n"); diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 2ca71d3d245c..45d80d00f86b 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_NET_H diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index 4cf11063e0b5..d4ba4d506f17 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <rdma/ib_pack.h> @@ -56,7 +29,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_WR_SEND] = { .name = "IB_WR_SEND", .mask = { - [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, @@ -66,7 +38,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_WR_SEND_WITH_IMM] = { .name = "IB_WR_SEND_WITH_IMM", .mask = { - [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, @@ -114,13 +85,20 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_WR_LOCAL_INV] = { .name = "IB_WR_LOCAL_INV", .mask = { - [IB_QPT_RC] = WR_REG_MASK, + [IB_QPT_RC] = WR_LOCAL_OP_MASK, }, }, [IB_WR_REG_MR] = { .name = "IB_WR_REG_MR", .mask = { - [IB_QPT_RC] = WR_REG_MASK, + [IB_QPT_RC] = WR_LOCAL_OP_MASK, + }, + }, + [IB_WR_BIND_MW] = { + .name = "IB_WR_BIND_MW", + .mask = { + [IB_QPT_RC] = WR_LOCAL_OP_MASK, + [IB_QPT_UC] = WR_LOCAL_OP_MASK, }, }, }; @@ -128,8 +106,8 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { [IB_OPCODE_RC_SEND_FIRST] = { .name = "IB_OPCODE_RC_SEND_FIRST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK - | RXE_SEND_MASK | RXE_START_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK | + RXE_SEND_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -137,9 +115,9 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { } }, [IB_OPCODE_RC_SEND_MIDDLE] = { - .name = "IB_OPCODE_RC_SEND_MIDDLE]", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK - | RXE_MIDDLE_MASK, + .name = "IB_OPCODE_RC_SEND_MIDDLE", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -148,8 +126,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_SEND_LAST] = { .name = "IB_OPCODE_RC_SEND_LAST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK - | RXE_SEND_MASK | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK | + RXE_SEND_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -158,21 +136,21 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RC_SEND_ONLY] = { .name = "IB_OPCODE_RC_SEND_ONLY", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK - | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK | + RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -181,33 +159,33 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RC_RDMA_WRITE_FIRST] = { .name = "IB_OPCODE_RC_RDMA_WRITE_FIRST", - .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK, + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = { .name = "IB_OPCODE_RC_RDMA_WRITE_MIDDLE", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -216,8 +194,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_RDMA_WRITE_LAST] = { .name = "IB_OPCODE_RC_RDMA_WRITE_LAST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -226,69 +204,69 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RC_RDMA_WRITE_ONLY] = { .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY", - .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK - | RXE_END_MASK, + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", - .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RC_RDMA_READ_REQUEST] = { .name = "IB_OPCODE_RC_RDMA_READ_REQUEST", - .mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = { .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST", - .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK - | RXE_START_MASK, + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK | + RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_AETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = { @@ -302,109 +280,110 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = { .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST", - .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK - | RXE_END_MASK, + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_AETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = { .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY", - .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_AETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RC_ACKNOWLEDGE] = { .name = "IB_OPCODE_RC_ACKNOWLEDGE", - .mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK - | RXE_END_MASK, + .mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_AETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = { .name = "IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE", - .mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_AETH] = RXE_BTH_BYTES, - [RXE_ATMACK] = RXE_BTH_BYTES - + RXE_AETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_ATMACK_BYTES + RXE_AETH_BYTES, + [RXE_ATMACK] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMACK_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RC_COMPARE_SWAP] = { .name = "IB_OPCODE_RC_COMPARE_SWAP", - .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_ATMETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_ATMETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMETH_BYTES, } }, [IB_OPCODE_RC_FETCH_ADD] = { .name = "IB_OPCODE_RC_FETCH_ADD", - .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_ATMETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_ATMETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMETH_BYTES, } }, [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = { .name = "IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE", - .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IETH_BYTES, } }, [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = { .name = "IB_OPCODE_RC_SEND_ONLY_INV", - .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_END_MASK | RXE_START_MASK, + .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_END_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_IETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IETH_BYTES, } }, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = { .name = "IB_OPCODE_UC_SEND_FIRST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK - | RXE_SEND_MASK | RXE_START_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK | + RXE_SEND_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -413,8 +392,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_SEND_MIDDLE] = { .name = "IB_OPCODE_UC_SEND_MIDDLE", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -423,8 +402,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_SEND_LAST] = { .name = "IB_OPCODE_UC_SEND_LAST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK - | RXE_SEND_MASK | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK | + RXE_SEND_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -433,21 +412,21 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_UC_SEND_ONLY] = { .name = "IB_OPCODE_UC_SEND_ONLY", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK - | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK | + RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -456,33 +435,33 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_UC_RDMA_WRITE_FIRST] = { .name = "IB_OPCODE_UC_RDMA_WRITE_FIRST", - .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK, + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = { .name = "IB_OPCODE_UC_RDMA_WRITE_MIDDLE", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -491,8 +470,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_RDMA_WRITE_LAST] = { .name = "IB_OPCODE_UC_RDMA_WRITE_LAST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -501,460 +480,460 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_UC_RDMA_WRITE_ONLY] = { .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY", - .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK - | RXE_END_MASK, + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", - .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, } }, /* RD */ [IB_OPCODE_RD_SEND_FIRST] = { .name = "IB_OPCODE_RD_SEND_FIRST", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_SEND_MIDDLE] = { .name = "IB_OPCODE_RD_SEND_MIDDLE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_SEND_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_SEND_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_SEND_LAST] = { .name = "IB_OPCODE_RD_SEND_LAST", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK - | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK - | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_SEND_MASK - | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK | + RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_SEND_MASK | + RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RD_SEND_ONLY] = { .name = "IB_OPCODE_RD_SEND_ONLY", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK | + RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK - | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK | + RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_FIRST] = { .name = "IB_OPCODE_RD_RDMA_WRITE_FIRST", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK - | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK, - .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK | + RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_RETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_RETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_MIDDLE] = { .name = "IB_OPCODE_RD_RDMA_WRITE_MIDDLE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_LAST] = { .name = "IB_OPCODE_RD_RDMA_WRITE_LAST", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK - | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK | + RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK | + RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_ONLY] = { .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK - | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK - | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK | + RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK | + RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_RETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_RETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK - | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES - + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK | + RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | + RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES + + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_RETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_RETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_RETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RD_RDMA_READ_REQUEST] = { .name = "IB_OPCODE_RD_RDMA_READ_REQUEST", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK - | RXE_REQ_MASK | RXE_READ_MASK - | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK | + RXE_REQ_MASK | RXE_READ_MASK | + RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_RETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES - + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_DETH_BYTES + + RXE_RDETH_BYTES, } }, [IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST] = { .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST", - .mask = RXE_RDETH_MASK | RXE_AETH_MASK - | RXE_PAYLOAD_MASK | RXE_ACK_MASK - | RXE_START_MASK, + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | + RXE_PAYLOAD_MASK | RXE_ACK_MASK | + RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_AETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_AETH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE] = { .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE", - .mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, } }, [IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST] = { .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST", - .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK - | RXE_ACK_MASK | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK | + RXE_ACK_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_AETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_AETH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY] = { .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY", - .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK - | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK | + RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_AETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_AETH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RD_ACKNOWLEDGE] = { .name = "IB_OPCODE_RD_ACKNOWLEDGE", - .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_AETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, } }, [IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE] = { .name = "IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE", - .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK - | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK | + RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_AETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_ATMACK] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_AETH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMACK] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RD_COMPARE_SWAP] = { .name = "RD_COMPARE_SWAP", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK - | RXE_REQ_MASK | RXE_ATOMIC_MASK - | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK | + RXE_REQ_MASK | RXE_ATOMIC_MASK | + RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_ATMETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, [RXE_PAYLOAD] = RXE_BTH_BYTES + - + RXE_ATMETH_BYTES - + RXE_DETH_BYTES + - + RXE_RDETH_BYTES, + RXE_ATMETH_BYTES + + RXE_DETH_BYTES + + RXE_RDETH_BYTES, } }, [IB_OPCODE_RD_FETCH_ADD] = { .name = "IB_OPCODE_RD_FETCH_ADD", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK - | RXE_REQ_MASK | RXE_ATOMIC_MASK - | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK | + RXE_REQ_MASK | RXE_ATOMIC_MASK | + RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_ATMETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, [RXE_PAYLOAD] = RXE_BTH_BYTES + - + RXE_ATMETH_BYTES - + RXE_DETH_BYTES + - + RXE_RDETH_BYTES, + RXE_ATMETH_BYTES + + RXE_DETH_BYTES + + RXE_RDETH_BYTES, } }, /* UD */ [IB_OPCODE_UD_SEND_ONLY] = { .name = "IB_OPCODE_UD_SEND_ONLY", - .mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_DETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE", - .mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK | + RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_DETH] = RXE_BTH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_DETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, } }, diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index 307604e9c78d..8f9aaaf260f2 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_OPCODE_H @@ -46,11 +19,9 @@ enum rxe_wr_mask { WR_SEND_MASK = BIT(2), WR_READ_MASK = BIT(3), WR_WRITE_MASK = BIT(4), - WR_LOCAL_MASK = BIT(5), - WR_REG_MASK = BIT(6), + WR_LOCAL_OP_MASK = BIT(5), WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, - WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK, WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK, WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK, }; @@ -110,8 +81,9 @@ enum rxe_hdr_mask { RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), - RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK), - RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK), + RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK), + RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK), + RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK), }; #define OPCODE_NONE (-1) diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index f59616b02477..86c7a8bf3cbb 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_PARAM_H @@ -36,6 +9,8 @@ #include <uapi/rdma/rdma_user_rxe.h> +#define DEFAULT_MAX_VALUE (1 << 20) + static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu) { if (mtu < 256) @@ -64,8 +39,7 @@ static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) enum rxe_device_param { RXE_MAX_MR_SIZE = -1ull, RXE_PAGE_SIZE_CAP = 0xfffff000, - RXE_MAX_QP = 0x10000, - RXE_MAX_QP_WR = 0x4000, + RXE_MAX_QP_WR = DEFAULT_MAX_VALUE, RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_AUTO_PATH_MIG @@ -76,26 +50,27 @@ enum rxe_device_param { | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS - | IB_DEVICE_ALLOW_USER_UNREG, + | IB_DEVICE_MEM_WINDOW + | IB_DEVICE_MEM_WINDOW_TYPE_2B, RXE_MAX_SGE = 32, RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) + sizeof(struct ib_sge) * RXE_MAX_SGE, RXE_MAX_INLINE_DATA = RXE_MAX_WQE_SIZE - sizeof(struct rxe_send_wqe), RXE_MAX_SGE_RD = 32, - RXE_MAX_CQ = 16384, + RXE_MAX_CQ = DEFAULT_MAX_VALUE, RXE_MAX_LOG_CQE = 15, - RXE_MAX_MR = 256 * 1024, - RXE_MAX_PD = 0x7ffc, + RXE_MAX_PD = DEFAULT_MAX_VALUE, RXE_MAX_QP_RD_ATOM = 128, RXE_MAX_RES_RD_ATOM = 0x3f000, RXE_MAX_QP_INIT_RD_ATOM = 128, RXE_MAX_MCAST_GRP = 8192, RXE_MAX_MCAST_QP_ATTACH = 56, RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, - RXE_MAX_AH = 100, - RXE_MAX_SRQ = 960, - RXE_MAX_SRQ_WR = 0x4000, + RXE_MAX_AH = (1<<15) - 1, /* 32Ki - 1 */ + RXE_MIN_AH_INDEX = 1, + RXE_MAX_AH_INDEX = RXE_MAX_AH, + RXE_MAX_SRQ_WR = DEFAULT_MAX_VALUE, RXE_MIN_SRQ_WR = 1, RXE_MAX_SRQ_SGE = 27, RXE_MIN_SRQ_SGE = 1, @@ -103,20 +78,25 @@ enum rxe_device_param { RXE_MAX_PKEYS = 64, RXE_LOCAL_CA_ACK_DELAY = 15, - RXE_MAX_UCONTEXT = 512, + RXE_MAX_UCONTEXT = DEFAULT_MAX_VALUE, RXE_NUM_PORT = 1, RXE_MIN_QP_INDEX = 16, - RXE_MAX_QP_INDEX = 0x00020000, + RXE_MAX_QP_INDEX = DEFAULT_MAX_VALUE, + RXE_MAX_QP = DEFAULT_MAX_VALUE - RXE_MIN_QP_INDEX, RXE_MIN_SRQ_INDEX = 0x00020001, - RXE_MAX_SRQ_INDEX = 0x00040000, + RXE_MAX_SRQ_INDEX = DEFAULT_MAX_VALUE, + RXE_MAX_SRQ = DEFAULT_MAX_VALUE - RXE_MIN_SRQ_INDEX, RXE_MIN_MR_INDEX = 0x00000001, - RXE_MAX_MR_INDEX = 0x00040000, - RXE_MIN_MW_INDEX = 0x00040001, - RXE_MAX_MW_INDEX = 0x00060000, + RXE_MAX_MR_INDEX = DEFAULT_MAX_VALUE, + RXE_MAX_MR = DEFAULT_MAX_VALUE - RXE_MIN_MR_INDEX, + RXE_MIN_MW_INDEX = 0x00010001, + RXE_MAX_MW_INDEX = 0x00020000, + RXE_MAX_MW = 0x00001000, + RXE_MAX_PKT_PER_ACK = 64, RXE_MAX_UNACKED_PSNS = 128, @@ -125,14 +105,23 @@ enum rxe_device_param { RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64, RXE_INFLIGHT_SKBS_PER_QP_LOW = 16, + /* Max number of interations of each tasklet + * before yielding the cpu to let other + * work make progress + */ + RXE_MAX_ITERATIONS = 1024, + /* Delay before calling arbiter timer */ RXE_NSEC_ARB_TIMER_DELAY = 200, + + /* IBTA v1.4 A3.3.1 VENDOR INFORMATION section */ + RXE_VENDOR_ID = 0XFFFFFF, }; /* default/initial rxe port parameters */ enum rxe_port_param { RXE_PORT_GID_TBL_LEN = 1024, - RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, + RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP, RXE_PORT_MAX_MSG_SZ = 0x800000, RXE_PORT_BAD_PKEY_CNTR = 0, RXE_PORT_QKEY_VIOL_CNTR = 0, @@ -145,7 +134,7 @@ enum rxe_port_param { RXE_PORT_INIT_TYPE_REPLY = 0, RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X, RXE_PORT_ACTIVE_SPEED = 1, - RXE_PORT_PKEY_TBL_LEN = 64, + RXE_PORT_PKEY_TBL_LEN = 1, RXE_PORT_PHYS_STATE = IB_PORT_PHYS_STATE_POLLING, RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL, }; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index fbcbac52290b..f50620f5a0a1 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -1,542 +1,302 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" -#include "rxe_loc.h" -/* info about object pools - * note that mr and mw share a single index space - * so that one can map an lkey to the correct type of object - */ -struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { +#define RXE_POOL_TIMEOUT (200) +#define RXE_POOL_ALIGN (16) + +static const struct rxe_type_info { + const char *name; + size_t size; + size_t elem_offset; + void (*cleanup)(struct rxe_pool_elem *elem); + u32 min_index; + u32 max_index; + u32 max_elem; +} rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { - .name = "rxe-uc", + .name = "uc", .size = sizeof(struct rxe_ucontext), - .flags = RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_ucontext, elem), + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_PD] = { - .name = "rxe-pd", + .name = "pd", .size = sizeof(struct rxe_pd), - .flags = RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_pd, elem), + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_AH] = { - .name = "rxe-ah", + .name = "ah", .size = sizeof(struct rxe_ah), - .flags = RXE_POOL_ATOMIC | RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_ah, elem), + .min_index = RXE_MIN_AH_INDEX, + .max_index = RXE_MAX_AH_INDEX, + .max_elem = RXE_MAX_AH_INDEX - RXE_MIN_AH_INDEX + 1, }, [RXE_TYPE_SRQ] = { - .name = "rxe-srq", + .name = "srq", .size = sizeof(struct rxe_srq), - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_srq, elem), + .cleanup = rxe_srq_cleanup, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, + .max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1, }, [RXE_TYPE_QP] = { - .name = "rxe-qp", + .name = "qp", .size = sizeof(struct rxe_qp), + .elem_offset = offsetof(struct rxe_qp, elem), .cleanup = rxe_qp_cleanup, - .flags = RXE_POOL_INDEX, .min_index = RXE_MIN_QP_INDEX, .max_index = RXE_MAX_QP_INDEX, + .max_elem = RXE_MAX_QP_INDEX - RXE_MIN_QP_INDEX + 1, }, [RXE_TYPE_CQ] = { - .name = "rxe-cq", + .name = "cq", .size = sizeof(struct rxe_cq), - .flags = RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_cq, elem), .cleanup = rxe_cq_cleanup, + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_MR] = { - .name = "rxe-mr", - .size = sizeof(struct rxe_mem), - .cleanup = rxe_mem_cleanup, - .flags = RXE_POOL_INDEX, - .max_index = RXE_MAX_MR_INDEX, + .name = "mr", + .size = sizeof(struct rxe_mr), + .elem_offset = offsetof(struct rxe_mr, elem), + .cleanup = rxe_mr_cleanup, .min_index = RXE_MIN_MR_INDEX, + .max_index = RXE_MAX_MR_INDEX, + .max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1, }, [RXE_TYPE_MW] = { - .name = "rxe-mw", - .size = sizeof(struct rxe_mem), - .flags = RXE_POOL_INDEX, - .max_index = RXE_MAX_MW_INDEX, + .name = "mw", + .size = sizeof(struct rxe_mw), + .elem_offset = offsetof(struct rxe_mw, elem), + .cleanup = rxe_mw_cleanup, .min_index = RXE_MIN_MW_INDEX, - }, - [RXE_TYPE_MC_GRP] = { - .name = "rxe-mc_grp", - .size = sizeof(struct rxe_mc_grp), - .cleanup = rxe_mc_cleanup, - .flags = RXE_POOL_KEY, - .key_offset = offsetof(struct rxe_mc_grp, mgid), - .key_size = sizeof(union ib_gid), - }, - [RXE_TYPE_MC_ELEM] = { - .name = "rxe-mc_elem", - .size = sizeof(struct rxe_mc_elem), - .flags = RXE_POOL_ATOMIC, + .max_index = RXE_MAX_MW_INDEX, + .max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1, }, }; -static inline const char *pool_name(struct rxe_pool *pool) -{ - return rxe_type_info[pool->type].name; -} - -static inline struct kmem_cache *pool_cache(struct rxe_pool *pool) -{ - return rxe_type_info[pool->type].cache; -} - -static void rxe_cache_clean(size_t cnt) -{ - int i; - struct rxe_type_info *type; - - for (i = 0; i < cnt; i++) { - type = &rxe_type_info[i]; - if (!(type->flags & RXE_POOL_NO_ALLOC)) { - kmem_cache_destroy(type->cache); - type->cache = NULL; - } - } -} - -int rxe_cache_init(void) +void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, + enum rxe_elem_type type) { - int err; - int i; - size_t size; - struct rxe_type_info *type; - - for (i = 0; i < RXE_NUM_TYPES; i++) { - type = &rxe_type_info[i]; - size = ALIGN(type->size, RXE_POOL_ALIGN); - if (!(type->flags & RXE_POOL_NO_ALLOC)) { - type->cache = - kmem_cache_create(type->name, size, - RXE_POOL_ALIGN, - RXE_POOL_CACHE_FLAGS, NULL); - if (!type->cache) { - pr_err("Unable to init kmem cache for %s\n", - type->name); - err = -ENOMEM; - goto err1; - } - } - } - - return 0; - -err1: - rxe_cache_clean(i); - - return err; -} - -void rxe_cache_exit(void) -{ - rxe_cache_clean(RXE_NUM_TYPES); -} - -static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) -{ - int err = 0; - size_t size; - - if ((max - min + 1) < pool->max_elem) { - pr_warn("not enough indices for max_elem\n"); - err = -EINVAL; - goto out; - } - - pool->max_index = max; - pool->min_index = min; - - size = BITS_TO_LONGS(max - min + 1) * sizeof(long); - pool->table = kmalloc(size, GFP_KERNEL); - if (!pool->table) { - err = -ENOMEM; - goto out; - } - - pool->table_size = size; - bitmap_zero(pool->table, max - min + 1); - -out: - return err; -} - -int rxe_pool_init( - struct rxe_dev *rxe, - struct rxe_pool *pool, - enum rxe_elem_type type, - unsigned int max_elem) -{ - int err = 0; - size_t size = rxe_type_info[type].size; + const struct rxe_type_info *info = &rxe_type_info[type]; memset(pool, 0, sizeof(*pool)); pool->rxe = rxe; + pool->name = info->name; pool->type = type; - pool->max_elem = max_elem; - pool->elem_size = ALIGN(size, RXE_POOL_ALIGN); - pool->flags = rxe_type_info[type].flags; - pool->tree = RB_ROOT; - pool->cleanup = rxe_type_info[type].cleanup; + pool->max_elem = info->max_elem; + pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN); + pool->elem_offset = info->elem_offset; + pool->cleanup = info->cleanup; atomic_set(&pool->num_elem, 0); - kref_init(&pool->ref_cnt); - - rwlock_init(&pool->pool_lock); - - if (rxe_type_info[type].flags & RXE_POOL_INDEX) { - err = rxe_pool_init_index(pool, - rxe_type_info[type].max_index, - rxe_type_info[type].min_index); - if (err) - goto out; - } - - if (rxe_type_info[type].flags & RXE_POOL_KEY) { - pool->key_offset = rxe_type_info[type].key_offset; - pool->key_size = rxe_type_info[type].key_size; - } - - pool->state = RXE_POOL_STATE_VALID; - -out: - return err; -} - -static void rxe_pool_release(struct kref *kref) -{ - struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); - - pool->state = RXE_POOL_STATE_INVALID; - kfree(pool->table); -} - -static void rxe_pool_put(struct rxe_pool *pool) -{ - kref_put(&pool->ref_cnt, rxe_pool_release); + xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); + pool->limit.min = info->min_index; + pool->limit.max = info->max_index; } void rxe_pool_cleanup(struct rxe_pool *pool) { - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - pool->state = RXE_POOL_STATE_INVALID; - if (atomic_read(&pool->num_elem) > 0) - pr_warn("%s pool destroyed with unfree'd elem\n", - pool_name(pool)); - write_unlock_irqrestore(&pool->pool_lock, flags); - - rxe_pool_put(pool); -} - -static u32 alloc_index(struct rxe_pool *pool) -{ - u32 index; - u32 range = pool->max_index - pool->min_index + 1; - - index = find_next_zero_bit(pool->table, range, pool->last); - if (index >= range) - index = find_first_zero_bit(pool->table, range); - - WARN_ON_ONCE(index >= range); - set_bit(index, pool->table); - pool->last = index; - return index + pool->min_index; -} - -static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) -{ - struct rb_node **link = &pool->tree.rb_node; - struct rb_node *parent = NULL; - struct rxe_pool_entry *elem; - - while (*link) { - parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, node); - - if (elem->index == new->index) { - pr_warn("element already exists!\n"); - goto out; - } - - if (elem->index > new->index) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &pool->tree); -out: - return; -} - -static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) -{ - struct rb_node **link = &pool->tree.rb_node; - struct rb_node *parent = NULL; - struct rxe_pool_entry *elem; - int cmp; - - while (*link) { - parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, node); - - cmp = memcmp((u8 *)elem + pool->key_offset, - (u8 *)new + pool->key_offset, pool->key_size); - - if (cmp == 0) { - pr_warn("key already exists!\n"); - goto out; - } - - if (cmp > 0) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - rb_link_node(&new->node, parent, link); - rb_insert_color(&new->node, &pool->tree); -out: - return; -} - -void rxe_add_key(void *arg, void *key) -{ - struct rxe_pool_entry *elem = arg; - struct rxe_pool *pool = elem->pool; - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); - insert_key(pool, elem); - write_unlock_irqrestore(&pool->pool_lock, flags); -} - -void rxe_drop_key(void *arg) -{ - struct rxe_pool_entry *elem = arg; - struct rxe_pool *pool = elem->pool; - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - rb_erase(&elem->node, &pool->tree); - write_unlock_irqrestore(&pool->pool_lock, flags); -} - -void rxe_add_index(void *arg) -{ - struct rxe_pool_entry *elem = arg; - struct rxe_pool *pool = elem->pool; - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - elem->index = alloc_index(pool); - insert_index(pool, elem); - write_unlock_irqrestore(&pool->pool_lock, flags); -} - -void rxe_drop_index(void *arg) -{ - struct rxe_pool_entry *elem = arg; - struct rxe_pool *pool = elem->pool; - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - clear_bit(elem->index - pool->min_index, pool->table); - rb_erase(&elem->node, &pool->tree); - write_unlock_irqrestore(&pool->pool_lock, flags); + WARN_ON(!xa_empty(&pool->xa)); } void *rxe_alloc(struct rxe_pool *pool) { - struct rxe_pool_entry *elem; - unsigned long flags; - - might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + struct rxe_pool_elem *elem; + void *obj; + int err; - read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) { - read_unlock_irqrestore(&pool->pool_lock, flags); + if (WARN_ON(!(pool->type == RXE_TYPE_MR))) return NULL; - } - kref_get(&pool->ref_cnt); - read_unlock_irqrestore(&pool->pool_lock, flags); - - if (!ib_device_try_get(&pool->rxe->ib_dev)) - goto out_put_pool; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; + goto err_cnt; - elem = kmem_cache_zalloc(pool_cache(pool), - (pool->flags & RXE_POOL_ATOMIC) ? - GFP_ATOMIC : GFP_KERNEL); - if (!elem) - goto out_cnt; + obj = kzalloc(pool->elem_size, GFP_KERNEL); + if (!obj) + goto err_cnt; + + elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset); elem->pool = pool; + elem->obj = obj; kref_init(&elem->ref_cnt); + init_completion(&elem->complete); + + /* allocate index in array but leave pointer as NULL so it + * can't be looked up until rxe_finalize() is called + */ + err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit, + &pool->next, GFP_KERNEL); + if (err < 0) + goto err_free; - return elem; + return obj; -out_cnt: +err_free: + kfree(obj); +err_cnt: atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); -out_put_pool: - rxe_pool_put(pool); return NULL; } -int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) +int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem, + bool sleepable) { - unsigned long flags; - - might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); + int err; + gfp_t gfp_flags; - read_lock_irqsave(&pool->pool_lock, flags); - if (pool->state != RXE_POOL_STATE_VALID) { - read_unlock_irqrestore(&pool->pool_lock, flags); + if (WARN_ON(pool->type == RXE_TYPE_MR)) return -EINVAL; - } - kref_get(&pool->ref_cnt); - read_unlock_irqrestore(&pool->pool_lock, flags); - - if (!ib_device_try_get(&pool->rxe->ib_dev)) - goto out_put_pool; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; + goto err_cnt; elem->pool = pool; + elem->obj = (u8 *)elem - pool->elem_offset; kref_init(&elem->ref_cnt); + init_completion(&elem->complete); + + /* AH objects are unique in that the create_ah verb + * can be called in atomic context. If the create_ah + * call is not sleepable use GFP_ATOMIC. + */ + gfp_flags = sleepable ? GFP_KERNEL : GFP_ATOMIC; + + if (sleepable) + might_sleep(); + err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit, + &pool->next, gfp_flags); + if (err < 0) + goto err_cnt; return 0; -out_cnt: +err_cnt: atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); -out_put_pool: - rxe_pool_put(pool); return -EINVAL; } -void rxe_elem_release(struct kref *kref) +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) { - struct rxe_pool_entry *elem = - container_of(kref, struct rxe_pool_entry, ref_cnt); - struct rxe_pool *pool = elem->pool; - - if (pool->cleanup) - pool->cleanup(elem); - - if (!(pool->flags & RXE_POOL_NO_ALLOC)) - kmem_cache_free(pool_cache(pool), elem); - atomic_dec(&pool->num_elem); - ib_device_put(&pool->rxe->ib_dev); - rxe_pool_put(pool); + struct rxe_pool_elem *elem; + struct xarray *xa = &pool->xa; + void *obj; + + rcu_read_lock(); + elem = xa_load(xa, index); + if (elem && kref_get_unless_zero(&elem->ref_cnt)) + obj = elem->obj; + else + obj = NULL; + rcu_read_unlock(); + + return obj; } -void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) +static void rxe_elem_release(struct kref *kref) { - struct rb_node *node = NULL; - struct rxe_pool_entry *elem = NULL; - unsigned long flags; - - read_lock_irqsave(&pool->pool_lock, flags); - - if (pool->state != RXE_POOL_STATE_VALID) - goto out; - - node = pool->tree.rb_node; + struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt); - while (node) { - elem = rb_entry(node, struct rxe_pool_entry, node); - - if (elem->index > index) - node = node->rb_left; - else if (elem->index < index) - node = node->rb_right; - else { - kref_get(&elem->ref_cnt); - break; - } - } - -out: - read_unlock_irqrestore(&pool->pool_lock, flags); - return node ? elem : NULL; + complete(&elem->complete); } -void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable) { - struct rb_node *node = NULL; - struct rxe_pool_entry *elem = NULL; - int cmp; - unsigned long flags; + struct rxe_pool *pool = elem->pool; + struct xarray *xa = &pool->xa; + static int timeout = RXE_POOL_TIMEOUT; + int ret, err = 0; + void *xa_ret; + + if (sleepable) + might_sleep(); + + /* erase xarray entry to prevent looking up + * the pool elem from its index + */ + xa_ret = xa_erase(xa, elem->index); + WARN_ON(xa_err(xa_ret)); + + /* if this is the last call to rxe_put complete the + * object. It is safe to touch obj->elem after this since + * it is freed below + */ + __rxe_put(elem); + + /* wait until all references to the object have been + * dropped before final object specific cleanup and + * return to rdma-core + */ + if (sleepable) { + if (!completion_done(&elem->complete) && timeout) { + ret = wait_for_completion_timeout(&elem->complete, + timeout); + + /* Shouldn't happen. There are still references to + * the object but, rather than deadlock, free the + * object or pass back to rdma-core. + */ + if (WARN_ON(!ret)) + err = -EINVAL; + } + } else { + unsigned long until = jiffies + timeout; + + /* AH objects are unique in that the destroy_ah verb + * can be called in atomic context. This delay + * replaces the wait_for_completion call above + * when the destroy_ah call is not sleepable + */ + while (!completion_done(&elem->complete) && + time_before(jiffies, until)) + mdelay(1); + + if (WARN_ON(!completion_done(&elem->complete))) + err = -EINVAL; + } - read_lock_irqsave(&pool->pool_lock, flags); + if (pool->cleanup) + pool->cleanup(elem); - if (pool->state != RXE_POOL_STATE_VALID) - goto out; + if (pool->type == RXE_TYPE_MR) + kfree_rcu(elem->obj); - node = pool->tree.rb_node; + atomic_dec(&pool->num_elem); - while (node) { - elem = rb_entry(node, struct rxe_pool_entry, node); + return err; +} - cmp = memcmp((u8 *)elem + pool->key_offset, - key, pool->key_size); +int __rxe_get(struct rxe_pool_elem *elem) +{ + return kref_get_unless_zero(&elem->ref_cnt); +} - if (cmp > 0) - node = node->rb_left; - else if (cmp < 0) - node = node->rb_right; - else - break; - } +int __rxe_put(struct rxe_pool_elem *elem) +{ + return kref_put(&elem->ref_cnt, rxe_elem_release); +} - if (node) - kref_get(&elem->ref_cnt); +void __rxe_finalize(struct rxe_pool_elem *elem) +{ + void *xa_ret; -out: - read_unlock_irqrestore(&pool->pool_lock, flags); - return node ? elem : NULL; + xa_ret = xa_store(&elem->pool->xa, elem->index, elem, GFP_KERNEL); + WARN_ON(xa_err(xa_ret)); } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 2f2cff1cbe43..9d83cb32092f 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -1,49 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_POOL_H #define RXE_POOL_H -#define RXE_POOL_ALIGN (16) -#define RXE_POOL_CACHE_FLAGS (0) - -enum rxe_pool_flags { - RXE_POOL_ATOMIC = BIT(0), - RXE_POOL_INDEX = BIT(1), - RXE_POOL_KEY = BIT(2), - RXE_POOL_NO_ALLOC = BIT(4), -}; - enum rxe_elem_type { RXE_TYPE_UC, RXE_TYPE_PD, @@ -53,78 +16,40 @@ enum rxe_elem_type { RXE_TYPE_CQ, RXE_TYPE_MR, RXE_TYPE_MW, - RXE_TYPE_MC_GRP, - RXE_TYPE_MC_ELEM, RXE_NUM_TYPES, /* keep me last */ }; -struct rxe_pool_entry; - -struct rxe_type_info { - const char *name; - size_t size; - void (*cleanup)(struct rxe_pool_entry *obj); - enum rxe_pool_flags flags; - u32 max_index; - u32 min_index; - size_t key_offset; - size_t key_size; - struct kmem_cache *cache; -}; - -extern struct rxe_type_info rxe_type_info[]; - -enum rxe_pool_state { - RXE_POOL_STATE_INVALID, - RXE_POOL_STATE_VALID, -}; - -struct rxe_pool_entry { +struct rxe_pool_elem { struct rxe_pool *pool; + void *obj; struct kref ref_cnt; struct list_head list; - - /* only used if indexed or keyed */ - struct rb_node node; + struct completion complete; u32 index; }; struct rxe_pool { struct rxe_dev *rxe; - rwlock_t pool_lock; /* protects pool add/del/search */ - size_t elem_size; - struct kref ref_cnt; - void (*cleanup)(struct rxe_pool_entry *obj); - enum rxe_pool_state state; - enum rxe_pool_flags flags; + const char *name; + void (*cleanup)(struct rxe_pool_elem *elem); enum rxe_elem_type type; unsigned int max_elem; atomic_t num_elem; + size_t elem_size; + size_t elem_offset; - /* only used if indexed or keyed */ - struct rb_root tree; - unsigned long *table; - size_t table_size; - u32 max_index; - u32 min_index; - u32 last; - size_t key_offset; - size_t key_size; + struct xarray xa; + struct xa_limit limit; + u32 next; }; -/* initialize slab caches for managed objects */ -int rxe_cache_init(void); - -/* cleanup slab caches for managed objects */ -void rxe_cache_exit(void); - /* initialize a pool of objects with given limit on * number of elements. gets parameters from rxe_type_info * pool elements will be allocated out of a slab cache */ -int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, - enum rxe_elem_type type, u32 max_elem); +void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, + enum rxe_elem_type type); /* free resources from object pool */ void rxe_pool_cleanup(struct rxe_pool *pool); @@ -133,37 +58,28 @@ void rxe_pool_cleanup(struct rxe_pool *pool); void *rxe_alloc(struct rxe_pool *pool); /* connect already allocated object to pool */ -int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); - -/* assign an index to an indexed object and insert object into - * pool's rb tree - */ -void rxe_add_index(void *elem); - -/* drop an index and remove object from rb tree */ -void rxe_drop_index(void *elem); - -/* assign a key to a keyed object and insert object into - * pool's rb tree - */ -void rxe_add_key(void *elem, void *key); - -/* remove elem from rb tree */ -void rxe_drop_key(void *elem); +int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem, + bool sleepable); +#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem, true) +#define rxe_add_to_pool_ah(pool, obj, sleepable) __rxe_add_to_pool(pool, \ + &(obj)->elem, sleepable) /* lookup an indexed object from index. takes a reference on object */ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); -/* lookup keyed object from key. takes a reference on the object */ -void *rxe_pool_get_key(struct rxe_pool *pool, void *key); +int __rxe_get(struct rxe_pool_elem *elem); +#define rxe_get(obj) __rxe_get(&(obj)->elem) + +int __rxe_put(struct rxe_pool_elem *elem); +#define rxe_put(obj) __rxe_put(&(obj)->elem) -/* cleanup an object when all references are dropped */ -void rxe_elem_release(struct kref *kref); +int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable); +#define rxe_cleanup(obj) __rxe_cleanup(&(obj)->elem, true) +#define rxe_cleanup_ah(obj, sleepable) __rxe_cleanup(&(obj)->elem, sleepable) -/* take a reference on an object */ -#define rxe_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt) +#define rxe_read(obj) kref_read(&(obj)->elem.ref_cnt) -/* drop a reference on an object */ -#define rxe_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, rxe_elem_release) +void __rxe_finalize(struct rxe_pool_elem *elem); +#define rxe_finalize(obj) __rxe_finalize(&(obj)->elem) #endif /* RXE_POOL_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index ec21f616ac98..a62bab88415c 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/skbuff.h> @@ -46,34 +19,34 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, int has_srq) { if (cap->max_send_wr > rxe->attr.max_qp_wr) { - pr_warn("invalid send wr = %d > %d\n", - cap->max_send_wr, rxe->attr.max_qp_wr); + pr_debug("invalid send wr = %u > %d\n", + cap->max_send_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_send_sge > rxe->attr.max_send_sge) { - pr_warn("invalid send sge = %d > %d\n", - cap->max_send_sge, rxe->attr.max_send_sge); + pr_debug("invalid send sge = %u > %d\n", + cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } if (!has_srq) { if (cap->max_recv_wr > rxe->attr.max_qp_wr) { - pr_warn("invalid recv wr = %d > %d\n", - cap->max_recv_wr, rxe->attr.max_qp_wr); + pr_debug("invalid recv wr = %u > %d\n", + cap->max_recv_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_recv_sge > rxe->attr.max_recv_sge) { - pr_warn("invalid recv sge = %d > %d\n", - cap->max_recv_sge, rxe->attr.max_recv_sge); + pr_debug("invalid recv sge = %u > %d\n", + cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } if (cap->max_inline_data > rxe->max_inline_data) { - pr_warn("invalid max inline data = %d > %d\n", - cap->max_inline_data, rxe->max_inline_data); + pr_debug("invalid max inline data = %u > %d\n", + cap->max_inline_data, rxe->max_inline_data); goto err1; } @@ -89,29 +62,34 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) struct rxe_port *port; int port_num = init->port_num; + switch (init->qp_type) { + case IB_QPT_GSI: + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + break; + default: + return -EOPNOTSUPP; + } + if (!init->recv_cq || !init->send_cq) { - pr_warn("missing cq\n"); + pr_debug("missing cq\n"); goto err1; } if (rxe_qp_chk_cap(rxe, cap, !!init->srq)) goto err1; - if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) { + if (init->qp_type == IB_QPT_GSI) { if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { - pr_warn("invalid port = %d\n", port_num); + pr_debug("invalid port = %d\n", port_num); goto err1; } port = &rxe->port; - if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) { - pr_warn("SMI QP exists for port %d\n", port_num); - goto err1; - } - if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { - pr_warn("GSI QP exists for port %d\n", port_num); + pr_debug("GSI QP exists for port %d\n", port_num); goto err1; } } @@ -142,22 +120,15 @@ static void free_rd_atomic_resources(struct rxe_qp *qp) for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { struct resp_res *res = &qp->resp.resources[i]; - free_rd_atomic_resource(qp, res); + free_rd_atomic_resource(res); } kfree(qp->resp.resources); qp->resp.resources = NULL; } } -void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res) +void free_rd_atomic_resource(struct resp_res *res) { - if (res->type == RXE_ATOMIC_MASK) { - rxe_drop_ref(qp); - kfree_skb(res->atomic.skb); - } else if (res->type == RXE_READ_MASK) { - if (res->read.mr) - rxe_drop_ref(res->read.mr); - } res->type = 0; } @@ -169,7 +140,7 @@ static void cleanup_rd_atomic_resources(struct rxe_qp *qp) if (qp->resp.resources) { for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { res = &qp->resp.resources[i]; - free_rd_atomic_resource(qp, res); + free_rd_atomic_resource(res); } } } @@ -184,16 +155,10 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, qp->attr.path_mtu = 1; qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu); - qpn = qp->pelem.index; + qpn = qp->elem.index; port = &rxe->port; switch (init->qp_type) { - case IB_QPT_SMI: - qp->ibqp.qp_num = 0; - port->qp_smi_index = qpn; - qp->attr.port_num = init->port_num; - break; - case IB_QPT_GSI: qp->ibqp.qp_num = 1; port->qp_gsi_index = qpn; @@ -205,12 +170,15 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, break; } - INIT_LIST_HEAD(&qp->grp_list); + spin_lock_init(&qp->state_lock); - skb_queue_head_init(&qp->send_pkts); + spin_lock_init(&qp->req.task.state_lock); + spin_lock_init(&qp->resp.task.state_lock); + spin_lock_init(&qp->comp.task.state_lock); - spin_lock_init(&qp->grp_lock); - spin_lock_init(&qp->state_lock); + spin_lock_init(&qp->sq.sq_lock); + spin_lock_init(&qp->rq.producer_lock); + spin_lock_init(&qp->rq.consumer_lock); atomic_set(&qp->ssn, 0); atomic_set(&qp->skb_out, 0); @@ -222,6 +190,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, { int err; int wqe_size; + enum queue_type type; err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); if (err < 0) @@ -235,8 +204,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, * the port number must be in the Dynamic Ports range * (0xc000 - 0xffff). */ - qp->src_port = RXE_ROCE_V2_SPORT + - (hash_32_generic(qp_num(qp), 14) & 0x3fff); + qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff); qp->sq.max_wr = init->cap.max_send_wr; /* These caps are limited by rxe_qp_chk_cap() done by the caller */ @@ -247,7 +215,9 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->sq.max_inline = init->cap.max_inline_data = wqe_size; wqe_size += sizeof(struct rxe_send_wqe); - qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size); + type = QUEUE_TYPE_FROM_CLIENT; + qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, + wqe_size, type); if (!qp->sq.queue) return -ENOMEM; @@ -258,20 +228,23 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, if (err) { vfree(qp->sq.queue->buf); kfree(qp->sq.queue); + qp->sq.queue = NULL; return err; } - qp->req.wqe_index = producer_index(qp->sq.queue); + qp->req.wqe_index = queue_get_producer(qp->sq.queue, + QUEUE_TYPE_FROM_CLIENT); + qp->req.state = QP_STATE_RESET; + qp->comp.state = QP_STATE_RESET; qp->req.opcode = -1; qp->comp.opcode = -1; - spin_lock_init(&qp->sq.sq_lock); skb_queue_head_init(&qp->req_pkts); - rxe_init_task(rxe, &qp->req.task, qp, + rxe_init_task(&qp->req.task, qp, rxe_requester, "req"); - rxe_init_task(rxe, &qp->comp.task, qp, + rxe_init_task(&qp->comp.task, qp, rxe_completer, "comp"); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ @@ -289,6 +262,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, { int err; int wqe_size; + enum queue_type type; if (!qp->srq) { qp->rq.max_wr = init->cap.max_recv_wr; @@ -299,9 +273,9 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); - qp->rq.queue = rxe_queue_init(rxe, - &qp->rq.max_wr, - wqe_size); + type = QUEUE_TYPE_FROM_CLIENT; + qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, + wqe_size, type); if (!qp->rq.queue) return -ENOMEM; @@ -311,16 +285,14 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, if (err) { vfree(qp->rq.queue->buf); kfree(qp->rq.queue); + qp->rq.queue = NULL; return err; } } - spin_lock_init(&qp->rq.producer_lock); - spin_lock_init(&qp->rq.consumer_lock); - skb_queue_head_init(&qp->resp_pkts); - rxe_init_task(rxe, &qp->resp.task, qp, + rxe_init_task(&qp->resp.task, qp, rxe_responder, "resp"); qp->resp.opcode = OPCODE_NONE; @@ -342,17 +314,20 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; - rxe_add_ref(pd); - rxe_add_ref(rcq); - rxe_add_ref(scq); + rxe_get(pd); + rxe_get(rcq); + rxe_get(scq); if (srq) - rxe_add_ref(srq); + rxe_get(srq); qp->pd = pd; qp->rcq = rcq; qp->scq = scq; qp->srq = srq; + atomic_inc(&rcq->num_wq); + atomic_inc(&scq->num_wq); + rxe_qp_init_misc(rxe, qp, init); err = rxe_qp_init_req(rxe, qp, init, udata, uresp); @@ -370,12 +345,21 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, err2: rxe_queue_cleanup(qp->sq.queue); + qp->sq.queue = NULL; err1: + atomic_dec(&rcq->num_wq); + atomic_dec(&scq->num_wq); + + qp->pd = NULL; + qp->rcq = NULL; + qp->scq = NULL; + qp->srq = NULL; + if (srq) - rxe_drop_ref(srq); - rxe_drop_ref(scq); - rxe_drop_ref(rcq); - rxe_drop_ref(pd); + rxe_put(srq); + rxe_put(scq); + rxe_put(rcq); + rxe_put(pd); return err; } @@ -418,7 +402,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) { - pr_warn("invalid mask or state for qp\n"); + pr_debug("invalid mask or state for qp\n"); goto err1; } @@ -432,7 +416,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_PORT) { if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) { - pr_warn("invalid port %d\n", attr->port_num); + pr_debug("invalid port %d\n", attr->port_num); goto err1; } } @@ -447,12 +431,12 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) goto err1; if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) { - pr_warn("invalid alt port %d\n", attr->alt_port_num); + pr_debug("invalid alt port %d\n", attr->alt_port_num); goto err1; } if (attr->alt_timeout > 31) { - pr_warn("invalid QP alt timeout %d > 31\n", - attr->alt_timeout); + pr_debug("invalid QP alt timeout %d > 31\n", + attr->alt_timeout); goto err1; } } @@ -473,17 +457,16 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) { - pr_warn("invalid max_rd_atomic %d > %d\n", - attr->max_rd_atomic, - rxe->attr.max_qp_rd_atom); + pr_debug("invalid max_rd_atomic %d > %d\n", + attr->max_rd_atomic, + rxe->attr.max_qp_rd_atom); goto err1; } } if (mask & IB_QP_TIMEOUT) { if (attr->timeout > 31) { - pr_warn("invalid QP timeout %d > 31\n", - attr->timeout); + pr_debug("invalid QP timeout %d > 31\n", attr->timeout); goto err1; } } @@ -509,6 +492,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) /* move qp to the reset state */ qp->req.state = QP_STATE_RESET; + qp->comp.state = QP_STATE_RESET; qp->resp.state = QP_STATE_RESET; /* let state machines reset themselves drain work and packet queues @@ -526,6 +510,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) atomic_set(&qp->ssn, 0); qp->req.opcode = -1; qp->req.need_retry = 0; + qp->req.wait_for_rnr_timer = 0; qp->req.noack_pkts = 0; qp->resp.msn = 0; qp->resp.opcode = -1; @@ -534,7 +519,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) qp->resp.sent_psn_nak = 0; if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -571,6 +556,7 @@ void rxe_qp_error(struct rxe_qp *qp) { qp->req.state = QP_STATE_ERROR; qp->resp.state = QP_STATE_ERROR; + qp->comp.state = QP_STATE_ERROR; qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ @@ -590,15 +576,16 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, int err; if (mask & IB_QP_MAX_QP_RD_ATOMIC) { - int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); + int max_rd_atomic = attr->max_rd_atomic ? + roundup_pow_of_two(attr->max_rd_atomic) : 0; qp->attr.max_rd_atomic = max_rd_atomic; atomic_set(&qp->req.rd_atomic, max_rd_atomic); } if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { - int max_dest_rd_atomic = - __roundup_pow_of_two(attr->max_dest_rd_atomic); + int max_dest_rd_atomic = attr->max_dest_rd_atomic ? + roundup_pow_of_two(attr->max_dest_rd_atomic) : 0; qp->attr.max_dest_rd_atomic = max_dest_rd_atomic; @@ -627,9 +614,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_QKEY) qp->attr.qkey = attr->qkey; - if (mask & IB_QP_AV) { + if (mask & IB_QP_AV) rxe_init_av(&attr->ah_attr, &qp->pri_av); - } if (mask & IB_QP_ALT_PATH) { rxe_init_av(&attr->alt_ah_attr, &qp->alt_av); @@ -708,6 +694,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, pr_debug("qp#%d state -> INIT\n", qp_num(qp)); qp->req.state = QP_STATE_INIT; qp->resp.state = QP_STATE_INIT; + qp->comp.state = QP_STATE_INIT; break; case IB_QPS_RTR: @@ -718,6 +705,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, case IB_QPS_RTS: pr_debug("qp#%d state -> RTS\n", qp_num(qp)); qp->req.state = QP_STATE_READY; + qp->comp.state = QP_STATE_READY; break; case IB_QPS_SQD: @@ -776,9 +764,25 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) return 0; } -/* called by the destroy qp verb */ -void rxe_qp_destroy(struct rxe_qp *qp) +int rxe_qp_chk_destroy(struct rxe_qp *qp) { + /* See IBA o10-2.2.3 + * An attempt to destroy a QP while attached to a mcast group + * will fail immediately. + */ + if (atomic_read(&qp->mcg_num)) { + pr_debug("Attempt to destroy QP while attached to multicast group\n"); + return -EBUSY; + } + + return 0; +} + +/* called when the last reference to the qp is dropped */ +static void rxe_qp_do_cleanup(struct work_struct *work) +{ + struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); + qp->valid = 0; qp->qp_timeout_jiffies = 0; rxe_cleanup_task(&qp->resp.task); @@ -792,54 +796,54 @@ void rxe_qp_destroy(struct rxe_qp *qp) rxe_cleanup_task(&qp->comp.task); /* flush out any receive wr's or pending requests */ - __rxe_do_task(&qp->req.task); + if (qp->req.task.func) + __rxe_do_task(&qp->req.task); + if (qp->sq.queue) { __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); } -} - -/* called when the last reference to the qp is dropped */ -static void rxe_qp_do_cleanup(struct work_struct *work) -{ - struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); - - rxe_drop_all_mcast_groups(qp); if (qp->sq.queue) rxe_queue_cleanup(qp->sq.queue); if (qp->srq) - rxe_drop_ref(qp->srq); + rxe_put(qp->srq); if (qp->rq.queue) rxe_queue_cleanup(qp->rq.queue); - if (qp->scq) - rxe_drop_ref(qp->scq); - if (qp->rcq) - rxe_drop_ref(qp->rcq); - if (qp->pd) - rxe_drop_ref(qp->pd); + if (qp->scq) { + atomic_dec(&qp->scq->num_wq); + rxe_put(qp->scq); + } - if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); - qp->resp.mr = NULL; + if (qp->rcq) { + atomic_dec(&qp->rcq->num_wq); + rxe_put(qp->rcq); } + if (qp->pd) + rxe_put(qp->pd); + + if (qp->resp.mr) + rxe_put(qp->resp.mr); + if (qp_type(qp) == IB_QPT_RC) sk_dst_reset(qp->sk->sk); free_rd_atomic_resources(qp); - kernel_sock_shutdown(qp->sk, SHUT_RDWR); - sock_release(qp->sk); + if (qp->sk) { + kernel_sock_shutdown(qp->sk, SHUT_RDWR); + sock_release(qp->sk); + } } /* called when the last reference to the qp is dropped */ -void rxe_qp_cleanup(struct rxe_pool_entry *arg) +void rxe_qp_cleanup(struct rxe_pool_elem *elem) { - struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem); + struct rxe_qp *qp = container_of(elem, typeof(*qp), elem); execute_in_process_context(rxe_qp_do_cleanup, &qp->cleanup_work); } diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index ff92704de32f..d6dbf5a0058d 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must retailuce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/vmalloc.h> @@ -45,12 +18,15 @@ int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, if (outbuf) { ip = rxe_create_mmap_info(rxe, buf_size, udata, buf); - if (!ip) + if (IS_ERR(ip)) { + err = PTR_ERR(ip); goto err1; + } - err = copy_to_user(outbuf, &ip->info, sizeof(ip->info)); - if (err) + if (copy_to_user(outbuf, &ip->info, sizeof(ip->info))) { + err = -EFAULT; goto err2; + } spin_lock_bh(&rxe->pending_lock); list_add(&ip->pending_mmaps, &rxe->pending_mmaps); @@ -64,7 +40,7 @@ int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, err2: kfree(ip); err1: - return -EINVAL; + return err; } inline void rxe_queue_reset(struct rxe_queue *q) @@ -76,9 +52,8 @@ inline void rxe_queue_reset(struct rxe_queue *q) memset(q->buf->data, 0, q->buf_size - sizeof(struct rxe_queue_buf)); } -struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, - int *num_elem, - unsigned int elem_size) +struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, + unsigned int elem_size, enum queue_type type) { struct rxe_queue *q; size_t buf_size; @@ -88,11 +63,12 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, if (*num_elem < 0) goto err1; - q = kmalloc(sizeof(*q), GFP_KERNEL); + q = kzalloc(sizeof(*q), GFP_KERNEL); if (!q) goto err1; q->rxe = rxe; + q->type = type; /* used in resize, only need to copy used part of queue */ q->elem_size = elem_size; @@ -135,16 +111,35 @@ err1: static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q, unsigned int num_elem) { - if (!queue_empty(q) && (num_elem < queue_count(q))) + enum queue_type type = q->type; + u32 new_prod; + u32 prod; + u32 cons; + + if (!queue_empty(q, q->type) && (num_elem < queue_count(q, type))) return -EINVAL; - while (!queue_empty(q)) { - memcpy(producer_addr(new_q), consumer_addr(q), - new_q->elem_size); - advance_producer(new_q); - advance_consumer(q); + new_prod = queue_get_producer(new_q, type); + prod = queue_get_producer(q, type); + cons = queue_get_consumer(q, type); + + while ((prod - cons) & q->index_mask) { + memcpy(queue_addr_from_index(new_q, new_prod), + queue_addr_from_index(q, cons), new_q->elem_size); + new_prod = queue_next_index(new_q, new_prod); + cons = queue_next_index(q, cons); } + new_q->buf->producer_index = new_prod; + q->buf->consumer_index = cons; + + /* update private index copies */ + if (type == QUEUE_TYPE_TO_CLIENT) + new_q->index = new_q->buf->producer_index; + else + q->index = q->buf->consumer_index; + + /* exchange rxe_queue headers */ swap(*q, *new_q); return 0; @@ -158,9 +153,10 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, struct rxe_queue *new_q; unsigned int num_elem = *num_elem_p; int err; - unsigned long flags = 0, flags1; + unsigned long producer_flags; + unsigned long consumer_flags; - new_q = rxe_queue_init(q->rxe, &num_elem, elem_size); + new_q = rxe_queue_init(q->rxe, &num_elem, elem_size, q->type); if (!new_q) return -ENOMEM; @@ -172,17 +168,17 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, goto err1; } - spin_lock_irqsave(consumer_lock, flags1); + spin_lock_irqsave(consumer_lock, consumer_flags); if (producer_lock) { - spin_lock_irqsave(producer_lock, flags); + spin_lock_irqsave(producer_lock, producer_flags); err = resize_finish(q, new_q, num_elem); - spin_unlock_irqrestore(producer_lock, flags); + spin_unlock_irqrestore(producer_lock, producer_flags); } else { err = resize_finish(q, new_q, num_elem); } - spin_unlock_irqrestore(consumer_lock, flags1); + spin_unlock_irqrestore(consumer_lock, consumer_flags); rxe_queue_cleanup(new_q); /* new/old dep on err */ if (err) diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index acd0a925481c..ed44042782fa 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -1,71 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_QUEUE_H #define RXE_QUEUE_H -/* implements a simple circular buffer that can optionally be - * shared between user space and the kernel and can be resized - - * the requested element size is rounded up to a power of 2 - * and the number of elements in the buffer is also rounded - * up to a power of 2. Since the queue is empty when the - * producer and consumer indices match the maximum capacity - * of the queue is one less than the number of element slots +/* Implements a simple circular buffer that is shared between user + * and the driver and can be resized. The requested element size is + * rounded up to a power of 2 and the number of elements in the buffer + * is also rounded up to a power of 2. Since the queue is empty when + * the producer and consumer indices match the maximum capacity of the + * queue is one less than the number of element slots. + * + * Notes: + * - The driver indices are always masked off to q->index_mask + * before storing so do not need to be checked on reads. + * - The user whether user space or kernel is generally + * not trusted so its parameters are masked to make sure + * they do not access the queue out of bounds on reads. + * - The driver indices for queues must not be written + * by user so a local copy is used and a shared copy is + * stored when the local copy is changed. + * - By passing the type in the parameter list separate from q + * the compiler can eliminate the switch statement when the + * actual queue type is known when the function is called at + * compile time. + * - These queues are lock free. The user and driver must protect + * changes to their end of the queues with locks if more than one + * CPU can be accessing it at the same time. */ -/* this data structure is shared between user space and kernel - * space for those cases where the queue is shared. It contains - * the producer and consumer indices. Is also contains a copy - * of the queue size parameters for user space to use but the - * kernel must use the parameters in the rxe_queue struct - * this MUST MATCH the corresponding librxe struct - * for performance reasons arrange to have producer and consumer - * pointers in separate cache lines - * the kernel should always mask the indices to avoid accessing - * memory outside of the data area +/** + * enum queue_type - type of queue + * @QUEUE_TYPE_TO_CLIENT: Queue is written by rxe driver and + * read by client. Used by rxe driver only. + * @QUEUE_TYPE_FROM_CLIENT: Queue is written by client and + * read by rxe driver. Used by rxe driver only. + * @QUEUE_TYPE_TO_DRIVER: Queue is written by client and + * read by rxe driver. Used by kernel client only. + * @QUEUE_TYPE_FROM_DRIVER: Queue is written by rxe driver and + * read by client. Used by kernel client only. */ -struct rxe_queue_buf { - __u32 log2_elem_size; - __u32 index_mask; - __u32 pad_1[30]; - __u32 producer_index; - __u32 pad_2[31]; - __u32 consumer_index; - __u32 pad_3[31]; - __u8 data[0]; +enum queue_type { + QUEUE_TYPE_TO_CLIENT, + QUEUE_TYPE_FROM_CLIENT, + QUEUE_TYPE_TO_DRIVER, + QUEUE_TYPE_FROM_DRIVER, }; +struct rxe_queue_buf; + struct rxe_queue { struct rxe_dev *rxe; struct rxe_queue_buf *buf; @@ -73,7 +59,14 @@ struct rxe_queue { size_t buf_size; size_t elem_size; unsigned int log2_elem_size; - unsigned int index_mask; + u32 index_mask; + enum queue_type type; + /* private copy of index for shared queues between + * kernel space and user space. Kernel reads and writes + * this copy and then replicates to rxe_queue_buf + * for read access by user space. + */ + u32 index; }; int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, @@ -82,93 +75,186 @@ int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, void rxe_queue_reset(struct rxe_queue *q); -struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, - int *num_elem, - unsigned int elem_size); +struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, + unsigned int elem_size, enum queue_type type); int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, unsigned int elem_size, struct ib_udata *udata, struct mminfo __user *outbuf, - /* Protect producers while resizing queue */ - spinlock_t *producer_lock, - /* Protect consumers while resizing queue */ - spinlock_t *consumer_lock); + spinlock_t *producer_lock, spinlock_t *consumer_lock); void rxe_queue_cleanup(struct rxe_queue *queue); -static inline int next_index(struct rxe_queue *q, int index) +static inline u32 queue_next_index(struct rxe_queue *q, int index) { - return (index + 1) & q->buf->index_mask; + return (index + 1) & q->index_mask; } -static inline int queue_empty(struct rxe_queue *q) +static inline u32 queue_get_producer(const struct rxe_queue *q, + enum queue_type type) { - return ((q->buf->producer_index - q->buf->consumer_index) - & q->index_mask) == 0; + u32 prod; + + switch (type) { + case QUEUE_TYPE_FROM_CLIENT: + /* protect user index */ + prod = smp_load_acquire(&q->buf->producer_index); + break; + case QUEUE_TYPE_TO_CLIENT: + prod = q->index; + break; + case QUEUE_TYPE_FROM_DRIVER: + /* protect driver index */ + prod = smp_load_acquire(&q->buf->producer_index); + break; + case QUEUE_TYPE_TO_DRIVER: + prod = q->buf->producer_index; + break; + } + + return prod; } -static inline int queue_full(struct rxe_queue *q) +static inline u32 queue_get_consumer(const struct rxe_queue *q, + enum queue_type type) { - return ((q->buf->producer_index + 1 - q->buf->consumer_index) - & q->index_mask) == 0; + u32 cons; + + switch (type) { + case QUEUE_TYPE_FROM_CLIENT: + cons = q->index; + break; + case QUEUE_TYPE_TO_CLIENT: + /* protect user index */ + cons = smp_load_acquire(&q->buf->consumer_index); + break; + case QUEUE_TYPE_FROM_DRIVER: + cons = q->buf->consumer_index; + break; + case QUEUE_TYPE_TO_DRIVER: + /* protect driver index */ + cons = smp_load_acquire(&q->buf->consumer_index); + break; + } + + return cons; } -static inline void advance_producer(struct rxe_queue *q) +static inline int queue_empty(struct rxe_queue *q, enum queue_type type) { - q->buf->producer_index = (q->buf->producer_index + 1) - & q->index_mask; + u32 prod = queue_get_producer(q, type); + u32 cons = queue_get_consumer(q, type); + + return ((prod - cons) & q->index_mask) == 0; } -static inline void advance_consumer(struct rxe_queue *q) +static inline int queue_full(struct rxe_queue *q, enum queue_type type) { - q->buf->consumer_index = (q->buf->consumer_index + 1) - & q->index_mask; + u32 prod = queue_get_producer(q, type); + u32 cons = queue_get_consumer(q, type); + + return ((prod + 1 - cons) & q->index_mask) == 0; } -static inline void *producer_addr(struct rxe_queue *q) +static inline u32 queue_count(const struct rxe_queue *q, + enum queue_type type) { - return q->buf->data + ((q->buf->producer_index & q->index_mask) - << q->log2_elem_size); + u32 prod = queue_get_producer(q, type); + u32 cons = queue_get_consumer(q, type); + + return (prod - cons) & q->index_mask; } -static inline void *consumer_addr(struct rxe_queue *q) +static inline void queue_advance_producer(struct rxe_queue *q, + enum queue_type type) { - return q->buf->data + ((q->buf->consumer_index & q->index_mask) - << q->log2_elem_size); + u32 prod; + + switch (type) { + case QUEUE_TYPE_FROM_CLIENT: + pr_warn("%s: attempt to advance client index\n", + __func__); + break; + case QUEUE_TYPE_TO_CLIENT: + prod = q->index; + prod = (prod + 1) & q->index_mask; + q->index = prod; + /* protect user index */ + smp_store_release(&q->buf->producer_index, prod); + break; + case QUEUE_TYPE_FROM_DRIVER: + pr_warn("%s: attempt to advance driver index\n", + __func__); + break; + case QUEUE_TYPE_TO_DRIVER: + prod = q->buf->producer_index; + prod = (prod + 1) & q->index_mask; + q->buf->producer_index = prod; + break; + } } -static inline unsigned int producer_index(struct rxe_queue *q) +static inline void queue_advance_consumer(struct rxe_queue *q, + enum queue_type type) { - return q->buf->producer_index; + u32 cons; + + switch (type) { + case QUEUE_TYPE_FROM_CLIENT: + cons = q->index; + cons = (cons + 1) & q->index_mask; + q->index = cons; + /* protect user index */ + smp_store_release(&q->buf->consumer_index, cons); + break; + case QUEUE_TYPE_TO_CLIENT: + pr_warn("%s: attempt to advance client index\n", + __func__); + break; + case QUEUE_TYPE_FROM_DRIVER: + cons = q->buf->consumer_index; + cons = (cons + 1) & q->index_mask; + q->buf->consumer_index = cons; + break; + case QUEUE_TYPE_TO_DRIVER: + pr_warn("%s: attempt to advance driver index\n", + __func__); + break; + } } -static inline unsigned int consumer_index(struct rxe_queue *q) +static inline void *queue_producer_addr(struct rxe_queue *q, + enum queue_type type) { - return q->buf->consumer_index; + u32 prod = queue_get_producer(q, type); + + return q->buf->data + (prod << q->log2_elem_size); } -static inline void *addr_from_index(struct rxe_queue *q, unsigned int index) +static inline void *queue_consumer_addr(struct rxe_queue *q, + enum queue_type type) { - return q->buf->data + ((index & q->index_mask) - << q->buf->log2_elem_size); + u32 cons = queue_get_consumer(q, type); + + return q->buf->data + (cons << q->log2_elem_size); } -static inline unsigned int index_from_addr(const struct rxe_queue *q, - const void *addr) +static inline void *queue_addr_from_index(struct rxe_queue *q, u32 index) { - return (((u8 *)addr - q->buf->data) >> q->log2_elem_size) - & q->index_mask; + return q->buf->data + ((index & q->index_mask) + << q->log2_elem_size); } -static inline unsigned int queue_count(const struct rxe_queue *q) +static inline u32 queue_index_from_addr(const struct rxe_queue *q, + const void *addr) { - return (q->buf->producer_index - q->buf->consumer_index) - & q->index_mask; + return (((u8 *)addr - q->buf->data) >> q->log2_elem_size) + & q->index_mask; } -static inline void *queue_head(struct rxe_queue *q) +static inline void *queue_head(struct rxe_queue *q, enum queue_type type) { - return queue_empty(q) ? NULL : consumer_addr(q); + return queue_empty(q, type) ? NULL : queue_consumer_addr(q, type); } #endif /* RXE_QUEUE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 831ad578a7b2..434a693cd4a5 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/skbuff.h> @@ -36,50 +9,43 @@ #include "rxe.h" #include "rxe_loc.h" +/* check that QP matches packet opcode type and is in a valid state */ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct rxe_qp *qp) { + unsigned int pkt_type; + if (unlikely(!qp->valid)) - goto err1; + return -EINVAL; + + pkt_type = pkt->opcode & 0xe0; switch (qp_type(qp)) { case IB_QPT_RC: - if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) { - pr_warn_ratelimited("bad qp type\n"); - goto err1; - } + if (unlikely(pkt_type != IB_OPCODE_RC)) + return -EINVAL; break; case IB_QPT_UC: - if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) { - pr_warn_ratelimited("bad qp type\n"); - goto err1; - } + if (unlikely(pkt_type != IB_OPCODE_UC)) + return -EINVAL; break; case IB_QPT_UD: - case IB_QPT_SMI: case IB_QPT_GSI: - if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) { - pr_warn_ratelimited("bad qp type\n"); - goto err1; - } + if (unlikely(pkt_type != IB_OPCODE_UD)) + return -EINVAL; break; default: - pr_warn_ratelimited("unsupported qp type\n"); - goto err1; + return -EINVAL; } if (pkt->mask & RXE_REQ_MASK) { if (unlikely(qp->resp.state != QP_STATE_READY)) - goto err1; + return -EINVAL; } else if (unlikely(qp->req.state < QP_STATE_READY || - qp->req.state > QP_STATE_DRAINED)) { - goto err1; - } + qp->req.state > QP_STATE_DRAINED)) + return -EINVAL; return 0; - -err1: - return -EINVAL; } static void set_bad_pkey_cntr(struct rxe_port *port) @@ -101,54 +67,26 @@ static void set_qkey_viol_cntr(struct rxe_port *port) static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, u32 qpn, struct rxe_qp *qp) { - int i; - int found_pkey = 0; struct rxe_port *port = &rxe->port; u16 pkey = bth_pkey(pkt); pkt->pkey_index = 0; - if (qpn == 1) { - for (i = 0; i < port->attr.pkey_tbl_len; i++) { - if (pkey_match(pkey, port->pkey_tbl[i])) { - pkt->pkey_index = i; - found_pkey = 1; - break; - } - } - - if (!found_pkey) { - pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); - set_bad_pkey_cntr(port); - goto err1; - } - } else { - if (unlikely(!pkey_match(pkey, - port->pkey_tbl[qp->attr.pkey_index] - ))) { - pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey); - set_bad_pkey_cntr(port); - goto err1; - } - pkt->pkey_index = qp->attr.pkey_index; + if (!pkey_match(pkey, IB_DEFAULT_PKEY_FULL)) { + set_bad_pkey_cntr(port); + return -EINVAL; } - if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && - pkt->mask) { + if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) { u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; if (unlikely(deth_qkey(pkt) != qkey)) { - pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n", - deth_qkey(pkt), qkey, qpn); set_qkey_viol_cntr(port); - goto err1; + return -EINVAL; } } return 0; - -err1: - return -EINVAL; } static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, @@ -157,13 +95,10 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb = PKT_TO_SKB(pkt); if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC) - goto done; + return 0; - if (unlikely(pkt->port_num != qp->attr.port_num)) { - pr_warn_ratelimited("port %d != qp port %d\n", - pkt->port_num, qp->attr.port_num); - goto err1; - } + if (unlikely(pkt->port_num != qp->attr.port_num)) + return -EINVAL; if (skb->protocol == htons(ETH_P_IP)) { struct in_addr *saddr = @@ -171,19 +106,9 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct in_addr *daddr = &qp->pri_av.dgid_addr._sockaddr_in.sin_addr; - if (ip_hdr(skb)->daddr != saddr->s_addr) { - pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n", - &ip_hdr(skb)->daddr, - &saddr->s_addr); - goto err1; - } - - if (ip_hdr(skb)->saddr != daddr->s_addr) { - pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n", - &ip_hdr(skb)->saddr, - &daddr->s_addr); - goto err1; - } + if ((ip_hdr(skb)->daddr != saddr->s_addr) || + (ip_hdr(skb)->saddr != daddr->s_addr)) + return -EINVAL; } else if (skb->protocol == htons(ETH_P_IPV6)) { struct in6_addr *saddr = @@ -191,24 +116,12 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct in6_addr *daddr = &qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr; - if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) { - pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n", - &ipv6_hdr(skb)->daddr, saddr); - goto err1; - } - - if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) { - pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n", - &ipv6_hdr(skb)->saddr, daddr); - goto err1; - } + if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr)) || + memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) + return -EINVAL; } -done: return 0; - -err1: - return -EINVAL; } static int hdr_check(struct rxe_pkt_info *pkt) @@ -220,24 +133,18 @@ static int hdr_check(struct rxe_pkt_info *pkt) int index; int err; - if (unlikely(bth_tver(pkt) != BTH_TVER)) { - pr_warn_ratelimited("bad tver\n"); + if (unlikely(bth_tver(pkt) != BTH_TVER)) goto err1; - } - if (unlikely(qpn == 0)) { - pr_warn_once("QP 0 not supported"); + if (unlikely(qpn == 0)) goto err1; - } if (qpn != IB_MULTICAST_QPN) { index = (qpn == 1) ? port->qp_gsi_index : qpn; qp = rxe_pool_get_index(&rxe->qp_pool, index); - if (unlikely(!qp)) { - pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); + if (unlikely(!qp)) goto err1; - } err = check_type_state(rxe, pkt, qp); if (unlikely(err)) @@ -251,17 +158,15 @@ static int hdr_check(struct rxe_pkt_info *pkt) if (unlikely(err)) goto err2; } else { - if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) { - pr_warn_ratelimited("no grh for mcast qpn\n"); + if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) goto err1; - } } pkt->qp = qp; return 0; err2: - rxe_drop_ref(qp); + rxe_put(qp); err1: return -EINVAL; } @@ -277,8 +182,8 @@ static inline void rxe_rcv_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb) static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) { struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - struct rxe_mc_grp *mcg; - struct rxe_mc_elem *mce; + struct rxe_mcg *mcg; + struct rxe_mca *mca; struct rxe_qp *qp; union ib_gid dgid; int err; @@ -290,15 +195,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid)); /* lookup mcast group corresponding to mgid, takes a ref */ - mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid); + mcg = rxe_lookup_mcg(rxe, &dgid); if (!mcg) - goto err1; /* mcast group not registered */ + goto drop; /* mcast group not registered */ - spin_lock_bh(&mcg->mcg_lock); + spin_lock_bh(&rxe->mcg_lock); - list_for_each_entry(mce, &mcg->qp_list, qp_list) { - qp = mce->qp; - pkt = SKB_TO_PKT(skb); + /* this is unreliable datagram service so we let + * failures to deliver a multicast packet to a + * single QP happen and just move on and try + * the rest of them on the list + */ + list_for_each_entry(mca, &mcg->qp_list, qp_list) { + qp = mca->qp; /* validate qp for incoming packet */ err = check_type_state(rxe, pkt, qp); @@ -309,31 +218,71 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) if (err) continue; - /* if *not* the last qp in the list - * increase the users of the skb then post to the next qp + /* for all but the last QP create a new clone of the + * skb and pass to the QP. Pass the original skb to + * the last QP in the list. */ - if (mce->qp_list.next != &mcg->qp_list) - skb_get(skb); + if (mca->qp_list.next != &mcg->qp_list) { + struct sk_buff *cskb; + struct rxe_pkt_info *cpkt; - pkt->qp = qp; - rxe_add_ref(qp); - rxe_rcv_pkt(pkt, skb); + cskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!cskb)) + continue; + + if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) { + kfree_skb(cskb); + break; + } + + cpkt = SKB_TO_PKT(cskb); + cpkt->qp = qp; + rxe_get(qp); + rxe_rcv_pkt(cpkt, cskb); + } else { + pkt->qp = qp; + rxe_get(qp); + rxe_rcv_pkt(pkt, skb); + skb = NULL; /* mark consumed */ + } } - spin_unlock_bh(&mcg->mcg_lock); + spin_unlock_bh(&rxe->mcg_lock); - rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); -err1: + if (likely(!skb)) + return; + + /* This only occurs if one of the checks fails on the last + * QP in the list above + */ + +drop: kfree_skb(skb); + ib_device_put(&rxe->ib_dev); } -static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) +/** + * rxe_chk_dgid - validate destination IP address + * @rxe: rxe device that received packet + * @skb: the received packet buffer + * + * Accept any loopback packets + * Extract IP address from packet and + * Accept if multicast packet + * Accept if matches an SGID table entry + */ +static int rxe_chk_dgid(struct rxe_dev *rxe, struct sk_buff *skb) { + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); const struct ib_gid_attr *gid_attr; union ib_gid dgid; union ib_gid *pdgid; + if (pkt->mask & RXE_LOOPBACK_MASK) + return 0; + if (skb->protocol == htons(ETH_P_IP)) { ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, (struct in6_addr *)&dgid); @@ -342,6 +291,9 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; } + if (rdma_is_multicast_addr((struct in6_addr *)pdgid)) + return 0; + gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid, IB_GID_TYPE_ROCE_UDP_ENCAP, 1, skb->dev); @@ -358,18 +310,12 @@ void rxe_rcv(struct sk_buff *skb) int err; struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); struct rxe_dev *rxe = pkt->rxe; - __be32 *icrcp; - u32 calc_icrc, pack_icrc; - - pkt->offset = 0; - if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) + if (unlikely(skb->len < RXE_BTH_BYTES)) goto drop; - if (unlikely(rxe_match_dgid(rxe, skb) < 0)) { - pr_warn_ratelimited("failed matching dgid\n"); + if (rxe_chk_dgid(rxe, skb) < 0) goto drop; - } pkt->opcode = bth_opcode(pkt); pkt->psn = bth_psn(pkt); @@ -383,26 +329,9 @@ void rxe_rcv(struct sk_buff *skb) if (unlikely(err)) goto drop; - /* Verify ICRC */ - icrcp = (__be32 *)(pkt->hdr + pkt->paylen - RXE_ICRC_SIZE); - pack_icrc = be32_to_cpu(*icrcp); - - calc_icrc = rxe_icrc_hdr(pkt, skb); - calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt), - payload_size(pkt) + bth_pad(pkt)); - calc_icrc = (__force u32)cpu_to_be32(~calc_icrc); - if (unlikely(calc_icrc != pack_icrc)) { - if (skb->protocol == htons(ETH_P_IPV6)) - pr_warn_ratelimited("bad ICRC from %pI6c\n", - &ipv6_hdr(skb)->saddr); - else if (skb->protocol == htons(ETH_P_IP)) - pr_warn_ratelimited("bad ICRC from %pI4\n", - &ip_hdr(skb)->saddr); - else - pr_warn_ratelimited("bad ICRC from unknown\n"); - + err = rxe_icrc_check(skb, pkt); + if (unlikely(err)) goto drop; - } rxe_counter_inc(rxe, RXE_CNT_RCVD_PKTS); @@ -415,7 +344,8 @@ void rxe_rcv(struct sk_buff *skb) drop: if (pkt->qp) - rxe_drop_ref(pkt->qp); + rxe_put(pkt->qp); kfree_skb(skb); + ib_device_put(&rxe->ib_dev); } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index e5031172c019..f63771207970 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/skbuff.h> @@ -42,8 +15,7 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, u32 opcode); static inline void retry_first_write_send(struct rxe_qp *qp, - struct rxe_send_wqe *wqe, - unsigned int mask, int npsn) + struct rxe_send_wqe *wqe, int npsn) { int i; @@ -60,8 +32,6 @@ static inline void retry_first_write_send(struct rxe_qp *qp, } else { advance_dma_data(&wqe->dma, to_send); } - if (mask & WR_WRITE_MASK) - wqe->iova += qp->mtu; } } @@ -72,15 +42,20 @@ static void req_retry(struct rxe_qp *qp) unsigned int mask; int npsn; int first = 1; + struct rxe_queue *q = qp->sq.queue; + unsigned int cons; + unsigned int prod; + + cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); + prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); - qp->req.wqe_index = consumer_index(qp->sq.queue); + qp->req.wqe_index = cons; qp->req.psn = qp->comp.psn; qp->req.opcode = -1; - for (wqe_index = consumer_index(qp->sq.queue); - wqe_index != producer_index(qp->sq.queue); - wqe_index = next_index(qp->sq.queue, wqe_index)) { - wqe = addr_from_index(qp->sq.queue, wqe_index); + for (wqe_index = cons; wqe_index != prod; + wqe_index = queue_next_index(q, wqe_index)) { + wqe = queue_addr_from_index(qp->sq.queue, wqe_index); mask = wr_opcode_mask(wqe->wr.opcode, qp); if (wqe->state == wqe_state_posted) @@ -107,7 +82,7 @@ static void req_retry(struct rxe_qp *qp) if (mask & WR_WRITE_OR_SEND_MASK) { npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK; - retry_first_write_send(qp, wqe, mask, npsn); + retry_first_write_send(qp, wqe, npsn); } if (mask & WR_READ_MASK) { @@ -125,39 +100,47 @@ void rnr_nak_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer); - pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp)); + pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp)); + + /* request a send queue retry */ + qp->req.need_retry = 1; + qp->req.wait_for_rnr_timer = 0; rxe_run_task(&qp->req.task, 1); } static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) { - struct rxe_send_wqe *wqe = queue_head(qp->sq.queue); - unsigned long flags; + struct rxe_send_wqe *wqe; + struct rxe_queue *q = qp->sq.queue; + unsigned int index = qp->req.wqe_index; + unsigned int cons; + unsigned int prod; + + wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); + cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); + prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); if (unlikely(qp->req.state == QP_STATE_DRAIN)) { /* check to see if we are drained; * state_lock used by requester and completer */ - spin_lock_irqsave(&qp->state_lock, flags); + spin_lock_bh(&qp->state_lock); do { if (qp->req.state != QP_STATE_DRAIN) { /* comp just finished */ - spin_unlock_irqrestore(&qp->state_lock, - flags); + spin_unlock_bh(&qp->state_lock); break; } - if (wqe && ((qp->req.wqe_index != - consumer_index(qp->sq.queue)) || + if (wqe && ((index != cons) || (wqe->state != wqe_state_posted))) { /* comp not done yet */ - spin_unlock_irqrestore(&qp->state_lock, - flags); + spin_unlock_bh(&qp->state_lock); break; } qp->req.state = QP_STATE_DRAINED; - spin_unlock_irqrestore(&qp->state_lock, flags); + spin_unlock_bh(&qp->state_lock); if (qp->ibqp.event_handler) { struct ib_event ev; @@ -171,26 +154,46 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) } while (0); } - if (qp->req.wqe_index == producer_index(qp->sq.queue)) + if (index == prod) return NULL; - wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index); + wqe = queue_addr_from_index(q, index); if (unlikely((qp->req.state == QP_STATE_DRAIN || qp->req.state == QP_STATE_DRAINED) && (wqe->state != wqe_state_processing))) return NULL; - if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) && - (qp->req.wqe_index != consumer_index(qp->sq.queue)))) { - qp->req.wait_fence = 1; - return NULL; - } - wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp); return wqe; } +/** + * rxe_wqe_is_fenced - check if next wqe is fenced + * @qp: the queue pair + * @wqe: the next wqe + * + * Returns: 1 if wqe needs to wait + * 0 if wqe is ready to go + */ +static int rxe_wqe_is_fenced(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + /* Local invalidate fence (LIF) see IBA 10.6.5.1 + * Requires ALL previous operations on the send queue + * are complete. Make mandatory for the rxe driver. + */ + if (wqe->wr.opcode == IB_WR_LOCAL_INV) + return qp->req.wqe_index != queue_get_consumer(qp->sq.queue, + QUEUE_TYPE_FROM_CLIENT); + + /* Fence see IBA 10.8.3.3 + * Requires that all previous read and atomic operations + * are complete. + */ + return (wqe->wr.send_flags & IB_SEND_FENCE) && + atomic_read(&qp->req.rd_atomic) != qp->attr.max_rd_atomic; +} + static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) { switch (opcode) { @@ -326,7 +329,6 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, case IB_QPT_UC: return next_opcode_uc(qp, opcode, fits); - case IB_QPT_SMI: case IB_QPT_UD: case IB_QPT_GSI: switch (opcode) { @@ -376,38 +378,25 @@ static inline int get_mtu(struct rxe_qp *qp) } static struct sk_buff *init_req_packet(struct rxe_qp *qp, + struct rxe_av *av, struct rxe_send_wqe *wqe, - int opcode, int payload, + int opcode, u32 payload, struct rxe_pkt_info *pkt) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - struct rxe_port *port = &rxe->port; struct sk_buff *skb; struct rxe_send_wr *ibwr = &wqe->wr; - struct rxe_av *av; int pad = (-payload) & 0x3; int paylen; int solicited; - u16 pkey; u32 qp_num; int ack_req; /* length from start of bth to end of icrc */ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; - - /* pkt->hdr, rxe, port_num and mask are initialized in ifc - * layer - */ - pkt->opcode = opcode; - pkt->qp = qp; - pkt->psn = qp->req.psn; - pkt->mask = rxe_opcode[opcode].mask; - pkt->paylen = paylen; - pkt->offset = 0; - pkt->wqe = wqe; + pkt->paylen = paylen; /* init skb */ - av = rxe_get_av(pkt); skb = rxe_init_packet(rxe, av, paylen, pkt); if (unlikely(!skb)) return NULL; @@ -419,10 +408,6 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) == (RXE_WRITE_MASK | RXE_IMMDT_MASK)); - pkey = (qp_type(qp) == IB_QPT_GSI) ? - port->pkey_tbl[ibwr->wr.ud.pkey_index] : - port->pkey_tbl[qp->attr.pkey_index]; - qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : qp->attr.dest_qp_num; @@ -431,7 +416,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, if (ack_req) qp->req.noack_pkts = 0; - bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num, + bth_init(pkt, pkt->opcode, solicited, 0, pad, IB_DEFAULT_PKEY_FULL, qp_num, ack_req, pkt->psn); /* init optional headers */ @@ -449,8 +434,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, if (pkt->mask & RXE_ATMETH_MASK) { atmeth_set_va(pkt, wqe->iova); - if (opcode == IB_OPCODE_RC_COMPARE_SWAP || - opcode == IB_OPCODE_RD_COMPARE_SWAP) { + if (opcode == IB_OPCODE_RC_COMPARE_SWAP) { atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap); atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add); } else { @@ -470,46 +454,37 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, return skb; } -static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, struct sk_buff *skb, - int paylen) +static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, + struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 payload) { - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - u32 crc = 0; - u32 *p; int err; - err = rxe_prepare(pkt, skb, &crc); + err = rxe_prepare(av, pkt, skb); if (err) return err; - if (pkt->mask & RXE_WRITE_OR_SEND) { + if (pkt->mask & RXE_WRITE_OR_SEND_MASK) { if (wqe->wr.send_flags & IB_SEND_INLINE) { u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; - crc = rxe_crc32(rxe, crc, tmp, paylen); - memcpy(payload_addr(pkt), tmp, paylen); + memcpy(payload_addr(pkt), tmp, payload); - wqe->dma.resid -= paylen; - wqe->dma.sge_offset += paylen; + wqe->dma.resid -= payload; + wqe->dma.sge_offset += payload; } else { err = copy_data(qp->pd, 0, &wqe->dma, - payload_addr(pkt), paylen, - from_mem_obj, - &crc); + payload_addr(pkt), payload, + RXE_FROM_MR_OBJ); if (err) return err; } if (bth_pad(pkt)) { - u8 *pad = payload_addr(pkt) + paylen; + u8 *pad = payload_addr(pkt) + payload; memset(pad, 0, bth_pad(pkt)); - crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt)); } } - p = payload_addr(pkt) + paylen + bth_pad(pkt); - - *p = ~crc; return 0; } @@ -529,7 +504,7 @@ static void update_wqe_state(struct rxe_qp *qp, static void update_wqe_psn(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, - int payload) + u32 payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -571,13 +546,13 @@ static void rollback_state(struct rxe_send_wqe *wqe, qp->req.psn = rollback_psn; } -static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) +static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { qp->req.opcode = pkt->opcode; if (pkt->mask & RXE_END_MASK) - qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); + qp->req.wqe_index = queue_next_index(qp->sq.queue, + qp->req.wqe_index); qp->need_req_skb = 0; @@ -586,36 +561,112 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, jiffies + qp->qp_timeout_jiffies); } +static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + u8 opcode = wqe->wr.opcode; + u32 rkey; + int ret; + + switch (opcode) { + case IB_WR_LOCAL_INV: + rkey = wqe->wr.ex.invalidate_rkey; + if (rkey_is_mw(rkey)) + ret = rxe_invalidate_mw(qp, rkey); + else + ret = rxe_invalidate_mr(qp, rkey); + + if (unlikely(ret)) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + return ret; + } + break; + case IB_WR_REG_MR: + ret = rxe_reg_fast_mr(qp, wqe); + if (unlikely(ret)) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + return ret; + } + break; + case IB_WR_BIND_MW: + ret = rxe_bind_mw(qp, wqe); + if (unlikely(ret)) { + wqe->status = IB_WC_MW_BIND_ERR; + return ret; + } + break; + default: + pr_err("Unexpected send wqe opcode %d\n", opcode); + wqe->status = IB_WC_LOC_QP_OP_ERR; + return -EINVAL; + } + + wqe->state = wqe_state_done; + wqe->status = IB_WC_SUCCESS; + qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); + + /* There is no ack coming for local work requests + * which can lead to a deadlock. So go ahead and complete + * it now. + */ + rxe_run_task(&qp->comp.task, 1); + + return 0; +} + int rxe_requester(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_pkt_info pkt; struct sk_buff *skb; struct rxe_send_wqe *wqe; enum rxe_hdr_mask mask; - int payload; + u32 payload; int mtu; int opcode; + int err; int ret; struct rxe_send_wqe rollback_wqe; u32 rollback_psn; + struct rxe_queue *q = qp->sq.queue; + struct rxe_ah *ah; + struct rxe_av *av; - rxe_add_ref(qp); + if (!rxe_get(qp)) + return -EAGAIN; -next_wqe: - if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) + if (unlikely(!qp->valid)) goto exit; + if (unlikely(qp->req.state == QP_STATE_ERROR)) { + wqe = req_next_wqe(qp); + if (wqe) + /* + * Generate an error completion for error qp state + */ + goto err; + else + goto exit; + } + if (unlikely(qp->req.state == QP_STATE_RESET)) { - qp->req.wqe_index = consumer_index(qp->sq.queue); + qp->req.wqe_index = queue_get_consumer(q, + QUEUE_TYPE_FROM_CLIENT); qp->req.opcode = -1; qp->req.need_rd_atomic = 0; qp->req.wait_psn = 0; qp->req.need_retry = 0; + qp->req.wait_for_rnr_timer = 0; goto exit; } - if (unlikely(qp->req.need_retry)) { + /* we come here if the retransmit timer has fired + * or if the rnr timer has fired. If the retransmit + * timer fires while we are processing an RNR NAK wait + * until the rnr timer has fired before starting the + * retry flow + */ + if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) { req_retry(qp); qp->req.need_retry = 0; } @@ -624,47 +675,22 @@ next_wqe: if (unlikely(!wqe)) goto exit; - if (wqe->mask & WR_REG_MASK) { - if (wqe->wr.opcode == IB_WR_LOCAL_INV) { - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - struct rxe_mem *rmr; - - rmr = rxe_pool_get_index(&rxe->mr_pool, - wqe->wr.ex.invalidate_rkey >> 8); - if (!rmr) { - pr_err("No mr for key %#x\n", - wqe->wr.ex.invalidate_rkey); - wqe->state = wqe_state_error; - wqe->status = IB_WC_MW_BIND_ERR; - goto exit; - } - rmr->state = RXE_MEM_STATE_FREE; - rxe_drop_ref(rmr); - wqe->state = wqe_state_done; - wqe->status = IB_WC_SUCCESS; - } else if (wqe->wr.opcode == IB_WR_REG_MR) { - struct rxe_mem *rmr = to_rmr(wqe->wr.wr.reg.mr); - - rmr->state = RXE_MEM_STATE_VALID; - rmr->access = wqe->wr.wr.reg.access; - rmr->lkey = wqe->wr.wr.reg.key; - rmr->rkey = wqe->wr.wr.reg.key; - rmr->iova = wqe->wr.wr.reg.mr->iova; - wqe->state = wqe_state_done; - wqe->status = IB_WC_SUCCESS; - } else { - goto exit; - } - if ((wqe->wr.send_flags & IB_SEND_SIGNALED) || - qp->sq_sig_type == IB_SIGNAL_ALL_WR) - rxe_run_task(&qp->comp.task, 1); - qp->req.wqe_index = next_index(qp->sq.queue, - qp->req.wqe_index); - goto next_wqe; + if (rxe_wqe_is_fenced(qp, wqe)) { + qp->req.wait_fence = 1; + goto exit; + } + + if (wqe->mask & WR_LOCAL_OP_MASK) { + err = rxe_do_local_ops(qp, wqe); + if (unlikely(err)) + goto err; + else + goto done; } if (unlikely(qp_type(qp) == IB_QPT_RC && - qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) { + psn_compare(qp->req.psn, (qp->comp.psn + + RXE_MAX_UNACKED_PSNS)) > 0)) { qp->req.wait_psn = 1; goto exit; } @@ -679,17 +705,17 @@ next_wqe: opcode = next_opcode(qp, wqe, wqe->wr.opcode); if (unlikely(opcode < 0)) { wqe->status = IB_WC_LOC_QP_OP_ERR; - goto exit; + goto err; } mask = rxe_opcode[opcode].mask; - if (unlikely(mask & RXE_READ_OR_ATOMIC)) { + if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) { if (check_init_depth(qp, wqe)) goto exit; } mtu = get_mtu(qp); - payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0; + payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0; if (payload > mtu) { if (qp_type(qp) == IB_QPT_UD) { /* C10-93.1.1: If the total sum of all the buffer lengths specified for a @@ -703,29 +729,55 @@ next_wqe: wqe->last_psn = qp->req.psn; qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; qp->req.opcode = IB_OPCODE_UD_SEND_ONLY; - qp->req.wqe_index = next_index(qp->sq.queue, + qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - __rxe_do_task(&qp->comp.task); - rxe_drop_ref(qp); - return 0; + rxe_run_task(&qp->comp.task, 0); + goto done; } payload = mtu; } - skb = init_req_packet(qp, wqe, opcode, payload, &pkt); + pkt.rxe = rxe; + pkt.opcode = opcode; + pkt.qp = qp; + pkt.psn = qp->req.psn; + pkt.mask = rxe_opcode[opcode].mask; + pkt.wqe = wqe; + + av = rxe_get_av(&pkt, &ah); + if (unlikely(!av)) { + pr_err("qp#%d Failed no address vector\n", qp_num(qp)); + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; + } + + skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); + wqe->status = IB_WC_LOC_QP_OP_ERR; + if (ah) + rxe_put(ah); goto err; } - if (fill_packet(qp, wqe, &pkt, skb, payload)) { - pr_debug("qp#%d Error during fill packet\n", qp_num(qp)); + err = finish_packet(qp, av, wqe, &pkt, skb, payload); + if (unlikely(err)) { + pr_debug("qp#%d Error during finish packet\n", qp_num(qp)); + if (err == -EFAULT) + wqe->status = IB_WC_LOC_PROT_ERR; + else + wqe->status = IB_WC_LOC_QP_OP_ERR; kfree_skb(skb); + if (ah) + rxe_put(ah); goto err; } + if (ah) + rxe_put(ah); + /* * To prevent a race on wqe access between requester and completer, * wqe members state and psn need to be set before calling @@ -735,30 +787,41 @@ next_wqe: save_state(wqe, qp, &rollback_wqe, &rollback_psn); update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); - ret = rxe_xmit_packet(qp, &pkt, skb); - if (ret) { + + err = rxe_xmit_packet(qp, &pkt, skb); + if (err) { qp->need_req_skb = 1; rollback_state(wqe, qp, &rollback_wqe, rollback_psn); - if (ret == -EAGAIN) { + if (err == -EAGAIN) { rxe_run_task(&qp->req.task, 1); goto exit; } + wqe->status = IB_WC_LOC_QP_OP_ERR; goto err; } - update_state(qp, wqe, &pkt, payload); - - goto next_wqe; + update_state(qp, &pkt); + /* A non-zero return value will cause rxe_do_task to + * exit its loop and end the tasklet. A zero return + * will continue looping and return to rxe_requester + */ +done: + ret = 0; + goto out; err: - wqe->status = IB_WC_LOC_PROT_ERR; + /* update wqe_index for each wqe completion */ + qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); wqe->state = wqe_state_error; - __rxe_do_task(&qp->comp.task); - + qp->req.state = QP_STATE_ERROR; + rxe_run_task(&qp->comp.task, 0); exit: - rxe_drop_ref(qp); - return -EAGAIN; + ret = -EAGAIN; +out: + rxe_put(qp); + + return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c4a8195bf670..693081e813ec 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/skbuff.h> @@ -48,6 +21,7 @@ enum resp_states { RESPST_CHK_RKEY, RESPST_EXECUTE, RESPST_READ_REPLY, + RESPST_ATOMIC_REPLY, RESPST_COMPLETE, RESPST_ACKNOWLEDGE, RESPST_CLEANUP, @@ -62,6 +36,7 @@ enum resp_states { RESPST_ERR_TOO_MANY_RDMA_ATM_REQ, RESPST_ERR_RNR, RESPST_ERR_RKEY_VIOLATION, + RESPST_ERR_INVALIDATE_RKEY, RESPST_ERR_LENGTH, RESPST_ERR_CQ_OVERFLOW, RESPST_ERROR, @@ -81,6 +56,7 @@ static char *resp_state_name[] = { [RESPST_CHK_RKEY] = "CHK_RKEY", [RESPST_EXECUTE] = "EXECUTE", [RESPST_READ_REPLY] = "READ_REPLY", + [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", [RESPST_COMPLETE] = "COMPLETE", [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", [RESPST_CLEANUP] = "CLEANUP", @@ -95,6 +71,7 @@ static char *resp_state_name[] = { [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", [RESPST_ERR_RNR] = "ERR_RNR", [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", + [RESPST_ERR_INVALIDATE_RKEY] = "ERR_INVALIDATE_RKEY_VIOLATION", [RESPST_ERR_LENGTH] = "ERR_LENGTH", [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", [RESPST_ERROR] = "ERROR", @@ -124,8 +101,9 @@ static inline enum resp_states get_req(struct rxe_qp *qp, if (qp->resp.state == QP_STATE_ERROR) { while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } /* go drain recv wr queue */ @@ -301,7 +279,6 @@ static enum resp_states check_op_valid(struct rxe_qp *qp, break; case IB_QPT_UD: - case IB_QPT_SMI: case IB_QPT_GSI: break; @@ -319,35 +296,44 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) struct rxe_queue *q = srq->rq.queue; struct rxe_recv_wqe *wqe; struct ib_event ev; + unsigned int count; + size_t size; + unsigned long flags; if (srq->error) return RESPST_ERR_RNR; - spin_lock_bh(&srq->rq.consumer_lock); + spin_lock_irqsave(&srq->rq.consumer_lock, flags); - wqe = queue_head(q); + wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); if (!wqe) { - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); return RESPST_ERR_RNR; } - /* note kernel and user space recv wqes have same size */ - memcpy(&qp->resp.srq_wqe, wqe, sizeof(qp->resp.srq_wqe)); + /* don't trust user space data */ + if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); + pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); + return RESPST_ERR_MALFORMED_WQE; + } + size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); + memcpy(&qp->resp.srq_wqe, wqe, size); qp->resp.wqe = &qp->resp.srq_wqe.wqe; - advance_consumer(q); + queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); + count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); - if (srq->limit && srq->ibsrq.event_handler && - (queue_count(q) < srq->limit)) { + if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { srq->limit = 0; goto event; } - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); return RESPST_CHK_LENGTH; event: - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; @@ -365,7 +351,8 @@ static enum resp_states check_resource(struct rxe_qp *qp, qp->resp.status = IB_WC_WR_FLUSH_ERR; return RESPST_COMPLETE; } else if (!srq) { - qp->resp.wqe = queue_head(qp->rq.queue); + qp->resp.wqe = queue_head(qp->rq.queue, + QUEUE_TYPE_FROM_CLIENT); if (qp->resp.wqe) { qp->resp.status = IB_WC_WR_FLUSH_ERR; return RESPST_COMPLETE; @@ -377,7 +364,7 @@ static enum resp_states check_resource(struct rxe_qp *qp, } } - if (pkt->mask & RXE_READ_OR_ATOMIC) { + if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) { /* it is the requesters job to not send * too many read/atomic ops, we just * recycle the responder resource queue @@ -392,7 +379,8 @@ static enum resp_states check_resource(struct rxe_qp *qp, if (srq) return get_srq_wqe(qp); - qp->resp.wqe = queue_head(qp->rq.queue); + qp->resp.wqe = queue_head(qp->rq.queue, + QUEUE_TYPE_FROM_CLIENT); return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; } @@ -417,7 +405,8 @@ static enum resp_states check_length(struct rxe_qp *qp, static enum resp_states check_rkey(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { - struct rxe_mem *mem = NULL; + struct rxe_mr *mr = NULL; + struct rxe_mw *mw = NULL; u64 va; u32 rkey; u32 resid; @@ -426,9 +415,10 @@ static enum resp_states check_rkey(struct rxe_qp *qp, enum resp_states state; int access; - if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) { + if (pkt->mask & RXE_READ_OR_WRITE_MASK) { if (pkt->mask & RXE_RETH_MASK) { qp->resp.va = reth_va(pkt); + qp->resp.offset = 0; qp->resp.rkey = reth_rkey(pkt); qp->resp.resid = reth_len(pkt); qp->resp.length = reth_len(pkt); @@ -437,6 +427,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, : IB_ACCESS_REMOTE_WRITE; } else if (pkt->mask & RXE_ATOMIC_MASK) { qp->resp.va = atmeth_va(pkt); + qp->resp.offset = 0; qp->resp.rkey = atmeth_rkey(pkt); qp->resp.resid = sizeof(u64); access = IB_ACCESS_REMOTE_ATOMIC; @@ -445,7 +436,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, } /* A zero-byte op is not required to set an addr or rkey. */ - if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) && + if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { return RESPST_EXECUTE; @@ -456,18 +447,38 @@ static enum resp_states check_rkey(struct rxe_qp *qp, resid = qp->resp.resid; pktlen = payload_size(pkt); - mem = lookup_mem(qp->pd, access, rkey, lookup_remote); - if (!mem) { - state = RESPST_ERR_RKEY_VIOLATION; - goto err; - } + if (rkey_is_mw(rkey)) { + mw = rxe_lookup_mw(qp, access, rkey); + if (!mw) { + pr_debug("%s: no MW matches rkey %#x\n", + __func__, rkey); + state = RESPST_ERR_RKEY_VIOLATION; + goto err; + } - if (unlikely(mem->state == RXE_MEM_STATE_FREE)) { - state = RESPST_ERR_RKEY_VIOLATION; - goto err; + mr = mw->mr; + if (!mr) { + pr_err("%s: MW doesn't have an MR\n", __func__); + state = RESPST_ERR_RKEY_VIOLATION; + goto err; + } + + if (mw->access & IB_ZERO_BASED) + qp->resp.offset = mw->addr; + + rxe_put(mw); + rxe_get(mr); + } else { + mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); + if (!mr) { + pr_debug("%s: no MR matches rkey %#x\n", + __func__, rkey); + state = RESPST_ERR_RKEY_VIOLATION; + goto err; + } } - if (mem_check_range(mem, va, resid)) { + if (mr_check_range(mr, va + qp->resp.offset, resid)) { state = RESPST_ERR_RKEY_VIOLATION; goto err; } @@ -495,12 +506,15 @@ static enum resp_states check_rkey(struct rxe_qp *qp, WARN_ON_ONCE(qp->resp.mr); - qp->resp.mr = mem; + qp->resp.mr = mr; return RESPST_EXECUTE; err: - if (mem) - rxe_drop_ref(mem); + if (mr) + rxe_put(mr); + if (mw) + rxe_put(mw); + return state; } @@ -510,7 +524,7 @@ static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, int err; err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, - data_addr, data_len, to_mem_obj, NULL); + data_addr, data_len, RXE_TO_MR_OBJ); if (unlikely(err)) return (err == -ENOSPC) ? RESPST_ERR_LENGTH : RESPST_ERR_MALFORMED_WQE; @@ -525,8 +539,8 @@ static enum resp_states write_data_in(struct rxe_qp *qp, int err; int data_len = payload_size(pkt); - err = rxe_mem_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt), - data_len, to_mem_obj, NULL); + err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset, + payload_addr(pkt), data_len, RXE_TO_MR_OBJ); if (err) { rc = RESPST_ERR_RKEY_VIOLATION; goto out; @@ -539,62 +553,114 @@ out: return rc; } +static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + int type) +{ + struct resp_res *res; + u32 pkts; + + res = &qp->resp.resources[qp->resp.res_head]; + rxe_advance_resp_resource(qp); + free_rd_atomic_resource(res); + + res->type = type; + res->replay = 0; + + switch (type) { + case RXE_READ_MASK: + res->read.va = qp->resp.va + qp->resp.offset; + res->read.va_org = qp->resp.va + qp->resp.offset; + res->read.resid = qp->resp.resid; + res->read.length = qp->resp.resid; + res->read.rkey = qp->resp.rkey; + + pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); + res->first_psn = pkt->psn; + res->cur_psn = pkt->psn; + res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; + + res->state = rdatm_res_state_new; + break; + case RXE_ATOMIC_MASK: + res->first_psn = pkt->psn; + res->last_psn = pkt->psn; + res->cur_psn = pkt->psn; + break; + } + + return res; +} + /* Guarantee atomicity of atomic operations at the machine level. */ static DEFINE_SPINLOCK(atomic_ops_lock); -static enum resp_states process_atomic(struct rxe_qp *qp, - struct rxe_pkt_info *pkt) +static enum resp_states atomic_reply(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) { - u64 iova = atmeth_va(pkt); u64 *vaddr; enum resp_states ret; - struct rxe_mem *mr = qp->resp.mr; + struct rxe_mr *mr = qp->resp.mr; + struct resp_res *res = qp->resp.res; + u64 value; - if (mr->state != RXE_MEM_STATE_VALID) { - ret = RESPST_ERR_RKEY_VIOLATION; - goto out; + if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK); + qp->resp.res = res; } - vaddr = iova_to_vaddr(mr, iova, sizeof(u64)); + if (!res->replay) { + if (mr->state != RXE_MR_STATE_VALID) { + ret = RESPST_ERR_RKEY_VIOLATION; + goto out; + } - /* check vaddr is 8 bytes aligned. */ - if (!vaddr || (uintptr_t)vaddr & 7) { - ret = RESPST_ERR_MISALIGNED_ATOMIC; - goto out; - } + vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, + sizeof(u64)); - spin_lock_bh(&atomic_ops_lock); + /* check vaddr is 8 bytes aligned. */ + if (!vaddr || (uintptr_t)vaddr & 7) { + ret = RESPST_ERR_MISALIGNED_ATOMIC; + goto out; + } - qp->resp.atomic_orig = *vaddr; + spin_lock_bh(&atomic_ops_lock); + res->atomic.orig_val = value = *vaddr; - if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP || - pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) { - if (*vaddr == atmeth_comp(pkt)) - *vaddr = atmeth_swap_add(pkt); - } else { - *vaddr += atmeth_swap_add(pkt); - } + if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) { + if (value == atmeth_comp(pkt)) + value = atmeth_swap_add(pkt); + } else { + value += atmeth_swap_add(pkt); + } - spin_unlock_bh(&atomic_ops_lock); + *vaddr = value; + spin_unlock_bh(&atomic_ops_lock); - ret = RESPST_NONE; + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + } + + ret = RESPST_ACKNOWLEDGE; out: return ret; } static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, - struct rxe_pkt_info *pkt, struct rxe_pkt_info *ack, int opcode, int payload, u32 psn, - u8 syndrome, - u32 *crcp) + u8 syndrome) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct sk_buff *skb; - u32 crc = 0; - u32 *p; int paylen; int pad; int err; @@ -612,43 +678,80 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, ack->qp = qp; ack->opcode = opcode; ack->mask = rxe_opcode[opcode].mask; - ack->offset = pkt->offset; ack->paylen = paylen; - - /* fill in bth using the request packet headers */ - memcpy(ack->hdr, pkt->hdr, pkt->offset + RXE_BTH_BYTES); - - bth_set_opcode(ack, opcode); - bth_set_qpn(ack, qp->attr.dest_qp_num); - bth_set_pad(ack, pad); - bth_set_se(ack, 0); - bth_set_psn(ack, psn); - bth_set_ack(ack, 0); ack->psn = psn; + bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL, + qp->attr.dest_qp_num, 0, psn); + if (ack->mask & RXE_AETH_MASK) { aeth_set_syn(ack, syndrome); aeth_set_msn(ack, qp->resp.msn); } if (ack->mask & RXE_ATMACK_MASK) - atmack_set_orig(ack, qp->resp.atomic_orig); + atmack_set_orig(ack, qp->resp.res->atomic.orig_val); - err = rxe_prepare(ack, skb, &crc); + err = rxe_prepare(&qp->pri_av, ack, skb); if (err) { kfree_skb(skb); return NULL; } - if (crcp) { - /* CRC computation will be continued by the caller */ - *crcp = crc; - } else { - p = payload_addr(ack) + payload + bth_pad(ack); - *p = ~crc; + return skb; +} + +/** + * rxe_recheck_mr - revalidate MR from rkey and get a reference + * @qp: the qp + * @rkey: the rkey + * + * This code allows the MR to be invalidated or deregistered or + * the MW if one was used to be invalidated or deallocated. + * It is assumed that the access permissions if originally good + * are OK and the mappings to be unchanged. + * + * TODO: If someone reregisters an MR to change its size or + * access permissions during the processing of an RDMA read + * we should kill the responder resource and complete the + * operation with an error. + * + * Return: mr on success else NULL + */ +static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mr *mr; + struct rxe_mw *mw; + + if (rkey_is_mw(rkey)) { + mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); + if (!mw) + return NULL; + + mr = mw->mr; + if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || + !mr || mr->state != RXE_MR_STATE_VALID) { + rxe_put(mw); + return NULL; + } + + rxe_get(mr); + rxe_put(mw); + + return mr; } - return skb; + mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); + if (!mr) + return NULL; + + if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { + rxe_put(mr); + return NULL; + } + + return mr; } /* RDMA read response. If res is not NULL, then we have a current RDMA request @@ -665,53 +768,32 @@ static enum resp_states read_reply(struct rxe_qp *qp, int opcode; int err; struct resp_res *res = qp->resp.res; - u32 icrc; - u32 *p; + struct rxe_mr *mr; if (!res) { - /* This is the first time we process that request. Get a - * resource - */ - res = &qp->resp.resources[qp->resp.res_head]; - - free_rd_atomic_resource(qp, res); - rxe_advance_resp_resource(qp); - - res->type = RXE_READ_MASK; - res->replay = 0; - - res->read.va = qp->resp.va; - res->read.va_org = qp->resp.va; - - res->first_psn = req_pkt->psn; + res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK); + qp->resp.res = res; + } - if (reth_len(req_pkt)) { - res->last_psn = (req_pkt->psn + - (reth_len(req_pkt) + mtu - 1) / - mtu - 1) & BTH_PSN_MASK; + if (res->state == rdatm_res_state_new) { + if (!res->replay) { + mr = qp->resp.mr; + qp->resp.mr = NULL; } else { - res->last_psn = res->first_psn; + mr = rxe_recheck_mr(qp, res->read.rkey); + if (!mr) + return RESPST_ERR_RKEY_VIOLATION; } - res->cur_psn = req_pkt->psn; - - res->read.resid = qp->resp.resid; - res->read.length = qp->resp.resid; - res->read.rkey = qp->resp.rkey; - /* note res inherits the reference to mr from qp */ - res->read.mr = qp->resp.mr; - qp->resp.mr = NULL; - - qp->resp.res = res; - res->state = rdatm_res_state_new; - } - - if (res->state == rdatm_res_state_new) { if (res->read.resid <= mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; else opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; } else { + mr = rxe_recheck_mr(qp, res->read.rkey); + if (!mr) + return RESPST_ERR_RKEY_VIOLATION; + if (res->read.resid > mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; else @@ -722,31 +804,27 @@ static enum resp_states read_reply(struct rxe_qp *qp, payload = min_t(int, res->read.resid, mtu); - skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload, - res->cur_psn, AETH_ACK_UNLIMITED, &icrc); - if (!skb) + skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, + res->cur_psn, AETH_ACK_UNLIMITED); + if (!skb) { + rxe_put(mr); return RESPST_ERR_RNR; + } - err = rxe_mem_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt), - payload, from_mem_obj, &icrc); - if (err) - pr_err("Failed copying memory\n"); + rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), + payload, RXE_FROM_MR_OBJ); + if (mr) + rxe_put(mr); if (bth_pad(&ack_pkt)) { - struct rxe_dev *rxe = to_rdev(qp->ibqp.device); u8 *pad = payload_addr(&ack_pkt) + payload; memset(pad, 0, bth_pad(&ack_pkt)); - icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt)); } - p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); - *p = ~icrc; err = rxe_xmit_packet(qp, &ack_pkt, skb); - if (err) { - pr_err("Failed sending RDMA reply.\n"); + if (err) return RESPST_ERR_RNR; - } res->read.va += payload; res->read.resid -= payload; @@ -766,16 +844,12 @@ static enum resp_states read_reply(struct rxe_qp *qp, return state; } -static void build_rdma_network_hdr(union rdma_network_hdr *hdr, - struct rxe_pkt_info *pkt) +static int invalidate_rkey(struct rxe_qp *qp, u32 rkey) { - struct sk_buff *skb = PKT_TO_SKB(pkt); - - memset(hdr, 0, sizeof(*hdr)); - if (skb->protocol == htons(ETH_P_IP)) - memcpy(&hdr->roce4grh, ip_hdr(skb), sizeof(hdr->roce4grh)); - else if (skb->protocol == htons(ETH_P_IPV6)) - memcpy(&hdr->ibgrh, ipv6_hdr(skb), sizeof(hdr->ibgrh)); + if (rkey_is_mw(rkey)) + return rxe_invalidate_mw(qp, rkey); + else + return rxe_invalidate_mr(qp, rkey); } /* Executes a new request. A retried request never reach that function (send @@ -784,16 +858,22 @@ static void build_rdma_network_hdr(union rdma_network_hdr *hdr, static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { enum resp_states err; + struct sk_buff *skb = PKT_TO_SKB(pkt); + union rdma_network_hdr hdr; if (pkt->mask & RXE_SEND_MASK) { if (qp_type(qp) == IB_QPT_UD || - qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { - union rdma_network_hdr hdr; - - build_rdma_network_hdr(&hdr, pkt); - - err = send_data_in(qp, &hdr, sizeof(hdr)); + if (skb->protocol == htons(ETH_P_IP)) { + memset(&hdr.reserved, 0, + sizeof(hdr.reserved)); + memcpy(&hdr.roce4grh, ip_hdr(skb), + sizeof(hdr.roce4grh)); + err = send_data_in(qp, &hdr, sizeof(hdr)); + } else { + err = send_data_in(qp, ipv6_hdr(skb), + sizeof(hdr)); + } if (err) return err; } @@ -809,14 +889,24 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp->resp.msn++; return RESPST_READ_REPLY; } else if (pkt->mask & RXE_ATOMIC_MASK) { - err = process_atomic(qp, pkt); - if (err) - return err; + return RESPST_ATOMIC_REPLY; } else { /* Unreachable */ WARN_ON_ONCE(1); } + if (pkt->mask & RXE_IETH_MASK) { + u32 rkey = ieth_rkey(pkt); + + err = invalidate_rkey(qp, rkey); + if (err) + return RESPST_ERR_INVALIDATE_RKEY; + } + + if (pkt->mask & RXE_END_MASK) + /* We successfully processed this new request. */ + qp->resp.msn++; + /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.ack_psn = qp->resp.psn; @@ -824,11 +914,9 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - if (pkt->mask & RXE_COMP_MASK) { - /* We successfully processed this new request. */ - qp->resp.msn++; + if (pkt->mask & RXE_COMP_MASK) return RESPST_COMPLETE; - } else if (qp_type(qp) == IB_QPT_RC) + else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; @@ -843,19 +931,19 @@ static enum resp_states do_complete(struct rxe_qp *qp, struct rxe_recv_wqe *wqe = qp->resp.wqe; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - if (unlikely(!wqe)) - return RESPST_CLEANUP; + if (!wqe) + goto finish; memset(&cqe, 0, sizeof(cqe)); if (qp->rcq->is_user) { - uwc->status = qp->resp.status; - uwc->qp_num = qp->ibqp.qp_num; - uwc->wr_id = wqe->wr_id; + uwc->status = qp->resp.status; + uwc->qp_num = qp->ibqp.qp_num; + uwc->wr_id = wqe->wr_id; } else { - wc->status = qp->resp.status; - wc->qp = &qp->ibqp; - wc->wr_id = wqe->wr_id; + wc->status = qp->resp.status; + wc->qp = &qp->ibqp; + wc->wr_id = wqe->wr_id; } if (wc->status == IB_WC_SUCCESS) { @@ -863,7 +951,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, wc->opcode = (pkt->mask & RXE_IMMDT_MASK && pkt->mask & RXE_WRITE_MASK) ? IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; - wc->vendor_err = 0; wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && pkt->mask & RXE_WRITE_MASK) ? qp->resp.length : wqe->dma.length - wqe->dma.resid; @@ -884,8 +971,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, uwc->ex.invalidate_rkey = ieth_rkey(pkt); } - uwc->qp_num = qp->ibqp.qp_num; - if (pkt->mask & RXE_DETH_MASK) uwc->src_qp = deth_sqp(pkt); @@ -910,24 +995,10 @@ static enum resp_states do_complete(struct rxe_qp *qp, } if (pkt->mask & RXE_IETH_MASK) { - struct rxe_mem *rmr; - wc->wc_flags |= IB_WC_WITH_INVALIDATE; wc->ex.invalidate_rkey = ieth_rkey(pkt); - - rmr = rxe_pool_get_index(&rxe->mr_pool, - wc->ex.invalidate_rkey >> 8); - if (unlikely(!rmr)) { - pr_err("Bad rkey %#x invalidation\n", - wc->ex.invalidate_rkey); - return RESPST_ERROR; - } - rmr->state = RXE_MEM_STATE_FREE; - rxe_drop_ref(rmr); } - wc->qp = &qp->ibqp; - if (pkt->mask & RXE_DETH_MASK) wc->src_qp = deth_sqp(pkt); @@ -937,86 +1008,59 @@ static enum resp_states do_complete(struct rxe_qp *qp, /* have copy for srq and reference for !srq */ if (!qp->srq) - advance_consumer(qp->rq.queue); + queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); qp->resp.wqe = NULL; if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) return RESPST_ERR_CQ_OVERFLOW; - if (qp->resp.state == QP_STATE_ERROR) +finish: + if (unlikely(qp->resp.state == QP_STATE_ERROR)) return RESPST_CHK_RESOURCE; - - if (!pkt) + if (unlikely(!pkt)) return RESPST_DONE; - else if (qp_type(qp) == IB_QPT_RC) + if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; } -static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, - u8 syndrome, u32 psn) + +static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, + int opcode, const char *msg) { - int err = 0; + int err; struct rxe_pkt_info ack_pkt; struct sk_buff *skb; - skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE, - 0, psn, syndrome, NULL); - if (!skb) { - err = -ENOMEM; - goto err1; - } + skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome); + if (!skb) + return -ENOMEM; err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) - pr_err_ratelimited("Failed sending ack\n"); + pr_err_ratelimited("Failed sending %s\n", msg); -err1: return err; } -static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, - u8 syndrome) +static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) { - int rc = 0; - struct rxe_pkt_info ack_pkt; - struct sk_buff *skb; - struct resp_res *res; - - skb = prepare_ack_packet(qp, pkt, &ack_pkt, - IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn, - syndrome, NULL); - if (!skb) { - rc = -ENOMEM; - goto out; - } - - rxe_add_ref(qp); + return send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_ACKNOWLEDGE, "ACK"); +} - res = &qp->resp.resources[qp->resp.res_head]; - free_rd_atomic_resource(qp, res); - rxe_advance_resp_resource(qp); +static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) +{ + int ret = send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK"); - memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(ack_pkt)); - memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0, - sizeof(skb->cb) - sizeof(ack_pkt)); - - skb_get(skb); - res->type = RXE_ATOMIC_MASK; - res->atomic.skb = skb; - res->first_psn = ack_pkt.psn; - res->last_psn = ack_pkt.psn; - res->cur_psn = ack_pkt.psn; - - rc = rxe_xmit_packet(qp, &ack_pkt, skb); - if (rc) { - pr_err_ratelimited("Failed sending ack\n"); - rxe_drop_ref(qp); - } -out: - return rc; + /* have to clear this since it is used to trigger + * long read replies + */ + qp->resp.res = NULL; + return ret; } static enum resp_states acknowledge(struct rxe_qp *qp, @@ -1026,11 +1070,11 @@ static enum resp_states acknowledge(struct rxe_qp *qp, return RESPST_CLEANUP; if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) - send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn); + send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); else if (pkt->mask & RXE_ATOMIC_MASK) - send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED); + send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); else if (bth_ack(pkt)) - send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn); + send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); return RESPST_CLEANUP; } @@ -1042,12 +1086,13 @@ static enum resp_states cleanup(struct rxe_qp *qp, if (pkt) { skb = skb_dequeue(&qp->req_pkts); - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -1082,10 +1127,8 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, if (pkt->mask & RXE_SEND_MASK || pkt->mask & RXE_WRITE_MASK) { /* SEND. Ack again and cleanup. C9-105. */ - if (bth_ack(pkt)) - send_ack(qp, pkt, AETH_ACK_UNLIMITED, prev_psn); - rc = RESPST_CLEANUP; - goto out; + send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); + return RESPST_CLEANUP; } else if (pkt->mask & RXE_READ_MASK) { struct resp_res *res; @@ -1138,14 +1181,11 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, /* Find the operation in our list of responder resources. */ res = find_resource(qp, pkt->psn); if (res) { - skb_get(res->atomic.skb); - /* Resend the result. */ - rc = rxe_xmit_packet(qp, pkt, res->atomic.skb); - if (rc) { - pr_err("Failed resending result. This flow is not handled - skb ignored\n"); - rc = RESPST_CLEANUP; - goto out; - } + res->replay = 1; + res->cur_psn = pkt->psn; + qp->resp.res = res; + rc = RESPST_ATOMIC_REPLY; + goto out; } /* Resource not found. Class D error. Drop the request. */ @@ -1193,7 +1233,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp) } if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -1204,17 +1244,19 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp) static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) { struct sk_buff *skb; + struct rxe_queue *q = qp->rq.queue; while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); + ib_device_put(qp->ibqp.device); } if (notify) return; - while (!qp->srq && qp->rq.queue && queue_head(qp->rq.queue)) - advance_consumer(qp->rq.queue); + while (!qp->srq && q && queue_head(q, q->type)) + queue_advance_consumer(q, q->type); } int rxe_responder(void *arg) @@ -1223,16 +1265,15 @@ int rxe_responder(void *arg) struct rxe_dev *rxe = to_rdev(qp->ibqp.device); enum resp_states state; struct rxe_pkt_info *pkt = NULL; - int ret = 0; + int ret; - rxe_add_ref(qp); + if (!rxe_get(qp)) + return -EAGAIN; qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; - if (!qp->valid) { - ret = -EINVAL; - goto done; - } + if (!qp->valid) + goto exit; switch (qp->resp.state) { case QP_STATE_RESET: @@ -1278,6 +1319,9 @@ int rxe_responder(void *arg) case RESPST_READ_REPLY: state = read_reply(qp, pkt); break; + case RESPST_ATOMIC_REPLY: + state = atomic_reply(qp, pkt); + break; case RESPST_ACKNOWLEDGE: state = acknowledge(qp, pkt); break; @@ -1289,7 +1333,7 @@ int rxe_responder(void *arg) break; case RESPST_ERR_PSN_OUT_OF_SEQ: /* RC only - Class B. Drop packet. */ - send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); + send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); state = RESPST_CLEANUP; break; @@ -1311,7 +1355,7 @@ int rxe_responder(void *arg) if (qp_type(qp) == IB_QPT_RC) { rxe_counter_inc(rxe, RXE_CNT_SND_RNR); /* RC - class B */ - send_ack(qp, pkt, AETH_RNR_NAK | + send_ack(qp, AETH_RNR_NAK | (~AETH_TYPE_MASK & qp->attr.min_rnr_timer), pkt->psn); @@ -1341,6 +1385,13 @@ int rxe_responder(void *arg) } break; + case RESPST_ERR_INVALIDATE_RKEY: + /* RC - Class J. */ + qp->resp.goto_error = 1; + qp->resp.status = IB_WC_REM_INV_REQ_ERR; + state = RESPST_COMPLETE; + break; + case RESPST_ERR_LENGTH: if (qp_type(qp) == IB_QPT_RC) { /* Class C */ @@ -1393,7 +1444,7 @@ int rxe_responder(void *arg) case RESPST_ERROR: qp->resp.goto_error = 0; - pr_warn("qp#%d moved to error state\n", qp_num(qp)); + pr_debug("qp#%d moved to error state\n", qp_num(qp)); rxe_qp_error(qp); goto exit; @@ -1402,9 +1453,16 @@ int rxe_responder(void *arg) } } + /* A non-zero return value will cause rxe_do_task to + * exit its loop and end the tasklet. A zero return + * will continue looping and return to rxe_responder + */ +done: + ret = 0; + goto out; exit: ret = -EAGAIN; -done: - rxe_drop_ref(qp); +out: + rxe_put(qp); return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index d8459431534e..02b39498c370 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -1,96 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/vmalloc.h> #include "rxe.h" -#include "rxe_loc.h" #include "rxe_queue.h" -int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, - struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) +int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) { - if (srq && srq->error) { - pr_warn("srq in error state\n"); + struct ib_srq_attr *attr = &init->attr; + + if (attr->max_wr > rxe->attr.max_srq_wr) { + pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + attr->max_wr, rxe->attr.max_srq_wr); goto err1; } - if (mask & IB_SRQ_MAX_WR) { - if (attr->max_wr > rxe->attr.max_srq_wr) { - pr_warn("max_wr(%d) > max_srq_wr(%d)\n", - attr->max_wr, rxe->attr.max_srq_wr); - goto err1; - } - - if (attr->max_wr <= 0) { - pr_warn("max_wr(%d) <= 0\n", attr->max_wr); - goto err1; - } - - if (srq && srq->limit && (attr->max_wr < srq->limit)) { - pr_warn("max_wr (%d) < srq->limit (%d)\n", - attr->max_wr, srq->limit); - goto err1; - } - - if (attr->max_wr < RXE_MIN_SRQ_WR) - attr->max_wr = RXE_MIN_SRQ_WR; + if (attr->max_wr <= 0) { + pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + goto err1; } - if (mask & IB_SRQ_LIMIT) { - if (attr->srq_limit > rxe->attr.max_srq_wr) { - pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", - attr->srq_limit, rxe->attr.max_srq_wr); - goto err1; - } + if (attr->max_wr < RXE_MIN_SRQ_WR) + attr->max_wr = RXE_MIN_SRQ_WR; - if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) { - pr_warn("srq_limit (%d) > cur limit(%d)\n", - attr->srq_limit, - srq->rq.queue->buf->index_mask); - goto err1; - } + if (attr->max_sge > rxe->attr.max_srq_sge) { + pr_warn("max_sge(%d) > max_srq_sge(%d)\n", + attr->max_sge, rxe->attr.max_srq_sge); + goto err1; } - if (mask == IB_SRQ_INIT_MASK) { - if (attr->max_sge > rxe->attr.max_srq_sge) { - pr_warn("max_sge(%d) > max_srq_sge(%d)\n", - attr->max_sge, rxe->attr.max_srq_sge); - goto err1; - } - - if (attr->max_sge < RXE_MIN_SRQ_SGE) - attr->max_sge = RXE_MIN_SRQ_SGE; - } + if (attr->max_sge < RXE_MIN_SRQ_SGE) + attr->max_sge = RXE_MIN_SRQ_SGE; return 0; @@ -105,11 +48,12 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, int err; int srq_wqe_size; struct rxe_queue *q; + enum queue_type type; srq->ibsrq.event_handler = init->event_handler; srq->ibsrq.srq_context = init->srq_context; srq->limit = init->attr.srq_limit; - srq->srq_num = srq->pelem.index; + srq->srq_num = srq->elem.index; srq->rq.max_wr = init->attr.max_wr; srq->rq.max_sge = init->attr.max_sge; @@ -118,8 +62,8 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, spin_lock_init(&srq->rq.producer_lock); spin_lock_init(&srq->rq.consumer_lock); - q = rxe_queue_init(rxe, &srq->rq.max_wr, - srq_wqe_size); + type = QUEUE_TYPE_FROM_CLIENT; + q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); if (!q) { pr_warn("unable to allocate queue for srq\n"); return -ENOMEM; @@ -146,6 +90,57 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, return 0; } +int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) +{ + if (srq->error) { + pr_warn("srq in error state\n"); + goto err1; + } + + if (mask & IB_SRQ_MAX_WR) { + if (attr->max_wr > rxe->attr.max_srq_wr) { + pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + attr->max_wr, rxe->attr.max_srq_wr); + goto err1; + } + + if (attr->max_wr <= 0) { + pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + goto err1; + } + + if (srq->limit && (attr->max_wr < srq->limit)) { + pr_warn("max_wr (%d) < srq->limit (%d)\n", + attr->max_wr, srq->limit); + goto err1; + } + + if (attr->max_wr < RXE_MIN_SRQ_WR) + attr->max_wr = RXE_MIN_SRQ_WR; + } + + if (mask & IB_SRQ_LIMIT) { + if (attr->srq_limit > rxe->attr.max_srq_wr) { + pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", + attr->srq_limit, rxe->attr.max_srq_wr); + goto err1; + } + + if (attr->srq_limit > srq->rq.queue->buf->index_mask) { + pr_warn("srq_limit (%d) > cur limit(%d)\n", + attr->srq_limit, + srq->rq.queue->buf->index_mask); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata) @@ -179,3 +174,14 @@ err2: srq->rq.queue = NULL; return err; } + +void rxe_srq_cleanup(struct rxe_pool_elem *elem) +{ + struct rxe_srq *srq = container_of(elem, typeof(*srq), elem); + + if (srq->pd) + rxe_put(srq->pd); + + if (srq->rq.queue) + rxe_queue_cleanup(srq->rq.queue); +} diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c deleted file mode 100644 index ccda5f5a3bc0..000000000000 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rxe.h" -#include "rxe_net.h" - -/* Copy argument and remove trailing CR. Return the new length. */ -static int sanitize_arg(const char *val, char *intf, int intf_len) -{ - int len; - - if (!val) - return 0; - - /* Remove newline. */ - for (len = 0; len < intf_len - 1 && val[len] && val[len] != '\n'; len++) - intf[len] = val[len]; - intf[len] = 0; - - if (len == 0 || (val[len] != 0 && val[len] != '\n')) - return 0; - - return len; -} - -static int rxe_param_set_add(const char *val, const struct kernel_param *kp) -{ - int len; - int err = 0; - char intf[32]; - struct net_device *ndev; - struct rxe_dev *exists; - - len = sanitize_arg(val, intf, sizeof(intf)); - if (!len) { - pr_err("add: invalid interface name\n"); - return -EINVAL; - } - - ndev = dev_get_by_name(&init_net, intf); - if (!ndev) { - pr_err("interface %s not found\n", intf); - return -EINVAL; - } - - exists = rxe_get_dev_from_net(ndev); - if (exists) { - ib_device_put(&exists->ib_dev); - pr_err("already configured on %s\n", intf); - err = -EINVAL; - goto err; - } - - err = rxe_net_add("rxe%d", ndev); - if (err) { - pr_err("failed to add %s\n", intf); - goto err; - } - -err: - dev_put(ndev); - return err; -} - -static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) -{ - int len; - char intf[32]; - struct ib_device *ib_dev; - - len = sanitize_arg(val, intf, sizeof(intf)); - if (!len) { - pr_err("add: invalid interface name\n"); - return -EINVAL; - } - - if (strncmp("all", intf, len) == 0) { - pr_info("rxe_sys: remove all"); - ib_unregister_driver(RDMA_DRIVER_RXE); - return 0; - } - - ib_dev = ib_device_get_by_name(intf, RDMA_DRIVER_RXE); - if (!ib_dev) { - pr_err("not configured on %s\n", intf); - return -EINVAL; - } - - ib_unregister_device_and_put(ib_dev); - - return 0; -} - -static const struct kernel_param_ops rxe_add_ops = { - .set = rxe_param_set_add, -}; - -static const struct kernel_param_ops rxe_remove_ops = { - .set = rxe_param_set_remove, -}; - -module_param_cb(add, &rxe_add_ops, NULL, 0200); -MODULE_PARM_DESC(add, "DEPRECATED. Create RXE device over network interface"); -module_param_cb(remove, &rxe_remove_ops, NULL, 0200); -MODULE_PARM_DESC(remove, "DEPRECATED. Remove RXE device over network interface"); diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 08f05ac5f5d5..ec2b7de1c497 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -1,41 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/hardirq.h> -#include "rxe_task.h" +#include "rxe.h" int __rxe_do_task(struct rxe_task *task) @@ -55,29 +28,29 @@ int __rxe_do_task(struct rxe_task *task) * a second caller finds the task already running * but looks just after the last call to func */ -void rxe_do_task(unsigned long data) +void rxe_do_task(struct tasklet_struct *t) { int cont; int ret; - unsigned long flags; - struct rxe_task *task = (struct rxe_task *)data; + struct rxe_task *task = from_tasklet(task, t, tasklet); + unsigned int iterations = RXE_MAX_ITERATIONS; - spin_lock_irqsave(&task->state_lock, flags); + spin_lock_bh(&task->state_lock); switch (task->state) { case TASK_STATE_START: task->state = TASK_STATE_BUSY; - spin_unlock_irqrestore(&task->state_lock, flags); + spin_unlock_bh(&task->state_lock); break; case TASK_STATE_BUSY: task->state = TASK_STATE_ARMED; - /* fall through */ + fallthrough; case TASK_STATE_ARMED: - spin_unlock_irqrestore(&task->state_lock, flags); + spin_unlock_bh(&task->state_lock); return; default: - spin_unlock_irqrestore(&task->state_lock, flags); + spin_unlock_bh(&task->state_lock); pr_warn("%s failed with bad state %d\n", __func__, task->state); return; } @@ -86,16 +59,23 @@ void rxe_do_task(unsigned long data) cont = 0; ret = task->func(task->arg); - spin_lock_irqsave(&task->state_lock, flags); + spin_lock_bh(&task->state_lock); switch (task->state) { case TASK_STATE_BUSY: - if (ret) + if (ret) { task->state = TASK_STATE_START; - else + } else if (iterations--) { cont = 1; + } else { + /* reschedule the tasklet and exit + * the loop to give up the cpu + */ + tasklet_schedule(&task->tasklet); + task->state = TASK_STATE_START; + } break; - /* soneone tried to run the task since the last time we called + /* someone tried to run the task since the last time we called * func, so we will call one more time regardless of the * return value */ @@ -108,22 +88,21 @@ void rxe_do_task(unsigned long data) pr_warn("%s failed with bad state %d\n", __func__, task->state); } - spin_unlock_irqrestore(&task->state_lock, flags); + spin_unlock_bh(&task->state_lock); } while (cont); task->ret = ret; } -int rxe_init_task(void *obj, struct rxe_task *task, +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *), char *name) { - task->obj = obj; task->arg = arg; task->func = func; snprintf(task->name, sizeof(task->name), "%s", name); task->destroyed = false; - tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task); + tasklet_setup(&task->tasklet, rxe_do_task); task->state = TASK_STATE_START; spin_lock_init(&task->state_lock); @@ -133,7 +112,6 @@ int rxe_init_task(void *obj, struct rxe_task *task, void rxe_cleanup_task(struct rxe_task *task) { - unsigned long flags; bool idle; /* @@ -143,9 +121,9 @@ void rxe_cleanup_task(struct rxe_task *task) task->destroyed = true; do { - spin_lock_irqsave(&task->state_lock, flags); + spin_lock_bh(&task->state_lock); idle = (task->state == TASK_STATE_START); - spin_unlock_irqrestore(&task->state_lock, flags); + spin_unlock_bh(&task->state_lock); } while (!idle); tasklet_kill(&task->tasklet); @@ -159,7 +137,7 @@ void rxe_run_task(struct rxe_task *task, int sched) if (sched) tasklet_schedule(&task->tasklet); else - rxe_do_task((unsigned long)task); + rxe_do_task(&task->tasklet); } void rxe_disable_task(struct rxe_task *task) diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 08ff42d451c6..7f612a1c68a7 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_TASK_H @@ -46,7 +19,6 @@ enum { * called again. */ struct rxe_task { - void *obj; struct tasklet_struct tasklet; int state; spinlock_t state_lock; /* spinlock for task state */ @@ -60,9 +32,9 @@ struct rxe_task { /* * init rxe_task structure * arg => parameter to pass to fcn - * fcn => function to call until it returns != 0 + * func => function to call until it returns != 0 */ -int rxe_init_task(void *obj, struct rxe_task *task, +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *), char *name); /* cleanup task */ @@ -80,7 +52,7 @@ int __rxe_do_task(struct rxe_task *task); * work to do someone must reschedule the task before * leaving */ -void rxe_do_task(unsigned long data); +void rxe_do_task(struct tasklet_struct *t); /* run a task, else schedule it to run as a tasklet, The decision * to run or schedule tasklet is based on the parameter sched. diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9dd4bd7aea92..88825edc7dce 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1,41 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/dma-mapping.h> #include <net/addrconf.h> #include <rdma/uverbs_ioctl.h> + #include "rxe.h" -#include "rxe_loc.h" #include "rxe_queue.h" #include "rxe_hw_counters.h" @@ -53,16 +26,13 @@ static int rxe_query_device(struct ib_device *dev, } static int rxe_query_port(struct ib_device *dev, - u8 port_num, struct ib_port_attr *attr) + u32 port_num, struct ib_port_attr *attr) { struct rxe_dev *rxe = to_rdev(dev); - struct rxe_port *port; int rc; - port = &rxe->port; - /* *attr being zeroed by the caller, avoid zeroing it here */ - *attr = port->attr; + *attr = rxe->port.attr; mutex_lock(&rxe->usdev_lock); rc = ib_get_eth_speed(dev, port_num, &attr->active_speed, @@ -81,24 +51,13 @@ static int rxe_query_port(struct ib_device *dev, } static int rxe_query_pkey(struct ib_device *device, - u8 port_num, u16 index, u16 *pkey) + u32 port_num, u16 index, u16 *pkey) { - struct rxe_dev *rxe = to_rdev(device); - struct rxe_port *port; - - port = &rxe->port; - - if (unlikely(index >= port->attr.pkey_tbl_len)) { - dev_warn(device->dev.parent, "invalid index = %d\n", - index); - goto err1; - } + if (index > 0) + return -EINVAL; - *pkey = port->pkey_tbl[index]; + *pkey = IB_DEFAULT_PKEY_FULL; return 0; - -err1: - return -EINVAL; } static int rxe_modify_device(struct ib_device *dev, @@ -122,7 +81,7 @@ static int rxe_modify_device(struct ib_device *dev, } static int rxe_modify_port(struct ib_device *dev, - u8 port_num, int mask, struct ib_port_modify *attr) + u32 port_num, int mask, struct ib_port_modify *attr) { struct rxe_dev *rxe = to_rdev(dev); struct rxe_port *port; @@ -139,29 +98,27 @@ static int rxe_modify_port(struct ib_device *dev, } static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, - u8 port_num) + u32 port_num) { - struct rxe_dev *rxe = to_rdev(dev); - - return rxe_link_layer(rxe, port_num); + return IB_LINK_LAYER_ETHERNET; } -static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) +static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata) { - struct rxe_dev *rxe = to_rdev(uctx->device); - struct rxe_ucontext *uc = to_ruc(uctx); + struct rxe_dev *rxe = to_rdev(ibuc->device); + struct rxe_ucontext *uc = to_ruc(ibuc); - return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem); + return rxe_add_to_pool(&rxe->uc_pool, uc); } static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) { struct rxe_ucontext *uc = to_ruc(ibuc); - rxe_drop_ref(uc); + rxe_cleanup(uc); } -static int rxe_port_immutable(struct ib_device *dev, u8 port_num, +static int rxe_port_immutable(struct ib_device *dev, u32 port_num, struct ib_port_immutable *immutable) { int err; @@ -185,33 +142,64 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); - return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem); + return rxe_add_to_pool(&rxe->pd_pool, pd); } -static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_pd *pd = to_rpd(ibpd); - rxe_drop_ref(pd); + rxe_cleanup(pd); + return 0; } -static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, - u32 flags, struct ib_udata *udata) +static int rxe_create_ah(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { - int err; struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); + struct rxe_create_ah_resp __user *uresp = NULL; + int err; - err = rxe_av_chk_attr(rxe, attr); + if (udata) { + /* test if new user provider */ + if (udata->outlen >= sizeof(*uresp)) + uresp = udata->outbuf; + ah->is_user = true; + } else { + ah->is_user = false; + } + + err = rxe_av_chk_attr(rxe, init_attr->ah_attr); if (err) return err; - err = rxe_add_to_pool(&rxe->ah_pool, &ah->pelem); + err = rxe_add_to_pool_ah(&rxe->ah_pool, ah, + init_attr->flags & RDMA_CREATE_AH_SLEEPABLE); if (err) return err; - rxe_init_av(attr, &ah->av); + /* create index > 0 */ + ah->ah_num = ah->elem.index; + + if (uresp) { + /* only if new user provider */ + err = copy_to_user(&uresp->ah_num, &ah->ah_num, + sizeof(uresp->ah_num)); + if (err) { + rxe_cleanup(ah); + return -EFAULT; + } + } else if (ah->is_user) { + /* only if old user provider */ + ah->ah_num = 0; + } + + rxe_init_av(init_attr->ah_attr, &ah->av); + rxe_finalize(ah); + return 0; } @@ -239,11 +227,13 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -static void rxe_destroy_ah(struct ib_ah *ibah, u32 flags) +static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); - rxe_drop_ref(ah); + rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE); + + return 0; } static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) @@ -253,8 +243,10 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) u32 length; struct rxe_recv_wqe *recv_wqe; int num_sge = ibwr->num_sge; + int full; - if (unlikely(queue_full(rq->queue))) { + full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER); + if (unlikely(full)) { err = -ENOMEM; goto err1; } @@ -268,9 +260,8 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) for (i = 0; i < num_sge; i++) length += ibwr->sg_list[i].length; - recv_wqe = producer_addr(rq->queue); + recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER); recv_wqe->wr_id = ibwr->wr_id; - recv_wqe->num_sge = num_sge; memcpy(recv_wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); @@ -281,12 +272,8 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) recv_wqe->dma.cur_sge = 0; recv_wqe->dma.sge_offset = 0; - /* make sure all changes to the work queue are written before we - * update the producer pointer - */ - smp_wmb(); + queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER); - advance_producer(rq->queue); return 0; err1: @@ -308,27 +295,29 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, uresp = udata->outbuf; } - err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); + if (init->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + + err = rxe_srq_chk_init(rxe, init); if (err) - goto err1; + return err; - err = rxe_add_to_pool(&rxe->srq_pool, &srq->pelem); + err = rxe_add_to_pool(&rxe->srq_pool, srq); if (err) - goto err1; + return err; - rxe_add_ref(pd); + rxe_get(pd); srq->pd = pd; err = rxe_srq_from_init(rxe, srq, init, udata, uresp); if (err) - goto err2; + goto err_cleanup; return 0; -err2: - rxe_drop_ref(pd); - rxe_drop_ref(srq); -err1: +err_cleanup: + rxe_cleanup(srq); + return err; } @@ -352,16 +341,12 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, err = rxe_srq_chk_attr(rxe, srq, attr, mask); if (err) - goto err1; + return err; err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); if (err) - goto err1; - + return err; return 0; - -err1: - return err; } static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) @@ -377,23 +362,20 @@ static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) return 0; } -static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rxe_srq *srq = to_rsrq(ibsrq); - if (srq->rq.queue) - rxe_queue_cleanup(srq->rq.queue); - - rxe_drop_ref(srq->pd); - rxe_drop_ref(srq); + rxe_cleanup(srq); + return 0; } static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { int err = 0; - unsigned long flags; struct rxe_srq *srq = to_rsrq(ibsrq); + unsigned long flags; spin_lock_irqsave(&srq->rq.producer_lock, flags); @@ -412,54 +394,51 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, return err; } -static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init, - struct ib_udata *udata) +static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init, + struct ib_udata *udata) { int err; - struct rxe_dev *rxe = to_rdev(ibpd->device); - struct rxe_pd *pd = to_rpd(ibpd); - struct rxe_qp *qp; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_pd *pd = to_rpd(ibqp->pd); + struct rxe_qp *qp = to_rqp(ibqp); struct rxe_create_qp_resp __user *uresp = NULL; if (udata) { if (udata->outlen < sizeof(*uresp)) - return ERR_PTR(-EINVAL); + return -EINVAL; uresp = udata->outbuf; } + if (init->create_flags) + return -EOPNOTSUPP; + err = rxe_qp_chk_init(rxe, init); if (err) - goto err1; - - qp = rxe_alloc(&rxe->qp_pool); - if (!qp) { - err = -ENOMEM; - goto err1; - } + return err; if (udata) { - if (udata->inlen) { - err = -EINVAL; - goto err2; - } - qp->is_user = 1; + if (udata->inlen) + return -EINVAL; + + qp->is_user = true; + } else { + qp->is_user = false; } - rxe_add_index(qp); + err = rxe_add_to_pool(&rxe->qp_pool, qp); + if (err) + return err; - err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd, udata); + err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata); if (err) - goto err3; + goto qp_init; - return &qp->ibqp; + rxe_finalize(qp); + return 0; -err3: - rxe_drop_index(qp); -err2: - rxe_drop_ref(qp); -err1: - return ERR_PTR(err); +qp_init: + rxe_cleanup(qp); + return err; } static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -469,6 +448,9 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct rxe_dev *rxe = to_rdev(ibqp->device); struct rxe_qp *qp = to_rqp(ibqp); + if (mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + err = rxe_qp_chk_attr(rxe, qp, attr, mask); if (err) goto err1; @@ -477,6 +459,11 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (err) goto err1; + if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH)) + qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, + qp->ibqp.qp_num, + qp->attr.dest_qp_num); + return 0; err1: @@ -497,10 +484,13 @@ static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct rxe_qp *qp = to_rqp(ibqp); + int ret; + + ret = rxe_qp_chk_destroy(qp); + if (ret) + return ret; - rxe_qp_destroy(qp); - rxe_drop_index(qp); - rxe_drop_ref(qp); + rxe_cleanup(qp); return 0; } @@ -535,15 +525,16 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, const struct ib_send_wr *ibwr) { wr->wr_id = ibwr->wr_id; - wr->num_sge = ibwr->num_sge; wr->opcode = ibwr->opcode; wr->send_flags = ibwr->send_flags; if (qp_type(qp) == IB_QPT_UD || - qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { + struct ib_ah *ibah = ud_wr(ibwr)->ah; + wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; + wr->wr.ud.ah_num = to_rah(ibah)->ah_num; if (qp_type(qp) == IB_QPT_GSI) wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; if (wr->opcode == IB_WR_SEND_WITH_IMM) @@ -552,7 +543,7 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE_WITH_IMM: wr->ex.imm_data = ibwr->ex.imm_data; - /* fall through */ + fallthrough; case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr; @@ -587,37 +578,37 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, } } -static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, +static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, + const struct ib_send_wr *ibwr) +{ + struct ib_sge *sge = ibwr->sg_list; + u8 *p = wqe->dma.inline_data; + int i; + + for (i = 0; i < ibwr->num_sge; i++, sge++) { + memcpy(p, (void *)(uintptr_t)sge->addr, sge->length); + p += sge->length; + } +} + +static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, unsigned int length, struct rxe_send_wqe *wqe) { int num_sge = ibwr->num_sge; - struct ib_sge *sge; - int i; - u8 *p; init_send_wr(qp, &wqe->wr, ibwr); - if (qp_type(qp) == IB_QPT_UD || - qp_type(qp) == IB_QPT_SMI || - qp_type(qp) == IB_QPT_GSI) - memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); - - if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { - p = wqe->dma.inline_data; - - sge = ibwr->sg_list; - for (i = 0; i < num_sge; i++, sge++) { - memcpy(p, (void *)(uintptr_t)sge->addr, - sge->length); - - p += sge->length; - } - } else if (mask & WR_REG_MASK) { + /* local operation */ + if (unlikely(mask & WR_LOCAL_OP_MASK)) { wqe->mask = mask; wqe->state = wqe_state_posted; - return 0; - } else + return; + } + + if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) + copy_inline_data_to_wqe(wqe, ibwr); + else memcpy(wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); @@ -631,8 +622,6 @@ static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, wqe->dma.sge_offset = 0; wqe->state = wqe_state_posted; wqe->ssn = atomic_add_return(1, &qp->ssn); - - return 0; } static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, @@ -642,6 +631,7 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, struct rxe_sq *sq = &qp->sq; struct rxe_send_wqe *send_wqe; unsigned long flags; + int full; err = validate_send_wr(qp, ibwr, mask, length); if (err) @@ -649,31 +639,21 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, spin_lock_irqsave(&qp->sq.sq_lock, flags); - if (unlikely(queue_full(sq->queue))) { - err = -ENOMEM; - goto err1; + full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER); + + if (unlikely(full)) { + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); + return -ENOMEM; } - send_wqe = producer_addr(sq->queue); + send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_TO_DRIVER); + init_send_wqe(qp, ibwr, mask, length, send_wqe); - err = init_send_wqe(qp, ibwr, mask, length, send_wqe); - if (unlikely(err)) - goto err1; + queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER); - /* - * make sure all changes to the work queue are - * written before we update the producer pointer - */ - smp_wmb(); - - advance_producer(sq->queue); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); return 0; - -err1: - spin_unlock_irqrestore(&qp->sq.sq_lock, flags); - return err; } static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, @@ -683,6 +663,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, unsigned int mask; unsigned int length = 0; int i; + struct ib_send_wr *next; while (wr) { mask = wr_opcode_mask(wr->opcode, qp); @@ -699,6 +680,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, break; } + next = wr->next; + length = 0; for (i = 0; i < wr->num_sge; i++) length += wr->sg_list[i].length; @@ -709,7 +692,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, *bad_wr = wr; break; } - wr = wr->next; + wr = next; } rxe_run_task(&qp->req.task, 1); @@ -798,7 +781,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector); if (err) @@ -809,16 +792,23 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (err) return err; - return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem); + return rxe_add_to_pool(&rxe->cq_pool, cq); } -static void rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rxe_cq *cq = to_rcq(ibcq); + /* See IBA C11-17: The CI shall return an error if this Verb is + * invoked while a Work Queue is still associated with the CQ. + */ + if (atomic_read(&cq->num_wq)) + return -EINVAL; + rxe_cq_disable(cq); - rxe_drop_ref(cq); + rxe_cleanup(cq); + return 0; } static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) @@ -857,12 +847,12 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) spin_lock_irqsave(&cq->cq_lock, flags); for (i = 0; i < num_entries; i++) { - cqe = queue_head(cq->queue); + cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER); if (!cqe) break; memcpy(wc++, &cqe->ibwc, sizeof(*wc)); - advance_consumer(cq->queue); + queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER); } spin_unlock_irqrestore(&cq->cq_lock, flags); @@ -872,7 +862,9 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) { struct rxe_cq *cq = to_rcq(ibcq); - int count = queue_count(cq->queue); + int count; + + count = queue_count(cq->queue, QUEUE_TYPE_FROM_DRIVER); return (count > wc_cnt) ? wc_cnt : count; } @@ -880,14 +872,17 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct rxe_cq *cq = to_rcq(ibcq); - unsigned long irq_flags; int ret = 0; + int empty; + unsigned long irq_flags; spin_lock_irqsave(&cq->cq_lock, irq_flags); if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = flags & IB_CQ_SOLICITED_MASK; - if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue)) + empty = queue_empty(cq->queue, QUEUE_TYPE_FROM_DRIVER); + + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty) ret = 1; spin_unlock_irqrestore(&cq->cq_lock, irq_flags); @@ -899,31 +894,19 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); - struct rxe_mem *mr; - int err; + struct rxe_mr *mr; mr = rxe_alloc(&rxe->mr_pool); - if (!mr) { - err = -ENOMEM; - goto err1; - } + if (!mr) + return ERR_PTR(-ENOMEM); - rxe_add_index(mr); + rxe_get(pd); + mr->ibmr.pd = ibpd; - rxe_add_ref(pd); - - err = rxe_mem_init_dma(pd, access, mr); - if (err) - goto err2; + rxe_mr_init_dma(access, mr); + rxe_finalize(mr); return &mr->ibmr; - -err2: - rxe_drop_ref(pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); -err1: - return ERR_PTR(err); } static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, @@ -935,7 +918,7 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, int err; struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); - struct rxe_mem *mr; + struct rxe_mr *mr; mr = rxe_alloc(&rxe->mr_pool); if (!mr) { @@ -943,42 +926,30 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, goto err2; } - rxe_add_index(mr); - rxe_add_ref(pd); + rxe_get(pd); + mr->ibmr.pd = ibpd; - err = rxe_mem_init_user(pd, start, length, iova, - access, udata, mr); + err = rxe_mr_init_user(rxe, start, length, iova, access, mr); if (err) goto err3; + rxe_finalize(mr); + return &mr->ibmr; err3: - rxe_drop_ref(pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); + rxe_cleanup(mr); err2: return ERR_PTR(err); } -static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) -{ - struct rxe_mem *mr = to_rmr(ibmr); - - mr->state = RXE_MEM_STATE_ZOMBIE; - rxe_drop_ref(mr->pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); - return 0; -} - static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); - struct rxe_mem *mr; + struct rxe_mr *mr; int err; if (mr_type != IB_MR_TYPE_MEM_REG) @@ -990,27 +961,26 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, goto err1; } - rxe_add_index(mr); + rxe_get(pd); + mr->ibmr.pd = ibpd; - rxe_add_ref(pd); - - err = rxe_mem_init_fast(pd, max_num_sg, mr); + err = rxe_mr_init_fast(max_num_sg, mr); if (err) goto err2; + rxe_finalize(mr); + return &mr->ibmr; err2: - rxe_drop_ref(pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); + rxe_cleanup(mr); err1: return ERR_PTR(err); } static int rxe_set_page(struct ib_mr *ibmr, u64 addr) { - struct rxe_mem *mr = to_rmr(ibmr); + struct rxe_mr *mr = to_rmr(ibmr); struct rxe_map *map; struct rxe_phys_buf *buf; @@ -1030,56 +1000,27 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr) static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { - struct rxe_mem *mr = to_rmr(ibmr); + struct rxe_mr *mr = to_rmr(ibmr); int n; mr->nbuf = 0; n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); - mr->va = ibmr->iova; - mr->iova = ibmr->iova; - mr->length = ibmr->length; mr->page_shift = ilog2(ibmr->page_size); mr->page_mask = ibmr->page_size - 1; - mr->offset = mr->iova & mr->page_mask; + mr->offset = ibmr->iova & mr->page_mask; return n; } -static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) -{ - int err; - struct rxe_dev *rxe = to_rdev(ibqp->device); - struct rxe_qp *qp = to_rqp(ibqp); - struct rxe_mc_grp *grp; - - /* takes a ref on grp if successful */ - err = rxe_mcast_get_grp(rxe, mgid, &grp); - if (err) - return err; - - err = rxe_mcast_add_grp_elem(rxe, qp, grp); - - rxe_drop_ref(grp); - return err; -} - -static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) -{ - struct rxe_dev *rxe = to_rdev(ibqp->device); - struct rxe_qp *qp = to_rqp(ibqp); - - return rxe_mcast_drop_grp_elem(rxe, qp, mgid); -} - static ssize_t parent_show(struct device *device, struct device_attribute *attr, char *buf) { struct rxe_dev *rxe = rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); - return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1)); + return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1)); } static DEVICE_ATTR_RO(parent); @@ -1107,8 +1048,9 @@ static const struct ib_device_ops rxe_dev_ops = { .driver_id = RDMA_DRIVER_RXE, .uverbs_abi_ver = RXE_UVERBS_ABI_VERSION, - .alloc_hw_stats = rxe_ib_alloc_hw_stats, + .alloc_hw_port_stats = rxe_ib_alloc_hw_port_stats, .alloc_mr = rxe_alloc_mr, + .alloc_mw = rxe_alloc_mw, .alloc_pd = rxe_alloc_pd, .alloc_ucontext = rxe_alloc_ucontext, .attach_mcast = rxe_attach_mcast, @@ -1116,7 +1058,9 @@ static const struct ib_device_ops rxe_dev_ops = { .create_cq = rxe_create_cq, .create_qp = rxe_create_qp, .create_srq = rxe_create_srq, + .create_user_ah = rxe_create_ah, .dealloc_driver = rxe_dealloc, + .dealloc_mw = rxe_dealloc_mw, .dealloc_pd = rxe_dealloc_pd, .dealloc_ucontext = rxe_dealloc_ucontext, .dereg_mr = rxe_dereg_mr, @@ -1125,6 +1069,7 @@ static const struct ib_device_ops rxe_dev_ops = { .destroy_qp = rxe_destroy_qp, .destroy_srq = rxe_destroy_srq, .detach_mcast = rxe_detach_mcast, + .device_group = &rxe_attr_group, .enable_driver = rxe_enable_driver, .get_dma_mr = rxe_get_dma_mr, .get_hw_stats = rxe_ib_get_hw_stats, @@ -1155,80 +1100,39 @@ static const struct ib_device_ops rxe_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_qp, rxe_qp, ibqp), INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc), + INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw), }; int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) { int err; struct ib_device *dev = &rxe->ib_dev; - struct crypto_shash *tfm; - strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); + strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->num_comp_vectors = num_possible_cpus(); - dev->dev.parent = rxe_dma_device(rxe); dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); - dev->dev.dma_ops = &dma_virt_ops; - dev->dev.dma_parms = &rxe->dma_parms; - rxe->dma_parms = (struct device_dma_parameters) - { .max_segment_size = SZ_2G }; - dma_coerce_mask_and_coherent(&dev->dev, - dma_get_required_mask(&dev->dev)); - - dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) - | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) - | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) - | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) - | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) - | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) - ; + + dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | + BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); ib_set_device_ops(dev, &rxe_dev_ops); err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1); if (err) return err; - tfm = crypto_alloc_shash("crc32", 0, 0); - if (IS_ERR(tfm)) { - pr_err("failed to allocate crc algorithm err:%ld\n", - PTR_ERR(tfm)); - return PTR_ERR(tfm); - } - rxe->tfm = tfm; + err = rxe_icrc_init(rxe); + if (err) + return err; - rdma_set_device_sysfs_group(dev, &rxe_attr_group); - err = ib_register_device(dev, ibdev_name); + err = ib_register_device(dev, ibdev_name, NULL); if (err) pr_warn("%s failed with error %d\n", __func__, err); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 92de39c4a7c1..5f5cbfcb3569 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_VERBS_H @@ -36,7 +9,6 @@ #include <linux/interrupt.h> #include <linux/workqueue.h> -#include <rdma/rdma_user_rxe.h> #include "rxe_pool.h" #include "rxe_task.h" #include "rxe_hw_counters.h" @@ -62,19 +34,20 @@ static inline int psn_compare(u32 psn_a, u32 psn_b) struct rxe_ucontext { struct ib_ucontext ibuc; - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; }; struct rxe_pd { struct ib_pd ibpd; - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; }; struct rxe_ah { struct ib_ah ibah; - struct rxe_pool_entry pelem; - struct rxe_pd *pd; + struct rxe_pool_elem elem; struct rxe_av av; + bool is_user; + int ah_num; }; struct rxe_cqe { @@ -86,13 +59,14 @@ struct rxe_cqe { struct rxe_cq { struct ib_cq ibcq; - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; struct rxe_queue *queue; spinlock_t cq_lock; u8 notify; bool is_dying; - int is_user; + bool is_user; struct tasklet_struct comp_task; + atomic_t num_wq; }; enum wqe_state { @@ -121,7 +95,7 @@ struct rxe_rq { struct rxe_srq { struct ib_srq ibsrq; - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; struct rxe_pd *pd; struct rxe_rq rq; u32 srq_num; @@ -149,11 +123,13 @@ struct rxe_req_info { int need_rd_atomic; int wait_psn; int need_retry; + int wait_for_rnr_timer; int noack_pkts; struct rxe_task task; }; struct rxe_comp_info { + enum rxe_qp_state state; u32 psn; int opcode; int timeout; @@ -180,10 +156,9 @@ struct resp_res { union { struct { - struct sk_buff *skb; + u64 orig_val; } atomic; struct { - struct rxe_mem *mr; u64 va_org; u32 rkey; u32 length; @@ -210,11 +185,11 @@ struct rxe_resp_info { /* RDMA read / atomic only */ u64 va; - struct rxe_mem *mr; + u64 offset; + struct rxe_mr *mr; u32 resid; u32 rkey; u32 length; - u64 atomic_orig; /* SRQ only */ struct { @@ -233,12 +208,12 @@ struct rxe_resp_info { }; struct rxe_qp { - struct rxe_pool_entry pelem; struct ib_qp ibqp; + struct rxe_pool_elem elem; struct ib_qp_attr attr; unsigned int valid; unsigned int mtu; - int is_user; + bool is_user; struct rxe_pd *pd; struct rxe_srq *srq; @@ -257,13 +232,10 @@ struct rxe_qp { struct rxe_av pri_av; struct rxe_av alt_av; - /* list of mcast groups qp has joined (for cleanup) */ - struct list_head grp_list; - spinlock_t grp_lock; /* guard grp_list */ + atomic_t mcg_num; struct sk_buff_head req_pkts; struct sk_buff_head resp_pkts; - struct sk_buff_head send_pkts; struct rxe_req_info req; struct rxe_comp_info comp; @@ -289,19 +261,20 @@ struct rxe_qp { struct execute_work cleanup_work; }; -enum rxe_mem_state { - RXE_MEM_STATE_ZOMBIE, - RXE_MEM_STATE_INVALID, - RXE_MEM_STATE_FREE, - RXE_MEM_STATE_VALID, +enum rxe_mr_state { + RXE_MR_STATE_INVALID, + RXE_MR_STATE_FREE, + RXE_MR_STATE_VALID, +}; + +enum rxe_mr_copy_dir { + RXE_TO_MR_OBJ, + RXE_FROM_MR_OBJ, }; -enum rxe_mem_type { - RXE_MEM_TYPE_NONE, - RXE_MEM_TYPE_DMA, - RXE_MEM_TYPE_MR, - RXE_MEM_TYPE_FMR, - RXE_MEM_TYPE_MW, +enum rxe_mr_lookup_type { + RXE_LOOKUP_LOCAL, + RXE_LOOKUP_REMOTE, }; #define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf)) @@ -315,24 +288,23 @@ struct rxe_map { struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; }; -struct rxe_mem { - struct rxe_pool_entry pelem; - union { - struct ib_mr ibmr; - struct ib_mw ibmw; - }; +static inline int rkey_is_mw(u32 rkey) +{ + u32 index = rkey >> 8; + + return (index >= RXE_MIN_MW_INDEX) && (index <= RXE_MAX_MW_INDEX); +} + +struct rxe_mr { + struct rxe_pool_elem elem; + struct ib_mr ibmr; - struct rxe_pd *pd; struct ib_umem *umem; u32 lkey; u32 rkey; - - enum rxe_mem_state state; - enum rxe_mem_type type; - u64 va; - u64 iova; - size_t length; + enum rxe_mr_state state; + enum ib_mr_type type; u32 offset; int access; @@ -347,52 +319,65 @@ struct rxe_mem { u32 max_buf; u32 num_map; + atomic_t num_mw; + struct rxe_map **map; }; -struct rxe_mc_grp { - struct rxe_pool_entry pelem; - spinlock_t mcg_lock; /* guard group */ +enum rxe_mw_state { + RXE_MW_STATE_INVALID = RXE_MR_STATE_INVALID, + RXE_MW_STATE_FREE = RXE_MR_STATE_FREE, + RXE_MW_STATE_VALID = RXE_MR_STATE_VALID, +}; + +struct rxe_mw { + struct ib_mw ibmw; + struct rxe_pool_elem elem; + spinlock_t lock; + enum rxe_mw_state state; + struct rxe_qp *qp; /* Type 2 only */ + struct rxe_mr *mr; + u32 rkey; + int access; + u64 addr; + u64 length; +}; + +struct rxe_mcg { + struct rb_node node; + struct kref ref_cnt; struct rxe_dev *rxe; struct list_head qp_list; union ib_gid mgid; - int num_qp; + atomic_t qp_num; u32 qkey; u16 pkey; }; -struct rxe_mc_elem { - struct rxe_pool_entry pelem; +struct rxe_mca { struct list_head qp_list; - struct list_head grp_list; struct rxe_qp *qp; - struct rxe_mc_grp *grp; }; struct rxe_port { struct ib_port_attr attr; - u16 *pkey_tbl; __be64 port_guid; __be64 subnet_prefix; spinlock_t port_lock; /* guard port */ unsigned int mtu_cap; /* special QPs */ - u32 qp_smi_index; u32 qp_gsi_index; }; struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; - struct device_dma_parameters dma_parms; int max_ucontext; int max_inline_data; struct mutex usdev_lock; struct net_device *ndev; - int xmit_errors; - struct rxe_pool uc_pool; struct rxe_pool pd_pool; struct rxe_pool ah_pool; @@ -401,8 +386,12 @@ struct rxe_dev { struct rxe_pool cq_pool; struct rxe_pool mr_pool; struct rxe_pool mw_pool; - struct rxe_pool mc_grp_pool; - struct rxe_pool mc_elem_pool; + + /* multicast support */ + spinlock_t mcg_lock; + struct rb_root mcg_tree; + atomic_t mcg_num; + atomic_t mcg_attach; spinlock_t pending_lock; /* guard pending_mmaps */ struct list_head pending_mmaps; @@ -456,18 +445,31 @@ static inline struct rxe_cq *to_rcq(struct ib_cq *cq) return cq ? container_of(cq, struct rxe_cq, ibcq) : NULL; } -static inline struct rxe_mem *to_rmr(struct ib_mr *mr) +static inline struct rxe_mr *to_rmr(struct ib_mr *mr) { - return mr ? container_of(mr, struct rxe_mem, ibmr) : NULL; + return mr ? container_of(mr, struct rxe_mr, ibmr) : NULL; } -static inline struct rxe_mem *to_rmw(struct ib_mw *mw) +static inline struct rxe_mw *to_rmw(struct ib_mw *mw) { - return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL; + return mw ? container_of(mw, struct rxe_mw, ibmw) : NULL; } -int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name); +static inline struct rxe_pd *rxe_ah_pd(struct rxe_ah *ah) +{ + return to_rpd(ah->ibah.pd); +} + +static inline struct rxe_pd *mr_pd(struct rxe_mr *mr) +{ + return to_rpd(mr->ibmr.pd); +} -void rxe_mc_cleanup(struct rxe_pool_entry *arg); +static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw) +{ + return to_rpd(mw->ibmw.pd); +} + +int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name); #endif /* RXE_VERBS_H */ diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index b622fc62f2cd..81b70a3eeb87 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -1,7 +1,10 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" - depends on INET && INFINIBAND && LIBCRC32C - select DMA_VIRT_OPS + depends on INET && INFINIBAND + depends on INFINIBAND_VIRT_DMA + select LIBCRC32C + select CRYPTO + select CRYPTO_CRC32C help This driver implements the iWARP RDMA transport over the Linux TCP/IP network stack. It enables a system with a diff --git a/drivers/infiniband/sw/siw/iwarp.h b/drivers/infiniband/sw/siw/iwarp.h index e8a04d9c89cb..3f1dedb50a0d 100644 --- a/drivers/infiniband/sw/siw/iwarp.h +++ b/drivers/infiniband/sw/siw/iwarp.h @@ -114,13 +114,6 @@ static inline u8 __ddp_get_version(struct iwarp_ctrl *ctrl) return be16_to_cpu(ctrl->ddp_rdmap_ctrl & DDP_MASK_VERSION) >> 8; } -static inline void __ddp_set_version(struct iwarp_ctrl *ctrl, u8 version) -{ - ctrl->ddp_rdmap_ctrl = - (ctrl->ddp_rdmap_ctrl & ~DDP_MASK_VERSION) | - (cpu_to_be16((u16)version << 8) & DDP_MASK_VERSION); -} - static inline u8 __rdmap_get_version(struct iwarp_ctrl *ctrl) { __be16 ver = ctrl->ddp_rdmap_ctrl & RDMAP_MASK_VERSION; @@ -128,12 +121,6 @@ static inline u8 __rdmap_get_version(struct iwarp_ctrl *ctrl) return be16_to_cpu(ver) >> 6; } -static inline void __rdmap_set_version(struct iwarp_ctrl *ctrl, u8 version) -{ - ctrl->ddp_rdmap_ctrl = (ctrl->ddp_rdmap_ctrl & ~RDMAP_MASK_VERSION) | - (cpu_to_be16(version << 6) & RDMAP_MASK_VERSION); -} - static inline u8 __rdmap_get_opcode(struct iwarp_ctrl *ctrl) { return be16_to_cpu(ctrl->ddp_rdmap_ctrl & RDMAP_MASK_OPCODE); diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index af5e9f8c0fcd..2f3a9cda3850 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -30,7 +30,6 @@ #define SIW_MAX_MR (SIW_MAX_QP * 10) #define SIW_MAX_PD SIW_MAX_QP #define SIW_MAX_MW 0 /* to be set if MW's are supported */ -#define SIW_MAX_FMR SIW_MAX_MR #define SIW_MAX_SRQ SIW_MAX_QP #define SIW_MAX_SRQ_WR (SIW_MAX_QP_WR * 10) #define SIW_MAX_CONTEXT SIW_MAX_PD @@ -59,7 +58,6 @@ struct siw_dev_cap { int max_mr; int max_pd; int max_mw; - int max_fmr; int max_srq; int max_srq_wr; int max_srq_sge; @@ -71,7 +69,6 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; - struct device_dma_parameters dma_parms; struct net_device *netdev; struct siw_dev_cap attrs; @@ -139,7 +136,7 @@ struct siw_pble { struct siw_pbl { unsigned int num_buf; unsigned int max_buf; - struct siw_pble pbe[1]; + struct siw_pble pbe[]; }; /* @@ -421,6 +418,7 @@ struct siw_qp { struct ib_qp base_qp; struct siw_device *sdev; struct kref ref; + struct completion qp_free; struct list_head devq; int tx_cpu; struct siw_qp_attrs attrs; @@ -647,16 +645,11 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp) return &qp->orq[qp->orq_get % qp->attrs.orq_size]; } -static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp) -{ - return &qp->orq[qp->orq_put % qp->attrs.orq_size]; -} - static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) { - struct siw_sqe *orq_e = orq_get_tail(qp); + struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size]; - if (orq_e && READ_ONCE(orq_e->flags) == 0) + if (READ_ONCE(orq_e->flags) == 0) return orq_e; return NULL; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index c5651a96b196..f88d2971c2c6 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -725,11 +725,11 @@ static int siw_proc_mpareply(struct siw_cep *cep) enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR; rv = siw_recv_mpa_rr(cep); - if (rv != -EAGAIN) - siw_cancel_mpatimer(cep); if (rv) goto out_err; + siw_cancel_mpatimer(cep); + rep = &cep->mpa.hdr; if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) { @@ -895,7 +895,8 @@ static int siw_proc_mpareply(struct siw_cep *cep) } out_err: - siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); + if (rv != -EAGAIN) + siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); return rv; } @@ -947,16 +948,8 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_get(new_cep); new_s->sk->sk_user_data = new_cep; - if (siw_tcp_nagle == false) { - int val = 1; - - rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof(val)); - if (rv) { - siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv); - goto error; - } - } + if (siw_tcp_nagle == false) + tcp_sock_set_nodelay(new_s->sk); new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ; rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT); @@ -976,14 +969,15 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_set_inuse(new_cep); rv = siw_proc_mpareq(new_cep); - siw_cep_set_free(new_cep); - if (rv != -EAGAIN) { siw_cep_put(cep); new_cep->listen_cep = NULL; - if (rv) + if (rv) { + siw_cep_set_free(new_cep); goto error; + } } + siw_cep_set_free(new_cep); } return; @@ -1055,7 +1049,7 @@ static void siw_cm_work_handler(struct work_struct *w) cep->state); } } - if (rv && rv != EAGAIN) + if (rv && rv != -EAGAIN) release_cep = 1; break; @@ -1232,12 +1226,10 @@ static void siw_cm_llp_data_ready(struct sock *sk) switch (cep->state) { case SIW_EPSTATE_RDMA_MODE: - /* fall through */ case SIW_EPSTATE_LISTENING: break; case SIW_EPSTATE_AWAIT_MPAREQ: - /* fall through */ case SIW_EPSTATE_AWAIT_MPAREP: siw_cm_queue_work(cep, SIW_CM_WORK_READ_MPAHDR); break; @@ -1310,19 +1302,22 @@ static void siw_cm_llp_state_change(struct sock *sk) } static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, - struct sockaddr *raddr) + struct sockaddr *raddr, bool afonly) { - int rv, flags = 0, s_val = 1; + int rv, flags = 0; size_t size = laddr->sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); /* * Make address available again asap. */ - rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val, - sizeof(s_val)); - if (rv < 0) - return rv; + sock_set_reuseaddr(s->sk); + + if (afonly) { + rv = ip6_sock_set_v6only(s->sk); + if (rv) + return rv; + } rv = s->ops->bind(s, laddr, size); if (rv < 0) @@ -1384,21 +1379,13 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) * mode. Might be reconsidered for async connection setup at * TCP level. */ - rv = kernel_bindconnect(s, laddr, raddr); + rv = kernel_bindconnect(s, laddr, raddr, id->afonly); if (rv != 0) { siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv); goto error; } - if (siw_tcp_nagle == false) { - int val = 1; - - rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *)&val, - sizeof(val)); - if (rv) { - siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv); - goto error; - } - } + if (siw_tcp_nagle == false) + tcp_sock_set_nodelay(s->sk); cep = siw_cep_alloc(sdev); if (!cep) { rv = -ENOMEM; @@ -1769,13 +1756,22 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) return 0; } -static int siw_listen_address(struct iw_cm_id *id, int backlog, - struct sockaddr *laddr, int addr_family) +/* + * siw_create_listen - Create resources for a listener's IWCM ID @id + * + * Starts listen on the socket address id->local_addr. + * + */ +int siw_create_listen(struct iw_cm_id *id, int backlog) { struct socket *s; struct siw_cep *cep = NULL; struct siw_device *sdev = to_siw_dev(id->device); - int rv = 0, s_val; + int addr_family = id->local_addr.ss_family; + int rv = 0; + + if (addr_family != AF_INET && addr_family != AF_INET6) + return -EAFNOSUPPORT; rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s); if (rv < 0) @@ -1784,16 +1780,36 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, /* * Allow binding local port when still in TIME_WAIT from last close. */ - s_val = 1; - rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val, - sizeof(s_val)); - if (rv) { - siw_dbg(id->device, "setsockopt error: %d\n", rv); - goto error; + sock_set_reuseaddr(s->sk); + + if (addr_family == AF_INET) { + struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); + + /* For wildcard addr, limit binding to current device only */ + if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) + s->sk->sk_bound_dev_if = sdev->netdev->ifindex; + + rv = s->ops->bind(s, (struct sockaddr *)laddr, + sizeof(struct sockaddr_in)); + } else { + struct sockaddr_in6 *laddr = &to_sockaddr_in6(id->local_addr); + + if (id->afonly) { + rv = ip6_sock_set_v6only(s->sk); + if (rv) { + siw_dbg(id->device, + "ip6_sock_set_v6only erro: %d\n", rv); + goto error; + } + } + + /* For wildcard addr, limit binding to current device only */ + if (ipv6_addr_any(&laddr->sin6_addr)) + s->sk->sk_bound_dev_if = sdev->netdev->ifindex; + + rv = s->ops->bind(s, (struct sockaddr *)laddr, + sizeof(struct sockaddr_in6)); } - rv = s->ops->bind(s, laddr, addr_family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)); if (rv) { siw_dbg(id->device, "socket bind error: %d\n", rv); goto error; @@ -1852,7 +1868,7 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); cep->state = SIW_EPSTATE_LISTENING; - siw_dbg(id->device, "Listen at laddr %pISp\n", laddr); + siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr); return 0; @@ -1910,106 +1926,6 @@ static void siw_drop_listeners(struct iw_cm_id *id) } } -/* - * siw_create_listen - Create resources for a listener's IWCM ID @id - * - * Listens on the socket address id->local_addr. - * - * If the listener's @id provides a specific local IP address, at most one - * listening socket is created and associated with @id. - * - * If the listener's @id provides the wildcard (zero) local IP address, - * a separate listen is performed for each local IP address of the device - * by creating a listening socket and binding to that local IP address. - * - */ -int siw_create_listen(struct iw_cm_id *id, int backlog) -{ - struct net_device *dev = to_siw_dev(id->device)->netdev; - int rv = 0, listeners = 0; - - siw_dbg(id->device, "backlog %d\n", backlog); - - /* - * For each attached address of the interface, create a - * listening socket, if id->local_addr is the wildcard - * IP address or matches the IP address. - */ - if (id->local_addr.ss_family == AF_INET) { - struct in_device *in_dev = in_dev_get(dev); - struct sockaddr_in s_laddr; - const struct in_ifaddr *ifa; - - if (!in_dev) { - rv = -ENODEV; - goto out; - } - memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr)); - - siw_dbg(id->device, "laddr %pISp\n", &s_laddr); - - rtnl_lock(); - in_dev_for_each_ifa_rtnl(ifa, in_dev) { - if (ipv4_is_zeronet(s_laddr.sin_addr.s_addr) || - s_laddr.sin_addr.s_addr == ifa->ifa_address) { - s_laddr.sin_addr.s_addr = ifa->ifa_address; - - rv = siw_listen_address(id, backlog, - (struct sockaddr *)&s_laddr, - AF_INET); - if (!rv) - listeners++; - } - } - rtnl_unlock(); - in_dev_put(in_dev); - } else if (id->local_addr.ss_family == AF_INET6) { - struct inet6_dev *in6_dev = in6_dev_get(dev); - struct inet6_ifaddr *ifp; - struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr); - - if (!in6_dev) { - rv = -ENODEV; - goto out; - } - siw_dbg(id->device, "laddr %pISp\n", &s_laddr); - - rtnl_lock(); - list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { - if (ifp->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED)) - continue; - if (ipv6_addr_any(&s_laddr->sin6_addr) || - ipv6_addr_equal(&s_laddr->sin6_addr, &ifp->addr)) { - struct sockaddr_in6 bind_addr = { - .sin6_family = AF_INET6, - .sin6_port = s_laddr->sin6_port, - .sin6_flowinfo = 0, - .sin6_addr = ifp->addr, - .sin6_scope_id = dev->ifindex }; - - rv = siw_listen_address(id, backlog, - (struct sockaddr *)&bind_addr, - AF_INET6); - if (!rv) - listeners++; - } - } - rtnl_unlock(); - in6_dev_put(in6_dev); - } else { - rv = -EAFNOSUPPORT; - } -out: - if (listeners) - rv = 0; - else if (!rv) - rv = -EINVAL; - - siw_dbg(id->device, "%s\n", rv ? "FAIL" : "OK"); - - return rv; -} - int siw_destroy_listen(struct iw_cm_id *id) { if (!id->provider_data) { @@ -2037,8 +1953,6 @@ int siw_cm_init(void) void siw_cm_exit(void) { - if (siw_cm_wq) { - flush_workqueue(siw_cm_wq); + if (siw_cm_wq) destroy_workqueue(siw_cm_wq); - } } diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 5cd40fb9e20c..dacc174604bf 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -67,12 +67,13 @@ static int siw_device_register(struct siw_device *sdev, const char *name) static int dev_id = 1; int rv; - rv = ib_register_device(base_dev, name); + sdev->vendor_part_id = dev_id++; + + rv = ib_register_device(base_dev, name, NULL); if (rv) { pr_warn("siw: device registration error %d\n", rv); return rv; } - sdev->vendor_part_id = dev_id++; siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr); @@ -97,15 +98,14 @@ static int siw_create_tx_threads(void) continue; siw_tx_thread[cpu] = - kthread_create(siw_run_sq, (unsigned long *)(long)cpu, - "siw_tx/%d", cpu); + kthread_run_on_cpu(siw_run_sq, + (unsigned long *)(long)cpu, + cpu, "siw_tx/%u"); if (IS_ERR(siw_tx_thread[cpu])) { siw_tx_thread[cpu] = NULL; continue; } - kthread_bind(siw_tx_thread[cpu], cpu); - wake_up_process(siw_tx_thread[cpu]); assigned++; } return assigned; @@ -119,6 +119,7 @@ static int siw_dev_qualified(struct net_device *netdev) * <linux/if_arp.h> for type identifiers. */ if (netdev->type == ARPHRD_ETHER || netdev->type == ARPHRD_IEEE802 || + netdev->type == ARPHRD_NONE || (netdev->type == ARPHRD_LOOPBACK && loopback_enabled)) return 1; @@ -134,7 +135,7 @@ static struct { static int siw_init_cpulist(void) { - int i, num_nodes = num_possible_nodes(); + int i, num_nodes = nr_node_ids; memset(siw_tx_thread, 0, sizeof(siw_tx_thread)); @@ -288,7 +289,6 @@ static const struct ib_device_ops siw_device_ops = { .post_srq_recv = siw_post_srq_recv, .query_device = siw_query_device, .query_gid = siw_query_gid, - .query_pkey = siw_query_pkey, .query_port = siw_query_port, .query_qp = siw_query_qp, .query_srq = siw_query_srq, @@ -297,6 +297,7 @@ static const struct ib_device_ops siw_device_ops = { INIT_RDMA_OBJ_SIZE(ib_cq, siw_cq, base_cq), INIT_RDMA_OBJ_SIZE(ib_pd, siw_pd, base_pd), + INIT_RDMA_OBJ_SIZE(ib_qp, siw_qp, base_qp), INIT_RDMA_OBJ_SIZE(ib_srq, siw_srq, base_srq), INIT_RDMA_OBJ_SIZE(ib_ucontext, siw_ucontext, base_ucontext), }; @@ -305,24 +306,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) { struct siw_device *sdev = NULL; struct ib_device *base_dev; - struct device *parent = netdev->dev.parent; int rv; - if (!parent) { - /* - * The loopback device has no parent device, - * so it appears as a top-level device. To support - * loopback device connectivity, take this device - * as the parent device. Skip all other devices - * w/o parent device. - */ - if (netdev->type != ARPHRD_LOOPBACK) { - pr_warn("siw: device %s error: no parent device\n", - netdev->name); - return NULL; - } - parent = &netdev->dev; - } sdev = ib_alloc_device(siw_device, base_dev); if (!sdev) return NULL; @@ -331,12 +316,12 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->netdev = netdev; - if (netdev->type != ARPHRD_LOOPBACK) { + if (netdev->type != ARPHRD_LOOPBACK && netdev->type != ARPHRD_NONE) { addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, netdev->dev_addr); } else { /* - * The loopback device does not have a HW address, + * This device does not have a HW address, * but connection mangagement lib expects gid != 0 */ size_t len = min_t(size_t, strlen(base_dev->name), 6); @@ -346,30 +331,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, addr); } - base_dev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + base_dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); base_dev->node_type = RDMA_NODE_RNIC; memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON, @@ -381,11 +344,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) * per physical port. */ base_dev->phys_port_cnt = 1; - base_dev->dev.parent = parent; - base_dev->dev.dma_ops = &dma_virt_ops; - base_dev->dev.dma_parms = &sdev->dma_parms; - sdev->dma_parms = (struct device_dma_parameters) - { .max_segment_size = SZ_2G }; base_dev->num_comp_vectors = num_possible_cpus(); xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); @@ -400,7 +358,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sizeof(base_dev->iw_ifname)); /* Disable TCP port mapping */ - base_dev->iw_driver_flags = IW_F_NO_PORT_MAP, + base_dev->iw_driver_flags = IW_F_NO_PORT_MAP; sdev->attrs.max_qp = SIW_MAX_QP; sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; @@ -413,7 +371,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->attrs.max_mr = SIW_MAX_MR; sdev->attrs.max_pd = SIW_MAX_PD; sdev->attrs.max_mw = SIW_MAX_MW; - sdev->attrs.max_fmr = SIW_MAX_FMR; sdev->attrs.max_srq = SIW_MAX_SRQ; sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; sdev->attrs.max_srq_sge = SIW_MAX_SGE; @@ -428,7 +385,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) atomic_set(&sdev->num_mr, 0); atomic_set(&sdev->num_pd, 0); - sdev->numa_node = dev_to_node(parent); + sdev->numa_node = dev_to_node(&netdev->dev); spin_lock_init(&sdev->lock); return sdev; diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index e2061dc0b043..61c17db70d65 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -106,8 +106,6 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, mem->perms = rights & IWARP_ACCESS_MASK; kref_init(&mem->ref); - mr->mem = mem; - get_random_bytes(&next, 4); next &= 0x00ffffff; @@ -116,6 +114,8 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, kfree(mem); return -ENOMEM; } + + mr->mem = mem; /* Set the STag index part */ mem->stag = id << 8; mr->base_mr.lkey = mr->base_mr.rkey = mem->stag; @@ -349,14 +349,11 @@ dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) struct siw_pbl *siw_pbl_alloc(u32 num_buf) { struct siw_pbl *pbl; - int buf_size = sizeof(*pbl); if (num_buf == 0) return ERR_PTR(-EINVAL); - buf_size += ((num_buf - 1) * sizeof(struct siw_pble)); - - pbl = kzalloc(buf_size, GFP_KERNEL); + pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL); if (!pbl) return ERR_PTR(-ENOMEM); @@ -397,7 +394,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) if (!writable) foll_flags |= FOLL_FORCE; - down_read(&mm_s->mmap_sem); + mmap_read_lock(mm_s); mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; @@ -441,7 +438,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) num_pages -= got; } out_sem_up: - up_read(&mm_s->mmap_sem); + mmap_read_unlock(mm_s); if (rv > 0) return umem; diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h index db138c8423da..f911287576d1 100644 --- a/drivers/infiniband/sw/siw/siw_mem.h +++ b/drivers/infiniband/sw/siw/siw_mem.h @@ -29,11 +29,6 @@ static inline void siw_mem_put(struct siw_mem *mem) kref_put(&mem->ref, siw_free_mem); } -static inline struct siw_mr *siw_mem2mr(struct siw_mem *m) -{ - return container_of(m, struct siw_mr, mem); -} - static inline void siw_unref_mem_sgl(struct siw_mem **mem, unsigned int num_sge) { while (num_sge) { diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 875d36d4b1c6..e6f634971228 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -199,26 +199,26 @@ void siw_qp_llp_write_space(struct sock *sk) static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) { - irq_size = roundup_pow_of_two(irq_size); - orq_size = roundup_pow_of_two(orq_size); - - qp->attrs.irq_size = irq_size; - qp->attrs.orq_size = orq_size; - - qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); - if (!qp->irq) { - siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size); - qp->attrs.irq_size = 0; - return -ENOMEM; + if (irq_size) { + irq_size = roundup_pow_of_two(irq_size); + qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); + if (!qp->irq) { + qp->attrs.irq_size = 0; + return -ENOMEM; + } } - qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); - if (!qp->orq) { - siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size); - qp->attrs.orq_size = 0; - qp->attrs.irq_size = 0; - vfree(qp->irq); - return -ENOMEM; + if (orq_size) { + orq_size = roundup_pow_of_two(orq_size); + qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); + if (!qp->orq) { + qp->attrs.orq_size = 0; + qp->attrs.irq_size = 0; + vfree(qp->irq); + return -ENOMEM; + } } + qp->attrs.irq_size = irq_size; + qp->attrs.orq_size = orq_size; siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size); return 0; } @@ -288,13 +288,14 @@ int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl) if (ctrl & MPA_V2_RDMA_WRITE_RTR) wqe->sqe.opcode = SIW_OP_WRITE; else if (ctrl & MPA_V2_RDMA_READ_RTR) { - struct siw_sqe *rreq; + struct siw_sqe *rreq = NULL; wqe->sqe.opcode = SIW_OP_READ; spin_lock(&qp->orq_lock); - rreq = orq_get_free(qp); + if (qp->attrs.orq_size) + rreq = orq_get_free(qp); if (rreq) { siw_read_to_orq(rreq, &wqe->sqe); qp->orq_put++; @@ -877,135 +878,88 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe) rreq->num_sge = 1; } -/* - * Must be called with SQ locked. - * To avoid complete SQ starvation by constant inbound READ requests, - * the active IRQ will not be served after qp->irq_burst, if the - * SQ has pending work. - */ -int siw_activate_tx(struct siw_qp *qp) +static int siw_activate_tx_from_sq(struct siw_qp *qp) { - struct siw_sqe *irqe, *sqe; + struct siw_sqe *sqe; struct siw_wqe *wqe = tx_wqe(qp); int rv = 1; - irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; - - if (irqe->flags & SIW_WQE_VALID) { - sqe = sq_get_next(qp); - - /* - * Avoid local WQE processing starvation in case - * of constant inbound READ request stream - */ - if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { - qp->irq_burst = 0; - goto skip_irq; - } - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; - - /* start READ RESPONSE */ - wqe->sqe.opcode = SIW_OP_READ_RESPONSE; - wqe->sqe.flags = 0; - if (irqe->num_sge) { - wqe->sqe.num_sge = 1; - wqe->sqe.sge[0].length = irqe->sge[0].length; - wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; - wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; - } else { - wqe->sqe.num_sge = 0; - } - - /* Retain original RREQ's message sequence number for - * potential error reporting cases. - */ - wqe->sqe.sge[1].length = irqe->sge[1].length; - - wqe->sqe.rkey = irqe->rkey; - wqe->sqe.raddr = irqe->raddr; + sqe = sq_get_next(qp); + if (!sqe) + return 0; - wqe->processed = 0; - qp->irq_get++; + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; - /* mark current IRQ entry free */ - smp_store_mb(irqe->flags, 0); + /* First copy SQE to kernel private memory */ + memcpy(&wqe->sqe, sqe, sizeof(*sqe)); + if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { + rv = -EINVAL; goto out; } - sqe = sq_get_next(qp); - if (sqe) { -skip_irq: - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; - - /* First copy SQE to kernel private memory */ - memcpy(&wqe->sqe, sqe, sizeof(*sqe)); - - if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { + if (wqe->sqe.flags & SIW_WQE_INLINE) { + if (wqe->sqe.opcode != SIW_OP_SEND && + wqe->sqe.opcode != SIW_OP_WRITE) { rv = -EINVAL; goto out; } - if (wqe->sqe.flags & SIW_WQE_INLINE) { - if (wqe->sqe.opcode != SIW_OP_SEND && - wqe->sqe.opcode != SIW_OP_WRITE) { - rv = -EINVAL; - goto out; - } - if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { - rv = -EINVAL; - goto out; - } - wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; - wqe->sqe.sge[0].lkey = 0; - wqe->sqe.num_sge = 1; + if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { + rv = -EINVAL; + goto out; } - if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { - /* A READ cannot be fenced */ - if (unlikely(wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == - SIW_OP_READ_LOCAL_INV)) { - siw_dbg_qp(qp, "cannot fence read\n"); - rv = -EINVAL; - goto out; - } - spin_lock(&qp->orq_lock); + wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].lkey = 0; + wqe->sqe.num_sge = 1; + } + if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { + /* A READ cannot be fenced */ + if (unlikely(wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == + SIW_OP_READ_LOCAL_INV)) { + siw_dbg_qp(qp, "cannot fence read\n"); + rv = -EINVAL; + goto out; + } + spin_lock(&qp->orq_lock); - if (!siw_orq_empty(qp)) { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); + if (qp->attrs.orq_size && !siw_orq_empty(qp)) { + qp->tx_ctx.orq_fence = 1; + rv = 0; + } + spin_unlock(&qp->orq_lock); - } else if (wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { - struct siw_sqe *rreq; + } else if (wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { + struct siw_sqe *rreq; - wqe->sqe.num_sge = 1; + if (unlikely(!qp->attrs.orq_size)) { + /* We negotiated not to send READ req's */ + rv = -EINVAL; + goto out; + } + wqe->sqe.num_sge = 1; - spin_lock(&qp->orq_lock); + spin_lock(&qp->orq_lock); - rreq = orq_get_free(qp); - if (rreq) { - /* - * Make an immediate copy in ORQ to be ready - * to process loopback READ reply - */ - siw_read_to_orq(rreq, &wqe->sqe); - qp->orq_put++; - } else { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); + rreq = orq_get_free(qp); + if (rreq) { + /* + * Make an immediate copy in ORQ to be ready + * to process loopback READ reply + */ + siw_read_to_orq(rreq, &wqe->sqe); + qp->orq_put++; + } else { + qp->tx_ctx.orq_fence = 1; + rv = 0; } - - /* Clear SQE, can be re-used by application */ - smp_store_mb(sqe->flags, 0); - qp->sq_get++; - } else { - rv = 0; + spin_unlock(&qp->orq_lock); } + + /* Clear SQE, can be re-used by application */ + smp_store_mb(sqe->flags, 0); + qp->sq_get++; out: if (unlikely(rv < 0)) { siw_dbg_qp(qp, "error %d\n", rv); @@ -1015,6 +969,65 @@ out: } /* + * Must be called with SQ locked. + * To avoid complete SQ starvation by constant inbound READ requests, + * the active IRQ will not be served after qp->irq_burst, if the + * SQ has pending work. + */ +int siw_activate_tx(struct siw_qp *qp) +{ + struct siw_sqe *irqe; + struct siw_wqe *wqe = tx_wqe(qp); + + if (!qp->attrs.irq_size) + return siw_activate_tx_from_sq(qp); + + irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; + + if (!(irqe->flags & SIW_WQE_VALID)) + return siw_activate_tx_from_sq(qp); + + /* + * Avoid local WQE processing starvation in case + * of constant inbound READ request stream + */ + if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { + qp->irq_burst = 0; + return siw_activate_tx_from_sq(qp); + } + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; + + /* start READ RESPONSE */ + wqe->sqe.opcode = SIW_OP_READ_RESPONSE; + wqe->sqe.flags = 0; + if (irqe->num_sge) { + wqe->sqe.num_sge = 1; + wqe->sqe.sge[0].length = irqe->sge[0].length; + wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; + wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; + } else { + wqe->sqe.num_sge = 0; + } + + /* Retain original RREQ's message sequence number for + * potential error reporting cases. + */ + wqe->sqe.sge[1].length = irqe->sge[1].length; + + wqe->sqe.rkey = irqe->rkey; + wqe->sqe.raddr = irqe->raddr; + + wqe->processed = 0; + qp->irq_get++; + + /* mark current IRQ entry free */ + smp_store_mb(irqe->flags, 0); + + return 1; +} + +/* * Check if current CQ state qualifies for calling CQ completion * handler. Must be called with CQ lock held. */ @@ -1329,8 +1342,6 @@ void siw_free_qp(struct kref *ref) vfree(qp->orq); siw_put_tx_cpu(qp->tx_cpu); - + complete(&qp->qp_free); atomic_dec(&sdev->num_qp); - siw_dbg_qp(qp, "free QP\n"); - kfree_rcu(qp, rcu); } diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 9ccce2909ac4..fd721cc19682 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -139,7 +139,8 @@ static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, break; bytes = min(bytes, len); - if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) { + if (siw_rx_kva(srx, (void *)(uintptr_t)buf_addr, bytes) == + bytes) { copied += bytes; offset += bytes; len -= bytes; @@ -679,6 +680,10 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) } spin_lock_irqsave(&qp->sq_lock, flags); + if (unlikely(!qp->attrs.irq_size)) { + run_sq = 0; + goto error_irq; + } if (tx_work->wr_status == SIW_WR_IDLE) { /* * immediately schedule READ response w/o @@ -711,8 +716,9 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) /* RRESP now valid as current TX wqe or placed into IRQ */ smp_store_mb(resp->flags, SIW_WQE_VALID); } else { - pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp), - qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size); +error_irq: + pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n", + qp_id(qp), qp->attrs.irq_size); siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, RDMAP_ETYPE_REMOTE_OPERATION, @@ -739,6 +745,9 @@ static int siw_orqe_start_rx(struct siw_qp *qp) struct siw_sqe *orqe; struct siw_wqe *wqe = NULL; + if (unlikely(!qp->attrs.orq_size)) + return -EPROTO; + /* make sure ORQ indices are current */ smp_mb(); @@ -795,8 +804,8 @@ int siw_proc_rresp(struct siw_qp *qp) */ rv = siw_orqe_start_rx(qp); if (rv) { - pr_warn("siw: [QP %u]: ORQ empty at idx %d\n", - qp_id(qp), qp->orq_get % qp->attrs.orq_size); + pr_warn("siw: [QP %u]: ORQ empty, size %d\n", + qp_id(qp), qp->attrs.orq_size); goto error_term; } rv = siw_rresp_check_ntoh(srx, frx); @@ -952,27 +961,28 @@ out: static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) { struct sk_buff *skb = srx->skb; + int avail = min(srx->skb_new, srx->fpdu_part_rem); u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad; __wsum crc_in, crc_own = 0; siw_dbg_qp(qp, "expected %d, available %d, pad %u\n", srx->fpdu_part_rem, srx->skb_new, srx->pad); - if (srx->skb_new < srx->fpdu_part_rem) - return -EAGAIN; - - skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem); + skb_copy_bits(skb, srx->skb_offset, tbuf, avail); - if (srx->mpa_crc_hd && srx->pad) - crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad); + srx->skb_new -= avail; + srx->skb_offset += avail; + srx->skb_copied += avail; + srx->fpdu_part_rem -= avail; - srx->skb_new -= srx->fpdu_part_rem; - srx->skb_offset += srx->fpdu_part_rem; - srx->skb_copied += srx->fpdu_part_rem; + if (srx->fpdu_part_rem) + return -EAGAIN; if (!srx->mpa_crc_hd) return 0; + if (srx->pad) + crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad); /* * CRC32 is computed, transmitted and received directly in NBO, * so there's never a reason to convert byte order. @@ -1074,10 +1084,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx) * completely received. */ if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) { - bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR; + int hdrlen = iwarp_pktinfo[opcode].hdr_len; - if (srx->skb_new < bytes) - return -EAGAIN; + bytes = min_t(int, hdrlen - MIN_DDP_HDR, srx->skb_new); skb_copy_bits(skb, srx->skb_offset, (char *)c_hdr + srx->fpdu_part_rcvd, bytes); @@ -1087,6 +1096,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx) srx->skb_new -= bytes; srx->skb_offset += bytes; srx->skb_copied += bytes; + + if (srx->fpdu_part_rcvd < hdrlen) + return -EAGAIN; } /* @@ -1144,11 +1156,12 @@ static int siw_check_tx_fence(struct siw_qp *qp) spin_lock_irqsave(&qp->orq_lock, flags); - rreq = orq_get_current(qp); - /* free current orq entry */ + rreq = orq_get_current(qp); WRITE_ONCE(rreq->flags, 0); + qp->orq_get++; + if (qp->tx_ctx.orq_fence) { if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) { pr_warn("siw: [QP %u]: fence resume: bad status %d\n", @@ -1156,10 +1169,12 @@ static int siw_check_tx_fence(struct siw_qp *qp) rv = -EPROTO; goto out; } - /* resume SQ processing */ + /* resume SQ processing, if possible */ if (tx_waiting->sqe.opcode == SIW_OP_READ || tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) { - rreq = orq_get_tail(qp); + + /* SQ processing was stopped because of a full ORQ */ + rreq = orq_get_free(qp); if (unlikely(!rreq)) { pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp)); rv = -EPROTO; @@ -1172,15 +1187,14 @@ static int siw_check_tx_fence(struct siw_qp *qp) resume_tx = 1; } else if (siw_orq_empty(qp)) { + /* + * SQ processing was stopped by fenced work request. + * Resume since all previous Read's are now completed. + */ qp->tx_ctx.orq_fence = 0; resume_tx = 1; - } else { - pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n", - qp_id(qp), qp->orq_get, qp->orq_put); - rv = -EPROTO; } } - qp->orq_get++; out: spin_unlock_irqrestore(&qp->orq_lock, flags); @@ -1214,7 +1228,7 @@ static int siw_rdmap_complete(struct siw_qp *qp, int error) case RDMAP_SEND_SE: case RDMAP_SEND_SE_INVAL: wqe->rqe.flags |= SIW_WQE_SOLICITED; - /* Fall through */ + fallthrough; case RDMAP_SEND: case RDMAP_SEND_INVAL: @@ -1289,11 +1303,13 @@ static int siw_rdmap_complete(struct siw_qp *qp, int error) wc_status); siw_wqe_put_mem(wqe, SIW_OP_READ); - if (!error) + if (!error) { rv = siw_check_tx_fence(qp); - else - /* Disable current ORQ eleement */ - WRITE_ONCE(orq_get_current(qp)->flags, 0); + } else { + /* Disable current ORQ element */ + if (qp->attrs.orq_size) + WRITE_ONCE(orq_get_current(qp)->flags, 0); + } break; case RDMAP_RDMA_READ_REQ: @@ -1385,7 +1401,7 @@ int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb, * DDP segment. */ qp->rx_fpdu->first_ddp_seg = 0; - /* Fall through */ + fallthrough; case SIW_GET_DATA_START: /* diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index ae92c8080967..7d47b521070b 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -29,7 +29,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); if (paddr) - return virt_to_page(paddr); + return virt_to_page((void *)paddr); return NULL; } @@ -76,7 +76,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) if (unlikely(!p)) return -EFAULT; - buffer = kmap(p); + buffer = kmap_local_page(p); if (likely(PAGE_SIZE - off >= bytes)) { memcpy(paddr, buffer + off, bytes); @@ -84,7 +84,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) unsigned long part = bytes - (PAGE_SIZE - off); memcpy(paddr, buffer + off, part); - kunmap(p); + kunmap_local(buffer); if (!mem->is_pbl) p = siw_get_upage(mem->umem, @@ -96,10 +96,10 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) if (unlikely(!p)) return -EFAULT; - buffer = kmap(p); + buffer = kmap_local_page(p); memcpy(paddr + part, buffer, bytes - part); } - kunmap(p); + kunmap_local(buffer); } } return (int)bytes; @@ -396,13 +396,20 @@ static int siw_0copy_tx(struct socket *s, struct page **page, #define MAX_TRAILER (MPA_CRC_SIZE + 4) -static void siw_unmap_pages(struct page **pp, unsigned long kmap_mask) +static void siw_unmap_pages(struct kvec *iov, unsigned long kmap_mask, int len) { - while (kmap_mask) { - if (kmap_mask & BIT(0)) - kunmap(*pp); - pp++; - kmap_mask >>= 1; + int i; + + /* + * Work backwards through the array to honor the kmap_local_page() + * ordering requirements. + */ + for (i = (len-1); i >= 0; i--) { + if (kmap_mask & BIT(i)) { + unsigned long addr = (unsigned long)iov[i].iov_base; + + kunmap_local((void *)(addr & PAGE_MASK)); + } } } @@ -485,6 +492,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) while (sge_len) { size_t plen = min((int)PAGE_SIZE - fp_off, sge_len); + void *kaddr; if (!is_kva) { struct page *p; @@ -497,7 +505,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) p = siw_get_upage(mem->umem, sge->laddr + sge_off); if (unlikely(!p)) { - siw_unmap_pages(page_array, kmap_mask); + siw_unmap_pages(iov, kmap_mask, seg); wqe->processed -= c_tx->bytes_unsent; rv = -EFAULT; goto done_crc; @@ -505,11 +513,12 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) page_array[seg] = p; if (!c_tx->use_sendpage) { - iov[seg].iov_base = kmap(p) + fp_off; - iov[seg].iov_len = plen; + void *kaddr = kmap_local_page(p); /* Remember for later kunmap() */ kmap_mask |= BIT(seg); + iov[seg].iov_base = kaddr + fp_off; + iov[seg].iov_len = plen; if (do_crc) crypto_shash_update( @@ -517,19 +526,30 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) iov[seg].iov_base, plen); } else if (do_crc) { + kaddr = kmap_local_page(p); crypto_shash_update(c_tx->mpa_crc_hd, - kmap(p) + fp_off, + kaddr + fp_off, plen); - kunmap(p); + kunmap_local(kaddr); } } else { - u64 va = sge->laddr + sge_off; + /* + * Cast to an uintptr_t to preserve all 64 bits + * in sge->laddr. + */ + uintptr_t va = (uintptr_t)(sge->laddr + sge_off); - page_array[seg] = virt_to_page(va & PAGE_MASK); + /* + * virt_to_page() takes a (void *) pointer + * so cast to a (void *) meaning it will be 64 + * bits on a 64 bit platform and 32 bits on a + * 32 bit platform. + */ + page_array[seg] = virt_to_page((void *)(va & PAGE_MASK)); if (do_crc) crypto_shash_update( c_tx->mpa_crc_hd, - (void *)(uintptr_t)va, + (void *)va, plen); } @@ -540,7 +560,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (++seg > (int)MAX_ARRAY) { siw_dbg_qp(tx_qp(c_tx), "to many fragments\n"); - siw_unmap_pages(page_array, kmap_mask); + siw_unmap_pages(iov, kmap_mask, seg-1); wqe->processed -= c_tx->bytes_unsent; rv = -EMSGSIZE; goto done_crc; @@ -591,7 +611,7 @@ sge_done: } else { rv = kernel_sendmsg(s, &msg, iov, seg + 1, hdr_len + data_len + trl_len); - siw_unmap_pages(page_array, kmap_mask); + siw_unmap_pages(iov, kmap_mask, seg); } if (rv < (int)hdr_len) { /* Not even complete hdr pushed or negative rv */ @@ -920,20 +940,27 @@ static int siw_fastreg_mr(struct ib_pd *pd, struct siw_sqe *sqe) { struct ib_mr *base_mr = (struct ib_mr *)(uintptr_t)sqe->base_mr; struct siw_device *sdev = to_siw_dev(pd->device); - struct siw_mem *mem = siw_mem_id2obj(sdev, sqe->rkey >> 8); + struct siw_mem *mem; int rv = 0; siw_dbg_pd(pd, "STag 0x%08x\n", sqe->rkey); - if (unlikely(!mem || !base_mr)) { + if (unlikely(!base_mr)) { pr_warn("siw: fastreg: STag 0x%08x unknown\n", sqe->rkey); return -EINVAL; } + if (unlikely(base_mr->rkey >> 8 != sqe->rkey >> 8)) { pr_warn("siw: fastreg: STag 0x%08x: bad MR\n", sqe->rkey); - rv = -EINVAL; - goto out; + return -EINVAL; } + + mem = siw_mem_id2obj(sdev, sqe->rkey >> 8); + if (unlikely(!mem)) { + pr_warn("siw: fastreg: STag 0x%08x unknown\n", sqe->rkey); + return -EINVAL; + } + if (unlikely(mem->pd != pd)) { pr_warn("siw: fastreg: PD mismatch\n"); rv = -EINVAL; @@ -1035,7 +1062,7 @@ next_wqe: case SIW_OP_SEND_REMOTE_INV: case SIW_OP_WRITE: siw_wqe_put_mem(wqe, tx_type); - /* Fall through */ + fallthrough; case SIW_OP_INVAL_STAG: case SIW_OP_REG_MR: @@ -1100,8 +1127,8 @@ next_wqe: /* * RREQ may have already been completed by inbound RRESP! */ - if (tx_type == SIW_OP_READ || - tx_type == SIW_OP_READ_LOCAL_INV) { + if ((tx_type == SIW_OP_READ || + tx_type == SIW_OP_READ_LOCAL_INV) && qp->attrs.orq_size) { /* Cleanup pending entry in ORQ */ qp->orq_put--; qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0; @@ -1121,7 +1148,7 @@ next_wqe: case SIW_OP_READ: case SIW_OP_READ_LOCAL_INV: siw_wqe_put_mem(wqe, tx_type); - /* Fall through */ + fallthrough; case SIW_OP_INVAL_STAG: case SIW_OP_REG_MR: diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 07e30138aaa1..3e814cfb298c 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -8,6 +8,7 @@ #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/xarray.h> +#include <net/addrconf.h> #include <rdma/iw_cm.h> #include <rdma/ib_verbs.h> @@ -131,12 +132,11 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, /* Revisit atomic caps if RFC 7306 gets supported */ attr->atomic_cap = 0; - attr->device_cap_flags = - IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_ALLOW_USER_UNREG; + attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; + attr->kernel_cap_flags = IBK_ALLOW_USER_UNREG; attr->max_cq = sdev->attrs.max_cq; attr->max_cqe = sdev->attrs.max_cqe; attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL; - attr->max_fmr = sdev->attrs.max_fmr; attr->max_mr = sdev->attrs.max_mr; attr->max_mw = sdev->attrs.max_mw; attr->max_mr_size = ~0ull; @@ -156,27 +156,28 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, attr->vendor_id = SIW_VENDOR_ID; attr->vendor_part_id = sdev->vendor_part_id; - memcpy(&attr->sys_image_guid, sdev->netdev->dev_addr, 6); + addrconf_addr_eui48((u8 *)&attr->sys_image_guid, + sdev->netdev->dev_addr); return 0; } -int siw_query_port(struct ib_device *base_dev, u8 port, +int siw_query_port(struct ib_device *base_dev, u32 port, struct ib_port_attr *attr) { struct siw_device *sdev = to_siw_dev(base_dev); + int rv; memset(attr, 0, sizeof(*attr)); - attr->active_mtu = attr->max_mtu; - attr->active_speed = 2; - attr->active_width = 2; + rv = ib_get_eth_speed(base_dev, port, &attr->active_speed, + &attr->active_width); attr->gid_tbl_len = 1; attr->max_msg_sz = -1; attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); + attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); attr->phys_state = sdev->state == IB_PORT_ACTIVE ? IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; - attr->pkey_tbl_len = 1; attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; attr->state = sdev->state; /* @@ -192,10 +193,10 @@ int siw_query_port(struct ib_device *base_dev, u8 port, * attr->subnet_timeout = 0; * attr->init_type_repy = 0; */ - return 0; + return rv; } -int siw_get_port_immutable(struct ib_device *base_dev, u8 port, +int siw_get_port_immutable(struct ib_device *base_dev, u32 port, struct ib_port_immutable *port_immutable) { struct ib_port_attr attr; @@ -204,21 +205,13 @@ int siw_get_port_immutable(struct ib_device *base_dev, u8 port, if (rv) return rv; - port_immutable->pkey_tbl_len = attr.pkey_tbl_len; port_immutable->gid_tbl_len = attr.gid_tbl_len; port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; return 0; } -int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey) -{ - /* Report the default pkey */ - *pkey = 0xffff; - return 0; -} - -int siw_query_gid(struct ib_device *base_dev, u8 port, int idx, +int siw_query_gid(struct ib_device *base_dev, u32 port, int idx, union ib_gid *gid) { struct siw_device *sdev = to_siw_dev(base_dev); @@ -243,12 +236,13 @@ int siw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) return 0; } -void siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +int siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct siw_device *sdev = to_siw_dev(pd->device); siw_dbg_pd(pd, "free PD\n"); atomic_dec(&sdev->num_pd); + return 0; } void siw_qp_get_ref(struct ib_qp *base_qp) @@ -293,37 +287,39 @@ siw_mmap_entry_insert(struct siw_ucontext *uctx, * * Create QP of requested size on given device. * - * @pd: Protection Domain + * @qp: Queue pait * @attrs: Initial QP attributes. * @udata: used to provide QP ID, SQ and RQ size back to user. */ -struct ib_qp *siw_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *attrs, - struct ib_udata *udata) +int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, + struct ib_udata *udata) { - struct siw_qp *qp = NULL; + struct ib_pd *pd = ibqp->pd; + struct siw_qp *qp = to_siw_qp(ibqp); struct ib_device *base_dev = pd->device; struct siw_device *sdev = to_siw_dev(base_dev); struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); - struct siw_cq *scq = NULL, *rcq = NULL; unsigned long flags; int num_sqe, num_rqe, rv = 0; size_t length; siw_dbg(base_dev, "create new QP\n"); + if (attrs->create_flags) + return -EOPNOTSUPP; + if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { siw_dbg(base_dev, "too many QP's\n"); rv = -ENOMEM; - goto err_out; + goto err_atomic; } if (attrs->qp_type != IB_QPT_RC) { siw_dbg(base_dev, "only RC QP's supported\n"); - rv = -EINVAL; - goto err_out; + rv = -EOPNOTSUPP; + goto err_atomic; } if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) || (attrs->cap.max_recv_wr > SIW_MAX_QP_WR) || @@ -331,13 +327,13 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, (attrs->cap.max_recv_sge > SIW_MAX_SGE)) { siw_dbg(base_dev, "QP size error\n"); rv = -EINVAL; - goto err_out; + goto err_atomic; } if (attrs->cap.max_inline_data > SIW_MAX_INLINE) { siw_dbg(base_dev, "max inline send: %d > %d\n", attrs->cap.max_inline_data, (int)SIW_MAX_INLINE); rv = -EINVAL; - goto err_out; + goto err_atomic; } /* * NOTE: we allow for zero element SQ and RQ WQE's SGL's @@ -346,21 +342,15 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, if (attrs->cap.max_send_wr + attrs->cap.max_recv_wr == 0) { siw_dbg(base_dev, "QP must have send or receive queue\n"); rv = -EINVAL; - goto err_out; + goto err_atomic; } - scq = to_siw_cq(attrs->send_cq); - rcq = to_siw_cq(attrs->recv_cq); - if (!scq || (!rcq && !attrs->srq)) { + if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) { siw_dbg(base_dev, "send CQ or receive CQ invalid\n"); rv = -EINVAL; - goto err_out; - } - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) { - rv = -ENOMEM; - goto err_out; + goto err_atomic; } + init_rwsem(&qp->state_lock); spin_lock_init(&qp->sq_lock); spin_lock_init(&qp->rq_lock); @@ -368,15 +358,25 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, rv = siw_qp_add(sdev, qp); if (rv) - goto err_out; + goto err_atomic; + + num_sqe = attrs->cap.max_send_wr; + num_rqe = attrs->cap.max_recv_wr; /* All queue indices are derived from modulo operations * on a free running 'get' (consumer) and 'put' (producer) * unsigned counter. Having queue sizes at power of two * avoids handling counter wrap around. */ - num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); - num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr); + if (num_sqe) + num_sqe = roundup_pow_of_two(num_sqe); + else { + /* Zero sized SQ is not supported */ + rv = -EINVAL; + goto err_out_xa; + } + if (num_rqe) + num_rqe = roundup_pow_of_two(num_rqe); if (udata) qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); @@ -384,7 +384,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); if (qp->sendq == NULL) { - siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe); rv = -ENOMEM; goto err_out_xa; } @@ -397,8 +396,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, } } qp->pd = pd; - qp->scq = scq; - qp->rcq = rcq; + qp->scq = to_siw_cq(attrs->send_cq); + qp->rcq = to_siw_cq(attrs->recv_cq); if (attrs->srq) { /* @@ -418,7 +417,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); if (qp->recvq == NULL) { - siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe); rv = -ENOMEM; goto err_out_xa; } @@ -482,23 +480,22 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, list_add_tail(&qp->devq, &sdev->qp_list); spin_unlock_irqrestore(&sdev->lock, flags); - return &qp->base_qp; + init_completion(&qp->qp_free); + + return 0; err_out_xa: xa_erase(&sdev->qp_xa, qp_id(qp)); -err_out: - if (qp) { - if (uctx) { - rdma_user_mmap_entry_remove(qp->sq_entry); - rdma_user_mmap_entry_remove(qp->rq_entry); - } - vfree(qp->sendq); - vfree(qp->recvq); - kfree(qp); + if (uctx) { + rdma_user_mmap_entry_remove(qp->sq_entry); + rdma_user_mmap_entry_remove(qp->rq_entry); } - atomic_dec(&sdev->num_qp); + vfree(qp->sendq); + vfree(qp->recvq); - return ERR_PTR(rv); +err_atomic: + atomic_dec(&sdev->num_qp); + return rv; } /* @@ -552,6 +549,9 @@ int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr, if (!attr_mask) return 0; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + memset(&new_attrs, 0, sizeof(new_attrs)); if (attr_mask & IB_QP_ACCESS_FLAGS) { @@ -626,6 +626,7 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) qp->scq = qp->rcq = NULL; siw_qp_put(qp); + wait_for_completion(&qp->qp_free); return 0; } @@ -665,7 +666,7 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, kbuf += core_sge->length; core_sge++; } - sqe->sge[0].length = bytes > 0 ? bytes : 0; + sqe->sge[0].length = max(bytes, 0); sqe->num_sge = bytes > 0 ? 1 : 0; return bytes; @@ -968,9 +969,9 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, unsigned long flags; int rv = 0; - if (qp->srq) { + if (qp->srq || qp->attrs.rq_size == 0) { *bad_wr = wr; - return -EOPNOTSUPP; /* what else from errno.h? */ + return -EINVAL; } if (!rdma_is_kernel_res(&qp->base_qp.res)) { siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n"); @@ -1064,7 +1065,7 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, return rv > 0 ? 0 : rv; } -void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) +int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) { struct siw_cq *cq = to_siw_cq(base_cq); struct siw_device *sdev = to_siw_dev(base_cq->device); @@ -1082,6 +1083,7 @@ void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) atomic_dec(&sdev->num_cq); vfree(cq->queue); + return 0; } /* @@ -1101,6 +1103,9 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct siw_cq *cq = to_siw_cq(base_cq); int rv, size = attr->cqe; + if (attr->flags) + return -EOPNOTSUPP; + if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) { siw_dbg(base_cq->device, "too many CQ's\n"); rv = -ENOMEM; @@ -1165,7 +1170,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, err_out: siw_dbg(base_cq->device, "CQ creation failed: %d", rv); - if (cq && cq->queue) { + if (cq->queue) { struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); @@ -1373,7 +1378,7 @@ err_out: } struct ib_mr *siw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_sge, struct ib_udata *udata) + u32 max_sge) { struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mr *mr = NULL; @@ -1562,6 +1567,9 @@ int siw_create_srq(struct ib_srq *base_srq, base_ucontext); int rv; + if (init_attrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) { siw_dbg_pd(base_srq->pd, "too many SRQ's\n"); rv = -ENOMEM; @@ -1699,7 +1707,7 @@ int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs) * QP anymore - the code trusts the RDMA core environment to keep track * of QP references. */ -void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) +int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) { struct siw_srq *srq = to_siw_srq(base_srq); struct siw_device *sdev = to_siw_dev(base_srq->device); @@ -1711,6 +1719,7 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); atomic_dec(&sdev->num_srq); + return 0; } /* @@ -1834,7 +1843,7 @@ void siw_srq_event(struct siw_srq *srq, enum ib_event_type etype) } } -void siw_port_event(struct siw_device *sdev, u8 port, enum ib_event_type etype) +void siw_port_event(struct siw_device *sdev, u32 port, enum ib_event_type etype) { struct ib_event event; diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h index 1a731989fad6..09964234f8d3 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.h +++ b/drivers/infiniband/sw/siw/siw_verbs.h @@ -36,24 +36,22 @@ static inline void siw_copy_sgl(struct ib_sge *sge, struct siw_sge *siw_sge, int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata); void siw_dealloc_ucontext(struct ib_ucontext *base_ctx); -int siw_query_port(struct ib_device *base_dev, u8 port, +int siw_query_port(struct ib_device *base_dev, u32 port, struct ib_port_attr *attr); -int siw_get_port_immutable(struct ib_device *base_dev, u8 port, +int siw_get_port_immutable(struct ib_device *base_dev, u32 port, struct ib_port_immutable *port_immutable); int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, struct ib_udata *udata); int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -int siw_query_port(struct ib_device *base_dev, u8 port, +int siw_query_port(struct ib_device *base_dev, u32 port, struct ib_port_attr *attr); -int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey); -int siw_query_gid(struct ib_device *base_dev, u8 port, int idx, +int siw_query_gid(struct ib_device *base_dev, u32 port, int idx, union ib_gid *gid); int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata); -void siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata); -struct ib_qp *siw_create_qp(struct ib_pd *base_pd, - struct ib_qp_init_attr *attr, - struct ib_udata *udata); +int siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata); +int siw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attr, + struct ib_udata *udata); int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr, @@ -63,13 +61,13 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); -void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata); +int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata); int siw_poll_cq(struct ib_cq *base_cq, int num_entries, struct ib_wc *wc); int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags); struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len, u64 rnic_va, int rights, struct ib_udata *udata); struct ib_mr *siw_alloc_mr(struct ib_pd *base_pd, enum ib_mr_type mr_type, - u32 max_sge, struct ib_udata *udata); + u32 max_sge); struct ib_mr *siw_get_dma_mr(struct ib_pd *base_pd, int rights); int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, unsigned int *sg_off); @@ -79,7 +77,7 @@ int siw_create_srq(struct ib_srq *base_srq, struct ib_srq_init_attr *attr, int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct ib_udata *udata); int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr); -void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata); +int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata); int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma); @@ -87,6 +85,6 @@ void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry); void siw_qp_event(struct siw_qp *qp, enum ib_event_type type); void siw_cq_event(struct siw_cq *cq, enum ib_event_type type); void siw_srq_event(struct siw_srq *srq, enum ib_event_type type); -void siw_port_event(struct siw_device *dev, u8 port, enum ib_event_type type); +void siw_port_event(struct siw_device *dev, u32 port, enum ib_event_type type); #endif |