diff options
Diffstat (limited to 'drivers/infiniband')
84 files changed, 1253 insertions, 10461 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index b44b1c322ec8..ade86388434f 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -83,7 +83,6 @@ config INFINIBAND_ADDR_TRANS_CONFIGFS if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/qib/Kconfig" -source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/efa/Kconfig" source "drivers/infiniband/hw/i40iw/Kconfig" diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 00fb3eacda19..d535995711c3 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -819,22 +819,16 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table) { int i; - bool deleted = false; if (!table) return; mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { - if (is_gid_entry_valid(table->data_vec[i])) { + if (is_gid_entry_valid(table->data_vec[i])) del_gid(ib_dev, port, table, i); - deleted = true; - } } mutex_unlock(&table->lock); - - if (deleted) - dispatch_gid_change_event(ib_dev, port); } void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 5920c0085d35..33b384c7df42 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,36 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ #include <linux/completion.h> @@ -246,7 +220,7 @@ struct cm_work { }; struct cm_timewait_info { - struct cm_work work; /* Must be first. */ + struct cm_work work; struct list_head list; struct rb_node remote_qp_node; struct rb_node remote_id_node; @@ -1525,14 +1499,6 @@ static int cm_issue_rej(struct cm_port *port, return ret; } -static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid, - __be32 local_qpn, __be32 remote_qpn) -{ - return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) || - ((local_ca_guid == remote_ca_guid) && - (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn)))); -} - static bool cm_req_has_alt_path(struct cm_req_msg *req_msg) { return ((req_msg->alt_local_lid) || @@ -3434,7 +3400,7 @@ static int cm_timewait_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; int ret; - timewait_info = (struct cm_timewait_info *)work; + timewait_info = container_of(work, struct cm_timewait_info, work); spin_lock_irq(&cm.lock); list_del(&timewait_info->list); spin_unlock_irq(&cm.lock); diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 3d16d614aff6..92d7260ac913 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -1,37 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING the madirectory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use source and binary forms, with or - * withmodification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retathe above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ -#if !defined(CM_MSGS_H) +#ifndef CM_MSGS_H #define CM_MSGS_H #include <rdma/ib_mad.h> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d78f67623f24..abf249d277ad 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1,36 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. - * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include <linux/completion.h> @@ -2828,22 +2801,65 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv) return 0; } -static int iboe_tos_to_sl(struct net_device *ndev, int tos) +static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio) { - int prio; struct net_device *dev; - prio = rt_tos2priority(tos); - dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; + dev = vlan_dev_real_dev(vlan_ndev); if (dev->num_tc) return netdev_get_prio_tc_map(dev, prio); -#if IS_ENABLED(CONFIG_VLAN_8021Q) + return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) & + VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; +} + +struct iboe_prio_tc_map { + int input_prio; + int output_tc; + bool found; +}; + +static int get_lower_vlan_dev_tc(struct net_device *dev, void *data) +{ + struct iboe_prio_tc_map *map = data; + + if (is_vlan_dev(dev)) + map->output_tc = get_vlan_ndev_tc(dev, map->input_prio); + else if (dev->num_tc) + map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio); + else + map->output_tc = 0; + /* We are interested only in first level VLAN device, so always + * return 1 to stop iterating over next level devices. + */ + map->found = true; + return 1; +} + +static int iboe_tos_to_sl(struct net_device *ndev, int tos) +{ + struct iboe_prio_tc_map prio_tc_map = {}; + int prio = rt_tos2priority(tos); + + /* If VLAN device, get it directly from the VLAN netdev */ if (is_vlan_dev(ndev)) - return (vlan_dev_get_egress_qos_mask(ndev, prio) & - VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; -#endif - return 0; + return get_vlan_ndev_tc(ndev, prio); + + prio_tc_map.input_prio = prio; + rcu_read_lock(); + netdev_walk_all_lower_dev_rcu(ndev, + get_lower_vlan_dev_tc, + &prio_tc_map); + rcu_read_unlock(); + /* If map is found from lower device, use it; Otherwise + * continue with the current netdevice to get priority to tc map. + */ + if (prio_tc_map.found) + return prio_tc_map.output_tc; + else if (ndev->num_tc) + return netdev_get_prio_tc_map(ndev, prio); + else + return 0; } static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 680ad27f497d..8434ec082c3a 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -149,11 +149,18 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, struct auto_mode_param *param = &counter->mode.param; bool match = true; - if (!rdma_is_visible_in_pid_ns(&qp->res)) - return false; - - /* Ensure that counter belongs to the right PID */ - if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task)) + /* + * Ensure that counter belongs to the right PID. This operation can + * race with user space which kills the process and leaves QP and + * counters orphans. + * + * It is not a big deal because exitted task will leave both QP and + * counter in the same bucket of zombie process. Just ensure that + * process is still alive before procedding. + * + */ + if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) || + !task_pid_nr(qp->res.task)) return false; if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) @@ -229,9 +236,6 @@ static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, rt = &dev->res[RDMA_RESTRACK_COUNTER]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - counter = container_of(res, struct rdma_counter, res); if ((counter->device != qp->device) || (counter->port != port)) goto next; @@ -412,9 +416,6 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) if (IS_ERR(res)) return NULL; - if (!rdma_is_visible_in_pid_ns(res)) - goto err; - qp = container_of(res, struct ib_qp, res); if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) goto err; @@ -445,11 +446,6 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, if (IS_ERR(res)) return NULL; - if (!rdma_is_visible_in_pid_ns(res)) { - rdma_restrack_put(res); - return NULL; - } - counter = container_of(res, struct rdma_counter, res); kref_get(&counter->kref); rdma_restrack_put(res); @@ -463,10 +459,15 @@ static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, u32 qp_num, u32 counter_id) { + struct rdma_port_counter *port_counter; struct rdma_counter *counter; struct ib_qp *qp; int ret; + port_counter = &dev->port_data[port].port_counter; + if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) + return -EINVAL; + qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; @@ -503,6 +504,7 @@ err: int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, u32 qp_num, u32 *counter_id) { + struct rdma_port_counter *port_counter; struct rdma_counter *counter; struct ib_qp *qp; int ret; @@ -510,9 +512,13 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, if (!rdma_is_port_valid(dev, port)) return -EINVAL; - if (!dev->port_data[port].port_counter.hstats) + port_counter = &dev->port_data[port].port_counter; + if (!port_counter->hstats) return -EOPNOTSUPP; + if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO) + return -EINVAL; + qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 2dd2cfe9b561..f8d383ceae05 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -128,17 +128,14 @@ module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); MODULE_PARM_DESC(netns_mode, "Share device among net namespaces; default=1 (shared)"); /** - * rdma_dev_access_netns() - Return whether a rdma device can be accessed + * rdma_dev_access_netns() - Return whether an rdma device can be accessed * from a specified net namespace or not. - * @device: Pointer to rdma device which needs to be checked + * @dev: Pointer to rdma device which needs to be checked * @net: Pointer to net namesapce for which access to be checked * - * rdma_dev_access_netns() - Return whether a rdma device can be accessed - * from a specified net namespace or not. When - * rdma device is in shared mode, it ignores the - * net namespace. When rdma device is exclusive - * to a net namespace, rdma device net namespace is - * checked against the specified one. + * When the rdma device is in shared mode, it ignores the net namespace. + * When the rdma device is exclusive to a net namespace, rdma device net + * namespace is checked against the specified one. */ bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) { @@ -1199,9 +1196,21 @@ static void setup_dma_device(struct ib_device *device) WARN_ON_ONCE(!parent); device->dma_device = parent; } - /* Setup default max segment size for all IB devices */ - dma_set_max_seg_size(device->dma_device, SZ_2G); + if (!device->dev.dma_parms) { + if (parent) { + /* + * The caller did not provide DMA parameters, so + * 'parent' probably represents a PCI device. The PCI + * core sets the maximum segment size to 64 + * KB. Increase this parameter to 2 GB. + */ + device->dev.dma_parms = parent->dma_parms; + dma_set_max_seg_size(device->dma_device, SZ_2G); + } else { + WARN_ON_ONCE(true); + } + } } /* @@ -1317,7 +1326,9 @@ out: /** * ib_register_device - Register an IB device with IB core - * @device:Device to register + * @device: Device to register + * @name: unique string device name. This may include a '%' which will + * cause a unique index to be added to the passed device name. * * Low-level drivers use ib_register_device() to register their * devices with the IB core. All registered clients will receive a @@ -1444,7 +1455,7 @@ out: /** * ib_unregister_device - Unregister an IB device - * @device: The device to unregister + * @ib_dev: The device to unregister * * Unregister an IB device. All clients will receive a remove callback. * @@ -1466,7 +1477,7 @@ EXPORT_SYMBOL(ib_unregister_device); /** * ib_unregister_device_and_put - Unregister a device while holding a 'get' - * device: The device to unregister + * @ib_dev: The device to unregister * * This is the same as ib_unregister_device(), except it includes an internal * ib_device_put() that should match a 'get' obtained by the caller. @@ -1536,7 +1547,7 @@ static void ib_unregister_work(struct work_struct *work) /** * ib_unregister_device_queued - Unregister a device using a work queue - * device: The device to unregister + * @ib_dev: The device to unregister * * This schedules an asynchronous unregistration using a WQ for the device. A * driver should use this to avoid holding locks while doing unregistration, @@ -2366,7 +2377,7 @@ int ib_modify_device(struct ib_device *device, struct ib_device_modify *device_modify) { if (!device->ops.modify_device) - return -ENOSYS; + return -EOPNOTSUPP; return device->ops.modify_device(device, device_modify_mask, device_modify); @@ -2607,6 +2618,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, drain_sq); SET_DEVICE_OP(dev_ops, enable_driver); SET_DEVICE_OP(dev_ops, fill_res_entry); + SET_DEVICE_OP(dev_ops, fill_stat_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); SET_DEVICE_OP(dev_ops, get_hw_stats); diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 7e2bcc72f66c..1bf87d9fd0bd 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -210,8 +210,10 @@ int iwpm_mapinfo_available(void); /** * iwpm_compare_sockaddr - Compare two sockaddr storage structs + * @a_sockaddr: first sockaddr to compare + * @b_sockaddr: second sockaddr to compare * - * Returns 0 if they are holding the same ip/tcp address info, + * Return: 0 if they are holding the same ip/tcp address info, * otherwise returns 1 */ int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, @@ -272,6 +274,7 @@ void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg); * iwpm_send_hello - Send hello response to iwpmd * * @nl_client: The index of the netlink client + * @iwpm_pid: The pid of the user space port mapper * @abi_version: The kernel's abi_version * * Returns 0 on success or a negative error code diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 65b36548bc17..f821faca0662 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -42,6 +42,9 @@ #include "cma_priv.h" #include "restrack.h" +typedef int (*res_fill_func_t)(struct sk_buff*, bool, + struct rdma_restrack_entry*, uint32_t); + /* * Sort array elements by the netlink attribute name */ @@ -180,6 +183,19 @@ static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, return 0; } +int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, + const char *str) +{ + if (put_driver_name_print_type(msg, name, + RDMA_NLDEV_PRINT_TYPE_UNSPEC)) + return -EMSGSIZE; + if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str)) + return -EMSGSIZE; + + return 0; +} +EXPORT_SYMBOL(rdma_nl_put_driver_string); + int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value) { return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, @@ -399,20 +415,34 @@ err: static int fill_res_name_pid(struct sk_buff *msg, struct rdma_restrack_entry *res) { + int err = 0; + /* * For user resources, user is should read /proc/PID/comm to get the * name of the task file. */ if (rdma_is_kernel_res(res)) { - if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, - res->kern_name)) - return -EMSGSIZE; + err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, + res->kern_name); } else { - if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, - task_pid_vnr(res->task))) - return -EMSGSIZE; + pid_t pid; + + pid = task_pid_vnr(res->task); + /* + * Task is dead and in zombie state. + * There is no need to print PID anymore. + */ + if (pid) + /* + * This part is racy, task can be killed and PID will + * be zero right here but it is ok, next query won't + * return PID. We don't promise real-time reflection + * of SW objects. + */ + err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid); } - return 0; + + return err ? -EMSGSIZE : 0; } static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, @@ -423,6 +453,14 @@ static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, return dev->ops.fill_res_entry(msg, res); } +static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (!dev->ops.fill_stat_entry) + return false; + return dev->ops.fill_stat_entry(msg, res); +} + static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { @@ -698,9 +736,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg, rt = &counter->device->res[RDMA_RESTRACK_QP]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - qp = container_of(res, struct ib_qp, res); if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) continue; @@ -723,8 +758,8 @@ err: return ret; } -static int fill_stat_hwcounter_entry(struct sk_buff *msg, - const char *name, u64 value) +int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, + u64 value) { struct nlattr *entry_attr; @@ -746,6 +781,25 @@ err: nla_nest_cancel(msg, entry_attr); return -EMSGSIZE; } +EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry); + +static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_mr *mr = container_of(res, struct ib_mr, res); + struct ib_device *dev = mr->pd->device; + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) + goto err; + + if (fill_stat_entry(dev, msg, res)) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} static int fill_stat_counter_hwcounters(struct sk_buff *msg, struct rdma_counter *counter) @@ -759,7 +813,7 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg, return -EMSGSIZE; for (i = 0; i < st->num_counters; i++) - if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i])) + if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i])) goto err; nla_nest_end(msg, table_attr); @@ -1117,8 +1171,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb, } struct nldev_fill_res_entry { - int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin, - struct rdma_restrack_entry *res, u32 port); enum rdma_nldev_attr nldev_attr; enum rdma_nldev_command nldev_cmd; u8 flags; @@ -1132,21 +1184,18 @@ enum nldev_res_flags { static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_QP] = { - .fill_res_func = fill_res_qp_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, .id = RDMA_NLDEV_ATTR_RES_LQPN, }, [RDMA_RESTRACK_CM_ID] = { - .fill_res_func = fill_res_cm_id_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CM_IDN, }, [RDMA_RESTRACK_CQ] = { - .fill_res_func = fill_res_cq_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, .flags = NLDEV_PER_DEV, @@ -1154,7 +1203,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_CQN, }, [RDMA_RESTRACK_MR] = { - .fill_res_func = fill_res_mr_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, .flags = NLDEV_PER_DEV, @@ -1162,7 +1210,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_MRN, }, [RDMA_RESTRACK_PD] = { - .fill_res_func = fill_res_pd_entry, .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, .flags = NLDEV_PER_DEV, @@ -1170,7 +1217,6 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .id = RDMA_NLDEV_ATTR_RES_PDN, }, [RDMA_RESTRACK_COUNTER] = { - .fill_res_func = fill_res_counter_entry, .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET, .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, @@ -1180,7 +1226,8 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, - enum rdma_restrack_type res_type) + enum rdma_restrack_type res_type, + res_fill_func_t fill_func) { const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; @@ -1222,11 +1269,6 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } - if (!rdma_is_visible_in_pid_ns(res)) { - ret = -ENOENT; - goto err_get; - } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; @@ -1243,7 +1285,9 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, } has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN); - ret = fe->fill_res_func(msg, has_cap_net_admin, res, port); + + ret = fill_func(msg, has_cap_net_admin, res, port); + rdma_restrack_put(res); if (ret) goto err_free; @@ -1263,7 +1307,8 @@ err: static int res_get_common_dumpit(struct sk_buff *skb, struct netlink_callback *cb, - enum rdma_restrack_type res_type) + enum rdma_restrack_type res_type, + res_fill_func_t fill_func) { const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; @@ -1334,9 +1379,6 @@ static int res_get_common_dumpit(struct sk_buff *skb, * objects. */ xa_for_each(&rt->xa, id, res) { - if (!rdma_is_visible_in_pid_ns(res)) - continue; - if (idx < start || !rdma_restrack_get(res)) goto next; @@ -1351,7 +1393,8 @@ static int res_get_common_dumpit(struct sk_buff *skb, goto msg_full; } - ret = fe->fill_res_func(skb, has_cap_net_admin, res, port); + ret = fill_func(skb, has_cap_net_admin, res, port); + rdma_restrack_put(res); if (ret) { @@ -1394,17 +1437,19 @@ err_index: return ret; } -#define RES_GET_FUNCS(name, type) \ - static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ +#define RES_GET_FUNCS(name, type) \ + static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ struct netlink_callback *cb) \ - { \ - return res_get_common_dumpit(skb, cb, type); \ - } \ - static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ - struct nlmsghdr *nlh, \ + { \ + return res_get_common_dumpit(skb, cb, type, \ + fill_res_##name##_entry); \ + } \ + static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ + struct nlmsghdr *nlh, \ struct netlink_ext_ack *extack) \ - { \ - return res_get_common_doit(skb, nlh, extack, type); \ + { \ + return res_get_common_doit(skb, nlh, extack, type, \ + fill_res_##name##_entry); \ } RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); @@ -1880,7 +1925,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, for (i = 0; i < num_cnts; i++) { v = stats->value[i] + rdma_counter_get_hwstat_value(device, port, i); - if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) { + if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) { ret = -EMSGSIZE; goto err_table; } @@ -1989,7 +2034,10 @@ static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, case RDMA_NLDEV_ATTR_RES_QP: ret = stat_get_doit_qp(skb, nlh, extack, tb); break; - + case RDMA_NLDEV_ATTR_RES_MR: + ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR, + fill_stat_mr_entry); + break; default: ret = -EINVAL; break; @@ -2013,7 +2061,10 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb, case RDMA_NLDEV_ATTR_RES_QP: ret = nldev_res_get_counter_dumpit(skb, cb); break; - + case RDMA_NLDEV_ATTR_RES_MR: + ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR, + fill_stat_mr_entry); + break; default: ret = -EINVAL; break; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index a07665f7ef8c..62fbb0ae9cb4 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -116,11 +116,8 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) u32 cnt = 0; xa_lock(&rt->xa); - xas_for_each(&xas, e, U32_MAX) { - if (!rdma_is_visible_in_pid_ns(e)) - continue; + xas_for_each(&xas, e, U32_MAX) cnt++; - } xa_unlock(&rt->xa); return cnt; } @@ -346,18 +343,3 @@ out: } } EXPORT_SYMBOL(rdma_restrack_del); - -bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res) -{ - /* - * 1. Kern resources should be visible in init - * namespace only - * 2. Present only resources visible in the current - * namespace - */ - if (rdma_is_kernel_res(res)) - return task_active_pid_ns(current) == &init_pid_ns; - - /* PID 0 means that resource is not found in current namespace */ - return task_pid_vnr(res->task); -} diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h index 7bd177cc0a61..d084e5f89849 100644 --- a/drivers/infiniband/core/restrack.h +++ b/drivers/infiniband/core/restrack.h @@ -27,5 +27,4 @@ int rdma_restrack_init(struct ib_device *dev); void rdma_restrack_clean(struct ib_device *dev); void rdma_restrack_attach_task(struct rdma_restrack_entry *res, struct task_struct *task); -bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res); #endif /* _RDMA_CORE_RESTRACK_H_ */ diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 5337393d4dfe..4fad732f9b3c 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -20,14 +20,17 @@ module_param_named(force_mr, rdma_rw_force_mr, bool, 0); MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations"); /* - * Check if the device might use memory registration. This is currently only - * true for iWarp devices. In the future we can hopefully fine tune this based - * on HCA driver input. + * Report whether memory registration should be used. Memory registration must + * be used for iWarp devices because of iWARP-specific limitations. Memory + * registration is also enabled if registering memory might yield better + * performance than using multiple SGE entries, see rdma_rw_io_needs_mr() */ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num) { if (rdma_protocol_iwarp(dev, port_num)) return true; + if (dev->attrs.max_sgl_rd) + return true; if (unlikely(rdma_rw_force_mr)) return true; return false; @@ -35,17 +38,19 @@ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num) /* * Check if the device will use memory registration for this RW operation. - * We currently always use memory registrations for iWarp RDMA READs, and - * have a debug option to force usage of MRs. - * - * XXX: In the future we can hopefully fine tune this based on HCA driver - * input. + * For RDMA READs we must use MRs on iWarp and can optionally use them as an + * optimization otherwise. Additionally we have a debug option to force usage + * of MRs to help testing this code path. */ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, enum dma_data_direction dir, int dma_nents) { - if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE) - return true; + if (dir == DMA_FROM_DEVICE) { + if (rdma_protocol_iwarp(dev, port_num)) + return true; + if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd) + return true; + } if (unlikely(rdma_rw_force_mr)) return true; return false; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 17fc2936c077..8917125ea16d 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1246,7 +1246,7 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, * @port_num: Port on the specified device. * @rec: path record entry to use for ah attributes initialization. * @ah_attr: address handle attributes to initialization from path record. - * @sgid_attr: SGID attribute to consider during initialization. + * @gid_attr: SGID attribute to consider during initialization. * * When ib_init_ah_attr_from_path() returns success, * (a) for IB link layer it optionally contains a reference to SGID attribute diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 7a50cedcef1f..92c932c067cb 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1268,7 +1268,7 @@ static ssize_t node_desc_store(struct device *device, int ret; if (!dev->ops.modify_device) - return -EIO; + return -EOPNOTSUPP; memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 61758201d9b2..269938f59d3f 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -795,6 +795,9 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, { const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + if (IS_ERR(attr)) + return PTR_ERR(attr); + if (size < attr->ptr_attr.len) { if (clear_user(u64_to_user_ptr(attr->ptr_attr.data) + size, attr->ptr_attr.len - size)) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f974b6854224..d357ac077bd8 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -244,6 +244,8 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); /** * ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * @caller: caller's build-time module name * * A protection domain object provides an association between QPs, shared * receive queues, address handles, memory regions, and memory windows. diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 433fca59febd..0aeccd984889 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ -obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_INFINIBAND_EFA) += efa/ obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/ diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index e55a1666c0cd..725b2350e349 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -108,6 +108,7 @@ struct bnxt_re_sqp_entries { #define BNXT_RE_MAX_MSIX 9 #define BNXT_RE_AEQ_IDX 0 #define BNXT_RE_NQ_IDX 1 +#define BNXT_RE_GEN_P5_MAX_VF 64 struct bnxt_re_dev { struct ib_device ibdev; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b4149dc9e824..8afd7d93cfe4 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -191,24 +191,6 @@ int bnxt_re_query_device(struct ib_device *ibdev, return 0; } -int bnxt_re_modify_device(struct ib_device *ibdev, - int device_modify_mask, - struct ib_device_modify *device_modify) -{ - switch (device_modify_mask) { - case IB_DEVICE_MODIFY_SYS_IMAGE_GUID: - /* Modify the GUID requires the modification of the GID table */ - /* GUID should be made as READ-ONLY */ - break; - case IB_DEVICE_MODIFY_NODE_DESC: - /* Node Desc should be made as READ-ONLY */ - break; - default: - break; - } - return 0; -} - /* Port */ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 31662b1ee35a..23d972da5652 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -145,9 +145,6 @@ struct bnxt_re_ucontext { int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata); -int bnxt_re_modify_device(struct ib_device *ibdev, - int device_modify_mask, - struct ib_device_modify *device_modify); int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr); int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 30a54f8aa42c..d6785b8339d2 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -119,61 +119,76 @@ static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev) * reserved for the function. The driver may choose to allocate fewer * resources than the firmware maximum. */ -static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) +static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) { - u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0; - u32 i; - u32 vf_pct; - u32 num_vfs; - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *attr; + struct bnxt_qplib_ctx *ctx; + int i; - rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, - dev_attr->max_qp); + attr = &rdev->dev_attr; + ctx = &rdev->qplib_ctx; - rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; + ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, + attr->max_qp); + ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; /* Use max_mr from fw since max_mrw does not get set */ - rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count, - dev_attr->max_mr); - rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, - dev_attr->max_srq); - rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, - dev_attr->max_cq); - - for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) - rdev->qplib_ctx.tqm_count[i] = - rdev->dev_attr.tqm_alloc_reqs[i]; - - if (rdev->num_vfs) { - /* - * Reserve a set of resources for the PF. Divide the remaining - * resources among the VFs - */ - vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; - num_vfs = 100 * rdev->num_vfs; - vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs; - vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs; - vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs; - /* - * The driver allows many more MRs than other resources. If the - * firmware does also, then reserve a fixed amount for the PF - * and divide the rest among VFs. VFs may use many MRs for NFS - * mounts, ISER, NVME applications, etc. If the firmware - * severely restricts the number of MRs, then let PF have - * half and divide the rest among VFs, as for the other - * resource types. - */ - if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) - vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs; - else - vf_mrws = (rdev->qplib_ctx.mrw_count - - BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs; - vf_gids = BNXT_RE_MAX_GID_PER_VF; + ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr); + ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, + attr->max_srq); + ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq); + if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) + for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) + rdev->qplib_ctx.tqm_count[i] = + rdev->dev_attr.tqm_alloc_reqs[i]; +} + +static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf) +{ + struct bnxt_qplib_vf_res *vf_res; + u32 mrws = 0; + u32 vf_pct; + u32 nvfs; + + vf_res = &qplib_ctx->vf_res; + /* + * Reserve a set of resources for the PF. Divide the remaining + * resources among the VFs + */ + vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; + nvfs = num_vf; + num_vf = 100 * num_vf; + vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf; + vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf; + vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf; + /* + * The driver allows many more MRs than other resources. If the + * firmware does also, then reserve a fixed amount for the PF and + * divide the rest among VFs. VFs may use many MRs for NFS + * mounts, ISER, NVME applications, etc. If the firmware severely + * restricts the number of MRs, then let PF have half and divide + * the rest among VFs, as for the other resource types. + */ + if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) { + mrws = qplib_ctx->mrw_count * vf_pct; + nvfs = num_vf; + } else { + mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF; } - rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws; - rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids; - rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps; - rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs; - rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs; + vf_res->max_mrw_per_vf = (mrws / nvfs); + vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF; +} + +static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) +{ + u32 num_vfs; + + memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); + bnxt_re_limit_pf_res(rdev); + + num_vfs = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? + BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs; + if (num_vfs) + bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs); } /* for handling bnxt_en callbacks later */ @@ -193,9 +208,11 @@ static void bnxt_re_sriov_config(void *p, int num_vfs) return; rdev->num_vfs = num_vfs; - bnxt_re_set_resource_limits(rdev); - bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, - &rdev->qplib_ctx); + if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) { + bnxt_re_set_resource_limits(rdev); + bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, + &rdev->qplib_ctx); + } } static void bnxt_re_shutdown(void *p) @@ -625,7 +642,6 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .map_mr_sg = bnxt_re_map_mr_sg, .mmap = bnxt_re_mmap, .modify_ah = bnxt_re_modify_ah, - .modify_device = bnxt_re_modify_device, .modify_qp = bnxt_re_modify_qp, .modify_srq = bnxt_re_modify_srq, .poll_cq = bnxt_re_poll_cq, @@ -895,10 +911,14 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, return 0; } +#define BNXT_RE_GEN_P5_PF_NQ_DB 0x10000 +#define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000 static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) { return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? - 0x10000 : rdev->msix_entries[indx].db_offset; + (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB : + BNXT_RE_GEN_P5_PF_NQ_DB) : + rdev->msix_entries[indx].db_offset; } static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) @@ -1408,8 +1428,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) rdev->is_virtfn); if (rc) goto disable_rcfw; - if (!rdev->is_virtfn) - bnxt_re_set_resource_limits(rdev); + + bnxt_re_set_resource_limits(rdev); rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0, bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 60c8f76aab33..5cdfa84faf85 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -494,8 +494,10 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, * shall setup this area for VF. Skipping the * HW programming */ - if (is_virtfn || bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + if (is_virtfn) goto skip_ctx_setup; + if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + goto config_vf_res; level = ctx->qpc_tbl.level; req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) | @@ -540,6 +542,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements); req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements); +config_vf_res: req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf); req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf); req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf); diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig deleted file mode 100644 index 8c1a72bff447..000000000000 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config INFINIBAND_CXGB3 - tristate "Chelsio RDMA Driver" - depends on CHELSIO_T3 - select GENERIC_ALLOCATOR - ---help--- - This is an iWARP/RDMA driver for the Chelsio T3 1GbE and - 10GbE adapters. - - For general information about Chelsio and our products, visit - our website at <http://www.chelsio.com>. - - For customer support, please visit our customer support page at - <http://www.chelsio.com/support.html>. - - Please send feedback to <linux-bugs@chelsio.com>. - - To compile this driver as a module, choose M here: the module - will be called iw_cxgb3. diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile deleted file mode 100644 index 34bb86a6ae3a..000000000000 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb3 - -obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o - -iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ - iwch_provider.o iwch.o cxio_hal.o cxio_resource.o diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c deleted file mode 100644 index 95b22a651673..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ /dev/null @@ -1,1312 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <asm/delay.h> - -#include <linux/mutex.h> -#include <linux/netdevice.h> -#include <linux/sched.h> -#include <linux/spinlock.h> -#include <linux/pci.h> -#include <linux/dma-mapping.h> -#include <linux/slab.h> -#include <net/net_namespace.h> - -#include "cxio_resource.h" -#include "cxio_hal.h" -#include "cxgb3_offload.h" -#include "sge_defs.h" - -static LIST_HEAD(rdev_list); -static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL; - -static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name) -{ - struct cxio_rdev *rdev; - - list_for_each_entry(rdev, &rdev_list, entry) - if (!strcmp(rdev->dev_name, dev_name)) - return rdev; - return NULL; -} - -static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev) -{ - struct cxio_rdev *rdev; - - list_for_each_entry(rdev, &rdev_list, entry) - if (rdev->t3cdev_p == tdev) - return rdev; - return NULL; -} - -int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, - enum t3_cq_opcode op, u32 credit) -{ - int ret; - struct t3_cqe *cqe; - u32 rptr; - - struct rdma_cq_op setup; - setup.id = cq->cqid; - setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0; - setup.op = op; - ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup); - - if ((ret < 0) || (op == CQ_CREDIT_UPDATE)) - return ret; - - /* - * If the rearm returned an index other than our current index, - * then there might be CQE's in flight (being DMA'd). We must wait - * here for them to complete or the consumer can miss a notification. - */ - if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) { - int i=0; - - rptr = cq->rptr; - - /* - * Keep the generation correct by bumping rptr until it - * matches the index returned by the rearm - 1. - */ - while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret) - rptr++; - - /* - * Now rptr is the index for the (last) cqe that was - * in-flight at the time the HW rearmed the CQ. We - * spin until that CQE is valid. - */ - cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2); - while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { - udelay(1); - if (i++ > 1000000) { - pr_err("%s: stalled rnic\n", rdev_p->dev_name); - return -EIO; - } - } - - return 1; - } - - return 0; -} - -static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) -{ - struct rdma_cq_setup setup; - setup.id = cqid; - setup.base_addr = 0; /* NULL address */ - setup.size = 0; /* disaable the CQ */ - setup.credits = 0; - setup.credit_thres = 0; - setup.ovfl_mode = 0; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) -{ - u64 sge_cmd; - struct t3_modify_qp_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - if (!skb) { - pr_debug("%s alloc_skb failed\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, - T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7, - T3_SOPEOP); - wqe->flags = cpu_to_be32(MODQP_WRITE_EC); - sge_cmd = qpid << 8 | 3; - wqe->sge_cmd = cpu_to_be64(sge_cmd); - skb->priority = CPL_PRIORITY_CONTROL; - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -} - -int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) -{ - struct rdma_cq_setup setup; - int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); - - size += 1; /* one extra page for storing cq-in-err state */ - cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); - if (!cq->cqid) - return -ENOMEM; - if (kernel) { - cq->sw_queue = kzalloc(size, GFP_KERNEL); - if (!cq->sw_queue) - return -ENOMEM; - } - cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size, - &(cq->dma_addr), GFP_KERNEL); - if (!cq->queue) { - kfree(cq->sw_queue); - return -ENOMEM; - } - dma_unmap_addr_set(cq, mapping, cq->dma_addr); - setup.id = cq->cqid; - setup.base_addr = (u64) (cq->dma_addr); - setup.size = 1UL << cq->size_log2; - setup.credits = 65535; - setup.credit_thres = 1; - if (rdev_p->t3cdev_p->type != T3A) - setup.ovfl_mode = 0; - else - setup.ovfl_mode = 1; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - struct cxio_qpid_list *entry; - u32 qpid; - int i; - - mutex_lock(&uctx->lock); - if (!list_empty(&uctx->qpids)) { - entry = list_entry(uctx->qpids.next, struct cxio_qpid_list, - entry); - list_del(&entry->entry); - qpid = entry->qpid; - kfree(entry); - } else { - qpid = cxio_hal_get_qpid(rdev_p->rscp); - if (!qpid) - goto out; - for (i = qpid+1; i & rdev_p->qpmask; i++) { - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - break; - entry->qpid = i; - list_add_tail(&entry->entry, &uctx->qpids); - } - } -out: - mutex_unlock(&uctx->lock); - pr_debug("%s qpid 0x%x\n", __func__, qpid); - return qpid; -} - -static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid, - struct cxio_ucontext *uctx) -{ - struct cxio_qpid_list *entry; - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return; - pr_debug("%s qpid 0x%x\n", __func__, qpid); - entry->qpid = qpid; - mutex_lock(&uctx->lock); - list_add_tail(&entry->entry, &uctx->qpids); - mutex_unlock(&uctx->lock); -} - -void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - struct list_head *pos, *nxt; - struct cxio_qpid_list *entry; - - mutex_lock(&uctx->lock); - list_for_each_safe(pos, nxt, &uctx->qpids) { - entry = list_entry(pos, struct cxio_qpid_list, entry); - list_del_init(&entry->entry); - if (!(entry->qpid & rdev_p->qpmask)) - cxio_hal_put_qpid(rdev_p->rscp, entry->qpid); - kfree(entry); - } - mutex_unlock(&uctx->lock); -} - -void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - INIT_LIST_HEAD(&uctx->qpids); - mutex_init(&uctx->lock); -} - -int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, - struct t3_wq *wq, struct cxio_ucontext *uctx) -{ - int depth = 1UL << wq->size_log2; - int rqsize = 1UL << wq->rq_size_log2; - - wq->qpid = get_qpid(rdev_p, uctx); - if (!wq->qpid) - return -ENOMEM; - - wq->rq = kcalloc(depth, sizeof(struct t3_swrq), GFP_KERNEL); - if (!wq->rq) - goto err1; - - wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize); - if (!wq->rq_addr) - goto err2; - - wq->sq = kcalloc(depth, sizeof(struct t3_swsq), GFP_KERNEL); - if (!wq->sq) - goto err3; - - wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), - depth * sizeof(union t3_wr), - &(wq->dma_addr), GFP_KERNEL); - if (!wq->queue) - goto err4; - - dma_unmap_addr_set(wq, mapping, wq->dma_addr); - wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; - if (!kernel_domain) - wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + - (wq->qpid << rdev_p->qpshift); - wq->rdev = rdev_p; - pr_debug("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", - __func__, wq->qpid, wq->doorbell, (unsigned long long)wq->udb); - return 0; -err4: - kfree(wq->sq); -err3: - cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize); -err2: - kfree(wq->rq); -err1: - put_qpid(rdev_p, wq->qpid, uctx); - return -ENOMEM; -} - -void cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) -{ - cxio_hal_clear_cq_ctx(rdev_p, cq->cqid); - kfree(cq->sw_queue); - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (cq->size_log2)) - * sizeof(struct t3_cqe) + 1, cq->queue, - dma_unmap_addr(cq, mapping)); - cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); -} - -int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq, - struct cxio_ucontext *uctx) -{ - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (wq->size_log2)) - * sizeof(union t3_wr), wq->queue, - dma_unmap_addr(wq, mapping)); - kfree(wq->sq); - cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2)); - kfree(wq->rq); - put_qpid(rdev_p, wq->qpid, uctx); - return 0; -} - -static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) -{ - struct t3_cqe cqe; - - pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, - wq, cq, cq->sw_rptr, cq->sw_wptr); - memset(&cqe, 0, sizeof(cqe)); - cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | - V_CQE_OPCODE(T3_SEND) | - V_CQE_TYPE(0) | - V_CQE_SWCQE(1) | - V_CQE_QPID(wq->qpid) | - V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, - cq->size_log2))); - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; - cq->sw_wptr++; -} - -int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) -{ - u32 ptr; - int flushed = 0; - - pr_debug("%s wq %p cq %p\n", __func__, wq, cq); - - /* flush RQ */ - pr_debug("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__, - wq->rq_rptr, wq->rq_wptr, count); - ptr = wq->rq_rptr + count; - while (ptr++ != wq->rq_wptr) { - insert_recv_cqe(wq, cq); - flushed++; - } - return flushed; -} - -static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, - struct t3_swsq *sqp) -{ - struct t3_cqe cqe; - - pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, - wq, cq, cq->sw_rptr, cq->sw_wptr); - memset(&cqe, 0, sizeof(cqe)); - cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | - V_CQE_OPCODE(sqp->opcode) | - V_CQE_TYPE(1) | - V_CQE_SWCQE(1) | - V_CQE_QPID(wq->qpid) | - V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, - cq->size_log2))); - cqe.u.scqe.wrid_hi = sqp->sq_wptr; - - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; - cq->sw_wptr++; -} - -int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) -{ - __u32 ptr = wq->sq_rptr + count; - int flushed = 0; - struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - - while (ptr != wq->sq_wptr) { - sqp->signaled = 0; - insert_sq_cqe(wq, cq, sqp); - ptr++; - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - flushed++; - } - return flushed; -} - -/* - * Move all CQEs from the HWCQ into the SWCQ. - */ -void cxio_flush_hw_cq(struct t3_cq *cq) -{ - struct t3_cqe *cqe, *swcqe; - - pr_debug("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); - cqe = cxio_next_hw_cqe(cq); - while (cqe) { - pr_debug("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n", - __func__, cq->rptr, cq->sw_wptr); - swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2); - *swcqe = *cqe; - swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); - cq->sw_wptr++; - cq->rptr++; - cqe = cxio_next_hw_cqe(cq); - } -} - -static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) -{ - if (CQE_OPCODE(*cqe) == T3_TERMINATE) - return 0; - - if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe)) - return 0; - - if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe)) - return 0; - - if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) && - Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) - return 0; - - return 1; -} - -void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count) -{ - struct t3_cqe *cqe; - u32 ptr; - - *count = 0; - ptr = cq->sw_rptr; - while (!Q_EMPTY(ptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if ((SQ_TYPE(*cqe) || - ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) && - (CQE_QPID(*cqe) == wq->qpid)) - (*count)++; - ptr++; - } - pr_debug("%s cq %p count %d\n", __func__, cq, *count); -} - -void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count) -{ - struct t3_cqe *cqe; - u32 ptr; - - *count = 0; - pr_debug("%s count zero %d\n", __func__, *count); - ptr = cq->sw_rptr; - while (!Q_EMPTY(ptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) && - (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq)) - (*count)++; - ptr++; - } - pr_debug("%s cq %p count %d\n", __func__, cq, *count); -} - -static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p) -{ - struct rdma_cq_setup setup; - setup.id = 0; - setup.base_addr = 0; /* NULL address */ - setup.size = 1; /* enable the CQ */ - setup.credits = 0; - - /* force SGE to redirect to RspQ and interrupt */ - setup.credit_thres = 0; - setup.ovfl_mode = 1; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) -{ - int err; - u64 sge_cmd, ctx0, ctx1; - u64 base_addr; - struct t3_modify_qp_wr *wqe; - struct sk_buff *skb; - - skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - if (!skb) { - pr_debug("%s alloc_skb failed\n", __func__); - return -ENOMEM; - } - err = cxio_hal_init_ctrl_cq(rdev_p); - if (err) { - pr_debug("%s err %d initializing ctrl_cq\n", __func__, err); - goto err; - } - rdev_p->ctrl_qp.workq = dma_alloc_coherent( - &(rdev_p->rnic_info.pdev->dev), - (1 << T3_CTRL_QP_SIZE_LOG2) * - sizeof(union t3_wr), - &(rdev_p->ctrl_qp.dma_addr), - GFP_KERNEL); - if (!rdev_p->ctrl_qp.workq) { - pr_debug("%s dma_alloc_coherent failed\n", __func__); - err = -ENOMEM; - goto err; - } - dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping, - rdev_p->ctrl_qp.dma_addr); - rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; - - mutex_init(&rdev_p->ctrl_qp.lock); - init_waitqueue_head(&rdev_p->ctrl_qp.waitq); - - /* update HW Ctrl QP context */ - base_addr = rdev_p->ctrl_qp.dma_addr; - base_addr >>= 12; - ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) | - V_EC_BASE_LO((u32) base_addr & 0xffff)); - ctx0 <<= 32; - ctx0 |= V_EC_CREDITS(FW_WR_NUM); - base_addr >>= 16; - ctx1 = (u32) base_addr; - base_addr >>= 32; - ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) | - V_EC_TYPE(0) | V_EC_GEN(1) | - V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; - wqe = skb_put_zero(skb, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, - T3_CTL_QP_TID, 7, T3_SOPEOP); - wqe->flags = cpu_to_be32(MODQP_WRITE_EC); - sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; - wqe->sge_cmd = cpu_to_be64(sge_cmd); - wqe->ctx1 = cpu_to_be64(ctx1); - wqe->ctx0 = cpu_to_be64(ctx0); - pr_debug("CtrlQP dma_addr %pad workq %p size %d\n", - &rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq, - 1 << T3_CTRL_QP_SIZE_LOG2); - skb->priority = CPL_PRIORITY_CONTROL; - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -err: - kfree_skb(skb); - return err; -} - -static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) -{ - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << T3_CTRL_QP_SIZE_LOG2) - * sizeof(union t3_wr), rdev_p->ctrl_qp.workq, - dma_unmap_addr(&rdev_p->ctrl_qp, mapping)); - return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID); -} - -/* write len bytes of data into addr (32B aligned address) - * If data is NULL, clear len byte of memory to zero. - * caller acquires the ctrl_qp lock before the call - */ -static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, - u32 len, void *data) -{ - u32 i, nr_wqe, copy_len; - u8 *copy_data; - u8 wr_len, utx_len; /* length in 8 byte flit */ - enum t3_wr_flags flag; - __be64 *wqe; - u64 utx_cmd; - addr &= 0x7FFFFFF; - nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */ - pr_debug("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n", - __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len, - nr_wqe, data, addr); - utx_len = 3; /* in 32B unit */ - for (i = 0; i < nr_wqe; i++) { - if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2)) { - pr_debug("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, wait for more space i %d\n", - __func__, - rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i); - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - !Q_FULL(rdev_p->ctrl_qp.rptr, - rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2))) { - pr_debug("%s ctrl_qp workq interrupted\n", - __func__); - return -ERESTARTSYS; - } - pr_debug("%s ctrl_qp wakeup, continue posting work request i %d\n", - __func__, i); - } - wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % - (1 << T3_CTRL_QP_SIZE_LOG2))); - flag = 0; - if (i == (nr_wqe - 1)) { - /* last WQE */ - flag = T3_COMPLETION_FLAG; - if (len % 32) - utx_len = len / 32 + 1; - else - utx_len = len / 32; - } - - /* - * Force a CQE to return the credit to the workq in case - * we posted more than half the max QP size of WRs - */ - if ((i != 0) && - (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) { - flag = T3_COMPLETION_FLAG; - pr_debug("%s force completion at i %d\n", __func__, i); - } - - /* build the utx mem command */ - wqe += (sizeof(struct t3_bypass_wr) >> 3); - utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3); - utx_cmd <<= 32; - utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1); - *wqe = cpu_to_be64(utx_cmd); - wqe++; - copy_data = (u8 *) data + i * 96; - copy_len = len > 96 ? 96 : len; - - /* clear memory content if data is NULL */ - if (data) - memcpy(wqe, copy_data, copy_len); - else - memset(wqe, 0, copy_len); - if (copy_len % 32) - memset(((u8 *) wqe) + copy_len, 0, - 32 - (copy_len % 32)); - wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 + - (utx_len << 2); - wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % - (1 << T3_CTRL_QP_SIZE_LOG2))); - - /* wptr in the WRID[31:0] */ - ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr; - - /* - * This must be the last write with a memory barrier - * for the genbit - */ - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, - Q_GENBIT(rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, - wr_len, T3_SOPEOP); - if (flag == T3_COMPLETION_FLAG) - ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); - len -= 96; - rdev_p->ctrl_qp.wptr++; - } - return 0; -} - -/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr - * OUT: stag index - * TBD: shared memory region support - */ -static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, - u32 *stag, u8 stag_state, u32 pdid, - enum tpt_mem_type type, enum tpt_mem_perm perm, - u32 zbva, u64 to, u32 len, u8 page_size, - u32 pbl_size, u32 pbl_addr) -{ - int err; - struct tpt_entry tpt; - u32 stag_idx; - u32 wptr; - - if (cxio_fatal_error(rdev_p)) - return -EIO; - - stag_state = stag_state > 0; - stag_idx = (*stag) >> 8; - - if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) { - stag_idx = cxio_hal_get_stag(rdev_p->rscp); - if (!stag_idx) - return -ENOMEM; - *stag = (stag_idx << 8) | ((*stag) & 0xFF); - } - pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", - __func__, stag_state, type, pdid, stag_idx); - - mutex_lock(&rdev_p->ctrl_qp.lock); - - /* write TPT entry */ - if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); - else { - tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID | - V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) | - V_TPT_STAG_STATE(stag_state) | - V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid)); - BUG_ON(page_size >= 28); - tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) | - ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) | - V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | - V_TPT_PAGE_SIZE(page_size)); - tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); - tpt.len = cpu_to_be32(len); - tpt.va_hi = cpu_to_be32((u32) (to >> 32)); - tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); - tpt.rsvd_bind_cnt_or_pstag = 0; - tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); - } - err = cxio_hal_ctrl_qp_write_mem(rdev_p, - stag_idx + - (rdev_p->rnic_info.tpt_base >> 5), - sizeof(tpt), &tpt); - - /* release the stag index to free pool */ - if (reset_tpt_entry) - cxio_hal_put_stag(rdev_p->rscp, stag_idx); - - wptr = rdev_p->ctrl_qp.wptr; - mutex_unlock(&rdev_p->ctrl_qp.lock); - if (!err) - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - SEQ32_GE(rdev_p->ctrl_qp.rptr, - wptr))) - return -ERESTARTSYS; - return err; -} - -int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, - u32 pbl_addr, u32 pbl_size) -{ - u32 wptr; - int err; - - pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", - __func__, pbl_addr, rdev_p->rnic_info.pbl_base, - pbl_size); - - mutex_lock(&rdev_p->ctrl_qp.lock); - err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3, - pbl); - wptr = rdev_p->ctrl_qp.wptr; - mutex_unlock(&rdev_p->ctrl_qp.lock); - if (err) - return err; - - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - SEQ32_GE(rdev_p->ctrl_qp.rptr, - wptr))) - return -ERESTARTSYS; - - return 0; -} - -int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl_size, pbl_addr); -} - -int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr) -{ - return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl_size, pbl_addr); -} - -int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, - u32 pbl_addr) -{ - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, - pbl_size, pbl_addr); -} - -int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, - 0, 0); -} - -int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) -{ - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, - 0, 0); -} - -int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, - 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr); -} - -int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) -{ - struct t3_rdma_init_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC); - if (!skb) - return -ENOMEM; - pr_debug("%s rdev_p %p\n", __func__, rdev_p); - wqe = __skb_put(skb, sizeof(*wqe)); - wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT)); - wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) | - V_FW_RIWR_LEN(sizeof(*wqe) >> 3)); - wqe->wrid.id1 = 0; - wqe->qpid = cpu_to_be32(attr->qpid); - wqe->pdid = cpu_to_be32(attr->pdid); - wqe->scqid = cpu_to_be32(attr->scqid); - wqe->rcqid = cpu_to_be32(attr->rcqid); - wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base); - wqe->rq_size = cpu_to_be32(attr->rq_size); - wqe->mpaattrs = attr->mpaattrs; - wqe->qpcaps = attr->qpcaps; - wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); - wqe->rqe_count = cpu_to_be16(attr->rqe_count); - wqe->flags_rtr_type = cpu_to_be16(attr->flags | - V_RTR_TYPE(attr->rtr_type) | - V_CHAN(attr->chan)); - wqe->ord = cpu_to_be32(attr->ord); - wqe->ird = cpu_to_be32(attr->ird); - wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); - wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); - wqe->irs = cpu_to_be32(attr->irs); - skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -} - -void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb) -{ - cxio_ev_cb = ev_cb; -} - -void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb) -{ - cxio_ev_cb = NULL; -} - -static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb) -{ - static int cnt; - struct cxio_rdev *rdev_p = NULL; - struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; - pr_debug("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x se %0x notify %0x cqbranch %0x creditth %0x\n", - cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg), - RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg), - RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg), - RSPQ_CREDIT_THRESH(rsp_msg)); - pr_debug("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe), - CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), - CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - rdev_p = (struct cxio_rdev *)t3cdev_p->ulp; - if (!rdev_p) { - pr_debug("%s called by t3cdev %p with null ulp\n", __func__, - t3cdev_p); - return 0; - } - if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) { - rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1; - wake_up_interruptible(&rdev_p->ctrl_qp.waitq); - dev_kfree_skb_irq(skb); - } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8) - dev_kfree_skb_irq(skb); - else if (cxio_ev_cb) - (*cxio_ev_cb) (rdev_p, skb); - else - dev_kfree_skb_irq(skb); - cnt++; - return 0; -} - -/* Caller takes care of locking if needed */ -int cxio_rdev_open(struct cxio_rdev *rdev_p) -{ - struct net_device *netdev_p = NULL; - int err = 0; - if (strlen(rdev_p->dev_name)) { - if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) { - return -EBUSY; - } - netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name); - if (!netdev_p) { - return -EINVAL; - } - dev_put(netdev_p); - } else if (rdev_p->t3cdev_p) { - if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) { - return -EBUSY; - } - netdev_p = rdev_p->t3cdev_p->lldev; - strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name, - T3_MAX_DEV_NAME_LEN); - } else { - pr_debug("%s t3cdev_p or dev_name must be set\n", __func__); - return -EINVAL; - } - - list_add_tail(&rdev_p->entry, &rdev_list); - - pr_debug("%s opening rnic dev %s\n", __func__, rdev_p->dev_name); - memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp)); - if (!rdev_p->t3cdev_p) - rdev_p->t3cdev_p = dev2t3cdev(netdev_p); - rdev_p->t3cdev_p->ulp = (void *) rdev_p; - - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO, - &(rdev_p->fw_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) { - pr_err("fatal firmware version mismatch: need version %u but adapter has version %u\n", - CXIO_FW_MAJ, - G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers)); - err = -EINVAL; - goto err1; - } - - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS, - &(rdev_p->rnic_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS, - &(rdev_p->port_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - - /* - * qpshift is the number of bits to shift the qpid left in order - * to get the correct address of the doorbell for that qp. - */ - cxio_init_ucontext(rdev_p, &rdev_p->uctx); - rdev_p->qpshift = PAGE_SHIFT - - ilog2(65536 >> - ilog2(rdev_p->rnic_info.udbell_len >> - PAGE_SHIFT)); - rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT; - rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1; - pr_debug("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n", - __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base, - rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p), - rdev_p->rnic_info.pbl_base, - rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base, - rdev_p->rnic_info.rqt_top); - pr_debug("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu qpnr %d qpmask 0x%x\n", - rdev_p->rnic_info.udbell_len, - rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr, - rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask); - - err = cxio_hal_init_ctrl_qp(rdev_p); - if (err) { - pr_err("%s error %d initializing ctrl_qp\n", __func__, err); - goto err1; - } - err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0, - 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ, - T3_MAX_NUM_PD); - if (err) { - pr_err("%s error %d initializing hal resources\n", - __func__, err); - goto err2; - } - err = cxio_hal_pblpool_create(rdev_p); - if (err) { - pr_err("%s error %d initializing pbl mem pool\n", - __func__, err); - goto err3; - } - err = cxio_hal_rqtpool_create(rdev_p); - if (err) { - pr_err("%s error %d initializing rqt mem pool\n", - __func__, err); - goto err4; - } - return 0; -err4: - cxio_hal_pblpool_destroy(rdev_p); -err3: - cxio_hal_destroy_resource(rdev_p->rscp); -err2: - cxio_hal_destroy_ctrl_qp(rdev_p); -err1: - rdev_p->t3cdev_p->ulp = NULL; - list_del(&rdev_p->entry); - return err; -} - -void cxio_rdev_close(struct cxio_rdev *rdev_p) -{ - if (rdev_p) { - cxio_hal_pblpool_destroy(rdev_p); - cxio_hal_rqtpool_destroy(rdev_p); - list_del(&rdev_p->entry); - cxio_hal_destroy_ctrl_qp(rdev_p); - cxio_hal_destroy_resource(rdev_p->rscp); - rdev_p->t3cdev_p->ulp = NULL; - } -} - -int __init cxio_hal_init(void) -{ - if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI)) - return -ENOMEM; - t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler); - return 0; -} - -void __exit cxio_hal_exit(void) -{ - struct cxio_rdev *rdev, *tmp; - - t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL); - list_for_each_entry_safe(rdev, tmp, &rdev_list, entry) - cxio_rdev_close(rdev); - cxio_hal_destroy_rhdl_resource(); -} - -static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) -{ - struct t3_swsq *sqp; - __u32 ptr = wq->sq_rptr; - int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr); - - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - while (count--) - if (!sqp->signaled) { - ptr++; - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - } else if (sqp->complete) { - - /* - * Insert this completed cqe into the swcq. - */ - pr_debug("%s moving cqe into swcq sq idx %ld cq idx %ld\n", - __func__, Q_PTR2IDX(ptr, wq->sq_size_log2), - Q_PTR2IDX(cq->sw_wptr, cq->size_log2)); - sqp->cqe.header |= htonl(V_CQE_SWCQE(1)); - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) - = sqp->cqe; - cq->sw_wptr++; - sqp->signaled = 0; - break; - } else - break; -} - -static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe, - struct t3_cqe *read_cqe) -{ - read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr; - read_cqe->len = wq->oldest_read->read_len; - read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) | - V_CQE_SWCQE(SW_CQE(*hw_cqe)) | - V_CQE_OPCODE(T3_READ_REQ) | - V_CQE_TYPE(1)); -} - -/* - * Return a ptr to the next read wr in the SWSQ or NULL. - */ -static void advance_oldest_read(struct t3_wq *wq) -{ - - u32 rptr = wq->oldest_read - wq->sq + 1; - u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2); - - while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) { - wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2); - - if (wq->oldest_read->opcode == T3_READ_REQ) - return; - rptr++; - } - wq->oldest_read = NULL; -} - -/* - * cxio_poll_cq - * - * Caller must: - * check the validity of the first CQE, - * supply the wq assicated with the qpid. - * - * credit: cq credit to return to sge. - * cqe_flushed: 1 iff the CQE is flushed. - * cqe: copy of the polled CQE. - * - * return value: - * 0 CQE returned, - * -1 CQE skipped, try again. - */ -int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit) -{ - int ret = 0; - struct t3_cqe *hw_cqe, read_cqe; - - *cqe_flushed = 0; - *credit = 0; - hw_cqe = cxio_next_cqe(cq); - - pr_debug("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe), - CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe), - CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe), - CQE_WRID_LOW(*hw_cqe)); - - /* - * skip cqe's not affiliated with a QP. - */ - if (wq == NULL) { - ret = -1; - goto skip_cqe; - } - - /* - * Gotta tweak READ completions: - * 1) the cqe doesn't contain the sq_wptr from the wr. - * 2) opcode not reflected from the wr. - * 3) read_len not reflected from the wr. - * 4) cq_type is RQ_TYPE not SQ_TYPE. - */ - if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { - - /* - * If this is an unsolicited read response, then the read - * was generated by the kernel driver as part of peer-2-peer - * connection setup. So ignore the completion. - */ - if (!wq->oldest_read) { - if (CQE_STATUS(*hw_cqe)) - wq->error = 1; - ret = -1; - goto skip_cqe; - } - - /* - * Don't write to the HWCQ, so create a new read req CQE - * in local memory. - */ - create_read_req_cqe(wq, hw_cqe, &read_cqe); - hw_cqe = &read_cqe; - advance_oldest_read(wq); - } - - /* - * T3A: Discard TERMINATE CQEs. - */ - if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) { - ret = -1; - wq->error = 1; - goto skip_cqe; - } - - if (CQE_STATUS(*hw_cqe) || wq->error) { - *cqe_flushed = wq->error; - wq->error = 1; - - /* - * T3A inserts errors into the CQE. We cannot return - * these as work completions. - */ - /* incoming write failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE) - && RQ_TYPE(*hw_cqe)) { - ret = -1; - goto skip_cqe; - } - /* incoming read request failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) { - ret = -1; - goto skip_cqe; - } - - /* incoming SEND with no receive posted failures */ - if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) && - Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { - ret = -1; - goto skip_cqe; - } - BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe)); - goto proc_cqe; - } - - /* - * RECV completion. - */ - if (RQ_TYPE(*hw_cqe)) { - - /* - * HW only validates 4 bits of MSN. So we must validate that - * the MSN in the SEND is the next expected MSN. If its not, - * then we complete this with TPT_ERR_MSN and mark the wq in - * error. - */ - - if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { - wq->error = 1; - ret = -1; - goto skip_cqe; - } - - if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) { - wq->error = 1; - hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN)); - goto proc_cqe; - } - goto proc_cqe; - } - - /* - * If we get here its a send completion. - * - * Handle out of order completion. These get stuffed - * in the SW SQ. Then the SW SQ is walked to move any - * now in-order completions into the SW CQ. This handles - * 2 cases: - * 1) reaping unsignaled WRs when the first subsequent - * signaled WR is completed. - * 2) out of order read completions. - */ - if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) { - struct t3_swsq *sqp; - - pr_debug("%s out of order completion going in swsq at idx %ld\n", - __func__, - Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), - wq->sq_size_log2)); - sqp = wq->sq + - Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2); - sqp->cqe = *hw_cqe; - sqp->complete = 1; - ret = -1; - goto flush_wq; - } - -proc_cqe: - *cqe = *hw_cqe; - - /* - * Reap the associated WR(s) that are freed up with this - * completion. - */ - if (SQ_TYPE(*hw_cqe)) { - wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); - pr_debug("%s completing sq idx %ld\n", __func__, - Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); - *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; - wq->sq_rptr++; - } else { - pr_debug("%s completing rq idx %ld\n", __func__, - Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); - *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; - if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) - cxio_hal_pblpool_free(wq->rdev, - wq->rq[Q_PTR2IDX(wq->rq_rptr, - wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); - BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr)); - wq->rq_rptr++; - } - -flush_wq: - /* - * Flush any completed cqes that are now in-order. - */ - flush_completed_wrs(wq, cq); - -skip_cqe: - if (SW_CQE(*hw_cqe)) { - pr_debug("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n", - __func__, cq, cq->cqid, cq->sw_rptr); - ++cq->sw_rptr; - } else { - pr_debug("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n", - __func__, cq, cq->cqid, cq->rptr); - ++cq->rptr; - - /* - * T3A: compute credits. - */ - if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1))) - || ((cq->rptr - cq->wptr) >= 128)) { - *credit = cq->rptr - cq->wptr; - cq->wptr = cq->rptr; - } - } - return ret; -} diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h deleted file mode 100644 index 40c029ffa425..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_HAL_H__ -#define __CXIO_HAL_H__ - -#include <linux/list.h> -#include <linux/mutex.h> -#include <linux/kfifo.h> - -#include "t3_cpl.h" -#include "t3cdev.h" -#include "cxgb3_ctl_defs.h" -#include "cxio_wr.h" - -#define T3_CTRL_QP_ID FW_RI_SGEEC_START -#define T3_CTL_QP_TID FW_RI_TID_START -#define T3_CTRL_QP_SIZE_LOG2 8 -#define T3_CTRL_CQ_ID 0 - -#define T3_MAX_NUM_RI (1<<15) -#define T3_MAX_NUM_QP (1<<15) -#define T3_MAX_NUM_CQ (1<<15) -#define T3_MAX_NUM_PD (1<<15) -#define T3_MAX_PBL_SIZE 256 -#define T3_MAX_RQ_SIZE 1024 -#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) -#define T3_MAX_CQ_DEPTH 65536 -#define T3_MAX_NUM_STAG (1<<15) -#define T3_MAX_MR_SIZE 0x100000000ULL -#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ - -#define T3_STAG_UNSET 0xffffffff - -#define T3_MAX_DEV_NAME_LEN 32 - -#define CXIO_FW_MAJ 7 - -struct cxio_hal_ctrl_qp { - u32 wptr; - u32 rptr; - struct mutex lock; /* for the wtpr, can sleep */ - wait_queue_head_t waitq;/* wait for RspQ/CQE msg */ - union t3_wr *workq; /* the work request queue */ - dma_addr_t dma_addr; /* pci bus address of the workq */ - DEFINE_DMA_UNMAP_ADDR(mapping); - void __iomem *doorbell; -}; - -struct cxio_hal_resource { - struct kfifo tpt_fifo; - spinlock_t tpt_fifo_lock; - struct kfifo qpid_fifo; - spinlock_t qpid_fifo_lock; - struct kfifo cqid_fifo; - spinlock_t cqid_fifo_lock; - struct kfifo pdid_fifo; - spinlock_t pdid_fifo_lock; -}; - -struct cxio_qpid_list { - struct list_head entry; - u32 qpid; -}; - -struct cxio_ucontext { - struct list_head qpids; - struct mutex lock; -}; - -struct cxio_rdev { - char dev_name[T3_MAX_DEV_NAME_LEN]; - struct t3cdev *t3cdev_p; - struct rdma_info rnic_info; - struct adap_ports port_info; - struct cxio_hal_resource *rscp; - struct cxio_hal_ctrl_qp ctrl_qp; - void *ulp; - unsigned long qpshift; - u32 qpnr; - u32 qpmask; - struct cxio_ucontext uctx; - struct gen_pool *pbl_pool; - struct gen_pool *rqt_pool; - struct list_head entry; - struct ch_embedded_info fw_info; - u32 flags; -#define CXIO_ERROR_FATAL 1 -}; - -static inline int cxio_fatal_error(struct cxio_rdev *rdev_p) -{ - return rdev_p->flags & CXIO_ERROR_FATAL; -} - -static inline int cxio_num_stags(struct cxio_rdev *rdev_p) -{ - return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5)); -} - -typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p, - struct sk_buff * skb); - -#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff) -#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff) -#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1) -#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1) -#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1) -#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1) -#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1) -#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1) -#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1) - -struct respQ_msg_t { - __be32 flags; /* flit 0 */ - __be32 cq_ptrid; - __be64 rsvd; /* flit 1 */ - struct t3_cqe cqe; /* flits 2-3 */ -}; - -enum t3_cq_opcode { - CQ_ARM_AN = 0x2, - CQ_ARM_SE = 0x6, - CQ_FORCE_AN = 0x3, - CQ_CREDIT_UPDATE = 0x7 -}; - -int cxio_rdev_open(struct cxio_rdev *rdev); -void cxio_rdev_close(struct cxio_rdev *rdev); -int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, - enum t3_cq_opcode op, u32 credit); -int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel); -void cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); -void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); -void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); -int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, - struct cxio_ucontext *uctx); -int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, - struct cxio_ucontext *uctx); -int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); -int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, - u32 pbl_addr, u32 pbl_size); -int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr); -int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr); -int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, - u32 pbl_addr); -int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); -int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr); -int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); -int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); -void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); -void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb); -u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); -void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); -int __init cxio_hal_init(void); -void __exit cxio_hal_exit(void); -int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); -int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); -void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); -void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); -void cxio_flush_hw_cq(struct t3_cq *cq); -int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit); -int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb); - -#ifdef pr_fmt -#undef pr_fmt -#endif - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c deleted file mode 100644 index c6e7bc4420b6..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -/* Crude resource management */ -#include <linux/kernel.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/kfifo.h> -#include <linux/spinlock.h> -#include <linux/errno.h> -#include "cxio_resource.h" -#include "cxio_hal.h" - -static struct kfifo rhdl_fifo; -static spinlock_t rhdl_fifo_lock; - -#define RANDOM_SIZE 16 - -static int __cxio_init_resource_fifo(struct kfifo *fifo, - spinlock_t *fifo_lock, - u32 nr, u32 skip_low, - u32 skip_high, - int random) -{ - u32 i, j, entry = 0, idx; - u32 random_bytes; - u32 rarray[16]; - spin_lock_init(fifo_lock); - - if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL)) - return -ENOMEM; - - for (i = 0; i < skip_low + skip_high; i++) - kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); - if (random) { - j = 0; - random_bytes = prandom_u32(); - for (i = 0; i < RANDOM_SIZE; i++) - rarray[i] = i + skip_low; - for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { - if (j >= RANDOM_SIZE) { - j = 0; - random_bytes = prandom_u32(); - } - idx = (random_bytes >> (j * 2)) & 0xF; - kfifo_in(fifo, - (unsigned char *) &rarray[idx], - sizeof(u32)); - rarray[idx] = i; - j++; - } - for (i = 0; i < RANDOM_SIZE; i++) - kfifo_in(fifo, - (unsigned char *) &rarray[i], - sizeof(u32)); - } else - for (i = skip_low; i < nr - skip_high; i++) - kfifo_in(fifo, (unsigned char *) &i, sizeof(u32)); - - for (i = 0; i < skip_low + skip_high; i++) - if (kfifo_out_locked(fifo, (unsigned char *) &entry, - sizeof(u32), fifo_lock) != sizeof(u32)) - break; - return 0; -} - -static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 0)); -} - -static int cxio_init_resource_fifo_random(struct kfifo *fifo, - spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - - return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 1)); -} - -static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p) -{ - u32 i; - - spin_lock_init(&rdev_p->rscp->qpid_fifo_lock); - - if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32), - GFP_KERNEL)) - return -ENOMEM; - - for (i = 16; i < T3_MAX_NUM_QP; i++) - if (!(i & rdev_p->qpmask)) - kfifo_in(&rdev_p->rscp->qpid_fifo, - (unsigned char *) &i, sizeof(u32)); - return 0; -} - -int cxio_hal_init_rhdl_resource(u32 nr_rhdl) -{ - return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1, - 0); -} - -void cxio_hal_destroy_rhdl_resource(void) -{ - kfifo_free(&rhdl_fifo); -} - -/* nr_* must be power of 2 */ -int cxio_hal_init_resource(struct cxio_rdev *rdev_p, - u32 nr_tpt, u32 nr_pbl, - u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid) -{ - int err = 0; - struct cxio_hal_resource *rscp; - - rscp = kmalloc(sizeof(*rscp), GFP_KERNEL); - if (!rscp) - return -ENOMEM; - rdev_p->rscp = rscp; - err = cxio_init_resource_fifo_random(&rscp->tpt_fifo, - &rscp->tpt_fifo_lock, - nr_tpt, 1, 0); - if (err) - goto tpt_err; - err = cxio_init_qpid_fifo(rdev_p); - if (err) - goto qpid_err; - err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, - nr_cqid, 1, 0); - if (err) - goto cqid_err; - err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, - nr_pdid, 1, 0); - if (err) - goto pdid_err; - return 0; -pdid_err: - kfifo_free(&rscp->cqid_fifo); -cqid_err: - kfifo_free(&rscp->qpid_fifo); -qpid_err: - kfifo_free(&rscp->tpt_fifo); -tpt_err: - return -ENOMEM; -} - -/* - * returns 0 if no resource available - */ -static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock) -{ - u32 entry; - if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)) - return entry; - else - return 0; /* fifo emptry */ -} - -static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock, - u32 entry) -{ - BUG_ON( - kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock) - == 0); -} - -u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock); -} - -void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag) -{ - cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag); -} - -u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp) -{ - u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo, - &rscp->qpid_fifo_lock); - pr_debug("%s qpid 0x%x\n", __func__, qpid); - return qpid; -} - -void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid) -{ - pr_debug("%s qpid 0x%x\n", __func__, qpid); - cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid); -} - -u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock); -} - -void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid) -{ - cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid); -} - -u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock); -} - -void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid) -{ - cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid); -} - -void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) -{ - kfifo_free(&rscp->tpt_fifo); - kfifo_free(&rscp->cqid_fifo); - kfifo_free(&rscp->qpid_fifo); - kfifo_free(&rscp->pdid_fifo); - kfree(rscp); -} - -/* - * PBL Memory Manager. Uses Linux generic allocator. - */ - -#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ - -u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) -{ - unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size); - return (u32)addr; -} - -void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) -{ - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size); - gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); -} - -int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) -{ - unsigned pbl_start, pbl_chunk; - - rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); - if (!rdev_p->pbl_pool) - return -ENOMEM; - - pbl_start = rdev_p->rnic_info.pbl_base; - pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1; - - while (pbl_start < rdev_p->rnic_info.pbl_top) { - pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1, - pbl_chunk); - if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) { - pr_debug("%s failed to add PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); - if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { - pr_warn("%s: Failed to add all PBL chunks (%x/%x)\n", - __func__, pbl_start, - rdev_p->rnic_info.pbl_top - pbl_start); - return 0; - } - pbl_chunk >>= 1; - } else { - pr_debug("%s added PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); - pbl_start += pbl_chunk; - } - } - - return 0; -} - -void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) -{ - gen_pool_destroy(rdev_p->pbl_pool); -} - -/* - * RQT Memory Manager. Uses Linux generic allocator. - */ - -#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */ -#define RQT_CHUNK 2*1024*1024 - -u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) -{ - unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); - return (u32)addr; -} - -void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) -{ - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6); - gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); -} - -int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p) -{ - unsigned long i; - rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); - if (rdev_p->rqt_pool) - for (i = rdev_p->rnic_info.rqt_base; - i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1; - i += RQT_CHUNK) - gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); - return rdev_p->rqt_pool ? 0 : -ENOMEM; -} - -void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p) -{ - gen_pool_destroy(rdev_p->rqt_pool); -} diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h deleted file mode 100644 index a2703a3d882d..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_RESOURCE_H__ -#define __CXIO_RESOURCE_H__ - -#include <linux/kernel.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/kfifo.h> -#include <linux/spinlock.h> -#include <linux/errno.h> -#include <linux/genalloc.h> -#include "cxio_hal.h" - -extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl); -extern void cxio_hal_destroy_rhdl_resource(void); -extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p, - u32 nr_tpt, u32 nr_pbl, - u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, - u32 nr_pdid); -extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag); -extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid); -extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid); -extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp); - -#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base ) -extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p); -extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p); -extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size); -extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); - -#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base ) -extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p); -extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p); -extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size); -extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); -#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h deleted file mode 100644 index 53aa5c36247a..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ /dev/null @@ -1,802 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_WR_H__ -#define __CXIO_WR_H__ - -#include <asm/io.h> -#include <linux/pci.h> -#include <linux/timer.h> -#include "firmware_exports.h" - -#define T3_MAX_SGE 4 -#define T3_MAX_INLINE 64 -#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3) -#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024) -#define T3_STAG0_PAGE_SHIFT 15 - -#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) -#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ - ((rptr)!=(wptr)) ) -#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1)) -#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr))) -#define Q_COUNT(rptr,wptr) ((wptr)-(rptr)) -#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1)) - -static inline void ring_doorbell(void __iomem *doorbell, u32 qpid) -{ - writel(((1<<31) | qpid), doorbell); -} - -#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 )) - -enum t3_wr_flags { - T3_COMPLETION_FLAG = 0x01, - T3_NOTIFY_FLAG = 0x02, - T3_SOLICITED_EVENT_FLAG = 0x04, - T3_READ_FENCE_FLAG = 0x08, - T3_LOCAL_FENCE_FLAG = 0x10 -} __packed; - -enum t3_wr_opcode { - T3_WR_BP = FW_WROPCODE_RI_BYPASS, - T3_WR_SEND = FW_WROPCODE_RI_SEND, - T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE, - T3_WR_READ = FW_WROPCODE_RI_RDMA_READ, - T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV, - T3_WR_BIND = FW_WROPCODE_RI_BIND_MW, - T3_WR_RCV = FW_WROPCODE_RI_RECEIVE, - T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT, - T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP, - T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR -} __packed; - -enum t3_rdma_opcode { - T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */ - T3_READ_REQ, - T3_READ_RESP, - T3_SEND, - T3_SEND_WITH_INV, - T3_SEND_WITH_SE, - T3_SEND_WITH_SE_INV, - T3_TERMINATE, - T3_RDMA_INIT, /* CHELSIO RI specific ... */ - T3_BIND_MW, - T3_FAST_REGISTER, - T3_LOCAL_INV, - T3_QP_MOD, - T3_BYPASS, - T3_RDMA_READ_REQ_WITH_INV, -} __packed; - -static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) -{ - switch (wrop) { - case T3_WR_BP: return T3_BYPASS; - case T3_WR_SEND: return T3_SEND; - case T3_WR_WRITE: return T3_RDMA_WRITE; - case T3_WR_READ: return T3_READ_REQ; - case T3_WR_INV_STAG: return T3_LOCAL_INV; - case T3_WR_BIND: return T3_BIND_MW; - case T3_WR_INIT: return T3_RDMA_INIT; - case T3_WR_QP_MOD: return T3_QP_MOD; - case T3_WR_FASTREG: return T3_FAST_REGISTER; - default: break; - } - return -1; -} - - -/* Work request id */ -union t3_wrid { - struct { - u32 hi; - u32 low; - } id0; - u64 id1; -}; - -#define WRID(wrid) (wrid.id1) -#define WRID_GEN(wrid) (wrid.id0.wr_gen) -#define WRID_IDX(wrid) (wrid.id0.wr_idx) -#define WRID_LO(wrid) (wrid.id0.wr_lo) - -struct fw_riwrh { - __be32 op_seop_flags; - __be32 gen_tid_len; -}; - -#define S_FW_RIWR_OP 24 -#define M_FW_RIWR_OP 0xff -#define V_FW_RIWR_OP(x) ((x) << S_FW_RIWR_OP) -#define G_FW_RIWR_OP(x) ((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP) - -#define S_FW_RIWR_SOPEOP 22 -#define M_FW_RIWR_SOPEOP 0x3 -#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP) - -#define S_FW_RIWR_FLAGS 8 -#define M_FW_RIWR_FLAGS 0x3fffff -#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS) -#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS) - -#define S_FW_RIWR_TID 8 -#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID) - -#define S_FW_RIWR_LEN 0 -#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN) - -#define S_FW_RIWR_GEN 31 -#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN) - -struct t3_sge { - __be32 stag; - __be32 len; - __be64 to; -}; - -/* If num_sgle is zero, flit 5+ contains immediate data.*/ -struct t3_send_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - - u8 rdmaop; /* 2 */ - u8 reserved[3]; - __be32 rem_stag; - __be32 plen; /* 3 */ - __be32 num_sgle; - struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ -}; - -#define T3_MAX_FASTREG_DEPTH 10 -#define T3_MAX_FASTREG_FRAG 10 - -struct t3_fastreg_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 stag; /* 2 */ - __be32 len; - __be32 va_base_hi; /* 3 */ - __be32 va_base_lo_fbo; - __be32 page_type_perms; /* 4 */ - __be32 reserved1; - __be64 pbl_addrs[0]; /* 5+ */ -}; - -/* - * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this. - */ -struct t3_pbl_frag { - struct fw_riwrh wrh; /* 0 */ - __be64 pbl_addrs[14]; /* 1..14 */ -}; - -#define S_FR_PAGE_COUNT 24 -#define M_FR_PAGE_COUNT 0xff -#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT) -#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT) - -#define S_FR_PAGE_SIZE 16 -#define M_FR_PAGE_SIZE 0x1f -#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE) -#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE) - -#define S_FR_TYPE 8 -#define M_FR_TYPE 0x1 -#define V_FR_TYPE(x) ((x) << S_FR_TYPE) -#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE) - -#define S_FR_PERMS 0 -#define M_FR_PERMS 0xff -#define V_FR_PERMS(x) ((x) << S_FR_PERMS) -#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS) - -struct t3_local_inv_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 stag; /* 2 */ - __be32 reserved; -}; - -struct t3_rdma_write_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 rdmaop; /* 2 */ - u8 reserved[3]; - __be32 stag_sink; - __be64 to_sink; /* 3 */ - __be32 plen; /* 4 */ - __be32 num_sgle; - struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */ -}; - -struct t3_rdma_read_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 rdmaop; /* 2 */ - u8 local_inv; - u8 reserved[2]; - __be32 rem_stag; - __be64 rem_to; /* 3 */ - __be32 local_stag; /* 4 */ - __be32 local_len; - __be64 local_to; /* 5 */ -}; - -struct t3_bind_mw_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u16 reserved; /* 2 */ - u8 type; - u8 perms; - __be32 mr_stag; - __be32 mw_stag; /* 3 */ - __be32 mw_len; - __be64 mw_va; /* 4 */ - __be32 mr_pbl_addr; /* 5 */ - u8 reserved2[3]; - u8 mr_pagesz; -}; - -struct t3_receive_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 pagesz[T3_MAX_SGE]; - __be32 num_sgle; /* 2 */ - struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */ - __be32 pbl_addr[T3_MAX_SGE]; -}; - -struct t3_bypass_wr { - struct fw_riwrh wrh; - union t3_wrid wrid; /* 1 */ -}; - -struct t3_modify_qp_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 flags; /* 2 */ - __be32 quiesce; /* 2 */ - __be32 max_ird; /* 3 */ - __be32 max_ord; /* 3 */ - __be64 sge_cmd; /* 4 */ - __be64 ctx1; /* 5 */ - __be64 ctx0; /* 6 */ -}; - -enum t3_modify_qp_flags { - MODQP_QUIESCE = 0x01, - MODQP_MAX_IRD = 0x02, - MODQP_MAX_ORD = 0x04, - MODQP_WRITE_EC = 0x08, - MODQP_READ_EC = 0x10, -}; - - -enum t3_mpa_attrs { - uP_RI_MPA_RX_MARKER_ENABLE = 0x1, - uP_RI_MPA_TX_MARKER_ENABLE = 0x2, - uP_RI_MPA_CRC_ENABLE = 0x4, - uP_RI_MPA_IETF_ENABLE = 0x8 -} __packed; - -enum t3_qp_caps { - uP_RI_QP_RDMA_READ_ENABLE = 0x01, - uP_RI_QP_RDMA_WRITE_ENABLE = 0x02, - uP_RI_QP_BIND_ENABLE = 0x04, - uP_RI_QP_FAST_REGISTER_ENABLE = 0x08, - uP_RI_QP_STAG0_ENABLE = 0x10 -} __packed; - -enum rdma_init_rtr_types { - RTR_READ = 1, - RTR_WRITE = 2, - RTR_SEND = 3, -}; - -#define S_RTR_TYPE 2 -#define M_RTR_TYPE 0x3 -#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE) -#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE) - -#define S_CHAN 4 -#define M_CHAN 0x3 -#define V_CHAN(x) ((x) << S_CHAN) -#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN) - -struct t3_rdma_init_attr { - u32 tid; - u32 qpid; - u32 pdid; - u32 scqid; - u32 rcqid; - u32 rq_addr; - u32 rq_size; - enum t3_mpa_attrs mpaattrs; - enum t3_qp_caps qpcaps; - u16 tcp_emss; - u32 ord; - u32 ird; - u64 qp_dma_addr; - u32 qp_dma_size; - enum rdma_init_rtr_types rtr_type; - u16 flags; - u16 rqe_count; - u32 irs; - u32 chan; -}; - -struct t3_rdma_init_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 qpid; /* 2 */ - __be32 pdid; - __be32 scqid; /* 3 */ - __be32 rcqid; - __be32 rq_addr; /* 4 */ - __be32 rq_size; - u8 mpaattrs; /* 5 */ - u8 qpcaps; - __be16 ulpdu_size; - __be16 flags_rtr_type; - __be16 rqe_count; - __be32 ord; /* 6 */ - __be32 ird; - __be64 qp_dma_addr; /* 7 */ - __be32 qp_dma_size; /* 8 */ - __be32 irs; -}; - -struct t3_genbit { - u64 flit[15]; - __be64 genbit; -}; - -struct t3_wq_in_err { - u64 flit[13]; - u64 err; -}; - -enum rdma_init_wr_flags { - MPA_INITIATOR = (1<<0), - PRIV_QP = (1<<1), -}; - -union t3_wr { - struct t3_send_wr send; - struct t3_rdma_write_wr write; - struct t3_rdma_read_wr read; - struct t3_receive_wr recv; - struct t3_fastreg_wr fastreg; - struct t3_pbl_frag pbl_frag; - struct t3_local_inv_wr local_inv; - struct t3_bind_mw_wr bind; - struct t3_bypass_wr bypass; - struct t3_rdma_init_wr init; - struct t3_modify_qp_wr qp_mod; - struct t3_genbit genbit; - struct t3_wq_in_err wq_in_err; - __be64 flit[16]; -}; - -#define T3_SQ_CQE_FLIT 13 -#define T3_SQ_COOKIE_FLIT 14 - -#define T3_RQ_COOKIE_FLIT 13 -#define T3_RQ_CQE_FLIT 14 - -static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe) -{ - return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags)); -} - -enum t3_wr_hdr_bits { - T3_EOP = 1, - T3_SOP = 2, - T3_SOPEOP = T3_EOP|T3_SOP, -}; - -static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, - enum t3_wr_flags flags, u8 genbit, u32 tid, - u8 len, u8 sopeop) -{ - wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) | - V_FW_RIWR_SOPEOP(sopeop) | - V_FW_RIWR_FLAGS(flags)); - wmb(); - wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) | - V_FW_RIWR_TID(tid) | - V_FW_RIWR_LEN(len)); - /* 2nd gen bit... */ - ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit); -} - -/* - * T3 ULP2_TX commands - */ -enum t3_utx_mem_op { - T3_UTX_MEM_READ = 2, - T3_UTX_MEM_WRITE = 3 -}; - -/* T3 MC7 RDMA TPT entry format */ - -enum tpt_mem_type { - TPT_NON_SHARED_MR = 0x0, - TPT_SHARED_MR = 0x1, - TPT_MW = 0x2, - TPT_MW_RELAXED_PROTECTION = 0x3 -}; - -enum tpt_addr_type { - TPT_ZBTO = 0, - TPT_VATO = 1 -}; - -enum tpt_mem_perm { - TPT_MW_BIND = 0x10, - TPT_LOCAL_READ = 0x8, - TPT_LOCAL_WRITE = 0x4, - TPT_REMOTE_READ = 0x2, - TPT_REMOTE_WRITE = 0x1 -}; - -struct tpt_entry { - __be32 valid_stag_pdid; - __be32 flags_pagesize_qpid; - - __be32 rsvd_pbl_addr; - __be32 len; - __be32 va_hi; - __be32 va_low_or_fbo; - - __be32 rsvd_bind_cnt_or_pstag; - __be32 rsvd_pbl_size; -}; - -#define S_TPT_VALID 31 -#define V_TPT_VALID(x) ((x) << S_TPT_VALID) -#define F_TPT_VALID V_TPT_VALID(1U) - -#define S_TPT_STAG_KEY 23 -#define M_TPT_STAG_KEY 0xFF -#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY) -#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY) - -#define S_TPT_STAG_STATE 22 -#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE) -#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U) - -#define S_TPT_STAG_TYPE 20 -#define M_TPT_STAG_TYPE 0x3 -#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE) -#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE) - -#define S_TPT_PDID 0 -#define M_TPT_PDID 0xFFFFF -#define V_TPT_PDID(x) ((x) << S_TPT_PDID) -#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID) - -#define S_TPT_PERM 28 -#define M_TPT_PERM 0xF -#define V_TPT_PERM(x) ((x) << S_TPT_PERM) -#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM) - -#define S_TPT_REM_INV_DIS 27 -#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS) -#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U) - -#define S_TPT_ADDR_TYPE 26 -#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE) -#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U) - -#define S_TPT_MW_BIND_ENABLE 25 -#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE) -#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U) - -#define S_TPT_PAGE_SIZE 20 -#define M_TPT_PAGE_SIZE 0x1F -#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE) -#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE) - -#define S_TPT_PBL_ADDR 0 -#define M_TPT_PBL_ADDR 0x1FFFFFFF -#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR) -#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR) - -#define S_TPT_QPID 0 -#define M_TPT_QPID 0xFFFFF -#define V_TPT_QPID(x) ((x) << S_TPT_QPID) -#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID) - -#define S_TPT_PSTAG 0 -#define M_TPT_PSTAG 0xFFFFFF -#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG) -#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG) - -#define S_TPT_PBL_SIZE 0 -#define M_TPT_PBL_SIZE 0xFFFFF -#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE) -#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE) - -/* - * CQE defs - */ -struct t3_cqe { - __be32 header; - __be32 len; - union { - struct { - __be32 stag; - __be32 msn; - } rcqe; - struct { - u32 wrid_hi; - u32 wrid_low; - } scqe; - } u; -}; - -#define S_CQE_OOO 31 -#define M_CQE_OOO 0x1 -#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO) -#define V_CEQ_OOO(x) ((x)<<S_CQE_OOO) - -#define S_CQE_QPID 12 -#define M_CQE_QPID 0x7FFFF -#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID) -#define V_CQE_QPID(x) ((x)<<S_CQE_QPID) - -#define S_CQE_SWCQE 11 -#define M_CQE_SWCQE 0x1 -#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE) -#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE) - -#define S_CQE_GENBIT 10 -#define M_CQE_GENBIT 0x1 -#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT) -#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT) - -#define S_CQE_STATUS 5 -#define M_CQE_STATUS 0x1F -#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS) -#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS) - -#define S_CQE_TYPE 4 -#define M_CQE_TYPE 0x1 -#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE) -#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE) - -#define S_CQE_OPCODE 0 -#define M_CQE_OPCODE 0xF -#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE) -#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE) - -#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x).header))) -#define CQE_OOO(x) (G_CQE_OOO(be32_to_cpu((x).header))) -#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x).header))) -#define CQE_GENBIT(x) (G_CQE_GENBIT(be32_to_cpu((x).header))) -#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x).header))) -#define SQ_TYPE(x) (CQE_TYPE((x))) -#define RQ_TYPE(x) (!CQE_TYPE((x))) -#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header))) -#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header))) - -#define CQE_SEND_OPCODE(x)( \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV)) - -#define CQE_LEN(x) (be32_to_cpu((x).len)) - -/* used for RQ completion processing */ -#define CQE_WRID_STAG(x) (be32_to_cpu((x).u.rcqe.stag)) -#define CQE_WRID_MSN(x) (be32_to_cpu((x).u.rcqe.msn)) - -/* used for SQ completion processing */ -#define CQE_WRID_SQ_WPTR(x) ((x).u.scqe.wrid_hi) -#define CQE_WRID_WPTR(x) ((x).u.scqe.wrid_low) - -/* generic accessor macros */ -#define CQE_WRID_HI(x) ((x).u.scqe.wrid_hi) -#define CQE_WRID_LOW(x) ((x).u.scqe.wrid_low) - -#define TPT_ERR_SUCCESS 0x0 -#define TPT_ERR_STAG 0x1 /* STAG invalid: either the */ - /* STAG is offlimt, being 0, */ - /* or STAG_key mismatch */ -#define TPT_ERR_PDID 0x2 /* PDID mismatch */ -#define TPT_ERR_QPID 0x3 /* QPID mismatch */ -#define TPT_ERR_ACCESS 0x4 /* Invalid access right */ -#define TPT_ERR_WRAP 0x5 /* Wrap error */ -#define TPT_ERR_BOUND 0x6 /* base and bounds voilation */ -#define TPT_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */ - /* shared memory region */ -#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */ - /* shared memory region */ -#define TPT_ERR_ECC 0x9 /* ECC error detected */ -#define TPT_ERR_ECC_PSTAG 0xA /* ECC error detected when */ - /* reading PSTAG for a MW */ - /* Invalidate */ -#define TPT_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */ - /* software error */ -#define TPT_ERR_SWFLUSH 0xC /* SW FLUSHED */ -#define TPT_ERR_CRC 0x10 /* CRC error */ -#define TPT_ERR_MARKER 0x11 /* Marker error */ -#define TPT_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */ -#define TPT_ERR_OUT_OF_RQE 0x13 /* out of RQE */ -#define TPT_ERR_DDP_VERSION 0x14 /* wrong DDP version */ -#define TPT_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */ -#define TPT_ERR_OPCODE 0x16 /* invalid rdma opcode */ -#define TPT_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */ -#define TPT_ERR_MSN 0x18 /* MSN error */ -#define TPT_ERR_TBIT 0x19 /* tag bit not set correctly */ -#define TPT_ERR_MO 0x1A /* MO not 0 for TERMINATE */ - /* or READ_REQ */ -#define TPT_ERR_MSN_GAP 0x1B -#define TPT_ERR_MSN_RANGE 0x1C -#define TPT_ERR_IRD_OVERFLOW 0x1D -#define TPT_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */ - /* software error */ -#define TPT_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */ - /* mismatch) */ - -struct t3_swsq { - __u64 wr_id; - struct t3_cqe cqe; - __u32 sq_wptr; - __be32 read_len; - int opcode; - int complete; - int signaled; -}; - -struct t3_swrq { - __u64 wr_id; - __u32 pbl_addr; -}; - -/* - * A T3 WQ implements both the SQ and RQ. - */ -struct t3_wq { - union t3_wr *queue; /* DMA accessible memory */ - dma_addr_t dma_addr; /* DMA address for HW */ - DEFINE_DMA_UNMAP_ADDR(mapping); /* unmap kruft */ - u32 error; /* 1 once we go to ERROR */ - u32 qpid; - u32 wptr; /* idx to next available WR slot */ - u32 size_log2; /* total wq size */ - struct t3_swsq *sq; /* SW SQ */ - struct t3_swsq *oldest_read; /* tracks oldest pending read */ - u32 sq_wptr; /* sq_wptr - sq_rptr == count of */ - u32 sq_rptr; /* pending wrs */ - u32 sq_size_log2; /* sq size */ - struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */ - u32 rq_wptr; /* rq_wptr - rq_rptr == count of */ - u32 rq_rptr; /* pending wrs */ - struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */ - u32 rq_size_log2; /* rq size */ - u32 rq_addr; /* rq adapter address */ - void __iomem *doorbell; /* kernel db */ - u64 udb; /* user db if any */ - struct cxio_rdev *rdev; -}; - -struct t3_cq { - u32 cqid; - u32 rptr; - u32 wptr; - u32 size_log2; - dma_addr_t dma_addr; - DEFINE_DMA_UNMAP_ADDR(mapping); - struct t3_cqe *queue; - struct t3_cqe *sw_queue; - u32 sw_rptr; - u32 sw_wptr; -}; - -#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \ - CQE_GENBIT(*cqe)) - -struct t3_cq_status_page { - u32 cq_err; -}; - -static inline int cxio_cq_in_error(struct t3_cq *cq) -{ - return ((struct t3_cq_status_page *) - &cq->queue[1 << cq->size_log2])->cq_err; -} - -static inline void cxio_set_cq_in_error(struct t3_cq *cq) -{ - ((struct t3_cq_status_page *) - &cq->queue[1 << cq->size_log2])->cq_err = 1; -} - -static inline void cxio_set_wq_in_error(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err |= 1; -} - -static inline void cxio_disable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err |= 2; -} - -static inline void cxio_enable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err &= ~2; -} - -static inline int cxio_wq_db_enabled(struct t3_wq *wq) -{ - return !(wq->queue->wq_in_err.err & 2); -} - -static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); - if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) - return cqe; - return NULL; -} - -static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); - return cqe; - } - return NULL; -} - -static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); - return cqe; - } - cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); - if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) - return cqe; - return NULL; -} - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c deleted file mode 100644 index 56a8ab6210cf..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/module.h> -#include <linux/moduleparam.h> - -#include <rdma/ib_verbs.h> - -#include "cxgb3_offload.h" -#include "iwch_provider.h" -#include <rdma/cxgb3-abi.h> -#include "iwch.h" -#include "iwch_cm.h" - -#define DRV_VERSION "1.1" - -MODULE_AUTHOR("Boyd Faulkner, Steve Wise"); -MODULE_DESCRIPTION("Chelsio T3 RDMA Driver"); -MODULE_LICENSE("Dual BSD/GPL"); - -static void open_rnic_dev(struct t3cdev *); -static void close_rnic_dev(struct t3cdev *); -static void iwch_event_handler(struct t3cdev *, u32, u32); - -struct cxgb3_client t3c_client = { - .name = "iw_cxgb3", - .add = open_rnic_dev, - .remove = close_rnic_dev, - .handlers = t3c_handlers, - .redirect = iwch_ep_redirect, - .event_handler = iwch_event_handler -}; - -static LIST_HEAD(dev_list); -static DEFINE_MUTEX(dev_mutex); - -static void disable_dbs(struct iwch_dev *rnicp) -{ - unsigned long index; - struct iwch_qp *qhp; - - xa_lock_irq(&rnicp->qps); - xa_for_each(&rnicp->qps, index, qhp) - cxio_disable_wq_db(&qhp->wq); - xa_unlock_irq(&rnicp->qps); -} - -static void enable_dbs(struct iwch_dev *rnicp, int ring_db) -{ - unsigned long index; - struct iwch_qp *qhp; - - xa_lock_irq(&rnicp->qps); - xa_for_each(&rnicp->qps, index, qhp) { - if (ring_db) - ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, - qhp->wq.qpid); - cxio_enable_wq_db(&qhp->wq); - } - xa_unlock_irq(&rnicp->qps); -} - -static void iwch_db_drop_task(struct work_struct *work) -{ - struct iwch_dev *rnicp = container_of(work, struct iwch_dev, - db_drop_task.work); - enable_dbs(rnicp, 1); -} - -static void rnic_init(struct iwch_dev *rnicp) -{ - pr_debug("%s iwch_dev %p\n", __func__, rnicp); - xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ); - xa_init_flags(&rnicp->qps, XA_FLAGS_LOCK_IRQ); - xa_init_flags(&rnicp->mrs, XA_FLAGS_LOCK_IRQ); - INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); - - rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; - rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; - rnicp->attr.max_sge_per_wr = T3_MAX_SGE; - rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; - rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; - rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH; - rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); - rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; - rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; - rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK; - rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; - rnicp->attr.can_resize_wq = 0; - rnicp->attr.max_rdma_reads_per_qp = 8; - rnicp->attr.max_rdma_read_resources = - rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps; - rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */ - rnicp->attr.max_rdma_read_depth = - rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps; - rnicp->attr.rq_overflow_handled = 0; - rnicp->attr.can_modify_ird = 0; - rnicp->attr.can_modify_ord = 0; - rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1; - rnicp->attr.stag0_value = 1; - rnicp->attr.zbva_support = 1; - rnicp->attr.local_invalidate_fence = 1; - rnicp->attr.cq_overflow_detection = 1; - return; -} - -static void open_rnic_dev(struct t3cdev *tdev) -{ - struct iwch_dev *rnicp; - - pr_debug("%s t3cdev %p\n", __func__, tdev); - pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION); - rnicp = ib_alloc_device(iwch_dev, ibdev); - if (!rnicp) { - pr_err("Cannot allocate ib device\n"); - return; - } - rnicp->rdev.ulp = rnicp; - rnicp->rdev.t3cdev_p = tdev; - - mutex_lock(&dev_mutex); - - if (cxio_rdev_open(&rnicp->rdev)) { - mutex_unlock(&dev_mutex); - pr_err("Unable to open CXIO rdev\n"); - ib_dealloc_device(&rnicp->ibdev); - return; - } - - rnic_init(rnicp); - - list_add_tail(&rnicp->entry, &dev_list); - mutex_unlock(&dev_mutex); - - if (iwch_register_device(rnicp)) { - pr_err("Unable to register device\n"); - close_rnic_dev(tdev); - } - pr_info("Initialized device %s\n", - pci_name(rnicp->rdev.rnic_info.pdev)); - return; -} - -static void close_rnic_dev(struct t3cdev *tdev) -{ - struct iwch_dev *dev, *tmp; - pr_debug("%s t3cdev %p\n", __func__, tdev); - mutex_lock(&dev_mutex); - list_for_each_entry_safe(dev, tmp, &dev_list, entry) { - if (dev->rdev.t3cdev_p == tdev) { - dev->rdev.flags = CXIO_ERROR_FATAL; - synchronize_net(); - cancel_delayed_work_sync(&dev->db_drop_task); - list_del(&dev->entry); - iwch_unregister_device(dev); - cxio_rdev_close(&dev->rdev); - WARN_ON(!xa_empty(&dev->cqs)); - WARN_ON(!xa_empty(&dev->qps)); - WARN_ON(!xa_empty(&dev->mrs)); - ib_dealloc_device(&dev->ibdev); - break; - } - } - mutex_unlock(&dev_mutex); -} - -static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) -{ - struct cxio_rdev *rdev = tdev->ulp; - struct iwch_dev *rnicp; - struct ib_event event; - u32 portnum = port_id + 1; - int dispatch = 0; - - if (!rdev) - return; - rnicp = rdev_to_iwch_dev(rdev); - switch (evt) { - case OFFLOAD_STATUS_DOWN: { - rdev->flags = CXIO_ERROR_FATAL; - synchronize_net(); - event.event = IB_EVENT_DEVICE_FATAL; - dispatch = 1; - break; - } - case OFFLOAD_PORT_DOWN: { - event.event = IB_EVENT_PORT_ERR; - dispatch = 1; - break; - } - case OFFLOAD_PORT_UP: { - event.event = IB_EVENT_PORT_ACTIVE; - dispatch = 1; - break; - } - case OFFLOAD_DB_FULL: { - disable_dbs(rnicp); - break; - } - case OFFLOAD_DB_EMPTY: { - enable_dbs(rnicp, 1); - break; - } - case OFFLOAD_DB_DROP: { - unsigned long delay = 1000; - unsigned short r; - - disable_dbs(rnicp); - get_random_bytes(&r, 2); - delay += r & 1023; - - /* - * delay is between 1000-2023 usecs. - */ - schedule_delayed_work(&rnicp->db_drop_task, - usecs_to_jiffies(delay)); - break; - } - } - - if (dispatch) { - event.device = &rnicp->ibdev; - event.element.port_num = portnum; - ib_dispatch_event(&event); - } - - return; -} - -static int __init iwch_init_module(void) -{ - int err; - - err = cxio_hal_init(); - if (err) - return err; - err = iwch_cm_init(); - if (err) - return err; - cxio_register_ev_cb(iwch_ev_dispatch); - cxgb3_register_client(&t3c_client); - return 0; -} - -static void __exit iwch_exit_module(void) -{ - cxgb3_unregister_client(&t3c_client); - cxio_unregister_ev_cb(iwch_ev_dispatch); - iwch_cm_term(); - cxio_hal_exit(); -} - -module_init(iwch_init_module); -module_exit(iwch_exit_module); diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h deleted file mode 100644 index 310a937bffcf..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __IWCH_H__ -#define __IWCH_H__ - -#include <linux/mutex.h> -#include <linux/list.h> -#include <linux/spinlock.h> -#include <linux/xarray.h> -#include <linux/workqueue.h> - -#include <rdma/ib_verbs.h> - -#include "cxio_hal.h" -#include "cxgb3_offload.h" - -struct iwch_pd; -struct iwch_cq; -struct iwch_qp; -struct iwch_mr; - -struct iwch_rnic_attributes { - u32 max_qps; - u32 max_wrs; /* Max for any SQ/RQ */ - u32 max_sge_per_wr; - u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */ - u32 max_cqs; - u32 max_cqes_per_cq; - u32 max_mem_regs; - u32 max_phys_buf_entries; /* for phys buf list */ - u32 max_pds; - - /* - * The memory page sizes supported by this RNIC. - * Bit position i in bitmap indicates page of - * size (4k)^i. Phys block list mode unsupported. - */ - u32 mem_pgsizes_bitmask; - u64 max_mr_size; - u8 can_resize_wq; - - /* - * The maximum number of RDMA Reads that can be outstanding - * per QP with this RNIC as the target. - */ - u32 max_rdma_reads_per_qp; - - /* - * The maximum number of resources used for RDMA Reads - * by this RNIC with this RNIC as the target. - */ - u32 max_rdma_read_resources; - - /* - * The max depth per QP for initiation of RDMA Read - * by this RNIC. - */ - u32 max_rdma_read_qp_depth; - - /* - * The maximum depth for initiation of RDMA Read - * operations by this RNIC on all QPs - */ - u32 max_rdma_read_depth; - u8 rq_overflow_handled; - u32 can_modify_ird; - u32 can_modify_ord; - u32 max_mem_windows; - u32 stag0_value; - u8 zbva_support; - u8 local_invalidate_fence; - u32 cq_overflow_detection; -}; - -struct iwch_dev { - struct ib_device ibdev; - struct cxio_rdev rdev; - u32 device_cap_flags; - struct iwch_rnic_attributes attr; - struct xarray cqs; - struct xarray qps; - struct xarray mrs; - struct list_head entry; - struct delayed_work db_drop_task; -}; - -static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) -{ - return container_of(ibdev, struct iwch_dev, ibdev); -} - -static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev) -{ - return container_of(rdev, struct iwch_dev, rdev); -} - -static inline int t3b_device(const struct iwch_dev *rhp) -{ - return rhp->rdev.t3cdev_p->type == T3B; -} - -static inline int t3a_device(const struct iwch_dev *rhp) -{ - return rhp->rdev.t3cdev_p->type == T3A; -} - -static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid) -{ - return xa_load(&rhp->cqs, cqid); -} - -static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid) -{ - return xa_load(&rhp->qps, qpid); -} - -static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid) -{ - return xa_load(&rhp->mrs, mmid); -} - -extern struct cxgb3_client t3c_client; -extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS]; -extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb); - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c deleted file mode 100644 index 0bca72cb4d9a..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ /dev/null @@ -1,2258 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/module.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <linux/workqueue.h> -#include <linux/skbuff.h> -#include <linux/timer.h> -#include <linux/notifier.h> -#include <linux/inetdevice.h> - -#include <net/neighbour.h> -#include <net/netevent.h> -#include <net/route.h> - -#include "tcb.h" -#include "cxgb3_offload.h" -#include "iwch.h" -#include "iwch_provider.h" -#include "iwch_cm.h" - -static char *states[] = { - "idle", - "listen", - "connecting", - "mpa_wait_req", - "mpa_req_sent", - "mpa_req_rcvd", - "mpa_rep_sent", - "fpdu_mode", - "aborting", - "closing", - "moribund", - "dead", - NULL, -}; - -int peer2peer = 0; -module_param(peer2peer, int, 0644); -MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); - -static int ep_timeout_secs = 60; -module_param(ep_timeout_secs, int, 0644); -MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " - "in seconds (default=60)"); - -static int mpa_rev = 1; -module_param(mpa_rev, int, 0644); -MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " - "1 is spec compliant. (default=1)"); - -static int markers_enabled = 0; -module_param(markers_enabled, int, 0644); -MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); - -static int crc_enabled = 1; -module_param(crc_enabled, int, 0644); -MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); - -static int rcv_win = 256 * 1024; -module_param(rcv_win, int, 0644); -MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)"); - -static int snd_win = 32 * 1024; -module_param(snd_win, int, 0644); -MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)"); - -static unsigned int nocong = 0; -module_param(nocong, uint, 0644); -MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)"); - -static unsigned int cong_flavor = 1; -module_param(cong_flavor, uint, 0644); -MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)"); - -static struct workqueue_struct *workq; - -static struct sk_buff_head rxq; - -static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); -static void ep_timeout(struct timer_list *t); -static void connect_reply_upcall(struct iwch_ep *ep, int status); - -static void start_ep_timer(struct iwch_ep *ep) -{ - pr_debug("%s ep %p\n", __func__, ep); - if (timer_pending(&ep->timer)) { - pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep); - del_timer_sync(&ep->timer); - } else - get_ep(&ep->com); - ep->timer.expires = jiffies + ep_timeout_secs * HZ; - add_timer(&ep->timer); -} - -static void stop_ep_timer(struct iwch_ep *ep) -{ - pr_debug("%s ep %p\n", __func__, ep); - if (!timer_pending(&ep->timer)) { - WARN(1, "%s timer stopped when its not running! ep %p state %u\n", - __func__, ep, ep->com.state); - return; - } - del_timer_sync(&ep->timer); - put_ep(&ep->com); -} - -static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e) -{ - int error = 0; - struct cxio_rdev *rdev; - - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - kfree_skb(skb); - return -EIO; - } - error = l2t_send(tdev, skb, l2e); - if (error < 0) - kfree_skb(skb); - return error < 0 ? error : 0; -} - -int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) -{ - int error = 0; - struct cxio_rdev *rdev; - - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - kfree_skb(skb); - return -EIO; - } - error = cxgb3_ofld_send(tdev, skb); - if (error < 0) - kfree_skb(skb); - return error < 0 ? error : 0; -} - -static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) -{ - struct cpl_tid_release *req; - - skb = get_skb(skb, sizeof(*req), GFP_KERNEL); - if (!skb) - return; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); - skb->priority = CPL_PRIORITY_SETUP; - iwch_cxgb3_ofld_send(tdev, skb); - return; -} - -int iwch_quiesce_tid(struct iwch_ep *ep) -{ - struct cpl_set_tcb_field *req; - struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - - if (!skb) - return -ENOMEM; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_RX_QUIESCE); - req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); - req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); - - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -int iwch_resume_tid(struct iwch_ep *ep) -{ - struct cpl_set_tcb_field *req; - struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - - if (!skb) - return -ENOMEM; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_RX_QUIESCE); - req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); - req->val = 0; - - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static void set_emss(struct iwch_ep *ep, u16 opt) -{ - pr_debug("%s ep %p opt %u\n", __func__, ep, opt); - ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40; - if (G_TCPOPT_TSTAMP(opt)) - ep->emss -= 12; - if (ep->emss < 128) - ep->emss = 128; - pr_debug("emss=%d\n", ep->emss); -} - -static enum iwch_ep_state state_read(struct iwch_ep_common *epc) -{ - unsigned long flags; - enum iwch_ep_state state; - - spin_lock_irqsave(&epc->lock, flags); - state = epc->state; - spin_unlock_irqrestore(&epc->lock, flags); - return state; -} - -static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) -{ - epc->state = new; -} - -static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) -{ - unsigned long flags; - - spin_lock_irqsave(&epc->lock, flags); - pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]); - __state_set(epc, new); - spin_unlock_irqrestore(&epc->lock, flags); - return; -} - -static void *alloc_ep(int size, gfp_t gfp) -{ - struct iwch_ep_common *epc; - - epc = kzalloc(size, gfp); - if (epc) { - kref_init(&epc->kref); - spin_lock_init(&epc->lock); - init_waitqueue_head(&epc->waitq); - } - pr_debug("%s alloc ep %p\n", __func__, epc); - return epc; -} - -void __free_ep(struct kref *kref) -{ - struct iwch_ep *ep; - ep = container_of(container_of(kref, struct iwch_ep_common, kref), - struct iwch_ep, com); - pr_debug("%s ep %p state %s\n", - __func__, ep, states[state_read(&ep->com)]); - if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { - cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); - dst_release(ep->dst); - l2t_release(ep->com.tdev, ep->l2t); - } - kfree(ep); -} - -static void release_ep_resources(struct iwch_ep *ep) -{ - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - set_bit(RELEASE_RESOURCES, &ep->com.flags); - put_ep(&ep->com); -} - -static int status2errno(int status) -{ - switch (status) { - case CPL_ERR_NONE: - return 0; - case CPL_ERR_CONN_RESET: - return -ECONNRESET; - case CPL_ERR_ARP_MISS: - return -EHOSTUNREACH; - case CPL_ERR_CONN_TIMEDOUT: - return -ETIMEDOUT; - case CPL_ERR_TCAM_FULL: - return -ENOMEM; - case CPL_ERR_CONN_EXIST: - return -EADDRINUSE; - default: - return -EIO; - } -} - -/* - * Try and reuse skbs already allocated... - */ -static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) -{ - if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { - skb_trim(skb, 0); - skb_get(skb); - } else { - skb = alloc_skb(len, gfp); - } - return skb; -} - -static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip, - __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - return rt; -} - -static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu) -{ - int i = 0; - - while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) - ++i; - return i; -} - -static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb) -{ - pr_debug("%s t3cdev %p\n", __func__, dev); - kfree_skb(skb); -} - -/* - * Handle an ARP failure for an active open. - */ -static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb) -{ - pr_err("ARP failure during connect\n"); - kfree_skb(skb); -} - -/* - * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant - * and send it along. - */ -static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb) -{ - struct cpl_abort_req *req = cplhdr(skb); - - pr_debug("%s t3cdev %p\n", __func__, dev); - req->cmd = CPL_ABORT_NO_RST; - iwch_cxgb3_ofld_send(dev, skb); -} - -static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) -{ - struct cpl_close_con_req *req; - struct sk_buff *skb; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), gfp); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, arp_failure_discard); - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid)); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) -{ - struct cpl_abort_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(skb, sizeof(*req), gfp); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, abort_arp_failure); - req = skb_put_zero(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); - req->cmd = CPL_ABORT_SEND_RST; - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_connect(struct iwch_ep *ep) -{ - struct cpl_act_open_req *req; - struct sk_buff *skb; - u32 opt0h, opt0l, opt2; - unsigned int mtu_idx; - int wscale; - - pr_debug("%s ep %p\n", __func__, ep); - - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); - wscale = compute_wscale(rcv_win); - opt0h = V_NAGLE(0) | - V_NO_CONG(nocong) | - V_KEEP_ALIVE(1) | - F_TCAM_BYPASS | - V_WND_SCALE(wscale) | - V_MSS_IDX(mtu_idx) | - V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); - opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | - V_CONG_CONTROL_FLAVOR(cong_flavor); - skb->priority = CPL_PRIORITY_SETUP; - set_arp_failure_handler(skb, act_open_req_arp_failure); - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid)); - req->local_port = ep->com.local_addr.sin_port; - req->peer_port = ep->com.remote_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; - req->opt0h = htonl(opt0h); - req->opt0l = htonl(opt0l); - req->params = 0; - req->opt2 = htonl(opt2); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - int len; - - pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen); - - BUG_ON(skb_cloned(skb)); - - mpalen = sizeof(*mpa) + ep->plen; - if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) { - kfree_skb(skb); - skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - connect_reply_upcall(ep, -ENOMEM); - return; - } - } - skb_trim(skb, 0); - skb_reserve(skb, sizeof(*req)); - skb_put(skb, mpalen); - skb->priority = CPL_PRIORITY_DATA; - mpa = (struct mpa_message *) skb->data; - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); - mpa->flags = (crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); - mpa->private_data_size = htons(ep->plen); - mpa->revision = mpa_rev; - - if (ep->plen) - memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); - - /* - * Reference the mpa skb. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - len = skb->len; - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(len); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - BUG_ON(ep->mpa_skb); - ep->mpa_skb = skb; - iwch_l2t_send(ep->com.tdev, skb, ep->l2t); - start_ep_timer(ep); - state_set(&ep->com, MPA_REQ_SENT); - return; -} - -static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - struct sk_buff *skb; - - pr_debug("%s ep %p plen %d\n", __func__, ep, plen); - - mpalen = sizeof(*mpa) + plen; - - skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - cannot alloc skb!\n", __func__); - return -ENOMEM; - } - skb_reserve(skb, sizeof(*req)); - mpa = skb_put(skb, mpalen); - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = MPA_REJECT; - mpa->revision = mpa_rev; - mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); - - /* - * Reference the mpa skb again. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(mpalen); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - BUG_ON(ep->mpa_skb); - ep->mpa_skb = skb; - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - int len; - struct sk_buff *skb; - - pr_debug("%s ep %p plen %d\n", __func__, ep, plen); - - mpalen = sizeof(*mpa) + plen; - - skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - cannot alloc skb!\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - skb_reserve(skb, sizeof(*req)); - mpa = skb_put(skb, mpalen); - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); - mpa->revision = mpa_rev; - mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); - - /* - * Reference the mpa skb. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - len = skb->len; - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(len); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - ep->mpa_skb = skb; - state_set(&ep->com, MPA_REP_SENT); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_act_establish *req = cplhdr(skb); - unsigned int tid = GET_TID(req); - - pr_debug("%s ep %p tid %d\n", __func__, ep, tid); - - dst_confirm(ep->dst); - - /* setup the hwtid for this connection */ - ep->hwtid = tid; - cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid); - - ep->snd_seq = ntohl(req->snd_isn); - ep->rcv_seq = ntohl(req->rcv_isn); - - set_emss(ep, ntohs(req->tcp_opt)); - - /* dealloc the atid */ - cxgb3_free_atid(ep->com.tdev, ep->atid); - - /* start MPA negotiation */ - send_mpa_req(ep, skb); - - return 0; -} - -static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) -{ - pr_debug("%s ep %p\n", __FILE__, ep); - state_set(&ep->com, ABORTING); - send_abort(ep, skb, gfp); -} - -static void close_complete_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CLOSE; - if (ep->com.cm_id) { - pr_debug("close complete delivered ep %p cm_id %p tid %d\n", - ep, ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void peer_close_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_DISCONNECT; - if (ep->com.cm_id) { - pr_debug("peer close delivered ep %p cm_id %p tid %d\n", - ep, ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } -} - -static void peer_abort_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CLOSE; - event.status = -ECONNRESET; - if (ep->com.cm_id) { - pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep, - ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void connect_reply_upcall(struct iwch_ep *ep, int status) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p status %d\n", __func__, ep, status); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CONNECT_REPLY; - event.status = status; - memcpy(&event.local_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&event.remote_addr, &ep->com.remote_addr, - sizeof(ep->com.remote_addr)); - - if ((status == 0) || (status == -ECONNREFUSED)) { - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); - } - if (ep->com.cm_id) { - pr_debug("%s ep %p tid %d status %d\n", __func__, ep, - ep->hwtid, status); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } - if (status < 0) { - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void connect_request_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CONNECT_REQUEST; - memcpy(&event.local_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&event.remote_addr, &ep->com.remote_addr, - sizeof(ep->com.local_addr)); - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); - event.provider_data = ep; - /* - * Until ird/ord negotiation via MPAv2 support is added, send max - * supported values - */ - event.ird = event.ord = 8; - if (state_read(&ep->parent_ep->com) != DEAD) { - get_ep(&ep->com); - ep->parent_ep->com.cm_id->event_handler( - ep->parent_ep->com.cm_id, - &event); - } - put_ep(&ep->parent_ep->com); - ep->parent_ep = NULL; -} - -static void established_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_ESTABLISHED; - /* - * Until ird/ord negotiation via MPAv2 support is added, send max - * supported values - */ - event.ird = event.ord = 8; - if (ep->com.cm_id) { - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } -} - -static int update_rx_credits(struct iwch_ep *ep, u32 credits) -{ - struct cpl_rx_data_ack *req; - struct sk_buff *skb; - - pr_debug("%s ep %p credits %u\n", __func__, ep, credits); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("update_rx_credits - cannot alloc skb!\n"); - return 0; - } - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid)); - req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1)); - skb->priority = CPL_PRIORITY_ACK; - iwch_cxgb3_ofld_send(ep->com.tdev, skb); - return credits; -} - -static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) -{ - struct mpa_message *mpa; - u16 plen; - struct iwch_qp_attributes attrs; - enum iwch_qp_attr_mask mask; - int err; - - pr_debug("%s ep %p\n", __func__, ep); - - /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. - */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_SENT) - return; - - /* - * If we get more than the supported amount of private data - * then we must fail this connection. - */ - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - err = -EINVAL; - goto err; - } - - /* - * copy the new data into our accumulation buffer. - */ - skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), - skb->len); - ep->mpa_pkt_len += skb->len; - - /* - * if we don't even have the mpa message, then bail. - */ - if (ep->mpa_pkt_len < sizeof(*mpa)) - return; - mpa = (struct mpa_message *) ep->mpa_pkt; - - /* Validate MPA header. */ - if (mpa->revision != mpa_rev) { - err = -EPROTO; - goto err; - } - if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { - err = -EPROTO; - goto err; - } - - plen = ntohs(mpa->private_data_size); - - /* - * Fail if there's too much private data. - */ - if (plen > MPA_MAX_PRIVATE_DATA) { - err = -EPROTO; - goto err; - } - - /* - * If plen does not account for pkt size - */ - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - err = -EPROTO; - goto err; - } - - ep->plen = (u8) plen; - - /* - * If we don't have all the pdata yet, then bail. - * We'll continue process when more data arrives. - */ - if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; - - if (mpa->flags & MPA_REJECT) { - err = -ECONNREFUSED; - goto err; - } - - /* - * If we get here we have accumulated the entire mpa - * start reply message including private data. And - * the MPA header is valid. - */ - state_set(&ep->com, FPDU_MODE); - ep->mpa_attr.initiator = 1; - ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; - ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n", - __func__, - ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, - ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); - - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ird; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; - - /* bind QP and TID with INIT_WR */ - err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - if (err) - goto err; - - if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { - iwch_post_zb_read(ep); - } - - goto out; -err: - abort_connection(ep, skb, GFP_KERNEL); -out: - connect_reply_upcall(ep, err); - return; -} - -static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) -{ - struct mpa_message *mpa; - u16 plen; - - pr_debug("%s ep %p\n", __func__, ep); - - /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. - */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_WAIT) - return; - - /* - * If we get more than the supported amount of private data - * then we must fail this connection. - */ - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - - /* - * Copy the new data into our accumulation buffer. - */ - skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), - skb->len); - ep->mpa_pkt_len += skb->len; - - /* - * If we don't even have the mpa message, then bail. - * We'll continue process when more data arrives. - */ - if (ep->mpa_pkt_len < sizeof(*mpa)) - return; - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - mpa = (struct mpa_message *) ep->mpa_pkt; - - /* - * Validate MPA Header. - */ - if (mpa->revision != mpa_rev) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - plen = ntohs(mpa->private_data_size); - - /* - * Fail if there's too much private data. - */ - if (plen > MPA_MAX_PRIVATE_DATA) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - /* - * If plen does not account for pkt size - */ - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - ep->plen = (u8) plen; - - /* - * If we don't have all the pdata yet, then bail. - */ - if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; - - /* - * If we get here we have accumulated the entire mpa - * start reply message including private data. - */ - ep->mpa_attr.initiator = 0; - ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; - ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n", - __func__, - ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, - ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); - - state_set(&ep->com, MPA_REQ_RCVD); - - /* drive upcall */ - connect_request_upcall(ep); - return; -} - -static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_rx_data *hdr = cplhdr(skb); - unsigned int dlen = ntohs(hdr->len); - - pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen); - - skb_pull(skb, sizeof(*hdr)); - skb_trim(skb, dlen); - - ep->rcv_seq += dlen; - BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); - - switch (state_read(&ep->com)) { - case MPA_REQ_SENT: - process_mpa_reply(ep, skb); - break; - case MPA_REQ_WAIT: - process_mpa_request(ep, skb); - break; - case MPA_REP_SENT: - break; - default: - pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n", - __func__, ep, state_read(&ep->com), ep->hwtid); - - /* - * The ep will timeout and inform the ULP of the failure. - * See ep_timeout(). - */ - break; - } - - /* update RX credits */ - update_rx_credits(ep, dlen); - - return CPL_RET_BUF_DONE; -} - -/* - * Upcall from the adapter indicating data has been transmitted. - * For us its just the single MPA request or reply. We can now free - * the skb holding the mpa message. - */ -static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_wr_ack *hdr = cplhdr(skb); - unsigned int credits = ntohs(hdr->credits); - unsigned long flags; - int post_zb = 0; - - pr_debug("%s ep %p credits %u\n", __func__, ep, credits); - - if (credits == 0) { - pr_debug("%s 0 credit ack ep %p state %u\n", - __func__, ep, state_read(&ep->com)); - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - BUG_ON(credits != 1); - dst_confirm(ep->dst); - if (!ep->mpa_skb) { - pr_debug("%s rdma_init wr_ack ep %p state %u\n", - __func__, ep, ep->com.state); - if (ep->mpa_attr.initiator) { - pr_debug("%s initiator ep %p state %u\n", - __func__, ep, ep->com.state); - if (peer2peer && ep->com.state == FPDU_MODE) - post_zb = 1; - } else { - pr_debug("%s responder ep %p state %u\n", - __func__, ep, ep->com.state); - if (ep->com.state == MPA_REQ_RCVD) { - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - } - } - } else { - pr_debug("%s lsm ack ep %p state %u freeing skb\n", - __func__, ep, ep->com.state); - kfree_skb(ep->mpa_skb); - ep->mpa_skb = NULL; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (post_zb) - iwch_post_zb_read(ep); - return CPL_RET_BUF_DONE; -} - -static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - unsigned long flags; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(!ep); - - /* - * We get 2 abort replies from the HW. The first one must - * be ignored except for scribbling that we need one more. - */ - if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) { - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case ABORTING: - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - break; - default: - pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state); - break; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * Return whether a failed active open has allocated a TID - */ -static inline int act_open_has_tid(int status) -{ - return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && - status != CPL_ERR_ARP_MISS; -} - -static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_act_open_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status, - status2errno(rpl->status)); - connect_reply_upcall(ep, status2errno(rpl->status)); - state_set(&ep->com, DEAD); - if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status)) - release_tid(ep->com.tdev, GET_TID(rpl), NULL); - cxgb3_free_atid(ep->com.tdev, ep->atid); - dst_release(ep->dst); - l2t_release(ep->com.tdev, ep->l2t); - put_ep(&ep->com); - return CPL_RET_BUF_DONE; -} - -static int listen_start(struct iwch_listen_ep *ep) -{ - struct sk_buff *skb; - struct cpl_pass_open_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("t3c_listen_start failed to alloc skb!\n"); - return -ENOMEM; - } - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid)); - req->local_port = ep->com.local_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_port = 0; - req->peer_ip = 0; - req->peer_netmask = 0; - req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS); - req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10)); - req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK)); - - skb->priority = 1; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_listen_ep *ep = ctx; - struct cpl_pass_open_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p status %d error %d\n", __func__, ep, - rpl->status, status2errno(rpl->status)); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - - return CPL_RET_BUF_DONE; -} - -static int listen_stop(struct iwch_listen_ep *ep) -{ - struct sk_buff *skb; - struct cpl_close_listserv_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->cpu_idx = 0; - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid)); - skb->priority = 1; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb, - void *ctx) -{ - struct iwch_listen_ep *ep = ctx; - struct cpl_close_listserv_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p\n", __func__, ep); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - return CPL_RET_BUF_DONE; -} - -static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) -{ - struct cpl_pass_accept_rpl *rpl; - unsigned int mtu_idx; - u32 opt0h, opt0l, opt2; - int wscale; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(*rpl)); - skb_get(skb); - mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); - wscale = compute_wscale(rcv_win); - opt0h = V_NAGLE(0) | - V_NO_CONG(nocong) | - V_KEEP_ALIVE(1) | - F_TCAM_BYPASS | - V_WND_SCALE(wscale) | - V_MSS_IDX(mtu_idx) | - V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); - opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | - V_CONG_CONTROL_FLAVOR(cong_flavor); - - rpl = cplhdr(skb); - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); - rpl->peer_ip = peer_ip; - rpl->opt0h = htonl(opt0h); - rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT); - rpl->opt2 = htonl(opt2); - rpl->rsvd = rpl->opt2; /* workaround for HW bug */ - skb->priority = CPL_PRIORITY_SETUP; - iwch_l2t_send(ep->com.tdev, skb, ep->l2t); - - return; -} - -static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, - struct sk_buff *skb) -{ - pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid, - peer_ip); - BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(struct cpl_tid_release)); - skb_get(skb); - - if (tdev->type != T3A) - release_tid(tdev, hwtid, skb); - else { - struct cpl_pass_accept_rpl *rpl; - - rpl = cplhdr(skb); - skb->priority = CPL_PRIORITY_SETUP; - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - hwtid)); - rpl->peer_ip = peer_ip; - rpl->opt0h = htonl(F_TCAM_BYPASS); - rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); - rpl->opt2 = 0; - rpl->rsvd = rpl->opt2; - iwch_cxgb3_ofld_send(tdev, skb); - } -} - -static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *child_ep, *parent_ep = ctx; - struct cpl_pass_accept_req *req = cplhdr(skb); - unsigned int hwtid = GET_TID(req); - struct dst_entry *dst; - struct l2t_entry *l2t; - struct rtable *rt; - struct iff_mac tim; - - pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); - - if (state_read(&parent_ep->com) != LISTEN) { - pr_err("%s - listening ep not in LISTEN\n", __func__); - goto reject; - } - - /* - * Find the netdev for this connection request. - */ - tim.mac_addr = req->dst_mac; - tim.vlan_tag = ntohs(req->vlan_tag); - if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { - pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac); - goto reject; - } - - /* Find output route */ - rt = find_route(tdev, - req->local_ip, - req->peer_ip, - req->local_port, - req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid))); - if (!rt) { - pr_err("%s - failed to find dst entry!\n", __func__); - goto reject; - } - dst = &rt->dst; - l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip); - if (!l2t) { - pr_err("%s - failed to allocate l2t entry!\n", __func__); - dst_release(dst); - goto reject; - } - child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); - if (!child_ep) { - pr_err("%s - failed to allocate ep entry!\n", __func__); - l2t_release(tdev, l2t); - dst_release(dst); - goto reject; - } - state_set(&child_ep->com, CONNECTING); - child_ep->com.tdev = tdev; - child_ep->com.cm_id = NULL; - child_ep->com.local_addr.sin_family = AF_INET; - child_ep->com.local_addr.sin_port = req->local_port; - child_ep->com.local_addr.sin_addr.s_addr = req->local_ip; - child_ep->com.remote_addr.sin_family = AF_INET; - child_ep->com.remote_addr.sin_port = req->peer_port; - child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip; - get_ep(&parent_ep->com); - child_ep->parent_ep = parent_ep; - child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid)); - child_ep->l2t = l2t; - child_ep->dst = dst; - child_ep->hwtid = hwtid; - timer_setup(&child_ep->timer, ep_timeout, 0); - cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid); - accept_cr(child_ep, req->peer_ip, skb); - goto out; -reject: - reject_cr(tdev, hwtid, req->peer_ip, skb); -out: - return CPL_RET_BUF_DONE; -} - -static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_pass_establish *req = cplhdr(skb); - - pr_debug("%s ep %p\n", __func__, ep); - ep->snd_seq = ntohl(req->snd_isn); - ep->rcv_seq = ntohl(req->rcv_isn); - - set_emss(ep, ntohs(req->tcp_opt)); - - dst_confirm(ep->dst); - state_set(&ep->com, MPA_REQ_WAIT); - start_ep_timer(ep); - - return CPL_RET_BUF_DONE; -} - -static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct iwch_qp_attributes attrs; - unsigned long flags; - int disconnect = 1; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - dst_confirm(ep->dst); - - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case MPA_REQ_WAIT: - __state_set(&ep->com, CLOSING); - break; - case MPA_REQ_SENT: - __state_set(&ep->com, CLOSING); - connect_reply_upcall(ep, -ECONNRESET); - break; - case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see iwch_accept_cr()). - */ - __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MPA_REP_SENT: - __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case FPDU_MODE: - start_ep_timer(ep); - __state_set(&ep->com, CLOSING); - attrs.next_state = IWCH_QP_STATE_CLOSING; - iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); - peer_close_upcall(ep); - break; - case ABORTING: - disconnect = 0; - break; - case CLOSING: - __state_set(&ep->com, MORIBUND); - disconnect = 0; - break; - case MORIBUND: - stop_ep_timer(ep); - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_IDLE; - iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); - } - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - disconnect = 0; - break; - case DEAD: - disconnect = 0; - break; - default: - BUG_ON(1); - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (disconnect) - iwch_ep_disconnect(ep, 0, GFP_KERNEL); - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * Returns whether an ABORT_REQ_RSS message is a negative advice. - */ -static int is_neg_adv_abort(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE; -} - -static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_abort_req_rss *req = cplhdr(skb); - struct iwch_ep *ep = ctx; - struct cpl_abort_rpl *rpl; - struct sk_buff *rpl_skb; - struct iwch_qp_attributes attrs; - int ret; - int release = 0; - unsigned long flags; - - if (is_neg_adv_abort(req->status)) { - pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep, - ep->hwtid); - t3_l2t_send_event(ep->com.tdev, ep->l2t); - return CPL_RET_BUF_DONE; - } - - /* - * We get 2 peer aborts from the HW. The first one must - * be ignored except for scribbling that we need one more. - */ - if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) { - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state); - switch (ep->com.state) { - case CONNECTING: - break; - case MPA_REQ_WAIT: - stop_ep_timer(ep); - break; - case MPA_REQ_SENT: - stop_ep_timer(ep); - connect_reply_upcall(ep, -ECONNRESET); - break; - case MPA_REP_SENT: - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see iwch_accept_cr()). - */ - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MORIBUND: - case CLOSING: - stop_ep_timer(ep); - /*FALLTHROUGH*/ - case FPDU_MODE: - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_ERROR; - ret = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (ret) - pr_err("%s - qp <- error failed!\n", __func__); - } - peer_abort_upcall(ep); - break; - case ABORTING: - break; - case DEAD: - pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); - spin_unlock_irqrestore(&ep->com.lock, flags); - return CPL_RET_BUF_DONE; - default: - BUG_ON(1); - break; - } - dst_confirm(ep->dst); - if (ep->com.state != ABORTING) { - __state_set(&ep->com, DEAD); - release = 1; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - - rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); - if (!rpl_skb) { - pr_err("%s - cannot allocate skb!\n", __func__); - release = 1; - goto out; - } - rpl_skb->priority = CPL_PRIORITY_DATA; - rpl = skb_put(rpl_skb, sizeof(*rpl)); - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); - rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); - rpl->cmd = CPL_ABORT_NO_RST; - iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb); -out: - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct iwch_qp_attributes attrs; - unsigned long flags; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(!ep); - - /* The cm_id may be null if we failed to connect */ - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case CLOSING: - __state_set(&ep->com, MORIBUND); - break; - case MORIBUND: - stop_ep_timer(ep); - if ((ep->com.cm_id) && (ep->com.qp)) { - attrs.next_state = IWCH_QP_STATE_IDLE; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - } - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - break; - case ABORTING: - case DEAD: - break; - default: - BUG_ON(1); - break; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * T3A does 3 things when a TERM is received: - * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet - * 2) generate an async event on the QP with the TERMINATE opcode - * 3) post a TERMINATE opcode cqe into the associated CQ. - * - * For (1), we save the message in the qp for later consumer consumption. - * For (2), we move the QP into TERMINATE, post a QP event and disconnect. - * For (3), we toss the CQE in cxio_poll_cq(). - * - * terminate() handles case (1)... - */ -static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - - if (state_read(&ep->com) != FPDU_MODE) - return CPL_RET_BUF_DONE; - - pr_debug("%s ep %p\n", __func__, ep); - skb_pull(skb, sizeof(struct cpl_rdma_terminate)); - pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len); - skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer, - skb->len); - ep->com.qp->attr.terminate_msg_len = skb->len; - ep->com.qp->attr.is_terminate_local = 0; - return CPL_RET_BUF_DONE; -} - -static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_rdma_ec_status *rep = cplhdr(skb); - struct iwch_ep *ep = ctx; - - pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, - rep->status); - if (rep->status) { - struct iwch_qp_attributes attrs; - - pr_err("%s BAD CLOSE - Aborting tid %u\n", - __func__, ep->hwtid); - stop_ep_timer(ep); - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - abort_connection(ep, NULL, GFP_KERNEL); - } - return CPL_RET_BUF_DONE; -} - -static void ep_timeout(struct timer_list *t) -{ - struct iwch_ep *ep = from_timer(ep, t, timer); - struct iwch_qp_attributes attrs; - unsigned long flags; - int abort = 1; - - spin_lock_irqsave(&ep->com.lock, flags); - pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, - ep->com.state); - switch (ep->com.state) { - case MPA_REQ_SENT: - __state_set(&ep->com, ABORTING); - connect_reply_upcall(ep, -ETIMEDOUT); - break; - case MPA_REQ_WAIT: - __state_set(&ep->com, ABORTING); - break; - case CLOSING: - case MORIBUND: - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - } - __state_set(&ep->com, ABORTING); - break; - default: - WARN(1, "%s unexpected state ep %p state %u\n", - __func__, ep, ep->com.state); - abort = 0; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (abort) - abort_connection(ep, NULL, GFP_ATOMIC); - put_ep(&ep->com); -} - -int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) -{ - struct iwch_ep *ep = to_ep(cm_id); - - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - - if (state_read(&ep->com) == DEAD) { - put_ep(&ep->com); - return -ECONNRESET; - } - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); - if (mpa_rev == 0) - abort_connection(ep, NULL, GFP_KERNEL); - else { - send_mpa_reject(ep, pdata, pdata_len); - iwch_ep_disconnect(ep, 0, GFP_KERNEL); - } - put_ep(&ep->com); - return 0; -} - -int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - int err; - struct iwch_qp_attributes attrs; - enum iwch_qp_attr_mask mask; - struct iwch_ep *ep = to_ep(cm_id); - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_qp *qp = get_qhp(h, conn_param->qpn); - - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { - err = -ECONNRESET; - goto err; - } - - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); - BUG_ON(!qp); - - if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || - (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { - abort_connection(ep, NULL, GFP_KERNEL); - err = -EINVAL; - goto err; - } - - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->com.qp = qp; - - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - - if (peer2peer && ep->ird == 0) - ep->ird = 1; - - pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); - - /* bind QP to EP and move to RTS */ - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ird; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - /* bind QP and TID with INIT_WR */ - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_MAX_ORD; - - err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - if (err) - goto err1; - - /* if needed, wait for wr_ack */ - if (iwch_rqes_posted(qp)) { - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (err) - goto err1; - } - - err = send_mpa_reply(ep, conn_param->private_data, - conn_param->private_data_len); - if (err) - goto err1; - - - state_set(&ep->com, FPDU_MODE); - established_upcall(ep); - put_ep(&ep->com); - return 0; -err1: - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); -err: - put_ep(&ep->com); - return err; -} - -static int is_loopback_dst(struct iw_cm_id *cm_id) -{ - struct net_device *dev; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr); - if (!dev) - return 0; - dev_put(dev); - return 1; -} - -int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_ep *ep; - struct rtable *rt; - int err = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - if (cm_id->m_remote_addr.ss_family != PF_INET) { - err = -ENOSYS; - goto out; - } - - if (is_loopback_dst(cm_id)) { - err = -ENOSYS; - goto out; - } - - ep = alloc_ep(sizeof(*ep), GFP_KERNEL); - if (!ep) { - pr_err("%s - cannot alloc ep\n", __func__); - err = -ENOMEM; - goto out; - } - timer_setup(&ep->timer, ep_timeout, 0); - ep->plen = conn_param->private_data_len; - if (ep->plen) - memcpy(ep->mpa_pkt + sizeof(struct mpa_message), - conn_param->private_data, ep->plen); - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - - if (peer2peer && ep->ord == 0) - ep->ord = 1; - - ep->com.tdev = h->rdev.t3cdev_p; - - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->com.qp = get_qhp(h, conn_param->qpn); - BUG_ON(!ep->com.qp); - pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, - ep->com.qp, cm_id); - - /* - * Allocate an active TID to initiate a TCP connection. - */ - ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep); - if (ep->atid == -1) { - pr_err("%s - cannot alloc atid\n", __func__); - err = -ENOMEM; - goto fail2; - } - - /* find a route */ - rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, IPTOS_LOWDELAY); - if (!rt) { - pr_err("%s - cannot find route\n", __func__); - err = -EHOSTUNREACH; - goto fail3; - } - ep->dst = &rt->dst; - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL, - &raddr->sin_addr.s_addr); - if (!ep->l2t) { - pr_err("%s - cannot alloc l2e\n", __func__); - err = -ENOMEM; - goto fail4; - } - - state_set(&ep->com, CONNECTING); - ep->tos = IPTOS_LOWDELAY; - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, - sizeof(ep->com.remote_addr)); - - /* send connect request to rnic */ - err = send_connect(ep); - if (!err) - goto out; - - l2t_release(h->rdev.t3cdev_p, ep->l2t); -fail4: - dst_release(ep->dst); -fail3: - cxgb3_free_atid(ep->com.tdev, ep->atid); -fail2: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); -out: - return err; -} - -int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) -{ - int err = 0; - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_listen_ep *ep; - - - might_sleep(); - - if (cm_id->m_local_addr.ss_family != PF_INET) { - err = -ENOSYS; - goto fail1; - } - - ep = alloc_ep(sizeof(*ep), GFP_KERNEL); - if (!ep) { - pr_err("%s - cannot alloc ep\n", __func__); - err = -ENOMEM; - goto fail1; - } - pr_debug("%s ep %p\n", __func__, ep); - ep->com.tdev = h->rdev.t3cdev_p; - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->backlog = backlog; - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - - /* - * Allocate a server TID. - */ - ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep); - if (ep->stid == -1) { - pr_err("%s - cannot alloc atid\n", __func__); - err = -ENOMEM; - goto fail2; - } - - state_set(&ep->com, LISTEN); - err = listen_start(ep); - if (err) - goto fail3; - - /* wait for pass_open_rpl */ - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (!err) { - cm_id->provider_data = ep; - goto out; - } -fail3: - cxgb3_free_stid(ep->com.tdev, ep->stid); -fail2: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); -fail1: -out: - return err; -} - -int iwch_destroy_listen(struct iw_cm_id *cm_id) -{ - int err; - struct iwch_listen_ep *ep = to_listen_ep(cm_id); - - pr_debug("%s ep %p\n", __func__, ep); - - might_sleep(); - state_set(&ep->com, DEAD); - ep->com.rpl_done = 0; - ep->com.rpl_err = 0; - err = listen_stop(ep); - if (err) - goto done; - wait_event(ep->com.waitq, ep->com.rpl_done); - cxgb3_free_stid(ep->com.tdev, ep->stid); -done: - err = ep->com.rpl_err; - cm_id->rem_ref(cm_id); - put_ep(&ep->com); - return err; -} - -int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) -{ - int ret=0; - unsigned long flags; - int close = 0; - int fatal = 0; - struct t3cdev *tdev; - struct cxio_rdev *rdev; - - spin_lock_irqsave(&ep->com.lock, flags); - - pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep, - states[ep->com.state], abrupt); - - tdev = (struct t3cdev *)ep->com.tdev; - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - fatal = 1; - close_complete_upcall(ep); - ep->com.state = DEAD; - } - switch (ep->com.state) { - case MPA_REQ_WAIT: - case MPA_REQ_SENT: - case MPA_REQ_RCVD: - case MPA_REP_SENT: - case FPDU_MODE: - close = 1; - if (abrupt) - ep->com.state = ABORTING; - else { - ep->com.state = CLOSING; - start_ep_timer(ep); - } - set_bit(CLOSE_SENT, &ep->com.flags); - break; - case CLOSING: - if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { - close = 1; - if (abrupt) { - stop_ep_timer(ep); - ep->com.state = ABORTING; - } else - ep->com.state = MORIBUND; - } - break; - case MORIBUND: - case ABORTING: - case DEAD: - pr_debug("%s ignoring disconnect ep %p state %u\n", - __func__, ep, ep->com.state); - break; - default: - BUG(); - break; - } - - spin_unlock_irqrestore(&ep->com.lock, flags); - if (close) { - if (abrupt) - ret = send_abort(ep, NULL, gfp); - else - ret = send_halfclose(ep, gfp); - if (ret) - fatal = 1; - } - if (fatal) - release_ep_resources(ep); - return ret; -} - -int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, - struct l2t_entry *l2t) -{ - struct iwch_ep *ep = ctx; - - if (ep->dst != old) - return 0; - - pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, - l2t); - dst_hold(new); - l2t_release(ep->com.tdev, ep->l2t); - ep->l2t = l2t; - dst_release(old); - ep->dst = new; - return 1; -} - -/* - * All the CM events are handled on a work queue to have a safe context. - * These are the real handlers that are called from the work queue. - */ -static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = { - [CPL_ACT_ESTABLISH] = act_establish, - [CPL_ACT_OPEN_RPL] = act_open_rpl, - [CPL_RX_DATA] = rx_data, - [CPL_TX_DMA_ACK] = tx_ack, - [CPL_ABORT_RPL_RSS] = abort_rpl, - [CPL_ABORT_RPL] = abort_rpl, - [CPL_PASS_OPEN_RPL] = pass_open_rpl, - [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, - [CPL_PASS_ACCEPT_REQ] = pass_accept_req, - [CPL_PASS_ESTABLISH] = pass_establish, - [CPL_PEER_CLOSE] = peer_close, - [CPL_ABORT_REQ_RSS] = peer_abort, - [CPL_CLOSE_CON_RPL] = close_con_rpl, - [CPL_RDMA_TERMINATE] = terminate, - [CPL_RDMA_EC_STATUS] = ec_status, -}; - -static void process_work(struct work_struct *work) -{ - struct sk_buff *skb = NULL; - void *ep; - struct t3cdev *tdev; - int ret; - - while ((skb = skb_dequeue(&rxq))) { - ep = *((void **) (skb->cb)); - tdev = *((struct t3cdev **) (skb->cb + sizeof(void *))); - ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep); - if (ret & CPL_RET_BUF_DONE) - kfree_skb(skb); - - /* - * ep was referenced in sched(), and is freed here. - */ - put_ep((struct iwch_ep_common *)ep); - } -} - -static DECLARE_WORK(skb_work, process_work); - -static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep_common *epc = ctx; - - get_ep(epc); - - /* - * Save ctx and tdev in the skb->cb area. - */ - *((void **) skb->cb) = ctx; - *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev; - - /* - * Queue the skb and schedule the worker thread. - */ - skb_queue_tail(&rxq, skb); - queue_work(workq, &skb_work); - return 0; -} - -static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_set_tcb_rpl *rpl = cplhdr(skb); - - if (rpl->status != CPL_ERR_NONE) { - pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n", - rpl->status, GET_TID(rpl)); - } - return CPL_RET_BUF_DONE; -} - -/* - * All upcalls from the T3 Core go to sched() to schedule the - * processing on a work queue. - */ -cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = { - [CPL_ACT_ESTABLISH] = sched, - [CPL_ACT_OPEN_RPL] = sched, - [CPL_RX_DATA] = sched, - [CPL_TX_DMA_ACK] = sched, - [CPL_ABORT_RPL_RSS] = sched, - [CPL_ABORT_RPL] = sched, - [CPL_PASS_OPEN_RPL] = sched, - [CPL_CLOSE_LISTSRV_RPL] = sched, - [CPL_PASS_ACCEPT_REQ] = sched, - [CPL_PASS_ESTABLISH] = sched, - [CPL_PEER_CLOSE] = sched, - [CPL_CLOSE_CON_RPL] = sched, - [CPL_ABORT_REQ_RSS] = sched, - [CPL_RDMA_TERMINATE] = sched, - [CPL_RDMA_EC_STATUS] = sched, - [CPL_SET_TCB_RPL] = set_tcb_rpl, -}; - -int __init iwch_cm_init(void) -{ - skb_queue_head_init(&rxq); - - workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM); - if (!workq) - return -ENOMEM; - - return 0; -} - -void __exit iwch_cm_term(void) -{ - flush_workqueue(workq); - destroy_workqueue(workq); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h deleted file mode 100644 index cc7fe644d260..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _IWCH_CM_H_ -#define _IWCH_CM_H_ - -#include <linux/inet.h> -#include <linux/wait.h> -#include <linux/spinlock.h> -#include <linux/kref.h> - -#include <rdma/ib_verbs.h> -#include <rdma/iw_cm.h> - -#include "cxgb3_offload.h" -#include "iwch_provider.h" - -#define MPA_KEY_REQ "MPA ID Req Frame" -#define MPA_KEY_REP "MPA ID Rep Frame" - -#define MPA_MAX_PRIVATE_DATA 256 -#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */ -#define MPA_REJECT 0x20 -#define MPA_CRC 0x40 -#define MPA_MARKERS 0x80 -#define MPA_FLAGS_MASK 0xE0 - -#define put_ep(ep) { \ - pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \ - __func__, __LINE__, ep, kref_read(&((ep)->kref))); \ - WARN_ON(kref_read(&((ep)->kref)) < 1); \ - kref_put(&((ep)->kref), __free_ep); \ -} - -#define get_ep(ep) { \ - pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \ - __func__, __LINE__, ep, kref_read(&((ep)->kref))); \ - kref_get(&((ep)->kref)); \ -} - -struct mpa_message { - u8 key[16]; - u8 flags; - u8 revision; - __be16 private_data_size; - u8 private_data[0]; -}; - -struct terminate_message { - u8 layer_etype; - u8 ecode; - __be16 hdrct_rsvd; - u8 len_hdrs[0]; -}; - -#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) - -enum iwch_layers_types { - LAYER_RDMAP = 0x00, - LAYER_DDP = 0x10, - LAYER_MPA = 0x20, - RDMAP_LOCAL_CATA = 0x00, - RDMAP_REMOTE_PROT = 0x01, - RDMAP_REMOTE_OP = 0x02, - DDP_LOCAL_CATA = 0x00, - DDP_TAGGED_ERR = 0x01, - DDP_UNTAGGED_ERR = 0x02, - DDP_LLP = 0x03 -}; - -enum iwch_rdma_ecodes { - RDMAP_INV_STAG = 0x00, - RDMAP_BASE_BOUNDS = 0x01, - RDMAP_ACC_VIOL = 0x02, - RDMAP_STAG_NOT_ASSOC = 0x03, - RDMAP_TO_WRAP = 0x04, - RDMAP_INV_VERS = 0x05, - RDMAP_INV_OPCODE = 0x06, - RDMAP_STREAM_CATA = 0x07, - RDMAP_GLOBAL_CATA = 0x08, - RDMAP_CANT_INV_STAG = 0x09, - RDMAP_UNSPECIFIED = 0xff -}; - -enum iwch_ddp_ecodes { - DDPT_INV_STAG = 0x00, - DDPT_BASE_BOUNDS = 0x01, - DDPT_STAG_NOT_ASSOC = 0x02, - DDPT_TO_WRAP = 0x03, - DDPT_INV_VERS = 0x04, - DDPU_INV_QN = 0x01, - DDPU_INV_MSN_NOBUF = 0x02, - DDPU_INV_MSN_RANGE = 0x03, - DDPU_INV_MO = 0x04, - DDPU_MSG_TOOBIG = 0x05, - DDPU_INV_VERS = 0x06 -}; - -enum iwch_mpa_ecodes { - MPA_CRC_ERR = 0x02, - MPA_MARKER_ERR = 0x03 -}; - -enum iwch_ep_state { - IDLE = 0, - LISTEN, - CONNECTING, - MPA_REQ_WAIT, - MPA_REQ_SENT, - MPA_REQ_RCVD, - MPA_REP_SENT, - FPDU_MODE, - ABORTING, - CLOSING, - MORIBUND, - DEAD, -}; - -enum iwch_ep_flags { - PEER_ABORT_IN_PROGRESS = 0, - ABORT_REQ_IN_PROGRESS = 1, - RELEASE_RESOURCES = 2, - CLOSE_SENT = 3, -}; - -struct iwch_ep_common { - struct iw_cm_id *cm_id; - struct iwch_qp *qp; - struct t3cdev *tdev; - enum iwch_ep_state state; - struct kref kref; - spinlock_t lock; - struct sockaddr_in local_addr; - struct sockaddr_in remote_addr; - wait_queue_head_t waitq; - int rpl_done; - int rpl_err; - unsigned long flags; -}; - -struct iwch_listen_ep { - struct iwch_ep_common com; - unsigned int stid; - int backlog; -}; - -struct iwch_ep { - struct iwch_ep_common com; - struct iwch_ep *parent_ep; - struct timer_list timer; - unsigned int atid; - u32 hwtid; - u32 snd_seq; - u32 rcv_seq; - struct l2t_entry *l2t; - struct dst_entry *dst; - struct sk_buff *mpa_skb; - struct iwch_mpa_attributes mpa_attr; - unsigned int mpa_pkt_len; - u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; - u8 tos; - u16 emss; - u16 plen; - u32 ird; - u32 ord; -}; - -static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) -{ - return cm_id->provider_data; -} - -static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) -{ - return cm_id->provider_data; -} - -static inline int compute_wscale(int win) -{ - int wscale = 0; - - while (wscale < 14 && (65535<<wscale) < win) - wscale++; - return wscale; -} - -/* CM prototypes */ - -int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); -int iwch_create_listen(struct iw_cm_id *cm_id, int backlog); -int iwch_destroy_listen(struct iw_cm_id *cm_id); -int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); -int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); -int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp); -int iwch_quiesce_tid(struct iwch_ep *ep); -int iwch_resume_tid(struct iwch_ep *ep); -void __free_ep(struct kref *kref); -void iwch_rearp(struct iwch_ep *ep); -int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t); - -int __init iwch_cm_init(void); -void __exit iwch_cm_term(void); -extern int peer2peer; - -#endif /* _IWCH_CM_H_ */ diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c deleted file mode 100644 index a098c0140580..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "iwch_provider.h" -#include "iwch.h" - -static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct iwch_qp *qhp, struct ib_wc *wc) -{ - struct t3_wq *wq = qhp ? &qhp->wq : NULL; - struct t3_cqe cqe; - u32 credit = 0; - u8 cqe_flushed; - u64 cookie; - int ret = 1; - - ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, - &credit); - if (t3a_device(chp->rhp) && credit) { - pr_debug("%s updating %d cq credits on id %d\n", __func__, - credit, chp->cq.cqid); - cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit); - } - - if (ret) { - ret = -EAGAIN; - goto out; - } - ret = 1; - - wc->wr_id = cookie; - wc->qp = qhp ? &qhp->ibqp : NULL; - wc->vendor_err = CQE_STATUS(cqe); - wc->wc_flags = 0; - - pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x lo 0x%x cookie 0x%llx\n", - __func__, - CQE_QPID(cqe), CQE_TYPE(cqe), - CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe), - CQE_WRID_LOW(cqe), (unsigned long long)cookie); - - if (CQE_TYPE(cqe) == 0) { - if (!CQE_STATUS(cqe)) - wc->byte_len = CQE_LEN(cqe); - else - wc->byte_len = 0; - wc->opcode = IB_WC_RECV; - if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV || - CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) { - wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe); - wc->wc_flags |= IB_WC_WITH_INVALIDATE; - } - } else { - switch (CQE_OPCODE(cqe)) { - case T3_RDMA_WRITE: - wc->opcode = IB_WC_RDMA_WRITE; - break; - case T3_READ_REQ: - wc->opcode = IB_WC_RDMA_READ; - wc->byte_len = CQE_LEN(cqe); - break; - case T3_SEND: - case T3_SEND_WITH_SE: - case T3_SEND_WITH_INV: - case T3_SEND_WITH_SE_INV: - wc->opcode = IB_WC_SEND; - break; - case T3_LOCAL_INV: - wc->opcode = IB_WC_LOCAL_INV; - break; - case T3_FAST_REGISTER: - wc->opcode = IB_WC_REG_MR; - break; - default: - pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", - CQE_OPCODE(cqe), CQE_QPID(cqe)); - ret = -EINVAL; - goto out; - } - } - - if (cqe_flushed) - wc->status = IB_WC_WR_FLUSH_ERR; - else { - - switch (CQE_STATUS(cqe)) { - case TPT_ERR_SUCCESS: - wc->status = IB_WC_SUCCESS; - break; - case TPT_ERR_STAG: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case TPT_ERR_PDID: - wc->status = IB_WC_LOC_PROT_ERR; - break; - case TPT_ERR_QPID: - case TPT_ERR_ACCESS: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case TPT_ERR_WRAP: - wc->status = IB_WC_GENERAL_ERR; - break; - case TPT_ERR_BOUND: - wc->status = IB_WC_LOC_LEN_ERR; - break; - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - wc->status = IB_WC_MW_BIND_ERR; - break; - case TPT_ERR_CRC: - case TPT_ERR_MARKER: - case TPT_ERR_PDU_LEN_ERR: - case TPT_ERR_OUT_OF_RQE: - case TPT_ERR_DDP_VERSION: - case TPT_ERR_RDMA_VERSION: - case TPT_ERR_DDP_QUEUE_NUM: - case TPT_ERR_MSN: - case TPT_ERR_TBIT: - case TPT_ERR_MO: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_IRD_OVERFLOW: - case TPT_ERR_OPCODE: - wc->status = IB_WC_FATAL_ERR; - break; - case TPT_ERR_SWFLUSH: - wc->status = IB_WC_WR_FLUSH_ERR; - break; - default: - pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n", - CQE_STATUS(cqe), CQE_QPID(cqe)); - ret = -EINVAL; - } - } -out: - return ret; -} - -/* - * Get one cq entry from cxio and map it to openib. - * - * Returns: - * 0 EMPTY; - * 1 cqe returned - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct ib_wc *wc) -{ - struct iwch_qp *qhp; - struct t3_cqe *rd_cqe; - int ret; - - rd_cqe = cxio_next_cqe(&chp->cq); - - if (!rd_cqe) - return 0; - - qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); - if (qhp) { - spin_lock(&qhp->lock); - ret = __iwch_poll_cq_one(rhp, chp, qhp, wc); - spin_unlock(&qhp->lock); - } else { - ret = __iwch_poll_cq_one(rhp, chp, NULL, wc); - } - return ret; -} - -int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) -{ - struct iwch_dev *rhp; - struct iwch_cq *chp; - unsigned long flags; - int npolled; - int err = 0; - - chp = to_iwch_cq(ibcq); - rhp = chp->rhp; - - spin_lock_irqsave(&chp->lock, flags); - for (npolled = 0; npolled < num_entries; ++npolled) { - - /* - * Because T3 can post CQEs that are _not_ associated - * with a WR, we might have to poll again after removing - * one of these. - */ - do { - err = iwch_poll_cq_one(rhp, chp, wc + npolled); - } while (err == -EAGAIN); - if (err <= 0) - break; - } - spin_unlock_irqrestore(&chp->lock, flags); - - if (err < 0) - return err; - else { - return npolled; - } -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c deleted file mode 100644 index 9d356c1301c7..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/gfp.h> -#include <linux/mman.h> -#include <net/sock.h> -#include "iwch_provider.h" -#include "iwch.h" -#include "iwch_cm.h" -#include "cxio_hal.h" -#include "cxio_wr.h" - -static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, - struct respQ_msg_t *rsp_msg, - enum ib_event_type ib_event, - int send_term) -{ - struct ib_event event; - struct iwch_qp_attributes attrs; - struct iwch_qp *qhp; - unsigned long flag; - - xa_lock(&rnicp->qps); - qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); - - if (!qhp) { - pr_err("%s unaffiliated error 0x%x qpid 0x%x\n", - __func__, CQE_STATUS(rsp_msg->cqe), - CQE_QPID(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - return; - } - - if ((qhp->attr.state == IWCH_QP_STATE_ERROR) || - (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) { - pr_debug("%s AE received after RTS - qp state %d qpid 0x%x status 0x%x\n", - __func__, - qhp->attr.state, qhp->wq.qpid, - CQE_STATUS(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - return; - } - - pr_err("%s - AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", - __func__, - CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), - CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - - atomic_inc(&qhp->refcnt); - xa_unlock(&rnicp->qps); - - if (qhp->attr.state == IWCH_QP_STATE_RTS) { - attrs.next_state = IWCH_QP_STATE_TERMINATE; - iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (send_term) - iwch_post_terminate(qhp, rsp_msg); - } - - event.event = ib_event; - event.device = chp->ibcq.device; - if (ib_event == IB_EVENT_CQ_ERR) - event.element.cq = &chp->ibcq; - else - event.element.qp = &qhp->ibqp; - - if (qhp->ibqp.event_handler) - (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); - - spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); -} - -void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) -{ - struct iwch_dev *rnicp; - struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; - struct iwch_cq *chp; - struct iwch_qp *qhp; - u32 cqid = RSPQ_CQID(rsp_msg); - unsigned long flag; - - rnicp = (struct iwch_dev *) rdev_p->ulp; - xa_lock(&rnicp->qps); - chp = get_chp(rnicp, cqid); - qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); - if (!chp || !qhp) { - pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", - cqid, CQE_QPID(rsp_msg->cqe), - CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), - CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), - CQE_WRID_LOW(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - goto out; - } - iwch_qp_add_ref(&qhp->ibqp); - atomic_inc(&chp->refcnt); - xa_unlock(&rnicp->qps); - - /* - * 1) completion of our sending a TERMINATE. - * 2) incoming TERMINATE message. - */ - if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) && - (CQE_STATUS(rsp_msg->cqe) == 0)) { - if (SQ_TYPE(rsp_msg->cqe)) { - pr_debug("%s QPID 0x%x ep %p disconnecting\n", - __func__, qhp->wq.qpid, qhp->ep); - iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); - } else { - pr_debug("%s post REQ_ERR AE QPID 0x%x\n", __func__, - qhp->wq.qpid); - post_qp_event(rnicp, chp, rsp_msg, - IB_EVENT_QP_REQ_ERR, 0); - iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); - } - goto done; - } - - /* Bad incoming Read request */ - if (SQ_TYPE(rsp_msg->cqe) && - (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) { - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); - goto done; - } - - /* Bad incoming write */ - if (RQ_TYPE(rsp_msg->cqe) && - (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) { - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); - goto done; - } - - switch (CQE_STATUS(rsp_msg->cqe)) { - - /* Completion Events */ - case TPT_ERR_SUCCESS: - - /* - * Confirm the destination entry if this is a RECV completion. - */ - if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) - dst_confirm(qhp->ep->dst); - spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - break; - - case TPT_ERR_STAG: - case TPT_ERR_PDID: - case TPT_ERR_QPID: - case TPT_ERR_ACCESS: - case TPT_ERR_WRAP: - case TPT_ERR_BOUND: - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); - break; - - /* Device Fatal Errors */ - case TPT_ERR_ECC: - case TPT_ERR_ECC_PSTAG: - case TPT_ERR_INTERNAL_ERR: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1); - break; - - /* QP Fatal Errors */ - case TPT_ERR_OUT_OF_RQE: - case TPT_ERR_PBL_ADDR_BOUND: - case TPT_ERR_CRC: - case TPT_ERR_MARKER: - case TPT_ERR_PDU_LEN_ERR: - case TPT_ERR_DDP_VERSION: - case TPT_ERR_RDMA_VERSION: - case TPT_ERR_OPCODE: - case TPT_ERR_DDP_QUEUE_NUM: - case TPT_ERR_MSN: - case TPT_ERR_TBIT: - case TPT_ERR_MO: - case TPT_ERR_MSN_GAP: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_RQE_ADDR_BOUND: - case TPT_ERR_IRD_OVERFLOW: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); - break; - - default: - pr_err("Unknown T3 status 0x%x QPID 0x%x\n", - CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid); - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); - break; - } -done: - if (atomic_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); - iwch_qp_rem_ref(&qhp->ibqp); -out: - dev_kfree_skb_irq(skb); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c deleted file mode 100644 index ce0f2741821d..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/slab.h> -#include <asm/byteorder.h> - -#include <rdma/iw_cm.h> -#include <rdma/ib_verbs.h> - -#include "cxio_hal.h" -#include "cxio_resource.h" -#include "iwch.h" -#include "iwch_provider.h" - -static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) -{ - u32 mmid; - - mhp->attr.state = 1; - mhp->attr.stag = stag; - mmid = stag >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); - return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); -} - -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, int shift) -{ - u32 stag; - int ret; - - if (cxio_register_phys_mem(&rhp->rdev, - &stag, mhp->attr.pdid, - mhp->attr.perms, - mhp->attr.zbva, - mhp->attr.va_fbo, - mhp->attr.len, - shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr)) - return -ENOMEM; - - ret = iwch_finish_mem_reg(mhp, stag); - if (ret) - cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - return ret; -} - -int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) -{ - mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, - npages << 3); - - if (!mhp->attr.pbl_addr) - return -ENOMEM; - - mhp->attr.pbl_size = npages; - - return 0; -} - -void iwch_free_pbl(struct iwch_mr *mhp) -{ - cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, - mhp->attr.pbl_size << 3); -} - -int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) -{ - return cxio_write_pbl(&mhp->rhp->rdev, pages, - mhp->attr.pbl_addr + (offset << 3), npages); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c deleted file mode 100644 index dcf02ec02810..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ /dev/null @@ -1,1321 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/device.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/delay.h> -#include <linux/errno.h> -#include <linux/list.h> -#include <linux/sched/mm.h> -#include <linux/spinlock.h> -#include <linux/ethtool.h> -#include <linux/rtnetlink.h> -#include <linux/inetdevice.h> -#include <linux/slab.h> - -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/byteorder.h> - -#include <rdma/iw_cm.h> -#include <rdma/ib_verbs.h> -#include <rdma/ib_smi.h> -#include <rdma/ib_umem.h> -#include <rdma/ib_user_verbs.h> -#include <rdma/uverbs_ioctl.h> - -#include "cxio_hal.h" -#include "iwch.h" -#include "iwch_provider.h" -#include "iwch_cm.h" -#include <rdma/cxgb3-abi.h> -#include "common.h" - -static void iwch_dealloc_ucontext(struct ib_ucontext *context) -{ - struct iwch_dev *rhp = to_iwch_dev(context->device); - struct iwch_ucontext *ucontext = to_iwch_ucontext(context); - struct iwch_mm_entry *mm, *tmp; - - pr_debug("%s context %p\n", __func__, context); - list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) - kfree(mm); - cxio_release_ucontext(&rhp->rdev, &ucontext->uctx); -} - -static int iwch_alloc_ucontext(struct ib_ucontext *ucontext, - struct ib_udata *udata) -{ - struct ib_device *ibdev = ucontext->device; - struct iwch_ucontext *context = to_iwch_ucontext(ucontext); - struct iwch_dev *rhp = to_iwch_dev(ibdev); - - pr_debug("%s ibdev %p\n", __func__, ibdev); - cxio_init_ucontext(&rhp->rdev, &context->uctx); - INIT_LIST_HEAD(&context->mmaps); - spin_lock_init(&context->mmap_lock); - return 0; -} - -static void iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) -{ - struct iwch_cq *chp; - - pr_debug("%s ib_cq %p\n", __func__, ib_cq); - chp = to_iwch_cq(ib_cq); - - xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); - atomic_dec(&chp->refcnt); - wait_event(chp->wait, !atomic_read(&chp->refcnt)); - - cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); -} - -static int iwch_create_cq(struct ib_cq *ibcq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) -{ - struct ib_device *ibdev = ibcq->device; - int entries = attr->cqe; - struct iwch_dev *rhp = to_iwch_dev(ibcq->device); - struct iwch_cq *chp = to_iwch_cq(ibcq); - struct iwch_create_cq_resp uresp; - struct iwch_create_cq_req ureq; - static int warned; - size_t resplen; - - pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries); - if (attr->flags) - return -EINVAL; - - if (udata) { - if (!t3a_device(rhp)) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) - return -EFAULT; - - chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr; - } - } - - if (t3a_device(rhp)) { - - /* - * T3A: Add some fluff to handle extra CQEs inserted - * for various errors. - * Additional CQE possibilities: - * TERMINATE, - * incoming RDMA WRITE Failures - * incoming RDMA READ REQUEST FAILUREs - * NOTE: We cannot ensure the CQ won't overflow. - */ - entries += 16; - } - entries = roundup_pow_of_two(entries); - chp->cq.size_log2 = ilog2(entries); - - if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata)) - return -ENOMEM; - - chp->rhp = rhp; - chp->ibcq.cqe = 1 << chp->cq.size_log2; - spin_lock_init(&chp->lock); - spin_lock_init(&chp->comp_handler_lock); - atomic_set(&chp->refcnt, 1); - init_waitqueue_head(&chp->wait); - if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) { - cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); - return -ENOMEM; - } - - if (udata) { - struct iwch_mm_entry *mm; - struct iwch_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct iwch_ucontext, ibucontext); - - mm = kmalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) { - iwch_destroy_cq(&chp->ibcq, udata); - return -ENOMEM; - } - uresp.cqid = chp->cq.cqid; - uresp.size_log2 = chp->cq.size_log2; - spin_lock(&ucontext->mmap_lock); - uresp.key = ucontext->key; - ucontext->key += PAGE_SIZE; - spin_unlock(&ucontext->mmap_lock); - mm->key = uresp.key; - mm->addr = virt_to_phys(chp->cq.queue); - if (udata->outlen < sizeof(uresp)) { - if (!warned++) - pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n"); - mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * - sizeof(struct t3_cqe)); - resplen = sizeof(struct iwch_create_cq_resp_v0); - } else { - mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) * - sizeof(struct t3_cqe)); - uresp.memsize = mm->len; - uresp.reserved = 0; - resplen = sizeof(uresp); - } - if (ib_copy_to_udata(udata, &uresp, resplen)) { - kfree(mm); - iwch_destroy_cq(&chp->ibcq, udata); - return -EFAULT; - } - insert_mmap(ucontext, mm); - } - pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr %pad\n", - chp->cq.cqid, chp, (1 << chp->cq.size_log2), - &chp->cq.dma_addr); - return 0; -} - -static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) -{ - struct iwch_dev *rhp; - struct iwch_cq *chp; - enum t3_cq_opcode cq_op; - int err; - unsigned long flag; - u32 rptr; - - chp = to_iwch_cq(ibcq); - rhp = chp->rhp; - if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) - cq_op = CQ_ARM_SE; - else - cq_op = CQ_ARM_AN; - if (chp->user_rptr_addr) { - if (get_user(rptr, chp->user_rptr_addr)) - return -EFAULT; - spin_lock_irqsave(&chp->lock, flag); - chp->cq.rptr = rptr; - } else - spin_lock_irqsave(&chp->lock, flag); - pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr); - err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0); - spin_unlock_irqrestore(&chp->lock, flag); - if (err < 0) - pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid); - if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - err = 0; - return err; -} - -static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - int len = vma->vm_end - vma->vm_start; - u32 key = vma->vm_pgoff << PAGE_SHIFT; - struct cxio_rdev *rdev_p; - int ret = 0; - struct iwch_mm_entry *mm; - struct iwch_ucontext *ucontext; - u64 addr; - - pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff, - key, len); - - if (vma->vm_start & (PAGE_SIZE-1)) { - return -EINVAL; - } - - rdev_p = &(to_iwch_dev(context->device)->rdev); - ucontext = to_iwch_ucontext(context); - - mm = remove_mmap(ucontext, key, len); - if (!mm) - return -EINVAL; - addr = mm->addr; - kfree(mm); - - if ((addr >= rdev_p->rnic_info.udbell_physbase) && - (addr < (rdev_p->rnic_info.udbell_physbase + - rdev_p->rnic_info.udbell_len))) { - - /* - * Map T3 DB register. - */ - if (vma->vm_flags & VM_READ) { - return -EPERM; - } - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - vma->vm_flags &= ~VM_MAYREAD; - ret = io_remap_pfn_range(vma, vma->vm_start, - addr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } else { - - /* - * Map WQ or CQ contig dma memory... - */ - ret = remap_pfn_range(vma, vma->vm_start, - addr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } - - return ret; -} - -static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - - php = to_iwch_pd(pd); - rhp = php->rhp; - pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); - cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); -} - -static int iwch_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) -{ - struct iwch_pd *php = to_iwch_pd(pd); - struct ib_device *ibdev = pd->device; - u32 pdid; - struct iwch_dev *rhp; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - rhp = (struct iwch_dev *) ibdev; - pdid = cxio_hal_get_pdid(rhp->rdev.rscp); - if (!pdid) - return -EINVAL; - - php->pdid = pdid; - php->rhp = rhp; - if (udata) { - struct iwch_alloc_pd_resp resp = {.pdid = php->pdid}; - - if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - iwch_deallocate_pd(&php->ibpd, udata); - return -EFAULT; - } - } - pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); - return 0; -} - -static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_mr *mhp; - u32 mmid; - - pr_debug("%s ib_mr %p\n", __func__, ib_mr); - - mhp = to_iwch_mr(ib_mr); - kfree(mhp->pages); - rhp = mhp->rhp; - mmid = mhp->attr.stag >> 8; - cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - iwch_free_pbl(mhp); - xa_erase_irq(&rhp->mrs, mmid); - if (mhp->kva) - kfree((void *) (unsigned long) mhp->kva); - ib_umem_release(mhp->umem); - pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp); - kfree(mhp); - return 0; -} - -static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) -{ - const u64 total_size = 0xffffffff; - const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK; - struct iwch_pd *php = to_iwch_pd(pd); - struct iwch_dev *rhp = php->rhp; - struct iwch_mr *mhp; - __be64 *page_list; - int shift = 26, npages, ret, i; - - pr_debug("%s ib_pd %p\n", __func__, pd); - - /* - * T3 only supports 32 bits of size. - */ - if (sizeof(phys_addr_t) > 4) { - pr_warn_once("Cannot support dma_mrs on this platform\n"); - return ERR_PTR(-ENOTSUPP); - } - - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - npages = (total_size + (1ULL << shift) - 1) >> shift; - if (!npages) { - ret = -EINVAL; - goto err; - } - - page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL); - if (!page_list) { - ret = -ENOMEM; - goto err; - } - - for (i = 0; i < npages; i++) - page_list[i] = cpu_to_be64((u64)i << shift); - - pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n", - __func__, mask, shift, total_size, npages); - - ret = iwch_alloc_pbl(mhp, npages); - if (ret) { - kfree(page_list); - goto err_pbl; - } - - ret = iwch_write_pbl(mhp, page_list, npages, 0); - kfree(page_list); - if (ret) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - - mhp->attr.perms = iwch_ib_to_tpt_access(acc); - mhp->attr.va_fbo = 0; - mhp->attr.page_size = shift - 12; - - mhp->attr.len = (u32) total_size; - mhp->attr.pbl_size = npages; - ret = iwch_register_mem(rhp, php, mhp, shift); - if (ret) - goto err_pbl; - - return &mhp->ibmr; - -err_pbl: - iwch_free_pbl(mhp); - -err: - kfree(mhp); - return ERR_PTR(ret); -} - -static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int acc, struct ib_udata *udata) -{ - __be64 *pages; - int shift, n, i; - int err = 0; - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mr *mhp; - struct iwch_reg_user_mr_resp uresp; - struct sg_dma_page_iter sg_iter; - pr_debug("%s ib_pd %p\n", __func__, pd); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - mhp->umem = ib_umem_get(udata, start, length, acc, 0); - if (IS_ERR(mhp->umem)) { - err = PTR_ERR(mhp->umem); - kfree(mhp); - return ERR_PTR(err); - } - - shift = PAGE_SHIFT; - - n = ib_umem_num_pages(mhp->umem); - - err = iwch_alloc_pbl(mhp, n); - if (err) - goto err; - - pages = (__be64 *) __get_free_page(GFP_KERNEL); - if (!pages) { - err = -ENOMEM; - goto err_pbl; - } - - i = n = 0; - - for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { - pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); - if (i == PAGE_SIZE / sizeof(*pages)) { - err = iwch_write_pbl(mhp, pages, i, n); - if (err) - goto pbl_done; - n += i; - i = 0; - } - } - - if (i) - err = iwch_write_pbl(mhp, pages, i, n); - -pbl_done: - free_page((unsigned long) pages); - if (err) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - mhp->attr.perms = iwch_ib_to_tpt_access(acc); - mhp->attr.va_fbo = virt; - mhp->attr.page_size = shift - 12; - mhp->attr.len = (u32) length; - - err = iwch_register_mem(rhp, php, mhp, shift); - if (err) - goto err_pbl; - - if (udata && !t3a_device(rhp)) { - uresp.pbl_addr = (mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3; - pr_debug("%s user resp pbl_addr 0x%x\n", __func__, - uresp.pbl_addr); - - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - iwch_dereg_mr(&mhp->ibmr, udata); - err = -EFAULT; - goto err; - } - } - - return &mhp->ibmr; - -err_pbl: - iwch_free_pbl(mhp); - -err: - ib_umem_release(mhp->umem); - kfree(mhp); - return ERR_PTR(err); -} - -static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mw *mhp; - u32 mmid; - u32 stag = 0; - int ret; - - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid); - if (ret) { - kfree(mhp); - return ERR_PTR(ret); - } - mhp->rhp = rhp; - mhp->attr.pdid = php->pdid; - mhp->attr.type = TPT_MW; - mhp->attr.stag = stag; - mmid = (stag) >> 8; - mhp->ibmw.rkey = stag; - if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { - cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - kfree(mhp); - return ERR_PTR(-ENOMEM); - } - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); - return &(mhp->ibmw); -} - -static int iwch_dealloc_mw(struct ib_mw *mw) -{ - struct iwch_dev *rhp; - struct iwch_mw *mhp; - u32 mmid; - - mhp = to_iwch_mw(mw); - rhp = mhp->rhp; - mmid = (mw->rkey) >> 8; - cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - xa_erase_irq(&rhp->mrs, mmid); - pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); - kfree(mhp); - return 0; -} - -static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mr *mhp; - u32 mmid; - u32 stag = 0; - int ret = -ENOMEM; - - if (mr_type != IB_MR_TYPE_MEM_REG || - max_num_sg > T3_MAX_FASTREG_DEPTH) - return ERR_PTR(-EINVAL); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - goto err; - - mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); - if (!mhp->pages) - goto pl_err; - - mhp->rhp = rhp; - ret = iwch_alloc_pbl(mhp, max_num_sg); - if (ret) - goto err1; - mhp->attr.pbl_size = max_num_sg; - ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, - mhp->attr.pbl_size, mhp->attr.pbl_addr); - if (ret) - goto err2; - mhp->attr.pdid = php->pdid; - mhp->attr.type = TPT_NON_SHARED_MR; - mhp->attr.stag = stag; - mhp->attr.state = 1; - mmid = (stag) >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - ret = xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL); - if (ret) - goto err3; - - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); - return &(mhp->ibmr); -err3: - cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); -err2: - iwch_free_pbl(mhp); -err1: - kfree(mhp->pages); -pl_err: - kfree(mhp); -err: - return ERR_PTR(ret); -} - -static int iwch_set_page(struct ib_mr *ibmr, u64 addr) -{ - struct iwch_mr *mhp = to_iwch_mr(ibmr); - - if (unlikely(mhp->npages == mhp->attr.pbl_size)) - return -ENOMEM; - - mhp->pages[mhp->npages++] = addr; - - return 0; -} - -static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, - int sg_nents, unsigned int *sg_offset) -{ - struct iwch_mr *mhp = to_iwch_mr(ibmr); - - mhp->npages = 0; - - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page); -} - -static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - struct iwch_qp_attributes attrs; - struct iwch_ucontext *ucontext; - - qhp = to_iwch_qp(ib_qp); - rhp = qhp->rhp; - - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); - wait_event(qhp->wait, !qhp->ep); - - xa_erase_irq(&rhp->qps, qhp->wq.qpid); - - atomic_dec(&qhp->refcnt); - wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); - - ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, - ibucontext); - cxio_destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - - pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__, - ib_qp, qhp->wq.qpid, qhp); - kfree(qhp); - return 0; -} - -static struct ib_qp *iwch_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *attrs, - struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - struct iwch_pd *php; - struct iwch_cq *schp; - struct iwch_cq *rchp; - struct iwch_create_qp_resp uresp; - int wqsize, sqsize, rqsize; - struct iwch_ucontext *ucontext; - - pr_debug("%s ib_pd %p\n", __func__, pd); - if (attrs->qp_type != IB_QPT_RC) - return ERR_PTR(-EINVAL); - php = to_iwch_pd(pd); - rhp = php->rhp; - schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid); - rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid); - if (!schp || !rchp) - return ERR_PTR(-EINVAL); - - /* The RQT size must be # of entries + 1 rounded up to a power of two */ - rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr); - if (rqsize == attrs->cap.max_recv_wr) - rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1); - - /* T3 doesn't support RQT depth < 16 */ - if (rqsize < 16) - rqsize = 16; - - if (rqsize > T3_MAX_RQ_SIZE) - return ERR_PTR(-EINVAL); - - if (attrs->cap.max_inline_data > T3_MAX_INLINE) - return ERR_PTR(-EINVAL); - - /* - * NOTE: The SQ and total WQ sizes don't need to be - * a power of two. However, all the code assumes - * they are. EG: Q_FREECNT() and friends. - */ - sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); - wqsize = roundup_pow_of_two(rqsize + sqsize); - - /* - * Kernel users need more wq space for fastreg WRs which can take - * 2 WR fragments. - */ - ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, - ibucontext); - if (!ucontext && wqsize < (rqsize + (2 * sqsize))) - wqsize = roundup_pow_of_two(rqsize + - roundup_pow_of_two(attrs->cap.max_send_wr * 2)); - pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__, - wqsize, sqsize, rqsize); - qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); - if (!qhp) - return ERR_PTR(-ENOMEM); - qhp->wq.size_log2 = ilog2(wqsize); - qhp->wq.rq_size_log2 = ilog2(rqsize); - qhp->wq.sq_size_log2 = ilog2(sqsize); - if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { - kfree(qhp); - return ERR_PTR(-ENOMEM); - } - - attrs->cap.max_recv_wr = rqsize - 1; - attrs->cap.max_send_wr = sqsize; - attrs->cap.max_inline_data = T3_MAX_INLINE; - - qhp->rhp = rhp; - qhp->attr.pd = php->pdid; - qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; - qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid; - qhp->attr.sq_num_entries = attrs->cap.max_send_wr; - qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; - qhp->attr.sq_max_sges = attrs->cap.max_send_sge; - qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; - qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; - qhp->attr.state = IWCH_QP_STATE_IDLE; - qhp->attr.next_state = IWCH_QP_STATE_IDLE; - - /* - * XXX - These don't get passed in from the openib user - * at create time. The CM sets them via a QP modify. - * Need to fix... I think the CM should - */ - qhp->attr.enable_rdma_read = 1; - qhp->attr.enable_rdma_write = 1; - qhp->attr.enable_bind = 1; - qhp->attr.max_ord = 1; - qhp->attr.max_ird = 1; - - spin_lock_init(&qhp->lock); - init_waitqueue_head(&qhp->wait); - atomic_set(&qhp->refcnt, 1); - - if (xa_store_irq(&rhp->qps, qhp->wq.qpid, qhp, GFP_KERNEL)) { - cxio_destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - kfree(qhp); - return ERR_PTR(-ENOMEM); - } - - if (udata) { - - struct iwch_mm_entry *mm1, *mm2; - - mm1 = kmalloc(sizeof(*mm1), GFP_KERNEL); - if (!mm1) { - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-ENOMEM); - } - - mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL); - if (!mm2) { - kfree(mm1); - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-ENOMEM); - } - - uresp.qpid = qhp->wq.qpid; - uresp.size_log2 = qhp->wq.size_log2; - uresp.sq_size_log2 = qhp->wq.sq_size_log2; - uresp.rq_size_log2 = qhp->wq.rq_size_log2; - spin_lock(&ucontext->mmap_lock); - uresp.key = ucontext->key; - ucontext->key += PAGE_SIZE; - uresp.db_key = ucontext->key; - ucontext->key += PAGE_SIZE; - spin_unlock(&ucontext->mmap_lock); - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - kfree(mm1); - kfree(mm2); - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-EFAULT); - } - mm1->key = uresp.key; - mm1->addr = virt_to_phys(qhp->wq.queue); - mm1->len = PAGE_ALIGN(wqsize * sizeof(union t3_wr)); - insert_mmap(ucontext, mm1); - mm2->key = uresp.db_key; - mm2->addr = qhp->wq.udb & PAGE_MASK; - mm2->len = PAGE_SIZE; - insert_mmap(ucontext, mm2); - } - qhp->ibqp.qp_num = qhp->wq.qpid; - pr_debug( - "%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr %pad size %d rq_addr 0x%x\n", - __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, - qhp->wq.qpid, qhp, &qhp->wq.dma_addr, 1 << qhp->wq.size_log2, - qhp->wq.rq_addr); - return &qhp->ibqp; -} - -static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - enum iwch_qp_attr_mask mask = 0; - struct iwch_qp_attributes attrs = {}; - - pr_debug("%s ib_qp %p\n", __func__, ibqp); - - /* iwarp does not support the RTR state */ - if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) - attr_mask &= ~IB_QP_STATE; - - /* Make sure we still have something left to do */ - if (!attr_mask) - return 0; - - qhp = to_iwch_qp(ibqp); - rhp = qhp->rhp; - - attrs.next_state = iwch_convert_state(attr->qp_state); - attrs.enable_rdma_read = (attr->qp_access_flags & - IB_ACCESS_REMOTE_READ) ? 1 : 0; - attrs.enable_rdma_write = (attr->qp_access_flags & - IB_ACCESS_REMOTE_WRITE) ? 1 : 0; - attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; - - - mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0; - mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? - (IWCH_QP_ATTR_ENABLE_RDMA_READ | - IWCH_QP_ATTR_ENABLE_RDMA_WRITE | - IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0; - - return iwch_modify_qp(rhp, qhp, mask, &attrs, 0); -} - -void iwch_qp_add_ref(struct ib_qp *qp) -{ - pr_debug("%s ib_qp %p\n", __func__, qp); - atomic_inc(&(to_iwch_qp(qp)->refcnt)); -} - -void iwch_qp_rem_ref(struct ib_qp *qp) -{ - pr_debug("%s ib_qp %p\n", __func__, qp); - if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt))) - wake_up(&(to_iwch_qp(qp)->wait)); -} - -static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) -{ - pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); - return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn); -} - - -static int iwch_query_pkey(struct ib_device *ibdev, - u8 port, u16 index, u16 * pkey) -{ - pr_debug("%s ibdev %p\n", __func__, ibdev); - *pkey = 0; - return 0; -} - -static int iwch_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - struct iwch_dev *dev; - - pr_debug("%s ibdev %p, port %d, index %d, gid %p\n", - __func__, ibdev, port, index, gid); - dev = to_iwch_dev(ibdev); - BUG_ON(port == 0 || port > 2); - memset(&(gid->raw[0]), 0, sizeof(gid->raw)); - memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6); - return 0; -} - -static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) -{ - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - char *cp, *next; - unsigned fw_maj, fw_min, fw_mic; - - lldev->ethtool_ops->get_drvinfo(lldev, &info); - - next = info.fw_version + 1; - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_maj); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_min); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_mic); - - return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) | - (fw_mic & 0xffff); -} - -static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw) -{ - - struct iwch_dev *dev; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - - dev = to_iwch_dev(ibdev); - memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); - props->hw_ver = dev->rdev.t3cdev_p->type; - props->fw_ver = fw_vers_string_to_u64(dev); - props->device_cap_flags = dev->device_cap_flags; - props->page_size_cap = dev->attr.mem_pgsizes_bitmask; - props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; - props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; - props->max_mr_size = dev->attr.max_mr_size; - props->max_qp = dev->attr.max_qps; - props->max_qp_wr = dev->attr.max_wrs; - props->max_send_sge = dev->attr.max_sge_per_wr; - props->max_recv_sge = dev->attr.max_sge_per_wr; - props->max_sge_rd = 1; - props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; - props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; - props->max_cq = dev->attr.max_cqs; - props->max_cqe = dev->attr.max_cqes_per_cq; - props->max_mr = dev->attr.max_mem_regs; - props->max_pd = dev->attr.max_pds; - props->local_ca_ack_delay = 0; - props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH; - - return 0; -} - -static int iwch_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - pr_debug("%s ibdev %p\n", __func__, ibdev); - - props->port_cap_flags = - IB_PORT_CM_SUP | - IB_PORT_SNMP_TUNNEL_SUP | - IB_PORT_REINIT_SUP | - IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; - props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; - props->active_width = 2; - props->active_speed = IB_SPEED_DDR; - props->max_msg_sz = -1; - - return 0; -} - -static ssize_t hw_rev_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - - pr_debug("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); -} -static DEVICE_ATTR_RO(hw_rev); - -static ssize_t hca_type_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - pr_debug("%s dev 0x%p\n", __func__, dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.driver); -} -static DEVICE_ATTR_RO(hca_type); - -static ssize_t board_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - - pr_debug("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, - iwch_dev->rdev.rnic_info.pdev->device); -} -static DEVICE_ATTR_RO(board_id); - -enum counters { - IPINRECEIVES, - IPINHDRERRORS, - IPINADDRERRORS, - IPINUNKNOWNPROTOS, - IPINDISCARDS, - IPINDELIVERS, - IPOUTREQUESTS, - IPOUTDISCARDS, - IPOUTNOROUTES, - IPREASMTIMEOUT, - IPREASMREQDS, - IPREASMOKS, - IPREASMFAILS, - TCPACTIVEOPENS, - TCPPASSIVEOPENS, - TCPATTEMPTFAILS, - TCPESTABRESETS, - TCPCURRESTAB, - TCPINSEGS, - TCPOUTSEGS, - TCPRETRANSSEGS, - TCPINERRS, - TCPOUTRSTS, - TCPRTOMIN, - TCPRTOMAX, - NR_COUNTERS -}; - -static const char * const names[] = { - [IPINRECEIVES] = "ipInReceives", - [IPINHDRERRORS] = "ipInHdrErrors", - [IPINADDRERRORS] = "ipInAddrErrors", - [IPINUNKNOWNPROTOS] = "ipInUnknownProtos", - [IPINDISCARDS] = "ipInDiscards", - [IPINDELIVERS] = "ipInDelivers", - [IPOUTREQUESTS] = "ipOutRequests", - [IPOUTDISCARDS] = "ipOutDiscards", - [IPOUTNOROUTES] = "ipOutNoRoutes", - [IPREASMTIMEOUT] = "ipReasmTimeout", - [IPREASMREQDS] = "ipReasmReqds", - [IPREASMOKS] = "ipReasmOKs", - [IPREASMFAILS] = "ipReasmFails", - [TCPACTIVEOPENS] = "tcpActiveOpens", - [TCPPASSIVEOPENS] = "tcpPassiveOpens", - [TCPATTEMPTFAILS] = "tcpAttemptFails", - [TCPESTABRESETS] = "tcpEstabResets", - [TCPCURRESTAB] = "tcpCurrEstab", - [TCPINSEGS] = "tcpInSegs", - [TCPOUTSEGS] = "tcpOutSegs", - [TCPRETRANSSEGS] = "tcpRetransSegs", - [TCPINERRS] = "tcpInErrs", - [TCPOUTRSTS] = "tcpOutRsts", - [TCPRTOMIN] = "tcpRtoMin", - [TCPRTOMAX] = "tcpRtoMax", -}; - -static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev, - u8 port_num) -{ - BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); - - /* Our driver only supports device level stats */ - if (port_num != 0) - return NULL; - - return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats, - u8 port, int index) -{ - struct iwch_dev *dev; - struct tp_mib_stats m; - int ret; - - if (port != 0 || !stats) - return -ENOSYS; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - dev = to_iwch_dev(ibdev); - ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); - if (ret) - return -ENOSYS; - - stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo; - stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo; - stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo; - stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo; - stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo; - stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo; - stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo; - stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo; - stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo; - stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout; - stats->value[IPREASMREQDS] = m.ipReasmReqds; - stats->value[IPREASMOKS] = m.ipReasmOKs; - stats->value[IPREASMFAILS] = m.ipReasmFails; - stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens; - stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens; - stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails; - stats->value[TCPESTABRESETS] = m.tcpEstabResets; - stats->value[TCPCURRESTAB] = m.tcpOutRsts; - stats->value[TCPINSEGS] = m.tcpCurrEstab; - stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo; - stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo; - stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo, - stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo; - stats->value[TCPRTOMIN] = m.tcpRtoMin; - stats->value[TCPRTOMAX] = m.tcpRtoMax; - - return stats->num_counters; -} - -static struct attribute *iwch_class_attributes[] = { - &dev_attr_hw_rev.attr, - &dev_attr_hca_type.attr, - &dev_attr_board_id.attr, - NULL -}; - -static const struct attribute_group iwch_attr_group = { - .attrs = iwch_class_attributes, -}; - -static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; - - err = ib_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - - return 0; -} - -static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) -{ - struct iwch_dev *iwch_dev = to_iwch_dev(ibdev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - pr_debug("%s dev 0x%p\n", __func__, iwch_dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); -} - -static const struct ib_device_ops iwch_dev_ops = { - .owner = THIS_MODULE, - .driver_id = RDMA_DRIVER_CXGB3, - .uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION, - .uverbs_no_driver_id_binding = 1, - - .alloc_hw_stats = iwch_alloc_stats, - .alloc_mr = iwch_alloc_mr, - .alloc_mw = iwch_alloc_mw, - .alloc_pd = iwch_allocate_pd, - .alloc_ucontext = iwch_alloc_ucontext, - .create_cq = iwch_create_cq, - .create_qp = iwch_create_qp, - .dealloc_mw = iwch_dealloc_mw, - .dealloc_pd = iwch_deallocate_pd, - .dealloc_ucontext = iwch_dealloc_ucontext, - .dereg_mr = iwch_dereg_mr, - .destroy_cq = iwch_destroy_cq, - .destroy_qp = iwch_destroy_qp, - .get_dev_fw_str = get_dev_fw_ver_str, - .get_dma_mr = iwch_get_dma_mr, - .get_hw_stats = iwch_get_mib, - .get_port_immutable = iwch_port_immutable, - .iw_accept = iwch_accept_cr, - .iw_add_ref = iwch_qp_add_ref, - .iw_connect = iwch_connect, - .iw_create_listen = iwch_create_listen, - .iw_destroy_listen = iwch_destroy_listen, - .iw_get_qp = iwch_get_qp, - .iw_reject = iwch_reject_cr, - .iw_rem_ref = iwch_qp_rem_ref, - .map_mr_sg = iwch_map_mr_sg, - .mmap = iwch_mmap, - .modify_qp = iwch_ib_modify_qp, - .poll_cq = iwch_poll_cq, - .post_recv = iwch_post_receive, - .post_send = iwch_post_send, - .query_device = iwch_query_device, - .query_gid = iwch_query_gid, - .query_pkey = iwch_query_pkey, - .query_port = iwch_query_port, - .reg_user_mr = iwch_reg_user_mr, - .req_notify_cq = iwch_arm_cq, - INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd), - INIT_RDMA_OBJ_SIZE(ib_cq, iwch_cq, ibcq), - INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext), -}; - -static int set_netdevs(struct ib_device *ib_dev, struct cxio_rdev *rdev) -{ - int ret; - int i; - - for (i = 0; i < rdev->port_info.nports; i++) { - ret = ib_device_set_netdev(ib_dev, rdev->port_info.lldevs[i], - i + 1); - if (ret) - return ret; - } - return 0; -} - -int iwch_register_device(struct iwch_dev *dev) -{ - int err; - - pr_debug("%s iwch_dev %p\n", __func__, dev); - memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); - memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); - dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | - IB_DEVICE_MEM_WINDOW | - IB_DEVICE_MEM_MGT_EXTENSIONS; - - /* cxgb3 supports STag 0. */ - dev->ibdev.local_dma_lkey = 0; - - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV); - dev->ibdev.node_type = RDMA_NODE_RNIC; - BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); - memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); - dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; - dev->ibdev.num_comp_vectors = 1; - dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; - - memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name, - sizeof(dev->ibdev.iw_ifname)); - - rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); - ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); - err = set_netdevs(&dev->ibdev, &dev->rdev); - if (err) - return err; - - return ib_register_device(&dev->ibdev, "cxgb3_%d"); -} - -void iwch_unregister_device(struct iwch_dev *dev) -{ - pr_debug("%s iwch_dev %p\n", __func__, dev); - ib_unregister_device(&dev->ibdev); - return; -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h deleted file mode 100644 index 8adbe9658935..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __IWCH_PROVIDER_H__ -#define __IWCH_PROVIDER_H__ - -#include <linux/list.h> -#include <linux/spinlock.h> -#include <rdma/ib_verbs.h> -#include <asm/types.h> -#include "t3cdev.h" -#include "iwch.h" -#include "cxio_wr.h" -#include "cxio_hal.h" - -struct iwch_pd { - struct ib_pd ibpd; - u32 pdid; - struct iwch_dev *rhp; -}; - -static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct iwch_pd, ibpd); -} - -struct tpt_attributes { - u32 stag; - u32 state:1; - u32 type:2; - u32 rsvd:1; - enum tpt_mem_perm perms; - u32 remote_invaliate_disable:1; - u32 zbva:1; - u32 mw_bind_enable:1; - u32 page_size:5; - - u32 pdid; - u32 qpid; - u32 pbl_addr; - u32 len; - u64 va_fbo; - u32 pbl_size; -}; - -struct iwch_mr { - struct ib_mr ibmr; - struct ib_umem *umem; - struct iwch_dev *rhp; - u64 kva; - struct tpt_attributes attr; - u64 *pages; - u32 npages; -}; - -typedef struct iwch_mw iwch_mw_handle; - -static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct iwch_mr, ibmr); -} - -struct iwch_mw { - struct ib_mw ibmw; - struct iwch_dev *rhp; - u64 kva; - struct tpt_attributes attr; -}; - -static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw) -{ - return container_of(ibmw, struct iwch_mw, ibmw); -} - -struct iwch_cq { - struct ib_cq ibcq; - struct iwch_dev *rhp; - struct t3_cq cq; - spinlock_t lock; - spinlock_t comp_handler_lock; - atomic_t refcnt; - wait_queue_head_t wait; - u32 __user *user_rptr_addr; -}; - -static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct iwch_cq, ibcq); -} - -enum IWCH_QP_FLAGS { - QP_QUIESCED = 0x01 -}; - -struct iwch_mpa_attributes { - u8 initiator; - u8 recv_marker_enabled; - u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */ - u8 crc_enabled; - u8 version; /* 0 or 1 */ -}; - -struct iwch_qp_attributes { - u32 scq; - u32 rcq; - u32 sq_num_entries; - u32 rq_num_entries; - u32 sq_max_sges; - u32 sq_max_sges_rdma_write; - u32 rq_max_sges; - u32 state; - u8 enable_rdma_read; - u8 enable_rdma_write; /* enable inbound Read Resp. */ - u8 enable_bind; - u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */ - /* - * Next QP state. If specify the current state, only the - * QP attributes will be modified. - */ - u32 max_ord; - u32 max_ird; - u32 pd; /* IN */ - u32 next_state; - char terminate_buffer[52]; - u32 terminate_msg_len; - u8 is_terminate_local; - struct iwch_mpa_attributes mpa_attr; /* IN-OUT */ - struct iwch_ep *llp_stream_handle; - char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */ - u32 stream_msg_buf_len; /* Only on Idle -> RTS */ -}; - -struct iwch_qp { - struct ib_qp ibqp; - struct iwch_dev *rhp; - struct iwch_ep *ep; - struct iwch_qp_attributes attr; - struct t3_wq wq; - spinlock_t lock; - atomic_t refcnt; - wait_queue_head_t wait; - enum IWCH_QP_FLAGS flags; -}; - -static inline int qp_quiesced(struct iwch_qp *qhp) -{ - return qhp->flags & QP_QUIESCED; -} - -static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct iwch_qp, ibqp); -} - -void iwch_qp_add_ref(struct ib_qp *qp); -void iwch_qp_rem_ref(struct ib_qp *qp); - -struct iwch_ucontext { - struct ib_ucontext ibucontext; - struct cxio_ucontext uctx; - u32 key; - spinlock_t mmap_lock; - struct list_head mmaps; -}; - -static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c) -{ - return container_of(c, struct iwch_ucontext, ibucontext); -} - -struct iwch_mm_entry { - struct list_head entry; - u64 addr; - u32 key; - unsigned len; -}; - -static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext, - u32 key, unsigned len) -{ - struct list_head *pos, *nxt; - struct iwch_mm_entry *mm; - - spin_lock(&ucontext->mmap_lock); - list_for_each_safe(pos, nxt, &ucontext->mmaps) { - - mm = list_entry(pos, struct iwch_mm_entry, entry); - if (mm->key == key && mm->len == len) { - list_del_init(&mm->entry); - spin_unlock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, key, - (unsigned long long)mm->addr, mm->len); - return mm; - } - } - spin_unlock(&ucontext->mmap_lock); - return NULL; -} - -static inline void insert_mmap(struct iwch_ucontext *ucontext, - struct iwch_mm_entry *mm) -{ - spin_lock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, mm->key, (unsigned long long)mm->addr, mm->len); - list_add_tail(&mm->entry, &ucontext->mmaps); - spin_unlock(&ucontext->mmap_lock); -} - -enum iwch_qp_attr_mask { - IWCH_QP_ATTR_NEXT_STATE = 1 << 0, - IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, - IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, - IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, - IWCH_QP_ATTR_MAX_ORD = 1 << 11, - IWCH_QP_ATTR_MAX_IRD = 1 << 12, - IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, - IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, - IWCH_QP_ATTR_MPA_ATTR = 1 << 24, - IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, - IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ | - IWCH_QP_ATTR_ENABLE_RDMA_WRITE | - IWCH_QP_ATTR_MAX_ORD | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_STREAM_MSG_BUFFER | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE) -}; - -int iwch_modify_qp(struct iwch_dev *rhp, - struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs, - int internal); - -enum iwch_qp_state { - IWCH_QP_STATE_IDLE, - IWCH_QP_STATE_RTS, - IWCH_QP_STATE_ERROR, - IWCH_QP_STATE_TERMINATE, - IWCH_QP_STATE_CLOSING, - IWCH_QP_STATE_TOT -}; - -static inline int iwch_convert_state(enum ib_qp_state ib_state) -{ - switch (ib_state) { - case IB_QPS_RESET: - case IB_QPS_INIT: - return IWCH_QP_STATE_IDLE; - case IB_QPS_RTS: - return IWCH_QP_STATE_RTS; - case IB_QPS_SQD: - return IWCH_QP_STATE_CLOSING; - case IB_QPS_SQE: - return IWCH_QP_STATE_TERMINATE; - case IB_QPS_ERR: - return IWCH_QP_STATE_ERROR; - default: - return -1; - } -} - -static inline u32 iwch_ib_to_tpt_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) | - (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) | - TPT_LOCAL_READ; -} - -static inline u32 iwch_ib_to_tpt_bind_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0); -} - -enum iwch_mmid_state { - IWCH_STAG_STATE_VALID, - IWCH_STAG_STATE_INVALID -}; - -enum iwch_qp_query_flags { - IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */ - IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */ - IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */ - - /* - * Quiesce QP context; Consumer - * will NOT replay outstanding WR - */ - IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4, - IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8, - IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */ -}; - -u16 iwch_rqes_posted(struct iwch_qp *qhp); -int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr); -int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr); -int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); -int iwch_post_zb_read(struct iwch_ep *ep); -int iwch_register_device(struct iwch_dev *dev); -void iwch_unregister_device(struct iwch_dev *dev); -void stop_read_rep_timer(struct iwch_qp *qhp); -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, int shift); -int iwch_alloc_pbl(struct iwch_mr *mhp, int npages); -void iwch_free_pbl(struct iwch_mr *mhp); -int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset); - -#define IWCH_NODE_DESC "cxgb3 Chelsio Communications" - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c deleted file mode 100644 index c649faad63f9..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ /dev/null @@ -1,1082 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/sched.h> -#include <linux/gfp.h> -#include "iwch_provider.h" -#include "iwch.h" -#include "iwch_cm.h" -#include "cxio_hal.h" -#include "cxio_resource.h" - -#define NO_SUPPORT -1 - -static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - int i; - u32 plen; - - switch (wr->opcode) { - case IB_WR_SEND: - if (wr->send_flags & IB_SEND_SOLICITED) - wqe->send.rdmaop = T3_SEND_WITH_SE; - else - wqe->send.rdmaop = T3_SEND; - wqe->send.rem_stag = 0; - break; - case IB_WR_SEND_WITH_INV: - if (wr->send_flags & IB_SEND_SOLICITED) - wqe->send.rdmaop = T3_SEND_WITH_SE_INV; - else - wqe->send.rdmaop = T3_SEND_WITH_INV; - wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey); - break; - default: - return -EINVAL; - } - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; - wqe->send.reserved[0] = 0; - wqe->send.reserved[1] = 0; - wqe->send.reserved[2] = 0; - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) - return -EMSGSIZE; - - plen += wr->sg_list[i].length; - wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); - wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - } - wqe->send.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 4 + ((wr->num_sge) << 1); - wqe->send.plen = cpu_to_be32(plen); - return 0; -} - -static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - int i; - u32 plen; - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; - wqe->write.rdmaop = T3_RDMA_WRITE; - wqe->write.reserved[0] = 0; - wqe->write.reserved[1] = 0; - wqe->write.reserved[2] = 0; - wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); - wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); - - if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { - plen = 4; - wqe->write.sgl[0].stag = wr->ex.imm_data; - wqe->write.sgl[0].len = cpu_to_be32(0); - wqe->write.num_sgle = cpu_to_be32(0); - *flit_cnt = 6; - } else { - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) { - return -EMSGSIZE; - } - plen += wr->sg_list[i].length; - wqe->write.sgl[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->write.sgl[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->write.sgl[i].to = - cpu_to_be64(wr->sg_list[i].addr); - } - wqe->write.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 5 + ((wr->num_sge) << 1); - } - wqe->write.plen = cpu_to_be32(plen); - return 0; -} - -static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - if (wr->num_sge > 1) - return -EINVAL; - wqe->read.rdmaop = T3_READ_REQ; - if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) - wqe->read.local_inv = 1; - else - wqe->read.local_inv = 0; - wqe->read.reserved[0] = 0; - wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey); - wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr); - wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); - wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length); - wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr); - *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; - return 0; -} - -static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr, - u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) -{ - struct iwch_mr *mhp = to_iwch_mr(wr->mr); - int i; - __be64 *p; - - if (mhp->npages > T3_MAX_FASTREG_DEPTH) - return -EINVAL; - *wr_cnt = 1; - wqe->fastreg.stag = cpu_to_be32(wr->key); - wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length); - wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32); - wqe->fastreg.va_base_lo_fbo = - cpu_to_be32(mhp->ibmr.iova & 0xffffffff); - wqe->fastreg.page_type_perms = cpu_to_be32( - V_FR_PAGE_COUNT(mhp->npages) | - V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) | - V_FR_TYPE(TPT_VATO) | - V_FR_PERMS(iwch_ib_to_tpt_access(wr->access))); - p = &wqe->fastreg.pbl_addrs[0]; - for (i = 0; i < mhp->npages; i++, p++) { - - /* If we need a 2nd WR, then set it up */ - if (i == T3_MAX_FASTREG_FRAG) { - *wr_cnt = 2; - wqe = (union t3_wr *)(wq->queue + - Q_PTR2IDX((wq->wptr+1), wq->size_log2)); - build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, - Q_GENBIT(wq->wptr + 1, wq->size_log2), - 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG, - T3_EOP); - - p = &wqe->pbl_frag.pbl_addrs[0]; - } - *p = cpu_to_be64((u64)mhp->pages[i]); - } - *flit_cnt = 5 + mhp->npages; - if (*flit_cnt > 15) - *flit_cnt = 15; - return 0; -} - -static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); - wqe->local_inv.reserved = 0; - *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3; - return 0; -} - -static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, - u32 num_sgle, u32 * pbl_addr, u8 * page_size) -{ - int i; - struct iwch_mr *mhp; - u64 offset; - for (i = 0; i < num_sgle; i++) { - - mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); - if (!mhp) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - if (!mhp->attr.state) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - if (mhp->attr.zbva) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - - if (sg_list[i].addr < mhp->attr.va_fbo) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - if (sg_list[i].addr + ((u64) sg_list[i].length) < - sg_list[i].addr) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - if (sg_list[i].addr + ((u64) sg_list[i].length) > - mhp->attr.va_fbo + ((u64) mhp->attr.len)) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - offset = sg_list[i].addr - mhp->attr.va_fbo; - offset += mhp->attr.va_fbo & - ((1UL << (12 + mhp->attr.page_size)) - 1); - pbl_addr[i] = ((mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3) + - (offset >> (12 + mhp->attr.page_size)); - page_size[i] = mhp->attr.page_size; - } - return 0; -} - -static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, - const struct ib_recv_wr *wr) -{ - int i, err = 0; - u32 pbl_addr[T3_MAX_SGE]; - u8 page_size[T3_MAX_SGE]; - - err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, - page_size); - if (err) - return err; - wqe->recv.pagesz[0] = page_size[0]; - wqe->recv.pagesz[1] = page_size[1]; - wqe->recv.pagesz[2] = page_size[2]; - wqe->recv.pagesz[3] = page_size[3]; - wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); - for (i = 0; i < wr->num_sge; i++) { - wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); - wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - - /* to in the WQE == the offset into the page */ - wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) & - ((1UL << (12 + page_size[i])) - 1)); - - /* pbl_addr is the adapters address in the PBL */ - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); - } - for (; i < T3_MAX_SGE; i++) { - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = 0; - wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; - } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].wr_id = wr->wr_id; - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].pbl_addr = 0; - return 0; -} - -static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, - const struct ib_recv_wr *wr) -{ - int i; - u32 pbl_addr; - u32 pbl_offset; - - - /* - * The T3 HW requires the PBL in the HW recv descriptor to reference - * a PBL entry. So we allocate the max needed PBL memory here and pass - * it to the uP in the recv WR. The uP will build the PBL and setup - * the HW recv descriptor. - */ - pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); - if (!pbl_addr) - return -ENOMEM; - - /* - * Compute the 8B aligned offset. - */ - pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; - - wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); - - for (i = 0; i < wr->num_sge; i++) { - - /* - * Use a 128MB page size. This and an imposed 128MB - * sge length limit allows us to require only a 2-entry HW - * PBL for each SGE. This restriction is acceptable since - * since it is not possible to allocate 128MB of contiguous - * DMA coherent memory! - */ - if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) - return -EINVAL; - wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; - - /* - * T3 restricts a recv to all zero-stag or all non-zero-stag. - */ - if (wr->sg_list[i].lkey != 0) - return -EINVAL; - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset); - pbl_offset += 2; - } - for (; i < T3_MAX_SGE; i++) { - wqe->recv.pagesz[i] = 0; - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = 0; - wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; - } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].wr_id = wr->wr_id; - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; - return 0; -} - -int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr) -{ - int err = 0; - u8 uninitialized_var(t3_wr_flit_cnt); - enum t3_wr_opcode t3_wr_opcode = 0; - enum t3_wr_flags t3_wr_flags; - struct iwch_qp *qhp; - u32 idx; - union t3_wr *wqe; - u32 num_wrs; - unsigned long flag; - struct t3_swsq *sqp; - int wr_cnt = 1; - - qhp = to_iwch_qp(ibqp); - spin_lock_irqsave(&qhp->lock, flag); - if (qhp->attr.state > IWCH_QP_STATE_RTS) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -EINVAL; - goto out; - } - num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, - qhp->wq.sq_size_log2); - if (num_wrs == 0) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -ENOMEM; - goto out; - } - while (wr) { - if (num_wrs == 0) { - err = -ENOMEM; - break; - } - idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - wqe = (union t3_wr *) (qhp->wq.queue + idx); - t3_wr_flags = 0; - if (wr->send_flags & IB_SEND_SOLICITED) - t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) - t3_wr_flags |= T3_COMPLETION_FLAG; - sqp = qhp->wq.sq + - Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - switch (wr->opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_INV: - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_READ_FENCE_FLAG; - t3_wr_opcode = T3_WR_SEND; - err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); - break; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - t3_wr_opcode = T3_WR_WRITE; - err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); - break; - case IB_WR_RDMA_READ: - case IB_WR_RDMA_READ_WITH_INV: - t3_wr_opcode = T3_WR_READ; - t3_wr_flags = 0; /* T3 reads are always signaled */ - err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); - if (err) - break; - sqp->read_len = wqe->read.local_len; - if (!qhp->wq.oldest_read) - qhp->wq.oldest_read = sqp; - break; - case IB_WR_REG_MR: - t3_wr_opcode = T3_WR_FASTREG; - err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt, - &wr_cnt, &qhp->wq); - break; - case IB_WR_LOCAL_INV: - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_LOCAL_FENCE_FLAG; - t3_wr_opcode = T3_WR_INV_STAG; - err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt); - break; - default: - pr_debug("%s post of type=%d TBD!\n", __func__, - wr->opcode); - err = -EINVAL; - } - if (err) - break; - wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; - sqp->wr_id = wr->wr_id; - sqp->opcode = wr2opcode(t3_wr_opcode); - sqp->sq_wptr = qhp->wq.sq_wptr; - sqp->complete = 0; - sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED); - - build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, - Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, t3_wr_flit_cnt, - (wr_cnt == 1) ? T3_SOPEOP : T3_SOP); - pr_debug("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n", - __func__, (unsigned long long)wr->wr_id, idx, - Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), - sqp->opcode); - wr = wr->next; - num_wrs--; - qhp->wq.wptr += wr_cnt; - ++(qhp->wq.sq_wptr); - } - spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); - -out: - if (err) - *bad_wr = wr; - return err; -} - -int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - int err = 0; - struct iwch_qp *qhp; - u32 idx; - union t3_wr *wqe; - u32 num_wrs; - unsigned long flag; - - qhp = to_iwch_qp(ibqp); - spin_lock_irqsave(&qhp->lock, flag); - if (qhp->attr.state > IWCH_QP_STATE_RTS) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -EINVAL; - goto out; - } - num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, - qhp->wq.rq_size_log2) - 1; - if (!wr) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -ENOMEM; - goto out; - } - while (wr) { - if (wr->num_sge > T3_MAX_SGE) { - err = -EINVAL; - break; - } - idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - wqe = (union t3_wr *) (qhp->wq.queue + idx); - if (num_wrs) - if (wr->sg_list[0].lkey) - err = build_rdma_recv(qhp, wqe, wr); - else - err = build_zero_stag_recv(qhp, wqe, wr); - else - err = -ENOMEM; - - if (err) - break; - - build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, - Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); - pr_debug("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x wqe %p\n", - __func__, (unsigned long long)wr->wr_id, - idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); - ++(qhp->wq.rq_wptr); - ++(qhp->wq.wptr); - wr = wr->next; - num_wrs--; - } - spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); - -out: - if (err) - *bad_wr = wr; - return err; -} - -static inline void build_term_codes(struct respQ_msg_t *rsp_msg, - u8 *layer_type, u8 *ecode) -{ - int status = TPT_ERR_INTERNAL_ERR; - int tagged = 0; - int opcode = -1; - int rqtype = 0; - int send_inv = 0; - - if (rsp_msg) { - status = CQE_STATUS(rsp_msg->cqe); - opcode = CQE_OPCODE(rsp_msg->cqe); - rqtype = RQ_TYPE(rsp_msg->cqe); - send_inv = (opcode == T3_SEND_WITH_INV) || - (opcode == T3_SEND_WITH_SE_INV); - tagged = (opcode == T3_RDMA_WRITE) || - (rqtype && (opcode == T3_READ_RESP)); - } - - switch (status) { - case TPT_ERR_STAG: - if (send_inv) { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_CANT_INV_STAG; - } else { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_INV_STAG; - } - break; - case TPT_ERR_PDID: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - if ((opcode == T3_SEND_WITH_INV) || - (opcode == T3_SEND_WITH_SE_INV)) - *ecode = RDMAP_CANT_INV_STAG; - else - *ecode = RDMAP_STAG_NOT_ASSOC; - break; - case TPT_ERR_QPID: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_STAG_NOT_ASSOC; - break; - case TPT_ERR_ACCESS: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_ACC_VIOL; - break; - case TPT_ERR_WRAP: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_TO_WRAP; - break; - case TPT_ERR_BOUND: - if (tagged) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_BASE_BOUNDS; - } else { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_BASE_BOUNDS; - } - break; - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_CANT_INV_STAG; - break; - case TPT_ERR_ECC: - case TPT_ERR_ECC_PSTAG: - case TPT_ERR_INTERNAL_ERR: - *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; - *ecode = 0; - break; - case TPT_ERR_OUT_OF_RQE: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MSN_NOBUF; - break; - case TPT_ERR_PBL_ADDR_BOUND: - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_BASE_BOUNDS; - break; - case TPT_ERR_CRC: - *layer_type = LAYER_MPA|DDP_LLP; - *ecode = MPA_CRC_ERR; - break; - case TPT_ERR_MARKER: - *layer_type = LAYER_MPA|DDP_LLP; - *ecode = MPA_MARKER_ERR; - break; - case TPT_ERR_PDU_LEN_ERR: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_MSG_TOOBIG; - break; - case TPT_ERR_DDP_VERSION: - if (tagged) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_INV_VERS; - } else { - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_VERS; - } - break; - case TPT_ERR_RDMA_VERSION: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_INV_VERS; - break; - case TPT_ERR_OPCODE: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_INV_OPCODE; - break; - case TPT_ERR_DDP_QUEUE_NUM: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_QN; - break; - case TPT_ERR_MSN: - case TPT_ERR_MSN_GAP: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_IRD_OVERFLOW: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MSN_RANGE; - break; - case TPT_ERR_TBIT: - *layer_type = LAYER_DDP|DDP_LOCAL_CATA; - *ecode = 0; - break; - case TPT_ERR_MO: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MO; - break; - default: - *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; - *ecode = 0; - break; - } -} - -int iwch_post_zb_read(struct iwch_ep *ep) -{ - union t3_wr *wqe; - struct sk_buff *skb; - u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; - - pr_debug("%s enter\n", __func__); - skb = alloc_skb(40, GFP_KERNEL); - if (!skb) { - pr_err("%s cannot send zb_read!!\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, sizeof(struct t3_rdma_read_wr)); - wqe->read.rdmaop = T3_READ_REQ; - wqe->read.reserved[0] = 0; - wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(1); - wqe->read.rem_to = cpu_to_be64(1); - wqe->read.local_stag = cpu_to_be32(1); - wqe->read.local_len = cpu_to_be32(0); - wqe->read.local_to = cpu_to_be64(1); - wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)| - V_FW_RIWR_LEN(flit_cnt)); - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb); -} - -/* - * This posts a TERMINATE with layer=RDMA, type=catastrophic. - */ -int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) -{ - union t3_wr *wqe; - struct terminate_message *term; - struct sk_buff *skb; - - pr_debug("%s %d\n", __func__, __LINE__); - skb = alloc_skb(40, GFP_ATOMIC); - if (!skb) { - pr_err("%s cannot send TERMINATE!\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, 40); - wqe->send.rdmaop = T3_TERMINATE; - - /* immediate data length */ - wqe->send.plen = htonl(4); - - /* immediate data starts here. */ - term = (struct terminate_message *)wqe->send.sgl; - build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); - wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) | - V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)); - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); -} - -/* - * Assumes qhp lock is held. - */ -static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, - struct iwch_cq *schp) - __releases(&qhp->lock) - __acquires(&qhp->lock) -{ - int count; - int flushed; - - lockdep_assert_held(&qhp->lock); - - pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); - /* take a ref on the qhp since we must release the lock */ - atomic_inc(&qhp->refcnt); - spin_unlock(&qhp->lock); - - /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock(&rchp->lock); - spin_lock(&qhp->lock); - cxio_flush_hw_cq(&rchp->cq); - cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); - flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock(&rchp->lock); - if (flushed) { - spin_lock(&rchp->comp_handler_lock); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock(&rchp->comp_handler_lock); - } - - /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock(&schp->lock); - spin_lock(&qhp->lock); - cxio_flush_hw_cq(&schp->cq); - cxio_count_scqes(&schp->cq, &qhp->wq, &count); - flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock(&schp->lock); - if (flushed) { - spin_lock(&schp->comp_handler_lock); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock(&schp->comp_handler_lock); - } - - /* deref */ - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); - - spin_lock(&qhp->lock); -} - -static void flush_qp(struct iwch_qp *qhp) -{ - struct iwch_cq *rchp, *schp; - - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); - - if (qhp->ibqp.uobject) { - cxio_set_wq_in_error(&qhp->wq); - cxio_set_cq_in_error(&rchp->cq); - spin_lock(&rchp->comp_handler_lock); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock(&rchp->comp_handler_lock); - if (schp != rchp) { - cxio_set_cq_in_error(&schp->cq); - spin_lock(&schp->comp_handler_lock); - (*schp->ibcq.comp_handler)(&schp->ibcq, - schp->ibcq.cq_context); - spin_unlock(&schp->comp_handler_lock); - } - return; - } - __flush_qp(qhp, rchp, schp); -} - - -/* - * Return count of RECV WRs posted - */ -u16 iwch_rqes_posted(struct iwch_qp *qhp) -{ - union t3_wr *wqe = qhp->wq.queue; - u16 count = 0; - - while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { - count++; - wqe++; - } - pr_debug("%s qhp %p count %u\n", __func__, qhp, count); - return count; -} - -static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs) -{ - struct t3_rdma_init_attr init_attr; - int ret; - - init_attr.tid = qhp->ep->hwtid; - init_attr.qpid = qhp->wq.qpid; - init_attr.pdid = qhp->attr.pd; - init_attr.scqid = qhp->attr.scq; - init_attr.rcqid = qhp->attr.rcq; - init_attr.rq_addr = qhp->wq.rq_addr; - init_attr.rq_size = 1 << qhp->wq.rq_size_log2; - init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE | - qhp->attr.mpa_attr.recv_marker_enabled | - (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | - (qhp->attr.mpa_attr.crc_enabled << 2); - - init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE | - uP_RI_QP_RDMA_WRITE_ENABLE | - uP_RI_QP_BIND_ENABLE; - if (!qhp->ibqp.uobject) - init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE | - uP_RI_QP_FAST_REGISTER_ENABLE; - - init_attr.tcp_emss = qhp->ep->emss; - init_attr.ord = qhp->attr.max_ord; - init_attr.ird = qhp->attr.max_ird; - init_attr.qp_dma_addr = qhp->wq.dma_addr; - init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); - init_attr.rqe_count = iwch_rqes_posted(qhp); - init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; - init_attr.chan = qhp->ep->l2t->smt_idx; - if (peer2peer) { - init_attr.rtr_type = RTR_READ; - if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) - init_attr.ord = 1; - if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator) - init_attr.ird = 1; - } else - init_attr.rtr_type = 0; - init_attr.irs = qhp->ep->rcv_seq; - pr_debug("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d flags 0x%x qpcaps 0x%x\n", - __func__, - init_attr.rq_addr, init_attr.rq_size, - init_attr.flags, init_attr.qpcaps); - ret = cxio_rdma_init(&rhp->rdev, &init_attr); - pr_debug("%s ret %d\n", __func__, ret); - return ret; -} - -int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs, - int internal) -{ - int ret = 0; - struct iwch_qp_attributes newattr = qhp->attr; - unsigned long flag; - int disconnect = 0; - int terminate = 0; - int abort = 0; - int free = 0; - struct iwch_ep *ep = NULL; - - pr_debug("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__, - qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state, - (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); - - spin_lock_irqsave(&qhp->lock, flag); - - /* Process attr changes if in IDLE */ - if (mask & IWCH_QP_ATTR_VALID_MODIFY) { - if (qhp->attr.state != IWCH_QP_STATE_IDLE) { - ret = -EIO; - goto out; - } - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ) - newattr.enable_rdma_read = attrs->enable_rdma_read; - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE) - newattr.enable_rdma_write = attrs->enable_rdma_write; - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND) - newattr.enable_bind = attrs->enable_bind; - if (mask & IWCH_QP_ATTR_MAX_ORD) { - if (attrs->max_ord > - rhp->attr.max_rdma_read_qp_depth) { - ret = -EINVAL; - goto out; - } - newattr.max_ord = attrs->max_ord; - } - if (mask & IWCH_QP_ATTR_MAX_IRD) { - if (attrs->max_ird > - rhp->attr.max_rdma_reads_per_qp) { - ret = -EINVAL; - goto out; - } - newattr.max_ird = attrs->max_ird; - } - qhp->attr = newattr; - } - - if (!(mask & IWCH_QP_ATTR_NEXT_STATE)) - goto out; - if (qhp->attr.state == attrs->next_state) - goto out; - - switch (qhp->attr.state) { - case IWCH_QP_STATE_IDLE: - switch (attrs->next_state) { - case IWCH_QP_STATE_RTS: - if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) { - ret = -EINVAL; - goto out; - } - if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) { - ret = -EINVAL; - goto out; - } - qhp->attr.mpa_attr = attrs->mpa_attr; - qhp->attr.llp_stream_handle = attrs->llp_stream_handle; - qhp->ep = qhp->attr.llp_stream_handle; - qhp->attr.state = IWCH_QP_STATE_RTS; - - /* - * Ref the endpoint here and deref when we - * disassociate the endpoint from the QP. This - * happens in CLOSING->IDLE transition or *->ERROR - * transition. - */ - get_ep(&qhp->ep->com); - spin_unlock_irqrestore(&qhp->lock, flag); - ret = rdma_init(rhp, qhp, mask, attrs); - spin_lock_irqsave(&qhp->lock, flag); - if (ret) - goto err; - break; - case IWCH_QP_STATE_ERROR: - qhp->attr.state = IWCH_QP_STATE_ERROR; - flush_qp(qhp); - break; - default: - ret = -EINVAL; - goto out; - } - break; - case IWCH_QP_STATE_RTS: - switch (attrs->next_state) { - case IWCH_QP_STATE_CLOSING: - BUG_ON(kref_read(&qhp->ep->com.kref) < 2); - qhp->attr.state = IWCH_QP_STATE_CLOSING; - if (!internal) { - abort=0; - disconnect = 1; - ep = qhp->ep; - get_ep(&ep->com); - } - break; - case IWCH_QP_STATE_TERMINATE: - qhp->attr.state = IWCH_QP_STATE_TERMINATE; - if (qhp->ibqp.uobject) - cxio_set_wq_in_error(&qhp->wq); - if (!internal) - terminate = 1; - break; - case IWCH_QP_STATE_ERROR: - qhp->attr.state = IWCH_QP_STATE_ERROR; - if (!internal) { - abort=1; - disconnect = 1; - ep = qhp->ep; - get_ep(&ep->com); - } - goto err; - break; - default: - ret = -EINVAL; - goto out; - } - break; - case IWCH_QP_STATE_CLOSING: - if (!internal) { - ret = -EINVAL; - goto out; - } - switch (attrs->next_state) { - case IWCH_QP_STATE_IDLE: - flush_qp(qhp); - qhp->attr.state = IWCH_QP_STATE_IDLE; - qhp->attr.llp_stream_handle = NULL; - put_ep(&qhp->ep->com); - qhp->ep = NULL; - wake_up(&qhp->wait); - break; - case IWCH_QP_STATE_ERROR: - goto err; - default: - ret = -EINVAL; - goto err; - } - break; - case IWCH_QP_STATE_ERROR: - if (attrs->next_state != IWCH_QP_STATE_IDLE) { - ret = -EINVAL; - goto out; - } - - if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) || - !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) { - ret = -EINVAL; - goto out; - } - qhp->attr.state = IWCH_QP_STATE_IDLE; - break; - case IWCH_QP_STATE_TERMINATE: - if (!internal) { - ret = -EINVAL; - goto out; - } - goto err; - break; - default: - pr_err("%s in a bad state %d\n", __func__, qhp->attr.state); - ret = -EINVAL; - goto err; - break; - } - goto out; -err: - pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep, - qhp->wq.qpid); - - /* disassociate the LLP connection */ - qhp->attr.llp_stream_handle = NULL; - ep = qhp->ep; - qhp->ep = NULL; - qhp->attr.state = IWCH_QP_STATE_ERROR; - free=1; - wake_up(&qhp->wait); - BUG_ON(!ep); - flush_qp(qhp); -out: - spin_unlock_irqrestore(&qhp->lock, flag); - - if (terminate) - iwch_post_terminate(qhp, NULL); - - /* - * If disconnect is 1, then we need to initiate a disconnect - * on the EP. This can be a normal close (RTS->CLOSING) or - * an abnormal close (RTS/CLOSING->ERROR). - */ - if (disconnect) { - iwch_ep_disconnect(ep, abort, GFP_KERNEL); - put_ep(&ep->com); - } - - /* - * If free is 1, then we've disassociated the EP from the QP - * and we need to dereference the EP. - */ - if (free) - put_ep(&ep->com); - - pr_debug("%s exit state %d\n", __func__, qhp->attr.state); - return ret; -} diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h deleted file mode 100644 index c702dc199e18..000000000000 --- a/drivers/infiniband/hw/cxgb3/tcb.h +++ /dev/null @@ -1,632 +0,0 @@ -/* - * Copyright (c) 2007 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _TCB_DEFS_H -#define _TCB_DEFS_H - -#define W_TCB_T_STATE 0 -#define S_TCB_T_STATE 0 -#define M_TCB_T_STATE 0xfULL -#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE) - -#define W_TCB_TIMER 0 -#define S_TCB_TIMER 4 -#define M_TCB_TIMER 0x1ULL -#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER) - -#define W_TCB_DACK_TIMER 0 -#define S_TCB_DACK_TIMER 5 -#define M_TCB_DACK_TIMER 0x1ULL -#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER) - -#define W_TCB_DEL_FLAG 0 -#define S_TCB_DEL_FLAG 6 -#define M_TCB_DEL_FLAG 0x1ULL -#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG) - -#define W_TCB_L2T_IX 0 -#define S_TCB_L2T_IX 7 -#define M_TCB_L2T_IX 0x7ffULL -#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX) - -#define W_TCB_SMAC_SEL 0 -#define S_TCB_SMAC_SEL 18 -#define M_TCB_SMAC_SEL 0x3ULL -#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL) - -#define W_TCB_TOS 0 -#define S_TCB_TOS 20 -#define M_TCB_TOS 0x3fULL -#define V_TCB_TOS(x) ((x) << S_TCB_TOS) - -#define W_TCB_MAX_RT 0 -#define S_TCB_MAX_RT 26 -#define M_TCB_MAX_RT 0xfULL -#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT) - -#define W_TCB_T_RXTSHIFT 0 -#define S_TCB_T_RXTSHIFT 30 -#define M_TCB_T_RXTSHIFT 0xfULL -#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT) - -#define W_TCB_T_DUPACKS 1 -#define S_TCB_T_DUPACKS 2 -#define M_TCB_T_DUPACKS 0xfULL -#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS) - -#define W_TCB_T_MAXSEG 1 -#define S_TCB_T_MAXSEG 6 -#define M_TCB_T_MAXSEG 0xfULL -#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG) - -#define W_TCB_T_FLAGS1 1 -#define S_TCB_T_FLAGS1 10 -#define M_TCB_T_FLAGS1 0xffffffffULL -#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1) - -#define W_TCB_T_MIGRATION 1 -#define S_TCB_T_MIGRATION 20 -#define M_TCB_T_MIGRATION 0x1ULL -#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION) - -#define W_TCB_T_FLAGS2 2 -#define S_TCB_T_FLAGS2 10 -#define M_TCB_T_FLAGS2 0x7fULL -#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2) - -#define W_TCB_SND_SCALE 2 -#define S_TCB_SND_SCALE 17 -#define M_TCB_SND_SCALE 0xfULL -#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE) - -#define W_TCB_RCV_SCALE 2 -#define S_TCB_RCV_SCALE 21 -#define M_TCB_RCV_SCALE 0xfULL -#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE) - -#define W_TCB_SND_UNA_RAW 2 -#define S_TCB_SND_UNA_RAW 25 -#define M_TCB_SND_UNA_RAW 0x7ffffffULL -#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW) - -#define W_TCB_SND_NXT_RAW 3 -#define S_TCB_SND_NXT_RAW 20 -#define M_TCB_SND_NXT_RAW 0x7ffffffULL -#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW) - -#define W_TCB_RCV_NXT 4 -#define S_TCB_RCV_NXT 15 -#define M_TCB_RCV_NXT 0xffffffffULL -#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT) - -#define W_TCB_RCV_ADV 5 -#define S_TCB_RCV_ADV 15 -#define M_TCB_RCV_ADV 0xffffULL -#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV) - -#define W_TCB_SND_MAX_RAW 5 -#define S_TCB_SND_MAX_RAW 31 -#define M_TCB_SND_MAX_RAW 0x7ffffffULL -#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW) - -#define W_TCB_SND_CWND 6 -#define S_TCB_SND_CWND 26 -#define M_TCB_SND_CWND 0x7ffffffULL -#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND) - -#define W_TCB_SND_SSTHRESH 7 -#define S_TCB_SND_SSTHRESH 21 -#define M_TCB_SND_SSTHRESH 0x7ffffffULL -#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH) - -#define W_TCB_T_RTT_TS_RECENT_AGE 8 -#define S_TCB_T_RTT_TS_RECENT_AGE 16 -#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL -#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE) - -#define W_TCB_T_RTSEQ_RECENT 9 -#define S_TCB_T_RTSEQ_RECENT 16 -#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL -#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT) - -#define W_TCB_T_SRTT 10 -#define S_TCB_T_SRTT 16 -#define M_TCB_T_SRTT 0xffffULL -#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT) - -#define W_TCB_T_RTTVAR 11 -#define S_TCB_T_RTTVAR 0 -#define M_TCB_T_RTTVAR 0xffffULL -#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR) - -#define W_TCB_TS_LAST_ACK_SENT_RAW 11 -#define S_TCB_TS_LAST_ACK_SENT_RAW 16 -#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL -#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW) - -#define W_TCB_DIP 12 -#define S_TCB_DIP 11 -#define M_TCB_DIP 0xffffffffULL -#define V_TCB_DIP(x) ((x) << S_TCB_DIP) - -#define W_TCB_SIP 13 -#define S_TCB_SIP 11 -#define M_TCB_SIP 0xffffffffULL -#define V_TCB_SIP(x) ((x) << S_TCB_SIP) - -#define W_TCB_DP 14 -#define S_TCB_DP 11 -#define M_TCB_DP 0xffffULL -#define V_TCB_DP(x) ((x) << S_TCB_DP) - -#define W_TCB_SP 14 -#define S_TCB_SP 27 -#define M_TCB_SP 0xffffULL -#define V_TCB_SP(x) ((x) << S_TCB_SP) - -#define W_TCB_TIMESTAMP 15 -#define S_TCB_TIMESTAMP 11 -#define M_TCB_TIMESTAMP 0xffffffffULL -#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP) - -#define W_TCB_TIMESTAMP_OFFSET 16 -#define S_TCB_TIMESTAMP_OFFSET 11 -#define M_TCB_TIMESTAMP_OFFSET 0xfULL -#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET) - -#define W_TCB_TX_MAX 16 -#define S_TCB_TX_MAX 15 -#define M_TCB_TX_MAX 0xffffffffULL -#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX) - -#define W_TCB_TX_HDR_PTR_RAW 17 -#define S_TCB_TX_HDR_PTR_RAW 15 -#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL -#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW) - -#define W_TCB_TX_LAST_PTR_RAW 18 -#define S_TCB_TX_LAST_PTR_RAW 0 -#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL -#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW) - -#define W_TCB_TX_COMPACT 18 -#define S_TCB_TX_COMPACT 17 -#define M_TCB_TX_COMPACT 0x1ULL -#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT) - -#define W_TCB_RX_COMPACT 18 -#define S_TCB_RX_COMPACT 18 -#define M_TCB_RX_COMPACT 0x1ULL -#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT) - -#define W_TCB_RCV_WND 18 -#define S_TCB_RCV_WND 19 -#define M_TCB_RCV_WND 0x7ffffffULL -#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND) - -#define W_TCB_RX_HDR_OFFSET 19 -#define S_TCB_RX_HDR_OFFSET 14 -#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL -#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET) - -#define W_TCB_RX_FRAG0_START_IDX_RAW 20 -#define S_TCB_RX_FRAG0_START_IDX_RAW 9 -#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW) - -#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21 -#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4 -#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL -#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET) - -#define W_TCB_RX_FRAG0_LEN 21 -#define S_TCB_RX_FRAG0_LEN 31 -#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL -#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN) - -#define W_TCB_RX_FRAG1_LEN 22 -#define S_TCB_RX_FRAG1_LEN 26 -#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL -#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN) - -#define W_TCB_NEWRENO_RECOVER 23 -#define S_TCB_NEWRENO_RECOVER 21 -#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL -#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER) - -#define W_TCB_PDU_HAVE_LEN 24 -#define S_TCB_PDU_HAVE_LEN 16 -#define M_TCB_PDU_HAVE_LEN 0x1ULL -#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN) - -#define W_TCB_PDU_LEN 24 -#define S_TCB_PDU_LEN 17 -#define M_TCB_PDU_LEN 0xffffULL -#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN) - -#define W_TCB_RX_QUIESCE 25 -#define S_TCB_RX_QUIESCE 1 -#define M_TCB_RX_QUIESCE 0x1ULL -#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE) - -#define W_TCB_RX_PTR_RAW 25 -#define S_TCB_RX_PTR_RAW 2 -#define M_TCB_RX_PTR_RAW 0x1ffffULL -#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW) - -#define W_TCB_CPU_NO 25 -#define S_TCB_CPU_NO 19 -#define M_TCB_CPU_NO 0x7fULL -#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO) - -#define W_TCB_ULP_TYPE 25 -#define S_TCB_ULP_TYPE 26 -#define M_TCB_ULP_TYPE 0xfULL -#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE) - -#define W_TCB_RX_FRAG1_PTR_RAW 25 -#define S_TCB_RX_FRAG1_PTR_RAW 30 -#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW) - -#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26 -#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15 -#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW) - -#define W_TCB_RX_FRAG2_PTR_RAW 27 -#define S_TCB_RX_FRAG2_PTR_RAW 10 -#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW) - -#define W_TCB_RX_FRAG2_LEN_RAW 27 -#define S_TCB_RX_FRAG2_LEN_RAW 27 -#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW) - -#define W_TCB_RX_FRAG3_PTR_RAW 28 -#define S_TCB_RX_FRAG3_PTR_RAW 22 -#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW) - -#define W_TCB_RX_FRAG3_LEN_RAW 29 -#define S_TCB_RX_FRAG3_LEN_RAW 7 -#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW) - -#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30 -#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2 -#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW) - -#define W_TCB_PDU_HDR_LEN 30 -#define S_TCB_PDU_HDR_LEN 29 -#define M_TCB_PDU_HDR_LEN 0xffULL -#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN) - -#define W_TCB_SLUSH1 31 -#define S_TCB_SLUSH1 5 -#define M_TCB_SLUSH1 0x7ffffULL -#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1) - -#define W_TCB_ULP_RAW 31 -#define S_TCB_ULP_RAW 24 -#define M_TCB_ULP_RAW 0xffULL -#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW) - -#define W_TCB_DDP_RDMAP_VERSION 25 -#define S_TCB_DDP_RDMAP_VERSION 30 -#define M_TCB_DDP_RDMAP_VERSION 0x1ULL -#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION) - -#define W_TCB_MARKER_ENABLE_RX 25 -#define S_TCB_MARKER_ENABLE_RX 31 -#define M_TCB_MARKER_ENABLE_RX 0x1ULL -#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX) - -#define W_TCB_MARKER_ENABLE_TX 26 -#define S_TCB_MARKER_ENABLE_TX 0 -#define M_TCB_MARKER_ENABLE_TX 0x1ULL -#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX) - -#define W_TCB_CRC_ENABLE 26 -#define S_TCB_CRC_ENABLE 1 -#define M_TCB_CRC_ENABLE 0x1ULL -#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE) - -#define W_TCB_IRS_ULP 26 -#define S_TCB_IRS_ULP 2 -#define M_TCB_IRS_ULP 0x1ffULL -#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP) - -#define W_TCB_ISS_ULP 26 -#define S_TCB_ISS_ULP 11 -#define M_TCB_ISS_ULP 0x1ffULL -#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP) - -#define W_TCB_TX_PDU_LEN 26 -#define S_TCB_TX_PDU_LEN 20 -#define M_TCB_TX_PDU_LEN 0x3fffULL -#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN) - -#define W_TCB_TX_PDU_OUT 27 -#define S_TCB_TX_PDU_OUT 2 -#define M_TCB_TX_PDU_OUT 0x1ULL -#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT) - -#define W_TCB_CQ_IDX_SQ 27 -#define S_TCB_CQ_IDX_SQ 3 -#define M_TCB_CQ_IDX_SQ 0xffffULL -#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ) - -#define W_TCB_CQ_IDX_RQ 27 -#define S_TCB_CQ_IDX_RQ 19 -#define M_TCB_CQ_IDX_RQ 0xffffULL -#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ) - -#define W_TCB_QP_ID 28 -#define S_TCB_QP_ID 3 -#define M_TCB_QP_ID 0xffffULL -#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID) - -#define W_TCB_PD_ID 28 -#define S_TCB_PD_ID 19 -#define M_TCB_PD_ID 0xffffULL -#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID) - -#define W_TCB_STAG 29 -#define S_TCB_STAG 3 -#define M_TCB_STAG 0xffffffffULL -#define V_TCB_STAG(x) ((x) << S_TCB_STAG) - -#define W_TCB_RQ_START 30 -#define S_TCB_RQ_START 3 -#define M_TCB_RQ_START 0x3ffffffULL -#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START) - -#define W_TCB_RQ_MSN 30 -#define S_TCB_RQ_MSN 29 -#define M_TCB_RQ_MSN 0x3ffULL -#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN) - -#define W_TCB_RQ_MAX_OFFSET 31 -#define S_TCB_RQ_MAX_OFFSET 7 -#define M_TCB_RQ_MAX_OFFSET 0xfULL -#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET) - -#define W_TCB_RQ_WRITE_PTR 31 -#define S_TCB_RQ_WRITE_PTR 11 -#define M_TCB_RQ_WRITE_PTR 0x3ffULL -#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR) - -#define W_TCB_INB_WRITE_PERM 31 -#define S_TCB_INB_WRITE_PERM 21 -#define M_TCB_INB_WRITE_PERM 0x1ULL -#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM) - -#define W_TCB_INB_READ_PERM 31 -#define S_TCB_INB_READ_PERM 22 -#define M_TCB_INB_READ_PERM 0x1ULL -#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM) - -#define W_TCB_ORD_L_BIT_VLD 31 -#define S_TCB_ORD_L_BIT_VLD 23 -#define M_TCB_ORD_L_BIT_VLD 0x1ULL -#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD) - -#define W_TCB_RDMAP_OPCODE 31 -#define S_TCB_RDMAP_OPCODE 24 -#define M_TCB_RDMAP_OPCODE 0xfULL -#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE) - -#define W_TCB_TX_FLUSH 31 -#define S_TCB_TX_FLUSH 28 -#define M_TCB_TX_FLUSH 0x1ULL -#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH) - -#define W_TCB_TX_OOS_RXMT 31 -#define S_TCB_TX_OOS_RXMT 29 -#define M_TCB_TX_OOS_RXMT 0x1ULL -#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT) - -#define W_TCB_TX_OOS_TXMT 31 -#define S_TCB_TX_OOS_TXMT 30 -#define M_TCB_TX_OOS_TXMT 0x1ULL -#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT) - -#define W_TCB_SLUSH_AUX2 31 -#define S_TCB_SLUSH_AUX2 31 -#define M_TCB_SLUSH_AUX2 0x1ULL -#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2) - -#define W_TCB_RX_FRAG1_PTR_RAW2 25 -#define S_TCB_RX_FRAG1_PTR_RAW2 30 -#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL -#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2) - -#define W_TCB_RX_DDP_FLAGS 26 -#define S_TCB_RX_DDP_FLAGS 15 -#define M_TCB_RX_DDP_FLAGS 0x3ffULL -#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS) - -#define W_TCB_SLUSH_AUX3 26 -#define S_TCB_SLUSH_AUX3 31 -#define M_TCB_SLUSH_AUX3 0x1ffULL -#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3) - -#define W_TCB_RX_DDP_BUF0_OFFSET 27 -#define S_TCB_RX_DDP_BUF0_OFFSET 8 -#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL -#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET) - -#define W_TCB_RX_DDP_BUF0_LEN 27 -#define S_TCB_RX_DDP_BUF0_LEN 30 -#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL -#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN) - -#define W_TCB_RX_DDP_BUF1_OFFSET 28 -#define S_TCB_RX_DDP_BUF1_OFFSET 20 -#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL -#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET) - -#define W_TCB_RX_DDP_BUF1_LEN 29 -#define S_TCB_RX_DDP_BUF1_LEN 10 -#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL -#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN) - -#define W_TCB_RX_DDP_BUF0_TAG 30 -#define S_TCB_RX_DDP_BUF0_TAG 0 -#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL -#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG) - -#define W_TCB_RX_DDP_BUF1_TAG 31 -#define S_TCB_RX_DDP_BUF1_TAG 0 -#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL -#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG) - -#define S_TF_DACK 10 -#define V_TF_DACK(x) ((x) << S_TF_DACK) - -#define S_TF_NAGLE 11 -#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE) - -#define S_TF_RECV_SCALE 12 -#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE) - -#define S_TF_RECV_TSTMP 13 -#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP) - -#define S_TF_RECV_SACK 14 -#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK) - -#define S_TF_TURBO 15 -#define V_TF_TURBO(x) ((x) << S_TF_TURBO) - -#define S_TF_KEEPALIVE 16 -#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE) - -#define S_TF_TCAM_BYPASS 17 -#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS) - -#define S_TF_CORE_FIN 18 -#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN) - -#define S_TF_CORE_MORE 19 -#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE) - -#define S_TF_MIGRATING 20 -#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING) - -#define S_TF_ACTIVE_OPEN 21 -#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN) - -#define S_TF_ASK_MODE 22 -#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE) - -#define S_TF_NON_OFFLOAD 23 -#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD) - -#define S_TF_MOD_SCHD 24 -#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD) - -#define S_TF_MOD_SCHD_REASON0 25 -#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0) - -#define S_TF_MOD_SCHD_REASON1 26 -#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1) - -#define S_TF_MOD_SCHD_RX 27 -#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX) - -#define S_TF_CORE_PUSH 28 -#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH) - -#define S_TF_RCV_COALESCE_ENABLE 29 -#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE) - -#define S_TF_RCV_COALESCE_PUSH 30 -#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH) - -#define S_TF_RCV_COALESCE_LAST_PSH 31 -#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH) - -#define S_TF_RCV_COALESCE_HEARTBEAT 32 -#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT) - -#define S_TF_HALF_CLOSE 33 -#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE) - -#define S_TF_DACK_MSS 34 -#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS) - -#define S_TF_CCTRL_SEL0 35 -#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0) - -#define S_TF_CCTRL_SEL1 36 -#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1) - -#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37 -#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY) - -#define S_TF_TX_PACE_AUTO 38 -#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO) - -#define S_TF_PEER_FIN_HELD 39 -#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD) - -#define S_TF_CORE_URG 40 -#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG) - -#define S_TF_RDMA_ERROR 41 -#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR) - -#define S_TF_SSWS_DISABLED 42 -#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED) - -#define S_TF_DUPACK_COUNT_ODD 43 -#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD) - -#define S_TF_TX_CHANNEL 44 -#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL) - -#define S_TF_RX_CHANNEL 45 -#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL) - -#define S_TF_TX_PACE_FIXED 46 -#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED) - -#define S_TF_RDMA_FLM_ERROR 47 -#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR) - -#define S_TF_RX_FLOW_CONTROL_DISABLE 48 -#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE) - -#endif /* _TCB_DEFS_H */ diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index d373ac0fe2cb..ba83d942997c 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -305,7 +305,10 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro static int c4iw_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { + int ret = 0; pr_debug("ibdev %p\n", ibdev); + ret = ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width); props->port_cap_flags = IB_PORT_CM_SUP | @@ -315,11 +318,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; - props->active_width = 2; - props->active_speed = IB_SPEED_DDR; props->max_msg_sz = -1; - return 0; + return ret; } static ssize_t hw_rev_show(struct device *dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 90e08c0c332d..8a522e14ef62 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -46,32 +46,32 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; struct hns_roce_ah *ah = to_hr_ah(ibah); - u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); + u16 vlan_id = 0xffff; bool vlan_en = false; int ret; gid_attr = ah_attr->grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, NULL); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); if (ret) return ret; /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - if (vlan_tag < VLAN_CFI_MASK) { + if (vlan_id < VLAN_N_VID) { vlan_en = true; - vlan_tag |= (rdma_ah_get_sl(ah_attr) & + vlan_id |= (rdma_ah_get_sl(ah_attr) & HNS_ROCE_VLAN_SL_BIT_MASK) << HNS_ROCE_VLAN_SL_SHIFT; } ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan = vlan_tag; + ah->av.vlan_id = vlan_id; ah->av.vlan_en = vlan_en; - dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index, - ah->av.vlan); + dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, + ah->av.vlan_id); if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 96d1302abde1..940761310430 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -45,7 +45,7 @@ #define HNS_ROCE_MAX_MSG_LEN 0x80000000 -#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) +#define HNS_ROCE_ALIGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) #define HNS_ROCE_IB_MIN_SQ_STRIDE 6 @@ -53,8 +53,6 @@ #define BA_BYTE_LEN 8 -#define BITS_PER_BYTE 8 - /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 @@ -582,7 +580,7 @@ struct hns_roce_av { u8 tclass; u8 dgid[HNS_ROCE_GID_SIZE]; u8 mac[ETH_ALEN]; - u16 vlan; + u16 vlan_id; bool vlan_en; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7a89d669f8bf..7218f6d4101a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -389,7 +389,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, V2_UD_SEND_WQE_BYTE_36_VLAN_S, - le16_to_cpu(ah->av.vlan)); + ah->av.vlan_id); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, @@ -4061,8 +4061,8 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); const struct ib_gid_attr *gid_attr = NULL; int is_roce_protocol; + u16 vlan_id = 0xffff; bool is_udp = false; - u16 vlan = 0xffff; u8 ib_port; u8 hr_port; int ret; @@ -4074,7 +4074,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, if (is_roce_protocol) { gid_attr = attr->ah_attr.grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); if (ret) return ret; @@ -4083,7 +4083,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, IB_GID_TYPE_ROCE_UDP_ENCAP); } - if (vlan < VLAN_CFI_MASK) { + if (vlan_id < VLAN_N_VID) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, @@ -4095,7 +4095,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, } roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, - V2_QPC_BYTE_24_VLAN_ID_S, vlan); + V2_QPC_BYTE_24_VLAN_ID_S, vlan_id); roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, V2_QPC_BYTE_24_VLAN_ID_S, 0); @@ -4650,16 +4650,14 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, { struct hns_roce_cq *send_cq, *recv_cq; struct ib_device *ibdev = &hr_dev->ib_dev; - int ret; + int ret = 0; if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); - if (ret) { + if (ret) ibdev_err(ibdev, "modify QP to Reset failed.\n"); - return ret; - } } send_cq = to_hr_cq(hr_qp->ibqp.send_cq); @@ -4715,7 +4713,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, kfree(hr_qp->rq_inl_buf.wqe_list); } - return 0; + return ret; } static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) @@ -4725,11 +4723,9 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) int ret; ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); - if (ret) { + if (ret) ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n", hr_qp->qpn, ret); - return ret; - } if (hr_qp->ibqp.qp_type == IB_QPT_GSI) kfree(hr_to_hr_sqp(hr_qp)); @@ -6092,11 +6088,11 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - hr_dev->caps.idx_ba_pg_sz); + hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - hr_dev->caps.idx_buf_pg_sz); + hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET); srq_context->idx_nxt_blk_addr = cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 43219d2f7de0..76a14db7028d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -87,8 +87,8 @@ #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 #define HNS_ROCE_V2_SCCC_ENTRY_SZ 32 -#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ 4096 -#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ 4096 +#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE +#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_INVALID_LKEY 0x100 diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index bd78ff90d998..6aa27d6ea3a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -332,9 +332,8 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, u8 max_sq_stride = ilog2(roundup_sq_stride); /* Sanity check SQ size before proceeding */ - if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes || - ucmd->log_sq_stride > max_sq_stride || - ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { + if (ucmd->log_sq_stride > max_sq_stride || + ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n"); return -EINVAL; } @@ -358,13 +357,16 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, u32 max_cnt; int ret; + if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || + hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) + return -EINVAL; + ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); if (ret) { ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n"); return ret; } - hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; hr_qp->sq.wqe_shift = ucmd->log_sq_stride; max_cnt = max(1U, cap->max_send_sge); @@ -391,37 +393,37 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to page_szie */ if (hr_dev->caps.max_sq_sg <= 2) { - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), PAGE_SIZE) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); hr_qp->sq.offset = 0; - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->rq.offset = HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); } else { page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sge.sge_cnt = ex_sge_num ? max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0; - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size) + - HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), page_size) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); hr_qp->sq.offset = 0; if (ex_sge_num) { - hr_qp->sge.offset = HNS_ROCE_ALOGN_UP( + hr_qp->sge.offset = HNS_ROCE_ALIGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); hr_qp->rq.offset = hr_qp->sge.offset + - HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), page_size); } else { - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP( + hr_qp->rq.offset = HNS_ROCE_ALIGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); @@ -591,19 +593,19 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sq.offset = 0; - size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, + size = HNS_ROCE_ALIGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift), (u32)hr_qp->sge.sge_cnt); hr_qp->sge.offset = size; - size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt << + size += HNS_ROCE_ALIGN_UP(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, page_size); } hr_qp->rq.offset = size; - size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), + size += HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size); hr_qp->buff_size = size; diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 0a31d0a3d657..06871731ac43 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -98,11 +98,15 @@ static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, goto err; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); - if (!table_attr) + if (!table_attr) { + ret = -EMSGSIZE; goto err; + } - if (hns_roce_fill_cq(msg, context)) + if (hns_roce_fill_cq(msg, context)) { + ret = -EMSGSIZE; goto err_cancel_table; + } nla_nest_end(msg, table_attr); kfree(context); @@ -113,7 +117,7 @@ err_cancel_table: nla_nest_cancel(msg, table_attr); err: kfree(context); - return -EMSGSIZE; + return ret; } int hns_roce_fill_res_entry(struct sk_buff *msg, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 9591457eb768..d96041d806f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -180,8 +180,7 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, { struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); struct hns_roce_ib_create_srq ucmd; - u32 page_shift; - u32 npages; + struct hns_roce_buf *buf; int ret; if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) @@ -191,11 +190,13 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); - npages = (ib_umem_page_count(srq->umem) + - (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.srqwqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, page_shift, &srq->mtt); + buf = &srq->buf; + buf->npages = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + (1 << hr_dev->caps.srqwqe_buf_pg_sz); + buf->page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + &srq->mtt); if (ret) goto err_user_buf; @@ -212,9 +213,12 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, goto err_user_srq_mtt; } - ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(srq->idx_que.umem), - PAGE_SHIFT, &srq->idx_que.mtt); - + buf = &srq->idx_que.idx_buf; + buf->npages = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), + 1 << hr_dev->caps.idx_buf_pg_sz); + buf->page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + &srq->idx_que.mtt); if (ret) { dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n"); goto err_user_idx_mtt; diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 9924be8384d8..d0a043ccbe58 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ - cong.o + cong.o restrack.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 4950df3f71b6..ac4d8d1b9a07 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -263,7 +263,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) }, .sq_sig_type = gsi->sq_sig_type, .qp_type = IB_QPT_UD, - .create_flags = mlx5_ib_create_qp_sqpn_qp1(), + .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1, }; return ib_create_qp(pd, &init_attr); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index add24b628900..0a8a1a129f17 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -67,6 +67,7 @@ #include <rdma/uverbs_std_types.h> #include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/ib_umem_odp.h> #define UVERBS_MODULE_NAME mlx5_ib #include <rdma/uverbs_named_ioctl.h> @@ -693,21 +694,6 @@ static void get_atomic_caps_qp(struct mlx5_ib_dev *dev, get_atomic_caps(dev, atomic_size_qp, props); } -static void get_atomic_caps_dc(struct mlx5_ib_dev *dev, - struct ib_device_attr *props) -{ - u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); - - get_atomic_caps(dev, atomic_size_qp, props); -} - -bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev) -{ - struct ib_device_attr props = {}; - - get_atomic_caps_dc(dev, &props); - return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false; -} static int mlx5_query_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid) { @@ -844,8 +830,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); if (uhw->outlen && uhw->outlen < resp_len) return -EINVAL; - else - resp.response_length = resp_len; + + resp.response_length = resp_len; if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; @@ -1011,6 +997,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); props->max_pi_fast_reg_page_list_len = props->max_fast_reg_page_list_len / 2; + props->max_sgl_rd = + MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance); get_atomic_caps_qp(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); @@ -6268,6 +6256,8 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .disassociate_ucontext = mlx5_ib_disassociate_ucontext, .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, + .fill_res_entry = mlx5_ib_fill_res_entry, + .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f61d4005c6c3..5b4c5751a98f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -247,12 +247,7 @@ struct mlx5_ib_flow_db { * These flags are intended for internal use by the mlx5_ib driver, and they * rely on the range reserved for that use in the ib_qp_create_flags enum. */ - -/* Create a UD QP whose source QP number is 1 */ -static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) -{ - return IB_QP_CREATE_RESERVED_START; -} +#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START struct wr_list { u16 opcode; @@ -585,6 +580,9 @@ struct mlx5_ib_dm { IB_ACCESS_REMOTE_READ |\ IB_ZERO_BASED) +#define mlx5_update_odp_stats(mr, counter_name, value) \ + atomic64_add(value, &((mr)->odp_stats.counter_name)) + struct mlx5_ib_mr { struct ib_mr ibmr; void *descs; @@ -625,6 +623,8 @@ struct mlx5_ib_mr { struct list_head elm; struct work_struct work; } odp_destroy; + struct ib_odp_counters odp_stats; + bool is_odp_implicit; struct mlx5_async_work cb_work; }; @@ -962,7 +962,10 @@ struct mlx5_ib_dev { /* serialize update of capability mask */ struct mutex cap_mask_mutex; - bool ib_active; + u8 ib_active:1; + u8 fill_delay:1; + u8 is_rep:1; + u8 lag_active:1; struct umr_common umrc; /* sync used page count stats */ @@ -971,7 +974,6 @@ struct mlx5_ib_dev { struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; - int fill_delay; struct ib_odp_caps odp_caps; u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; @@ -994,8 +996,6 @@ struct mlx5_ib_dev { struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; - bool is_rep; - int lag_active; struct mlx5_ib_lb_state lb; u8 umr_fence; @@ -1247,7 +1247,6 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); -bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev); struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, @@ -1346,6 +1345,10 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, u8 *native_port_num); void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 199f7959aaa5..f583476f5c8b 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -658,6 +658,20 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) return 0; } +static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, + struct ib_pd *pd) +{ + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); +} + struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -681,16 +695,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, length64, 1); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, 0); + set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) @@ -1148,16 +1154,8 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET64(mkc, mkc, len, length); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, start_addr); + set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index bcfc09846697..45ee40c2f36e 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -248,6 +248,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt)) - 1; u64 idx = 0, blk_start_idx = 0; + u64 invalidations = 0; int in_block = 0; u64 addr; @@ -283,6 +284,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, blk_start_idx = idx; in_block = 1; } + + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; } else { u64 umr_offset = idx & umr_block_mask; @@ -300,6 +304,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); + + mlx5_update_odp_stats(mr, invalidations, invalidations); + /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -378,8 +385,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled)) caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; - - return; } static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, @@ -495,6 +500,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; imr->umem = &umem_odp->umem; + imr->is_odp_implicit = true; atomic_set(&imr->num_deferred_work, 0); xa_init(&imr->implicit_children); @@ -869,6 +875,13 @@ next_mr: if (ret < 0) goto srcu_unlock; + /* + * When prefetching a page, page fault is generated + * in order to bring the page to the main memory. + * In the current flow, page faults are being counted. + */ + mlx5_update_odp_stats(mr, faults, ret); + npages += ret; ret = 0; break; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8937d72ddcf6..bb3f432e2fb6 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1041,7 +1041,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | IB_QP_CREATE_NETIF_QP | - mlx5_ib_create_qp_sqpn_qp1())) + MLX5_IB_QP_CREATE_SQPN_QP1)) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) @@ -1104,7 +1104,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, MLX5_SET(qpc, qpc, fre, 1); MLX5_SET(qpc, qpc, rlky, 1); - if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { + if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) { MLX5_SET(qpc, qpc, deth_sqpn, 1); qp->flags |= MLX5_IB_QP_SQPN_QP1; } @@ -2140,7 +2140,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EINVAL; } if (init_attr->create_flags & - mlx5_ib_create_qp_sqpn_qp1()) { + MLX5_IB_QP_CREATE_SQPN_QP1) { mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); return -EINVAL; } @@ -5823,7 +5823,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, if (qp->flags & MLX5_IB_QP_MANAGED_RECV) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; if (qp->flags & MLX5_IB_QP_SQPN_QP1) - qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1(); + qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1; qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c new file mode 100644 index 000000000000..8f6c04f12531 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include <uapi/rdma/rdma_netlink.h> +#include <rdma/ib_umem_odp.h> +#include <rdma/restrack.h> +#include "mlx5_ib.h" + +static int fill_stat_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + + if (!table_attr) + goto err; + + if (rdma_nl_stat_hwcounter_entry(msg, "page_faults", + atomic64_read(&mr->odp_stats.faults))) + goto err_table; + if (rdma_nl_stat_hwcounter_entry( + msg, "page_invalidations", + atomic64_read(&mr->odp_stats.invalidations))) + goto err_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + +static int fill_res_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + if (mr->is_odp_implicit) { + if (rdma_nl_put_driver_string(msg, "odp", "implicit")) + goto err; + } else { + if (rdma_nl_put_driver_string(msg, "odp", "explicit")) + goto err; + } + + nla_nest_end(msg, table_attr); + return 0; + +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_res_mr_entry(msg, res); + + return 0; +} + +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_stat_mr_entry(msg, res); + + return 0; +} diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 5136b835e1ba..1ff5407270d2 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -357,9 +357,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev) return -ENOMEM; spin_lock_init(&dev->sgid_lock); + xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ); if (IS_IWARP(dev)) { - xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ); + xa_init(&dev->qps); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 0cfd849b13d6..8e927f6c1520 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -40,6 +40,7 @@ #include <linux/qed/qed_rdma_if.h> #include <linux/qed/qede_rdma.h> #include <linux/qed/roce_common.h> +#include <linux/completion.h> #include "qedr_hsi_rdma.h" #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" @@ -377,10 +378,20 @@ enum qedr_qp_err_bitmap { QEDR_QP_ERR_RQ_PBL_FULL = 32, }; +enum qedr_qp_create_type { + QEDR_QP_CREATE_NONE, + QEDR_QP_CREATE_USER, + QEDR_QP_CREATE_KERNEL, +}; + +enum qedr_iwarp_cm_flags { + QEDR_IWARP_CM_WAIT_FOR_CONNECT = BIT(0), + QEDR_IWARP_CM_WAIT_FOR_DISCONNECT = BIT(1), +}; + struct qedr_qp { struct ib_qp ibqp; /* must be first */ struct qedr_dev *dev; - struct qedr_iw_ep *ep; struct qedr_qp_hwq_info sq; struct qedr_qp_hwq_info rq; @@ -395,6 +406,7 @@ struct qedr_qp { u32 id; struct qedr_pd *pd; enum ib_qp_type qp_type; + enum qedr_qp_create_type create_type; struct qed_rdma_qp *qed_qp; u32 qp_id; u16 icid; @@ -437,8 +449,11 @@ struct qedr_qp { /* Relevant to qps created from user space only (applications) */ struct qedr_userq usq; struct qedr_userq urq; - atomic_t refcnt; - bool destroyed; + + /* synchronization objects used with iwarp ep */ + struct kref refcnt; + struct completion iwarp_cm_comp; + unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */ }; struct qedr_ah { @@ -531,7 +546,7 @@ struct qedr_iw_ep { struct iw_cm_id *cm_id; struct qedr_qp *qp; void *qed_context; - u8 during_connect; + struct kref refcnt; }; static inline diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 22881d4442b9..5e9732990be5 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -79,6 +79,27 @@ qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info, } } +static void qedr_iw_free_qp(struct kref *ref) +{ + struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt); + + kfree(qp); +} + +static void +qedr_iw_free_ep(struct kref *ref) +{ + struct qedr_iw_ep *ep = container_of(ref, struct qedr_iw_ep, refcnt); + + if (ep->qp) + kref_put(&ep->qp->refcnt, qedr_iw_free_qp); + + if (ep->cm_id) + ep->cm_id->rem_ref(ep->cm_id); + + kfree(ep); +} + static void qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) { @@ -93,6 +114,7 @@ qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) ep->dev = dev; ep->qed_context = params->ep_context; + kref_init(&ep->refcnt); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; @@ -141,12 +163,10 @@ qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params) { struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - if (ep->cm_id) { + if (ep->cm_id) qedr_iw_issue_event(context, params, IW_CM_EVENT_CLOSE); - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -186,11 +206,13 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) struct qedr_qp *qp = ep->qp; struct iw_cm_event event; - if (qp->destroyed) { - kfree(dwork); - qedr_iw_qp_rem_ref(&qp->ibqp); - return; - } + /* The qp won't be released until we release the ep. + * the ep's refcnt was increased before calling this + * function, therefore it is safe to access qp + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + goto out; memset(&event, 0, sizeof(event)); event.status = dwork->status; @@ -204,7 +226,6 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) else qp_params.new_state = QED_ROCE_QP_STATE_SQD; - kfree(dwork); if (ep->cm_id) ep->cm_id->event_handler(ep->cm_id, &event); @@ -214,7 +235,10 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params); - qedr_iw_qp_rem_ref(&qp->ibqp); + complete(&ep->qp->iwarp_cm_comp); +out: + kfree(dwork); + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -224,13 +248,17 @@ qedr_iw_disconnect_event(void *context, struct qedr_discon_work *work; struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; struct qedr_dev *dev = ep->dev; - struct qedr_qp *qp = ep->qp; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return; - qedr_iw_qp_add_ref(&qp->ibqp); + /* We can't get a close event before disconnect, but since + * we're scheduling a work queue we need to make sure close + * won't delete the ep, so we increase the refcnt + */ + kref_get(&ep->refcnt); + work->ep = ep; work->event = params->event; work->status = params->status; @@ -252,16 +280,30 @@ qedr_iw_passive_complete(void *context, if ((params->status == -ECONNREFUSED) && (!ep->qp)) { DP_DEBUG(dev, QEDR_MSG_IWARP, "PASSIVE connection refused releasing ep...\n"); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return; } + complete(&ep->qp->iwarp_cm_comp); qedr_iw_issue_event(context, params, IW_CM_EVENT_ESTABLISHED); if (params->status < 0) qedr_iw_close_event(context, params); } +static void +qedr_iw_active_complete(void *context, + struct qed_iwarp_cm_event_params *params) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + + complete(&ep->qp->iwarp_cm_comp); + qedr_iw_issue_event(context, params, IW_CM_EVENT_CONNECT_REPLY); + + if (params->status < 0) + kref_put(&ep->refcnt, qedr_iw_free_ep); +} + static int qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params) { @@ -288,27 +330,15 @@ qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params) qedr_iw_mpa_reply(context, params); break; case QED_IWARP_EVENT_PASSIVE_COMPLETE: - ep->during_connect = 0; qedr_iw_passive_complete(context, params); break; - case QED_IWARP_EVENT_ACTIVE_COMPLETE: - ep->during_connect = 0; - qedr_iw_issue_event(context, - params, - IW_CM_EVENT_CONNECT_REPLY); - if (params->status < 0) { - struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + qedr_iw_active_complete(context, params); break; case QED_IWARP_EVENT_DISCONNECT: qedr_iw_disconnect_event(context, params); break; case QED_IWARP_EVENT_CLOSE: - ep->during_connect = 0; qedr_iw_close_event(context, params); break; case QED_IWARP_EVENT_RQ_EMPTY: @@ -476,6 +506,19 @@ qedr_addr6_resolve(struct qedr_dev *dev, return rc; } +struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn) +{ + struct qedr_qp *qp; + + xa_lock(&dev->qps); + qp = xa_load(&dev->qps, qpn); + if (qp) + kref_get(&qp->refcnt); + xa_unlock(&dev->qps); + + return qp; +} + int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct qedr_dev *dev = get_qedr_dev(cm_id->device); @@ -491,10 +534,6 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int rc = 0; int i; - qp = xa_load(&dev->qps, conn_param->qpn); - if (unlikely(!qp)) - return -EINVAL; - laddr = (struct sockaddr_in *)&cm_id->m_local_addr; raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; @@ -516,8 +555,15 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -ENOMEM; ep->dev = dev; + kref_init(&ep->refcnt); + + qp = qedr_iw_load_qp(dev, conn_param->qpn); + if (!qp) { + rc = -EINVAL; + goto err; + } + ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -580,16 +626,20 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) in_params.qp = qp->qed_qp; memcpy(in_params.local_mac_addr, dev->ndev->dev_addr, ETH_ALEN); - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already being destroyed */ + rc = dev->ops->iwarp_connect(dev->rdma_ctx, &in_params, &out_params); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; err: - cm_id->rem_ref(cm_id); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return rc; } @@ -677,18 +727,17 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct qedr_dev *dev = ep->dev; struct qedr_qp *qp; struct qed_iwarp_accept_in params; - int rc; + int rc = 0; DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); - qp = xa_load(&dev->qps, conn_param->qpn); + qp = qedr_iw_load_qp(dev, conn_param->qpn); if (!qp) { DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); return -EINVAL; } ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -700,15 +749,21 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) params.ird = conn_param->ird; params.ord = conn_param->ord; - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already destroyed */ + rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; + err: - ep->during_connect = 0; - cm_id->rem_ref(cm_id); + kref_put(&ep->refcnt, qedr_iw_free_ep); + return rc; } @@ -731,17 +786,14 @@ void qedr_iw_qp_add_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - atomic_inc(&qp->refcnt); + kref_get(&qp->refcnt); } void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - if (atomic_dec_and_test(&qp->refcnt)) { - xa_erase_irq(&qp->dev->qps, qp->qp_id); - kfree(qp); - } + kref_put(&qp->refcnt, qedr_iw_free_qp); } struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 6f3ce86019b7..8b4240c1cc76 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -51,6 +51,7 @@ #include "verbs.h" #include <rdma/qedr-abi.h> #include "qedr_roce_cm.h" +#include "qedr_iw_cm.h" #define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm) #define RDMA_MAX_SGE_PER_SRQ (4) @@ -1193,7 +1194,10 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, struct ib_qp_init_attr *attrs) { spin_lock_init(&qp->q_lock); - atomic_set(&qp->refcnt, 1); + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + kref_init(&qp->refcnt); + init_completion(&qp->iwarp_cm_comp); + } qp->pd = pd; qp->qp_type = attrs->qp_type; qp->max_inline_data = attrs->cap.max_inline_data; @@ -1577,6 +1581,14 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) ib_umem_release(qp->urq.umem); qp->urq.umem = NULL; + + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); + qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + kfree(qp->urq.pbl_tbl); + } } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -1592,6 +1604,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; + qp->create_type = QEDR_QP_CREATE_USER; memset(&ureq, 0, sizeof(ureq)); rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); if (rc) { @@ -1805,6 +1818,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev, u32 n_sq_entries; memset(&in_params, 0, sizeof(in_params)); + qp->create_type = QEDR_QP_CREATE_KERNEL; /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in * the ring. The ring should allow at least a single WR, even if the @@ -1918,7 +1932,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { - rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL); + rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL); if (rc) goto err; } @@ -2437,7 +2451,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, return rc; } - if (udata) + if (qp->create_type == QEDR_QP_CREATE_USER) qedr_cleanup_user(dev, qp); else qedr_cleanup_kernel(dev, qp); @@ -2467,34 +2481,44 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) qedr_modify_qp(ibqp, &attr, attr_mask, NULL); } } else { - /* Wait for the connect/accept to complete */ - if (qp->ep) { - int wait_count = 1; - - while (qp->ep->during_connect) { - DP_DEBUG(dev, QEDR_MSG_QP, - "Still in during connect/accept\n"); - - msleep(100); - if (wait_count++ > 200) { - DP_NOTICE(dev, - "during connect timeout\n"); - break; - } - } - } + /* If connection establishment started the WAIT_FOR_CONNECT + * bit will be on and we need to Wait for the establishment + * to complete before destroying the qp. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); + + /* If graceful disconnect started, the WAIT_FOR_DISCONNECT + * bit will be on, and we need to wait for the disconnect to + * complete before continuing. We can use the same completion, + * iwarp_cm_comp, since this is the only place that waits for + * this completion and it is sequential. In addition, + * disconnect can't occur before the connection is fully + * established, therefore if WAIT_FOR_DISCONNECT is on it + * means WAIT_FOR_CONNECT is also on and the completion for + * CONNECT already occurred. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); } if (qp->qp_type == IB_QPT_GSI) qedr_destroy_gsi_qp(dev); + /* We need to remove the entry from the xarray before we release the + * qp_id to avoid a race of the qp_id being reallocated and failing + * on xa_insert + */ + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + xa_erase(&dev->qps, qp->qp_id); + qedr_free_qp_resources(dev, qp, udata); - if (atomic_dec_and_test(&qp->refcnt) && - rdma_protocol_iwarp(&dev->ibdev, 1)) { - xa_erase_irq(&dev->qps, qp->qp_id); - kfree(qp); - } + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_iw_qp_rem_ref(&qp->ibqp); + return 0; } @@ -2673,8 +2697,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); - if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) - qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + if (mr->type != QEDR_MR_DMA) + free_mr_info(dev, &mr->info); /* it could be user registered memory. */ ib_umem_release(mr->umem); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index 8f9749d54688..86a6c054ea26 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -58,7 +58,8 @@ #define PVRDMA_ROCEV1_VERSION 17 #define PVRDMA_ROCEV2_VERSION 18 #define PVRDMA_PPN64_VERSION 19 -#define PVRDMA_VERSION PVRDMA_PPN64_VERSION +#define PVRDMA_QPHANDLE_VERSION 20 +#define PVRDMA_VERSION PVRDMA_QPHANDLE_VERSION #define PVRDMA_BOARD_ID 1 #define PVRDMA_REV_ID 1 @@ -581,6 +582,17 @@ struct pvrdma_cmd_create_qp_resp { u32 max_inline_data; }; +struct pvrdma_cmd_create_qp_resp_v2 { + struct pvrdma_cmd_resp_hdr hdr; + u32 qpn; + u32 qp_handle; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + struct pvrdma_cmd_modify_qp { struct pvrdma_cmd_hdr hdr; u32 qp_handle; @@ -663,6 +675,7 @@ union pvrdma_cmd_resp { struct pvrdma_cmd_create_cq_resp create_cq_resp; struct pvrdma_cmd_resize_cq_resp resize_cq_resp; struct pvrdma_cmd_create_qp_resp create_qp_resp; + struct pvrdma_cmd_create_qp_resp_v2 create_qp_resp_v2; struct pvrdma_cmd_query_qp_resp query_qp_resp; struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; struct pvrdma_cmd_create_srq_resp create_srq_resp; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index bca6a58a442e..22daf2389d95 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -52,6 +52,9 @@ #include "pvrdma.h" +static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, + struct pvrdma_qp *qp); + static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq, struct pvrdma_cq **recv_cq) { @@ -195,7 +198,9 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_qp *cmd = &req.create_qp; struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp; + struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2; struct pvrdma_create_qp ucmd; + struct pvrdma_create_qp_resp qp_resp = {}; unsigned long flags; int ret; bool is_srq = !!init_attr->srq; @@ -260,6 +265,15 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, goto err_qp; } + /* Userspace supports qpn and qp handles? */ + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION && + udata->outlen < sizeof(qp_resp)) { + dev_warn(&dev->pdev->dev, + "create queuepair not supported\n"); + ret = -EOPNOTSUPP; + goto err_qp; + } + if (!is_srq) { /* set qp->sq.wqe_cnt, shift, buf_size.. */ qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr, @@ -379,13 +393,33 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, } /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */ - qp->qp_handle = resp->qpn; qp->port = init_attr->port_num; - qp->ibqp.qp_num = resp->qpn; + + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) { + qp->ibqp.qp_num = resp_v2->qpn; + qp->qp_handle = resp_v2->qp_handle; + } else { + qp->ibqp.qp_num = resp->qpn; + qp->qp_handle = resp->qpn; + } + spin_lock_irqsave(&dev->qp_tbl_lock, flags); dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp; spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); + if (udata) { + qp_resp.qpn = qp->ibqp.qp_num; + qp_resp.qp_handle = qp->qp_handle; + + if (ib_copy_to_udata(udata, &qp_resp, + min(udata->outlen, sizeof(qp_resp)))) { + dev_warn(&dev->pdev->dev, + "failed to copy back udata\n"); + __pvrdma_destroy_qp(dev, qp); + return ERR_PTR(-EINVAL); + } + } + return &qp->ibqp; err_pdir: @@ -400,27 +434,15 @@ err_qp: return ERR_PTR(ret); } -static void pvrdma_free_qp(struct pvrdma_qp *qp) +static void _pvrdma_free_qp(struct pvrdma_qp *qp) { + unsigned long flags; struct pvrdma_dev *dev = to_vdev(qp->ibqp.device); - struct pvrdma_cq *scq; - struct pvrdma_cq *rcq; - unsigned long flags, scq_flags, rcq_flags; - - /* In case cq is polling */ - get_cqs(qp, &scq, &rcq); - pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); - - _pvrdma_flush_cqe(qp, scq); - if (scq != rcq) - _pvrdma_flush_cqe(qp, rcq); spin_lock_irqsave(&dev->qp_tbl_lock, flags); dev->qp_tbl[qp->qp_handle] = NULL; spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); - pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); - if (refcount_dec_and_test(&qp->refcnt)) complete(&qp->free); wait_for_completion(&qp->free); @@ -435,34 +457,71 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) atomic_dec(&dev->num_qps); } -/** - * pvrdma_destroy_qp - destroy a queue pair - * @qp: the queue pair to destroy - * @udata: user data or null for kernel object - * - * @return: 0 on success. - */ -int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) +static void pvrdma_free_qp(struct pvrdma_qp *qp) +{ + struct pvrdma_cq *scq; + struct pvrdma_cq *rcq; + unsigned long scq_flags, rcq_flags; + + /* In case cq is polling */ + get_cqs(qp, &scq, &rcq); + pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_flush_cqe(qp, scq); + if (scq != rcq) + _pvrdma_flush_cqe(qp, rcq); + + /* + * We're now unlocking the CQs before clearing out the qp handle this + * should still be safe. We have destroyed the backend QP and flushed + * the CQEs so there should be no other completions for this QP. + */ + pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_free_qp(qp); +} + +static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev, + u32 qp_handle) { - struct pvrdma_qp *vqp = to_vqp(qp); union pvrdma_cmd_req req; struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp; int ret; memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP; - cmd->qp_handle = vqp->qp_handle; + cmd->qp_handle = qp_handle; - ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0); + ret = pvrdma_cmd_post(dev, &req, NULL, 0); if (ret < 0) - dev_warn(&to_vdev(qp->device)->pdev->dev, + dev_warn(&dev->pdev->dev, "destroy queuepair failed, error: %d\n", ret); +} +/** + * pvrdma_destroy_qp - destroy a queue pair + * @qp: the queue pair to destroy + * @udata: user data or null for kernel object + * + * @return: always 0. + */ +int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) +{ + struct pvrdma_qp *vqp = to_vqp(qp); + + _pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle); pvrdma_free_qp(vqp); return 0; } +static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, + struct pvrdma_qp *qp) +{ + _pvrdma_destroy_qp_work(dev, qp->qp_handle); + _pvrdma_free_qp(qp); +} + /** * pvrdma_modify_qp - modify queue pair attributes * @ibqp: the queue pair diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index fe99da0ff060..ee02c6176007 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -129,7 +129,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, * rvt_destory_ah - Destory an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) - * @udata: user data or NULL for kernel object * * Return: 0 on success */ diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index a85571a4cf57..13d7f66eadab 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -552,7 +552,6 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) /** * rvt_driver_cq_init - Init cq resources on behalf of driver - * @rdi: rvt dev structure * * Return: 0 on success */ @@ -568,7 +567,6 @@ int rvt_driver_cq_init(void) /** * rvt_cq_exit - tear down cq reources - * @rdi: rvt dev structure */ void rvt_cq_exit(void) { diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0b0a241c57ff..3cdf75d0c7a4 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2563,10 +2563,9 @@ void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift) EXPORT_SYMBOL(rvt_add_retry_timer_ext); /** - * rvt_add_rnr_timer - add/start an rnr timer - * @qp - the QP - * @aeth - aeth of RNR timeout, simulated aeth for loopback - * add an rnr timer on the QP + * rvt_add_rnr_timer - add/start an rnr timer on the QP + * @qp: the QP + * @aeth: aeth of RNR timeout, simulated aeth for loopback */ void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth) { @@ -2583,7 +2582,7 @@ EXPORT_SYMBOL(rvt_add_rnr_timer); /** * rvt_stop_rc_timers - stop all timers - * @qp - the QP + * @qp: the QP * stop any pending timers */ void rvt_stop_rc_timers(struct rvt_qp *qp) @@ -2617,7 +2616,7 @@ static void rvt_stop_rnr_timer(struct rvt_qp *qp) /** * rvt_del_timers_sync - wait for any timeout routines to exit - * @qp - the QP + * @qp: the QP */ void rvt_del_timers_sync(struct rvt_qp *qp) { @@ -2626,7 +2625,7 @@ void rvt_del_timers_sync(struct rvt_qp *qp) } EXPORT_SYMBOL(rvt_del_timers_sync); -/** +/* * This is called from s_timer for missing responses. */ static void rvt_rc_timeout(struct timer_list *t) @@ -2676,12 +2675,13 @@ EXPORT_SYMBOL(rvt_rc_rnr_retry); * rvt_qp_iter_init - initial for QP iteration * @rdi: rvt devinfo * @v: u64 value + * @cb: user-defined callback * * This returns an iterator suitable for iterating QPs * in the system. * - * The @cb is a user defined callback and @v is a 64 - * bit value passed to and relevant for processing in the + * The @cb is a user-defined callback and @v is a 64-bit + * value passed to and relevant for processing in the * @cb. An example use case would be to alter QP processing * based on criteria not part of the rvt_qp. * @@ -2712,7 +2712,7 @@ EXPORT_SYMBOL(rvt_qp_iter_init); /** * rvt_qp_iter_next - return the next QP in iter - * @iter - the iterator + * @iter: the iterator * * Fine grained QP iterator suitable for use * with debugfs seq_file mechanisms. @@ -2775,14 +2775,14 @@ EXPORT_SYMBOL(rvt_qp_iter_next); /** * rvt_qp_iter - iterate all QPs - * @rdi - rvt devinfo - * @v - a 64 bit value - * @cb - a callback + * @rdi: rvt devinfo + * @v: a 64-bit value + * @cb: a callback * * This provides a way for iterating all QPs. * - * The @cb is a user defined callback and @v is a 64 - * bit value passed to and relevant for processing in the + * The @cb is a user-defined callback and @v is a 64-bit + * value passed to and relevant for processing in the * cb. An example use case would be to alter QP processing * based on criteria not part of the rvt_qp. * diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 18da1e1ea979..986265ad6e79 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -683,9 +683,10 @@ EXPORT_SYMBOL(rvt_unregister_device); /** * rvt_init_port - init internal data for driver port - * @rdi: rvt dev strut + * @rdi: rvt_dev_info struct * @port: rvt port * @port_index: 0 based index of ports, different from IB core port num + * @pkey_table: pkey_table for @port * * Keep track of a list of ports. No need to have a detach port. * They persist until the driver goes away. diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index a8c11b5e1e94..0946a301a5c5 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -77,12 +77,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe) { rxe->max_inline_data = RXE_MAX_INLINE_DATA; - rxe->attr.fw_ver = RXE_FW_VER; rxe->attr.max_mr_size = RXE_MAX_MR_SIZE; rxe->attr.page_size_cap = RXE_PAGE_SIZE_CAP; - rxe->attr.vendor_id = RXE_VENDOR_ID; - rxe->attr.vendor_part_id = RXE_VENDOR_PART_ID; - rxe->attr.hw_ver = RXE_HW_VER; rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; @@ -94,22 +90,13 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_mr = RXE_MAX_MR; rxe->attr.max_pd = RXE_MAX_PD; rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM; - rxe->attr.max_ee_rd_atom = RXE_MAX_EE_RD_ATOM; rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; - rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM; rxe->attr.atomic_cap = IB_ATOMIC_HCA; - rxe->attr.max_ee = RXE_MAX_EE; - rxe->attr.max_rdd = RXE_MAX_RDD; - rxe->attr.max_mw = RXE_MAX_MW; - rxe->attr.max_raw_ipv6_qp = RXE_MAX_RAW_IPV6_QP; - rxe->attr.max_raw_ethy_qp = RXE_MAX_RAW_ETHY_QP; rxe->attr.max_mcast_grp = RXE_MAX_MCAST_GRP; rxe->attr.max_mcast_qp_attach = RXE_MAX_MCAST_QP_ATTACH; rxe->attr.max_total_mcast_qp_attach = RXE_MAX_TOT_MCAST_QP_ATTACH; rxe->attr.max_ah = RXE_MAX_AH; - rxe->attr.max_fmr = RXE_MAX_FMR; - rxe->attr.max_map_per_fmr = RXE_MAX_MAP_PER_FMR; rxe->attr.max_srq = RXE_MAX_SRQ; rxe->attr.max_srq_wr = RXE_MAX_SRQ_WR; rxe->attr.max_srq_sge = RXE_MAX_SRQ_SGE; diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index fe5207386700..353c6668249e 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -60,12 +60,8 @@ static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) /* default/initial rxe device parameter settings */ enum rxe_device_param { - RXE_FW_VER = 0, RXE_MAX_MR_SIZE = -1ull, RXE_PAGE_SIZE_CAP = 0xfffff000, - RXE_VENDOR_ID = 0, - RXE_VENDOR_PART_ID = 0, - RXE_HW_VER = 0, RXE_MAX_QP = 0x10000, RXE_MAX_QP_WR = 0x4000, RXE_MAX_INLINE_DATA = 400, @@ -87,21 +83,12 @@ enum rxe_device_param { RXE_MAX_MR = 256 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, - RXE_MAX_EE_RD_ATOM = 0, RXE_MAX_RES_RD_ATOM = 0x3f000, RXE_MAX_QP_INIT_RD_ATOM = 128, - RXE_MAX_EE_INIT_RD_ATOM = 0, - RXE_MAX_EE = 0, - RXE_MAX_RDD = 0, - RXE_MAX_MW = 0, - RXE_MAX_RAW_IPV6_QP = 0, - RXE_MAX_RAW_ETHY_QP = 0, RXE_MAX_MCAST_GRP = 8192, RXE_MAX_MCAST_QP_ATTACH = 56, RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, RXE_MAX_AH = 100, - RXE_MAX_FMR = 0, - RXE_MAX_MAP_PER_FMR = 0, RXE_MAX_SRQ = 960, RXE_MAX_SRQ_WR = 0x4000, RXE_MIN_SRQ_WR = 1, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 623129f27f5a..9dd4bd7aea92 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -106,6 +106,10 @@ static int rxe_modify_device(struct ib_device *dev, { struct rxe_dev *rxe = to_rdev(dev); + if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | + IB_DEVICE_MODIFY_NODE_DESC)) + return -EOPNOTSUPP; + if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); @@ -1171,6 +1175,9 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); dev->dev.dma_ops = &dma_virt_ops; + dev->dev.dma_parms = &rxe->dma_parms; + rxe->dma_parms = (struct device_dma_parameters) + { .max_segment_size = SZ_2G }; dma_coerce_mask_and_coherent(&dev->dev, dma_get_required_mask(&dev->dev)); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 5c4b2239129c..95834206c80c 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -384,6 +384,7 @@ struct rxe_port { struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; + struct device_dma_parameters dma_parms; int max_ucontext; int max_inline_data; struct mutex usdev_lock; diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index dba4535494ab..1ea3ed249e7b 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -70,6 +70,7 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; + struct device_dma_parameters dma_parms; struct net_device *netdev; struct siw_dev_cap attrs; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 8c1931a57f4a..3bccfef40e7e 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1373,22 +1373,8 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) rv = -EINVAL; goto error; } - if (v4) - siw_dbg_qp(qp, - "pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n", - pd_len, - &((struct sockaddr_in *)(laddr))->sin_addr, - ntohs(((struct sockaddr_in *)(laddr))->sin_port), - &((struct sockaddr_in *)(raddr))->sin_addr, - ntohs(((struct sockaddr_in *)(raddr))->sin_port)); - else - siw_dbg_qp(qp, - "pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n", - pd_len, - &((struct sockaddr_in6 *)(laddr))->sin6_addr, - ntohs(((struct sockaddr_in6 *)(laddr))->sin6_port), - &((struct sockaddr_in6 *)(raddr))->sin6_addr, - ntohs(((struct sockaddr_in6 *)(raddr))->sin6_port)); + siw_dbg_qp(qp, "pd_len %d, laddr %pISp, raddr %pISp\n", pd_len, laddr, + raddr); rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s); if (rv < 0) @@ -1867,14 +1853,7 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); cep->state = SIW_EPSTATE_LISTENING; - if (addr_family == AF_INET) - siw_dbg(id->device, "Listen at laddr %pI4 %u\n", - &(((struct sockaddr_in *)laddr)->sin_addr), - ((struct sockaddr_in *)laddr)->sin_port); - else - siw_dbg(id->device, "Listen at laddr %pI6 %u\n", - &(((struct sockaddr_in6 *)laddr)->sin6_addr), - ((struct sockaddr_in6 *)laddr)->sin6_port); + siw_dbg(id->device, "Listen at laddr %pISp\n", laddr); return 0; @@ -1935,7 +1914,7 @@ static void siw_drop_listeners(struct iw_cm_id *id) /* * siw_create_listen - Create resources for a listener's IWCM ID @id * - * Listens on the socket addresses id->local_addr and id->remote_addr. + * Listens on the socket address id->local_addr. * * If the listener's @id provides a specific local IP address, at most one * listening socket is created and associated with @id. @@ -1959,7 +1938,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) */ if (id->local_addr.ss_family == AF_INET) { struct in_device *in_dev = in_dev_get(dev); - struct sockaddr_in s_laddr, *s_raddr; + struct sockaddr_in s_laddr; const struct in_ifaddr *ifa; if (!in_dev) { @@ -1967,12 +1946,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) goto out; } memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr)); - s_raddr = (struct sockaddr_in *)&id->remote_addr; - siw_dbg(id->device, - "laddr %pI4:%d, raddr %pI4:%d\n", - &s_laddr.sin_addr, ntohs(s_laddr.sin_port), - &s_raddr->sin_addr, ntohs(s_raddr->sin_port)); + siw_dbg(id->device, "laddr %pISp\n", &s_laddr); rtnl_lock(); in_dev_for_each_ifa_rtnl(ifa, in_dev) { @@ -1992,17 +1967,13 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) } else if (id->local_addr.ss_family == AF_INET6) { struct inet6_dev *in6_dev = in6_dev_get(dev); struct inet6_ifaddr *ifp; - struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr), - *s_raddr = &to_sockaddr_in6(id->remote_addr); + struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr); if (!in6_dev) { rv = -ENODEV; goto out; } - siw_dbg(id->device, - "laddr %pI6:%d, raddr %pI6:%d\n", - &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), - &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port)); + siw_dbg(id->device, "laddr %pISp\n", &s_laddr); rtnl_lock(); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 05a92f997f60..48e45a852b51 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/dma-mapping.h> +#include <net/addrconf.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> #include <rdma/rdma_netlink.h> @@ -248,24 +249,6 @@ static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id) return NULL; } -static void siw_verbs_sq_flush(struct ib_qp *base_qp) -{ - struct siw_qp *qp = to_siw_qp(base_qp); - - down_write(&qp->state_lock); - siw_sq_flush(qp); - up_write(&qp->state_lock); -} - -static void siw_verbs_rq_flush(struct ib_qp *base_qp) -{ - struct siw_qp *qp = to_siw_qp(base_qp); - - down_write(&qp->state_lock); - siw_rq_flush(qp); - up_write(&qp->state_lock); -} - static const struct ib_device_ops siw_device_ops = { .owner = THIS_MODULE, .uverbs_abi_ver = SIW_ABI_VERSION, @@ -284,8 +267,6 @@ static const struct ib_device_ops siw_device_ops = { .destroy_cq = siw_destroy_cq, .destroy_qp = siw_destroy_qp, .destroy_srq = siw_destroy_srq, - .drain_rq = siw_verbs_rq_flush, - .drain_sq = siw_verbs_sq_flush, .get_dma_mr = siw_get_dma_mr, .get_port_immutable = siw_get_port_immutable, .iw_accept = siw_accept, @@ -350,15 +331,19 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->netdev = netdev; if (netdev->type != ARPHRD_LOOPBACK) { - memcpy(&base_dev->node_guid, netdev->dev_addr, 6); + addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, + netdev->dev_addr); } else { /* * The loopback device does not have a HW address, * but connection mangagement lib expects gid != 0 */ - size_t gidlen = min_t(size_t, strlen(base_dev->name), 6); + size_t len = min_t(size_t, strlen(base_dev->name), 6); + char addr[6] = { }; - memcpy(&base_dev->node_guid, base_dev->name, gidlen); + memcpy(addr, base_dev->name, len); + addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, + addr); } base_dev->uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -397,6 +382,9 @@ static struct siw_device *siw_device_create(struct net_device *netdev) base_dev->phys_port_cnt = 1; base_dev->dev.parent = parent; base_dev->dev.dma_ops = &dma_virt_ops; + base_dev->dev.dma_parms = &sdev->dma_parms; + sdev->dma_parms = (struct device_dma_parameters) + { .max_segment_size = SZ_2G }; base_dev->num_comp_vectors = num_possible_cpus(); ib_set_device_ops(base_dev, &siw_device_ops); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 869e02b69a01..726a5924ea13 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -687,6 +687,47 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, return bytes; } +/* Complete SQ WR's without processing */ +static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + struct siw_sqe sqe = {}; + int rv = 0; + + while (wr) { + sqe.id = wr->wr_id; + sqe.opcode = wr->opcode; + rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR); + if (rv) { + if (bad_wr) + *bad_wr = wr; + break; + } + wr = wr->next; + } + return rv; +} + +/* Complete RQ WR's without processing */ +static int siw_rq_flush_wr(struct siw_qp *qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct siw_rqe rqe = {}; + int rv = 0; + + while (wr) { + rqe.id = wr->wr_id; + rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR); + if (rv) { + if (bad_wr) + *bad_wr = wr; + break; + } + wr = wr->next; + } + return rv; +} + /* * siw_post_send() * @@ -705,26 +746,54 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, unsigned long flags; int rv = 0; + if (wr && !qp->kernel_verbs) { + siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); + *bad_wr = wr; + return -EINVAL; + } + /* * Try to acquire QP state lock. Must be non-blocking * to accommodate kernel clients needs. */ if (!down_read_trylock(&qp->state_lock)) { - *bad_wr = wr; - siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state); - return -ENOTCONN; + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * ERROR state is final, so we can be sure + * this state will not change as long as the QP + * exists. + * + * This handles an ib_drain_sq() call with + * a concurrent request to set the QP state + * to ERROR. + */ + rv = siw_sq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP locked, state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } + return rv; } if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) { + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * Immediately flush this WR to CQ, if QP + * is in ERROR state. SQ is guaranteed to + * be empty, so WR complets in-order. + * + * Typically triggered by ib_drain_sq(). + */ + rv = siw_sq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP out of state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } up_read(&qp->state_lock); - *bad_wr = wr; - siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state); - return -ENOTCONN; - } - if (wr && !qp->kernel_verbs) { - siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); - up_read(&qp->state_lock); - *bad_wr = wr; - return -EINVAL; + return rv; } spin_lock_irqsave(&qp->sq_lock, flags); @@ -919,24 +988,54 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, *bad_wr = wr; return -EOPNOTSUPP; /* what else from errno.h? */ } + if (!qp->kernel_verbs) { + siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n"); + *bad_wr = wr; + return -EINVAL; + } + /* * Try to acquire QP state lock. Must be non-blocking * to accommodate kernel clients needs. */ if (!down_read_trylock(&qp->state_lock)) { - *bad_wr = wr; - return -ENOTCONN; - } - if (!qp->kernel_verbs) { - siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n"); - up_read(&qp->state_lock); - *bad_wr = wr; - return -EINVAL; + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * ERROR state is final, so we can be sure + * this state will not change as long as the QP + * exists. + * + * This handles an ib_drain_rq() call with + * a concurrent request to set the QP state + * to ERROR. + */ + rv = siw_rq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP locked, state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } + return rv; } if (qp->attrs.state > SIW_QP_STATE_RTS) { + if (qp->attrs.state == SIW_QP_STATE_ERROR) { + /* + * Immediately flush this WR to CQ, if QP + * is in ERROR state. RQ is guaranteed to + * be empty, so WR complets in-order. + * + * Typically triggered by ib_drain_rq(). + */ + rv = siw_rq_flush_wr(qp, wr, bad_wr); + } else { + siw_dbg_qp(qp, "QP out of state %d\n", + qp->attrs.state); + *bad_wr = wr; + rv = -ENOTCONN; + } up_read(&qp->state_lock); - *bad_wr = wr; - return -EINVAL; + return rv; } /* * Serialize potentially multiple producers. diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 2e72fc5af157..3690e28cc7ea 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -646,13 +646,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, if (ib_conn->pi_support) { u32 sig_caps = ib_dev->attrs.sig_prot_cap; + shost->sg_prot_tablesize = shost->sg_tablesize; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP | SHOST_DIX_GUARD_CRC); } if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) - shost->virt_boundary_mask = ~MASK_4K; + shost->virt_boundary_mask = SZ_4K - 1; if (iscsi_host_add(shost, ib_dev->dev.parent)) { mutex_unlock(&iser_conn->state_mutex); @@ -785,7 +786,7 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, * iscsi_iser_ep_connect() - Initiate iSER connection establishment * @shost: scsi_host * @dst_addr: destination address - * @non-blocking: indicate if routine can block + * @non_blocking: indicate if routine can block * * Allocate an iscsi endpoint, an iser_conn structure and bind them. * After that start RDMA connection establishment via rdma_cm. We diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 52ce63592dcf..029c00163442 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -96,16 +96,12 @@ #define iser_err(fmt, arg...) \ pr_err(PFX "%s: " fmt, __func__ , ## arg) -#define SHIFT_4K 12 -#define SIZE_4K (1ULL << SHIFT_4K) -#define MASK_4K (~(SIZE_4K-1)) - /* Default support is 512KB I/O size */ #define ISER_DEF_MAX_SECTORS 1024 #define ISCSI_ISER_DEF_SG_TABLESIZE \ - ((ISER_DEF_MAX_SECTORS * SECTOR_SIZE) >> SHIFT_4K) + ((ISER_DEF_MAX_SECTORS * SECTOR_SIZE) >> ilog2(SZ_4K)) /* Maximum support is 16MB I/O size */ -#define ISCSI_ISER_MAX_SG_TABLESIZE ((32768 * SECTOR_SIZE) >> SHIFT_4K) +#define ISCSI_ISER_MAX_SG_TABLESIZE ((32768 * SECTOR_SIZE) >> ilog2(SZ_4K)) #define ISER_DEF_XMIT_CMDS_DEFAULT 512 #if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT @@ -232,15 +228,16 @@ enum iser_desc_type { * @iser_header: iser header * @iscsi_header: iscsi header * @type: command/control/dataout - * @dam_addr: header buffer dma_address + * @dma_addr: header buffer dma_address * @tx_sg: sg[0] points to iser/iscsi headers * sg[1] optionally points to either of immediate data * unsolicited data-out or control * @num_sge: number sges used on this TX task + * @cqe: completion handler * @mapped: Is the task header mapped - * reg_wr: registration WR - * send_wr: send WR - * inv_wr: invalidate WR + * @reg_wr: registration WR + * @send_wr: send WR + * @inv_wr: invalidate WR */ struct iser_tx_desc { struct iser_ctrl iser_header; @@ -267,6 +264,7 @@ struct iser_tx_desc { * @data: received data segment * @dma_addr: receive buffer dma address * @rx_sg: ib_sge of receive buffer + * @cqe: completion handler * @pad: for sense data TODO: Modify to maximum sense length supported */ struct iser_rx_desc { @@ -283,9 +281,9 @@ struct iser_rx_desc { * struct iser_login_desc - iSER login descriptor * * @req: pointer to login request buffer - * @resp: pointer to login response buffer + * @rsp: pointer to login response buffer * @req_dma: DMA address of login request buffer - * @rsp_dma: DMA address of login response buffer + * @rsp_dma: DMA address of login response buffer * @sge: IB sge for login post recv * @cqe: completion handler */ @@ -315,12 +313,12 @@ struct iser_comp { }; /** - * struct iser_device - Memory registration operations + * struct iser_reg_ops - Memory registration operations * per-device registration schemes * * @alloc_reg_res: Allocate registration resources * @free_reg_res: Free registration resources - * @fast_reg_mem: Register memory buffers + * @reg_mem: Register memory buffers * @unreg_mem: Un-register memory buffers * @reg_desc_get: Get a registration descriptor for pool * @reg_desc_put: Get a registration descriptor to pool @@ -369,7 +367,7 @@ struct iser_device { }; /** - * struct iser_reg_resources - Fast registration recources + * struct iser_reg_resources - Fast registration resources * * @mr: memory region * @fmr_pool: pool of fmrs @@ -402,7 +400,7 @@ struct iser_fr_desc { }; /** - * struct iser_fr_pool: connection fast registration pool + * struct iser_fr_pool - connection fast registration pool * * @list: list of fastreg descriptors * @lock: protects fmr/fastreg pool @@ -427,6 +425,7 @@ struct iser_fr_pool { * @comp: iser completion context * @fr_pool: connection fast registration poool * @pi_support: Indicate device T10-PI support + * @reg_cqe: completion handler */ struct ib_conn { struct rdma_cm_id *cma_id; @@ -467,6 +466,7 @@ struct ib_conn { * @num_rx_descs: number of rx descriptors * @scsi_sg_tablesize: scsi host sg_tablesize * @pages_per_mr: maximum pages available for registration + * @snd_w_inv: connection uses remote invalidation */ struct iser_conn { struct ib_conn ib_conn; @@ -525,7 +525,7 @@ struct iser_page_vec { }; /** - * struct iser_global: iSER global context + * struct iser_global - iSER global context * * @device_list_mutex: protects device_list * @device_list: iser devices global list diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 5cbb4b3a0566..4a7045bb0831 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -358,6 +358,8 @@ static inline bool iser_signal_comp(u8 sig_count) /** * iser_send_command - send command PDU + * @conn: link to matching iscsi connection + * @task: SCSI command task */ int iser_send_command(struct iscsi_conn *conn, struct iscsi_task *task) @@ -429,6 +431,9 @@ send_command_error: /** * iser_send_data_out - send data out PDU + * @conn: link to matching iscsi connection + * @task: SCSI command task + * @hdr: pointer to the LLD's iSCSI message header */ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_task *task, diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 2cc89a9b9e9b..0f74dc6d12fa 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -170,7 +170,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, dev = iser_task->iser_conn->ib_conn.device->ib_device; data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir); - if (data->dma_nents == 0) { + if (unlikely(data->dma_nents == 0)) { iser_err("dma_map_sg failed!!!\n"); return -EINVAL; } @@ -237,7 +237,7 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, int ret, plen; page_vec->npages = 0; - page_vec->fake_mr.page_size = SIZE_4K; + page_vec->fake_mr.page_size = SZ_4K; plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, mem->dma_nents, NULL, iser_set_page); if (unlikely(plen < mem->dma_nents)) { @@ -451,7 +451,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SIZE_4K); + n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SZ_4K); if (unlikely(n != mem->dma_nents)) { iser_err("failed to map sg (%d/%d)\n", n, mem->dma_nents); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index a6548de0e218..1f4a37a3c2b3 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -58,12 +58,12 @@ static void iser_event_handler(struct ib_event_handler *handler, dev_name(&event->device->dev), event->element.port_num); } -/** +/* * iser_create_device_ib_res - creates Protection Domain (PD), Completion * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with - * the adapator. + * the adaptor. * - * returns 0 on success, -1 on failure + * Return: 0 on success, -1 on failure */ static int iser_create_device_ib_res(struct iser_device *device) { @@ -124,9 +124,9 @@ comps_err: return -1; } -/** +/* * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, - * CQ and PD created with the device associated with the adapator. + * CQ and PD created with the device associated with the adaptor. */ static void iser_free_device_ib_res(struct iser_device *device) { @@ -149,8 +149,11 @@ static void iser_free_device_ib_res(struct iser_device *device) /** * iser_alloc_fmr_pool - Creates FMR pool and page_vector + * @ib_conn: connection RDMA resources + * @cmds_max: max number of SCSI commands for this connection + * @size: max number of pages per map request * - * returns 0 on success, or errno code on failure + * Return: 0 on success, or errno code on failure */ int iser_alloc_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max, @@ -180,7 +183,7 @@ int iser_alloc_fmr_pool(struct ib_conn *ib_conn, page_vec->pages = (u64 *)(page_vec + 1); - params.page_shift = SHIFT_4K; + params.page_shift = ilog2(SZ_4K); params.max_pages_per_fmr = size; /* make the pool size twice the max number of SCSI commands * * the ML is expected to queue, watermark for unmap at 50% */ @@ -215,6 +218,7 @@ err_frpl: /** * iser_free_fmr_pool - releases the FMR pool and page vec + * @ib_conn: connection RDMA resources */ void iser_free_fmr_pool(struct ib_conn *ib_conn) { @@ -295,7 +299,11 @@ static void iser_destroy_fastreg_desc(struct iser_fr_desc *desc) /** * iser_alloc_fastreg_pool - Creates pool of fast_reg descriptors * for fast registration work requests. - * returns 0 on success, or errno code on failure + * @ib_conn: connection RDMA resources + * @cmds_max: max number of SCSI commands for this connection + * @size: max number of pages per map request + * + * Return: 0 on success, or errno code on failure */ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max, @@ -332,6 +340,7 @@ err: /** * iser_free_fastreg_pool - releases the pool of fast_reg descriptors + * @ib_conn: connection RDMA resources */ void iser_free_fastreg_pool(struct ib_conn *ib_conn) { @@ -355,10 +364,10 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) fr_pool->size - i); } -/** +/* * iser_create_ib_conn_res - Queue-Pair (QP) * - * returns 0 on success, -1 on failure + * Return: 0 on success, -1 on failure */ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) { @@ -436,7 +445,7 @@ out_err: return ret; } -/** +/* * based on the resolved device node GUID see if there already allocated * device for this device. If there's no such, create one. */ @@ -487,9 +496,9 @@ static void iser_device_try_release(struct iser_device *device) mutex_unlock(&ig.device_list_mutex); } -/** +/* * Called with state mutex held - **/ + */ static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, enum iser_conn_state comp, enum iser_conn_state exch) @@ -561,7 +570,8 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn, } /** - * Frees all conn objects and deallocs conn descriptor + * iser_conn_release - Frees all conn objects and deallocs conn descriptor + * @iser_conn: iSER connection context */ void iser_conn_release(struct iser_conn *iser_conn) { @@ -595,7 +605,10 @@ void iser_conn_release(struct iser_conn *iser_conn) } /** - * triggers start of the disconnect procedures and wait for them to be done + * iser_conn_terminate - triggers start of the disconnect procedures and + * waits for them to be done + * @iser_conn: iSER connection context + * * Called with state mutex held */ int iser_conn_terminate(struct iser_conn *iser_conn) @@ -632,9 +645,9 @@ int iser_conn_terminate(struct iser_conn *iser_conn) return 1; } -/** +/* * Called with state mutex held - **/ + */ static void iser_connect_error(struct rdma_cm_id *cma_id) { struct iser_conn *iser_conn; @@ -670,7 +683,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, else max_num_sg = attr->max_fast_reg_page_list_len; - sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K); + sg_tablesize = DIV_ROUND_UP(max_sectors * SECTOR_SIZE, SZ_4K); if (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) sup_sg_tablesize = min_t( @@ -684,9 +697,9 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, iser_conn->scsi_sg_tablesize + reserved_mr_pages; } -/** +/* * Called with state mutex held - **/ + */ static void iser_addr_handler(struct rdma_cm_id *cma_id) { struct iser_device *device; @@ -732,9 +745,9 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) } } -/** +/* * Called with state mutex held - **/ + */ static void iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; @@ -1019,7 +1032,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) ib_conn->post_recv_buf_count += count; ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL); - if (ib_ret) { + if (unlikely(ib_ret)) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count -= count; } else @@ -1030,9 +1043,12 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) /** - * iser_start_send - Initiate a Send DTO operation + * iser_post_send - Initiate a Send DTO operation + * @ib_conn: connection RDMA resources + * @tx_desc: iSER TX descriptor + * @signal: true to send work request as SIGNALED * - * returns 0 on success, -1 on failure + * Return: 0 on success, -1 on failure */ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, bool signal) @@ -1060,7 +1076,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, first_wr = wr; ib_ret = ib_post_send(ib_conn->qp, first_wr, NULL); - if (ib_ret) + if (unlikely(ib_ret)) iser_err("ib_post_send failed, ret:%d opcode:%d\n", ib_ret, wr->opcode); @@ -1081,7 +1097,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, ret = ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); if (ret) { - pr_err("ib_check_mr_status failed, ret %d\n", ret); + iser_err("ib_check_mr_status failed, ret %d\n", ret); /* Not a lot we can do, return ambiguous guard error */ *sector = 0; return 0x1; @@ -1093,7 +1109,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, sector_div(sector_off, sector_size + 8); *sector = scsi_get_lba(iser_task->sc) + sector_off; - pr_err("PI error found type %d at sector %llx " + iser_err("PI error found type %d at sector %llx " "expected %x vs actual %x\n", mr_status.sig_err.err_type, (unsigned long long)*sector, diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h index 43ac61ffef4a..6dbc08e1a6a6 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h @@ -70,7 +70,7 @@ struct opa_vnic_adapter; -/** +/* * struct __opa_vesw_info - OPA vnic virtual switch info * * Same as opa_vesw_info without bitwise attribute. @@ -96,7 +96,7 @@ struct __opa_vesw_info { u8 rsvd4[2]; } __packed; -/** +/* * struct __opa_per_veswport_info - OPA vnic per port info * * Same as opa_per_veswport_info without bitwise attribute. @@ -136,7 +136,7 @@ struct __opa_per_veswport_info { u8 rsvd3[8]; } __packed; -/** +/* * struct __opa_veswport_info - OPA vnic port info * * Same as opa_veswport_info without bitwise attribute. @@ -146,7 +146,7 @@ struct __opa_veswport_info { struct __opa_per_veswport_info vport; }; -/** +/* * struct __opa_veswport_trap - OPA vnic trap info * * Same as opa_veswport_trap without bitwise attribute. diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index b5960351bec0..b7f7a5f7bd98 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -174,9 +174,9 @@ static int srp_tmo_get(char *buffer, const struct kernel_param *kp) int tmo = *(int *)kp->arg; if (tmo >= 0) - return sprintf(buffer, "%d", tmo); + return sprintf(buffer, "%d\n", tmo); else - return sprintf(buffer, "off"); + return sprintf(buffer, "off\n"); } static int srp_tmo_set(const char *val, const struct kernel_param *kp) @@ -352,11 +352,11 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) init_completion(&ch->done); ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ? - (struct sockaddr *)&target->rdma_cm.src : NULL, - (struct sockaddr *)&target->rdma_cm.dst, + &target->rdma_cm.src.sa : NULL, + &target->rdma_cm.dst.sa, SRP_PATH_REC_TIMEOUT_MS); if (ret) { - pr_err("No route available from %pIS to %pIS (%d)\n", + pr_err("No route available from %pISpsc to %pISpsc (%d)\n", &target->rdma_cm.src, &target->rdma_cm.dst, ret); goto out; } @@ -366,7 +366,7 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) ret = ch->status; if (ret) { - pr_err("Resolving address %pIS failed (%d)\n", + pr_err("Resolving address %pISpsc failed (%d)\n", &target->rdma_cm.dst, ret); goto out; } @@ -552,6 +552,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; + const struct ib_device_attr *attr = &dev->dev->attrs; struct ib_qp_init_attr *init_attr; struct ib_cq *recv_cq, *send_cq; struct ib_qp *qp; @@ -583,12 +584,14 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) init_attr->cap.max_send_wr = m * target->queue_size; init_attr->cap.max_recv_wr = target->queue_size + 1; init_attr->cap.max_recv_sge = 1; - init_attr->cap.max_send_sge = SRP_MAX_SGE; + init_attr->cap.max_send_sge = min(SRP_MAX_SGE, attr->max_send_sge); init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; init_attr->qp_type = IB_QPT_RC; init_attr->send_cq = send_cq; init_attr->recv_cq = recv_cq; + ch->max_imm_sge = min(init_attr->cap.max_send_sge - 1U, 255U); + if (target->using_rdma_cm) { ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr); qp = ch->rdma_cm.cm_id->qp; @@ -1362,7 +1365,8 @@ static void srp_terminate_io(struct srp_rport *rport) } /* Calculate maximum initiator to target information unit length. */ -static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data) +static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data, + uint32_t max_it_iu_size) { uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN + sizeof(struct srp_indirect_buf) + @@ -1372,6 +1376,11 @@ static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data) max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET + srp_max_imm_data); + if (max_it_iu_size) + max_iu_len = min(max_iu_len, max_it_iu_size); + + pr_debug("max_iu_len = %d\n", max_iu_len); + return max_iu_len; } @@ -1389,7 +1398,8 @@ static int srp_rport_reconnect(struct srp_rport *rport) struct srp_target_port *target = rport->lld_data; struct srp_rdma_ch *ch; uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, - srp_use_imm_data); + srp_use_imm_data, + target->max_it_iu_size); int i, j, ret = 0; bool multich = false; @@ -1838,7 +1848,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, return -EIO; if (ch->use_imm_data && - count <= SRP_MAX_IMM_SGE && + count <= ch->max_imm_sge && SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len && scmnd->sc_data_direction == DMA_TO_DEVICE) { struct srp_imm_buf *buf; @@ -2538,7 +2548,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id, ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP; ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, - ch->use_imm_data); + ch->use_imm_data, + target->max_it_iu_size); WARN_ON_ONCE(ch->max_it_iu_len > be32_to_cpu(lrsp->max_it_iu_len)); @@ -3411,6 +3422,7 @@ enum { SRP_OPT_IP_SRC = 1 << 15, SRP_OPT_IP_DEST = 1 << 16, SRP_OPT_TARGET_CAN_QUEUE= 1 << 17, + SRP_OPT_MAX_IT_IU_SIZE = 1 << 18, }; static unsigned int srp_opt_mandatory[] = { @@ -3443,6 +3455,7 @@ static const match_table_t srp_opt_tokens = { { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, { SRP_OPT_IP_SRC, "src=%s" }, { SRP_OPT_IP_DEST, "dest=%s" }, + { SRP_OPT_MAX_IT_IU_SIZE, "max_it_iu_size=%d" }, { SRP_OPT_ERR, NULL } }; @@ -3736,6 +3749,14 @@ static int srp_parse_options(struct net *net, const char *buf, target->tl_retry_count = token; break; + case SRP_OPT_MAX_IT_IU_SIZE: + if (match_int(args, &token) || token < 0) { + pr_warn("bad maximum initiator to target IU size '%s'\n", p); + goto out; + } + target->max_it_iu_size = token; + break; + default: pr_warn("unknown parameter or missing value '%s' in target creation request\n", p); @@ -3887,7 +3908,9 @@ static ssize_t srp_create_target(struct device *dev, target->mr_per_cmd = mr_per_cmd; target->indirect_size = target->sg_tablesize * sizeof (struct srp_direct_buf); - max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data); + max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + srp_use_imm_data, + target->max_it_iu_size); INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index b2861cd2087a..5359ece561ca 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -161,6 +161,7 @@ struct srp_rdma_ch { }; uint32_t max_it_iu_len; uint32_t max_ti_iu_len; + u8 max_imm_sge; bool use_imm_data; /* Everything above this point is used in the hot path of @@ -209,6 +210,7 @@ struct srp_target_port { u32 ch_count; u32 lkey; enum srp_target_state state; + uint32_t max_it_iu_size; unsigned int cmd_sg_cnt; unsigned int indirect_size; bool allow_ext_sg; @@ -245,11 +247,13 @@ struct srp_target_port { union { struct sockaddr_in ip4; struct sockaddr_in6 ip6; + struct sockaddr sa; struct sockaddr_storage ss; } src; union { struct sockaddr_in ip4; struct sockaddr_in6 ip6; + struct sockaddr sa; struct sockaddr_storage ss; } dst; bool src_specified; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index e25c70a56be6..a278e76b9e02 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -556,34 +556,41 @@ static int srpt_refresh_port(struct srpt_port *sport) struct ib_port_attr port_attr; int ret; - memset(&port_modify, 0, sizeof(port_modify)); - port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; - port_modify.clr_port_cap_mask = 0; - - ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); - if (ret) - goto err_mod_port; - ret = ib_query_port(sport->sdev->device, sport->port, &port_attr); if (ret) - goto err_query_port; + return ret; sport->sm_lid = port_attr.sm_lid; sport->lid = port_attr.lid; ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); if (ret) - goto err_query_port; + return ret; - sport->port_guid_wwn.priv = sport; - srpt_format_guid(sport->port_guid, sizeof(sport->port_guid), + sport->port_guid_id.wwn.priv = sport; + srpt_format_guid(sport->port_guid_id.name, + sizeof(sport->port_guid_id.name), &sport->gid.global.interface_id); - sport->port_gid_wwn.priv = sport; - snprintf(sport->port_gid, sizeof(sport->port_gid), + sport->port_gid_id.wwn.priv = sport; + snprintf(sport->port_gid_id.name, sizeof(sport->port_gid_id.name), "0x%016llx%016llx", be64_to_cpu(sport->gid.global.subnet_prefix), be64_to_cpu(sport->gid.global.interface_id)); + if (rdma_protocol_iwarp(sport->sdev->device, sport->port)) + return 0; + + memset(&port_modify, 0, sizeof(port_modify)); + port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; + port_modify.clr_port_cap_mask = 0; + + ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); + if (ret) { + pr_warn("%s-%d: enabling device management failed (%d). Note: this is expected if SR-IOV is enabled.\n", + dev_name(&sport->sdev->device->dev), sport->port, ret); + return 0; + } + if (!sport->mad_agent) { memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; @@ -599,23 +606,14 @@ static int srpt_refresh_port(struct srpt_port *sport) srpt_mad_recv_handler, sport, 0); if (IS_ERR(sport->mad_agent)) { - ret = PTR_ERR(sport->mad_agent); + pr_err("%s-%d: MAD agent registration failed (%ld). Note: this is expected if SR-IOV is enabled.\n", + dev_name(&sport->sdev->device->dev), sport->port, + PTR_ERR(sport->mad_agent)); sport->mad_agent = NULL; - goto err_query_port; } } return 0; - -err_query_port: - - port_modify.set_port_cap_mask = 0; - port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; - ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); - -err_mod_port: - - return ret; } /** @@ -1931,41 +1929,22 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch) return ret; } -static bool srpt_ch_closed(struct srpt_port *sport, struct srpt_rdma_ch *ch) -{ - struct srpt_nexus *nexus; - struct srpt_rdma_ch *ch2; - bool res = true; - - rcu_read_lock(); - list_for_each_entry(nexus, &sport->nexus_list, entry) { - list_for_each_entry(ch2, &nexus->ch_list, list) { - if (ch2 == ch) { - res = false; - goto done; - } - } - } -done: - rcu_read_unlock(); - - return res; -} - /* Send DREQ and wait for DREP. */ static void srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch) { + DECLARE_COMPLETION_ONSTACK(closed); struct srpt_port *sport = ch->sport; pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, ch->state); + ch->closed = &closed; + mutex_lock(&sport->mutex); srpt_disconnect_ch(ch); mutex_unlock(&sport->mutex); - while (wait_event_timeout(sport->ch_releaseQ, srpt_ch_closed(sport, ch), - 5 * HZ) == 0) + while (wait_for_completion_timeout(&closed, 5 * HZ) == 0) pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, ch->sess_name, ch->qp->qp_num, ch->state); @@ -2045,10 +2024,17 @@ static void srpt_set_enabled(struct srpt_port *sport, bool enabled) __srpt_close_all_ch(sport); } +static void srpt_drop_sport_ref(struct srpt_port *sport) +{ + if (atomic_dec_return(&sport->refcount) == 0 && sport->freed_channels) + complete(sport->freed_channels); +} + static void srpt_free_ch(struct kref *kref) { struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref); + srpt_drop_sport_ref(ch->sport); kfree_rcu(ch, rcu); } @@ -2092,6 +2078,9 @@ static void srpt_release_channel_work(struct work_struct *w) list_del_rcu(&ch->list); mutex_unlock(&sport->mutex); + if (ch->closed) + complete(ch->closed); + srpt_destroy_ch_ib(ch); srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, @@ -2106,8 +2095,6 @@ static void srpt_release_channel_work(struct work_struct *w) kmem_cache_destroy(ch->req_buf_cache); - wake_up(&sport->ch_releaseQ); - kref_put(&ch->kref, srpt_free_ch); } @@ -2144,6 +2131,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, char i_port_id[36]; u32 it_iu_len; int i, tag_num, tag_size, ret; + struct srpt_tpg *stpg; WARN_ON_ONCE(irqs_disabled()); @@ -2296,23 +2284,38 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, be64_to_cpu(*(__be64 *)nexus->i_port_id), be64_to_cpu(*(__be64 *)(nexus->i_port_id + 8))); - pr_debug("registering session %s\n", ch->sess_name); + pr_debug("registering src addr %s or i_port_id %s\n", ch->sess_name, + i_port_id); tag_num = ch->rq_size; tag_size = 1; /* ib_srpt does not use se_sess->sess_cmd_map */ - if (sport->port_guid_tpg.se_tpg_wwn) - ch->sess = target_setup_session(&sport->port_guid_tpg, tag_num, + + mutex_lock(&sport->port_guid_id.mutex); + list_for_each_entry(stpg, &sport->port_guid_id.tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, ch->sess_name, ch, NULL); - if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) - ch->sess = target_setup_session(&sport->port_gid_tpg, tag_num, + } + mutex_unlock(&sport->port_guid_id.mutex); + + mutex_lock(&sport->port_gid_id.mutex); + list_for_each_entry(stpg, &sport->port_gid_id.tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id, ch, NULL); - /* Retry without leading "0x" */ - if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) - ch->sess = target_setup_session(&sport->port_gid_tpg, tag_num, + if (!IS_ERR_OR_NULL(ch->sess)) + break; + /* Retry without leading "0x" */ + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id + 2, ch, NULL); + } + mutex_unlock(&sport->port_gid_id.mutex); + if (IS_ERR_OR_NULL(ch->sess)) { WARN_ON_ONCE(ch->sess == NULL); ret = PTR_ERR(ch->sess); @@ -2325,6 +2328,12 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, goto destroy_ib; } + /* + * Once a session has been created destruction of srpt_rdma_ch objects + * will decrement sport->refcount. Hence increment sport->refcount now. + */ + atomic_inc(&sport->refcount); + mutex_lock(&sport->mutex); if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { @@ -2505,6 +2514,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id, struct srpt_device *sdev; struct srp_login_req req; const struct srp_login_req_rdma *req_rdma; + struct sa_path_rec *path_rec = cm_id->route.path_rec; char src_addr[40]; sdev = ib_get_client_data(cm_id->device, &srpt_client); @@ -2530,7 +2540,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id, &cm_id->route.addr.src_addr); return srpt_cm_req_recv(sdev, NULL, cm_id, cm_id->port_num, - cm_id->route.path_rec->pkey, &req, src_addr); + path_rec ? path_rec->pkey : 0, &req, src_addr); } static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, @@ -2906,39 +2916,29 @@ static void srpt_refresh_port_work(struct work_struct *work) srpt_refresh_port(sport); } -static bool srpt_ch_list_empty(struct srpt_port *sport) -{ - struct srpt_nexus *nexus; - bool res = true; - - rcu_read_lock(); - list_for_each_entry(nexus, &sport->nexus_list, entry) - if (!list_empty(&nexus->ch_list)) - res = false; - rcu_read_unlock(); - - return res; -} - /** * srpt_release_sport - disable login and wait for associated channels * @sport: SRPT HCA port. */ static int srpt_release_sport(struct srpt_port *sport) { + DECLARE_COMPLETION_ONSTACK(c); struct srpt_nexus *nexus, *next_n; struct srpt_rdma_ch *ch; WARN_ON_ONCE(irqs_disabled()); + sport->freed_channels = &c; + mutex_lock(&sport->mutex); srpt_set_enabled(sport, false); mutex_unlock(&sport->mutex); - while (wait_event_timeout(sport->ch_releaseQ, - srpt_ch_list_empty(sport), 5 * HZ) <= 0) { - pr_info("%s_%d: waiting for session unregistration ...\n", - dev_name(&sport->sdev->device->dev), sport->port); + while (atomic_read(&sport->refcount) > 0 && + wait_for_completion_timeout(&c, 5 * HZ) <= 0) { + pr_info("%s_%d: waiting for unregistration of %d sessions and configfs directories ...\n", + dev_name(&sport->sdev->device->dev), sport->port, + atomic_read(&sport->refcount)); rcu_read_lock(); list_for_each_entry(nexus, &sport->nexus_list, entry) { list_for_each_entry(ch, &nexus->ch_list, list) { @@ -2975,10 +2975,10 @@ static struct se_wwn *__srpt_lookup_wwn(const char *name) for (i = 0; i < dev->phys_port_cnt; i++) { sport = &sdev->port[i]; - if (strcmp(sport->port_guid, name) == 0) - return &sport->port_guid_wwn; - if (strcmp(sport->port_gid, name) == 0) - return &sport->port_gid_wwn; + if (strcmp(sport->port_guid_id.name, name) == 0) + return &sport->port_guid_id.wwn; + if (strcmp(sport->port_gid_id.name, name) == 0) + return &sport->port_gid_id.wwn; } } @@ -3147,7 +3147,6 @@ static void srpt_add_one(struct ib_device *device) for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; INIT_LIST_HEAD(&sport->nexus_list); - init_waitqueue_head(&sport->ch_releaseQ); mutex_init(&sport->mutex); sport->sdev = sdev; sport->port = i; @@ -3156,6 +3155,10 @@ static void srpt_add_one(struct ib_device *device) sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; sport->port_attrib.use_srq = false; INIT_WORK(&sport->work, srpt_refresh_port_work); + mutex_init(&sport->port_guid_id.mutex); + INIT_LIST_HEAD(&sport->port_guid_id.tpg_list); + mutex_init(&sport->port_gid_id.mutex); + INIT_LIST_HEAD(&sport->port_gid_id.tpg_list); if (srpt_refresh_port(sport)) { pr_err("MAD registration failed for %s-%d.\n", @@ -3258,14 +3261,23 @@ static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg) return tpg->se_tpg_wwn->priv; } +static struct srpt_port_id *srpt_wwn_to_sport_id(struct se_wwn *wwn) +{ + struct srpt_port *sport = wwn->priv; + + if (wwn == &sport->port_guid_id.wwn) + return &sport->port_guid_id; + if (wwn == &sport->port_gid_id.wwn) + return &sport->port_gid_id; + WARN_ON_ONCE(true); + return NULL; +} + static char *srpt_get_fabric_wwn(struct se_portal_group *tpg) { - struct srpt_port *sport = srpt_tpg_to_sport(tpg); + struct srpt_tpg *stpg = container_of(tpg, typeof(*stpg), tpg); - WARN_ON_ONCE(tpg != &sport->port_guid_tpg && - tpg != &sport->port_gid_tpg); - return tpg == &sport->port_guid_tpg ? sport->port_guid : - sport->port_gid; + return stpg->sport_id->name; } static u16 srpt_get_tag(struct se_portal_group *tpg) @@ -3722,18 +3734,27 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, const char *name) { struct srpt_port *sport = wwn->priv; - struct se_portal_group *tpg; - int res; - - WARN_ON_ONCE(wwn != &sport->port_guid_wwn && - wwn != &sport->port_gid_wwn); - tpg = wwn == &sport->port_guid_wwn ? &sport->port_guid_tpg : - &sport->port_gid_tpg; - res = core_tpg_register(wwn, tpg, SCSI_PROTOCOL_SRP); - if (res) + struct srpt_port_id *sport_id = srpt_wwn_to_sport_id(wwn); + struct srpt_tpg *stpg; + int res = -ENOMEM; + + stpg = kzalloc(sizeof(*stpg), GFP_KERNEL); + if (!stpg) + return ERR_PTR(res); + stpg->sport_id = sport_id; + res = core_tpg_register(wwn, &stpg->tpg, SCSI_PROTOCOL_SRP); + if (res) { + kfree(stpg); return ERR_PTR(res); + } + + mutex_lock(&sport_id->mutex); + list_add_tail(&stpg->entry, &sport_id->tpg_list); + mutex_unlock(&sport_id->mutex); - return tpg; + atomic_inc(&sport->refcount); + + return &stpg->tpg; } /** @@ -3742,10 +3763,18 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, */ static void srpt_drop_tpg(struct se_portal_group *tpg) { + struct srpt_tpg *stpg = container_of(tpg, typeof(*stpg), tpg); + struct srpt_port_id *sport_id = stpg->sport_id; struct srpt_port *sport = srpt_tpg_to_sport(tpg); + mutex_lock(&sport_id->mutex); + list_del(&stpg->entry); + mutex_unlock(&sport_id->mutex); + sport->enabled = false; core_tpg_deregister(tpg); + kfree(stpg); + srpt_drop_sport_ref(sport); } /** diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index ee9f20e9177a..2e1a69840857 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -264,6 +264,8 @@ enum rdma_ch_state { * @zw_cqe: Zero-length write CQE. * @rcu: RCU head. * @kref: kref for this channel. + * @closed: Completion object that will be signaled as soon as a new + * channel object with the same identity can be created. * @rq_size: IB receive queue size. * @max_rsp_size: Maximum size of an RSP response message in bytes. * @sq_wr_avail: number of work requests available in the send queue. @@ -306,6 +308,7 @@ struct srpt_rdma_ch { struct ib_cqe zw_cqe; struct rcu_head rcu; struct kref kref; + struct completion *closed; int rq_size; u32 max_rsp_size; atomic_t sq_wr_avail; @@ -361,24 +364,52 @@ struct srpt_port_attrib { }; /** + * struct srpt_tpg - information about a single "target portal group" + * @entry: Entry in @sport_id->tpg_list. + * @sport_id: Port name this TPG is associated with. + * @tpg: LIO TPG data structure. + * + * Zero or more target portal groups are associated with each port name + * (srpt_port_id). With each TPG an ACL list is associated. + */ +struct srpt_tpg { + struct list_head entry; + struct srpt_port_id *sport_id; + struct se_portal_group tpg; +}; + +/** + * struct srpt_port_id - information about an RDMA port name + * @mutex: Protects @tpg_list changes. + * @tpg_list: TPGs associated with the RDMA port name. + * @wwn: WWN associated with the RDMA port name. + * @name: ASCII representation of the port name. + * + * Multiple sysfs directories can be associated with a single RDMA port. This + * data structure represents a single (port, name) pair. + */ +struct srpt_port_id { + struct mutex mutex; + struct list_head tpg_list; + struct se_wwn wwn; + char name[64]; +}; + +/** * struct srpt_port - information associated by SRPT with a single IB port * @sdev: backpointer to the HCA information. * @mad_agent: per-port management datagram processing information. * @enabled: Whether or not this target port is enabled. - * @port_guid: ASCII representation of Port GUID - * @port_gid: ASCII representation of Port GID * @port: one-based port number. * @sm_lid: cached value of the port's sm_lid. * @lid: cached value of the port's lid. * @gid: cached value of the port's gid. - * @port_acl_lock spinlock for port_acl_list: * @work: work structure for refreshing the aforementioned cached values. - * @port_guid_tpg: TPG associated with target port GUID. - * @port_guid_wwn: WWN associated with target port GUID. - * @port_gid_tpg: TPG associated with target port GID. - * @port_gid_wwn: WWN associated with target port GID. + * @port_guid_id: target port GUID + * @port_gid_id: target port GID * @port_attrib: Port attributes that can be accessed through configfs. - * @ch_releaseQ: Enables waiting for removal from nexus_list. + * @refcount: Number of objects associated with this port. + * @freed_channels: Completion that will be signaled once @refcount becomes 0. * @mutex: Protects nexus_list. * @nexus_list: Nexus list. See also srpt_nexus.entry. */ @@ -386,19 +417,16 @@ struct srpt_port { struct srpt_device *sdev; struct ib_mad_agent *mad_agent; bool enabled; - u8 port_guid[24]; - u8 port_gid[64]; u8 port; u32 sm_lid; u32 lid; union ib_gid gid; struct work_struct work; - struct se_portal_group port_guid_tpg; - struct se_wwn port_guid_wwn; - struct se_portal_group port_gid_tpg; - struct se_wwn port_gid_wwn; + struct srpt_port_id port_guid_id; + struct srpt_port_id port_gid_id; struct srpt_port_attrib port_attrib; - wait_queue_head_t ch_releaseQ; + atomic_t refcount; + struct completion *freed_channels; struct mutex mutex; struct list_head nexus_list; }; |