// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2019 Mellanox Technologies. All rights reserved. */ #include #include #include "core_priv.h" #include "restrack.h" #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE) static int __counter_set_mode(struct rdma_counter_mode *curr, enum rdma_nl_counter_mode new_mode, enum rdma_nl_counter_mask new_mask) { if ((new_mode == RDMA_COUNTER_MODE_AUTO) && ((new_mask & (~ALL_AUTO_MODE_MASKS)) || (curr->mode != RDMA_COUNTER_MODE_NONE))) return -EINVAL; curr->mode = new_mode; curr->mask = new_mask; return 0; } /** * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode * * When @on is true, the @mask must be set; When @on is false, it goes * into manual mode if there's any counter, so that the user is able to * manually access them. */ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, bool on, enum rdma_nl_counter_mask mask) { struct rdma_port_counter *port_counter; int ret; port_counter = &dev->port_data[port].port_counter; mutex_lock(&port_counter->lock); if (on) { ret = __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_AUTO, mask); } else { if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { ret = -EINVAL; goto out; } if (port_counter->num_counters) ret = __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_MANUAL, 0); else ret = __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, 0); } out: mutex_unlock(&port_counter->lock); return ret; } static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, enum rdma_nl_counter_mode mode) { struct rdma_port_counter *port_counter; struct rdma_counter *counter; int ret; if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) return NULL; counter = kzalloc(sizeof(*counter), GFP_KERNEL); if (!counter) return NULL; counter->device = dev; counter->port = port; counter->res.type = RDMA_RESTRACK_COUNTER; counter->stats = dev->ops.counter_alloc_stats(counter); if (!counter->stats) goto err_stats; port_counter = &dev->port_data[port].port_counter; mutex_lock(&port_counter->lock); if (mode == RDMA_COUNTER_MODE_MANUAL) { ret = __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_MANUAL, 0); if (ret) goto err_mode; } port_counter->num_counters++; mutex_unlock(&port_counter->lock); counter->mode.mode = mode; kref_init(&counter->kref); mutex_init(&counter->lock); return counter; err_mode: mutex_unlock(&port_counter->lock); kfree(counter->stats); err_stats: kfree(counter); return NULL; } static void rdma_counter_free(struct rdma_counter *counter) { struct rdma_port_counter *port_counter; port_counter = &counter->device->port_data[counter->port].port_counter; mutex_lock(&port_counter->lock); port_counter->num_counters--; if (!port_counter->num_counters && (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, 0); mutex_unlock(&port_counter->lock); rdma_restrack_del(&counter->res); kfree(counter->stats); kfree(counter); } static void auto_mode_init_counter(struct rdma_counter *counter, const struct ib_qp *qp, enum rdma_nl_counter_mask new_mask) { struct auto_mode_param *param = &counter->mode.param; counter->mode.mode = RDMA_COUNTER_MODE_AUTO; counter->mode.mask = new_mask; if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) param->qp_type = qp->qp_type; } static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, enum rdma_nl_counter_mask auto_mask) { struct auto_mode_param *param = &counter->mode.param; bool match = true; if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) return false; /* Ensure that counter belong to right PID */ if (!rdma_is_kernel_res(&counter->res) && !rdma_is_kernel_res(&qp->res) && (task_pid_vnr(counter->res.task) != current->pid)) return false; if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) match &= (param->qp_type == qp->qp_type); return match; } static int __rdma_counter_bind_qp(struct rdma_counter *counter, struct ib_qp *qp) { int ret; if (qp->counter) return -EINVAL; if (!qp->device->ops.counter_bind_qp) return -EOPNOTSUPP; mutex_lock(&counter->lock); ret = qp->device->ops.counter_bind_qp(counter, qp); mutex_unlock(&counter->lock); return ret; } static int __rdma_counter_unbind_qp(struct ib_qp *qp) { struct rdma_counter *counter = qp->counter; int ret; if (!qp->device->ops.counter_unbind_qp) return -EOPNOTSUPP; mutex_lock(&counter->lock); ret = qp->device->ops.counter_unbind_qp(qp); mutex_unlock(&counter->lock); return ret; } static void counter_history_stat_update(const struct rdma_counter *counter) { struct ib_device *dev = counter->device; struct rdma_port_counter *port_counter; int i; port_counter = &dev->port_data[counter->port].port_counter; if (!port_counter->hstats) return; for (i = 0; i < counter->stats->num_counters; i++) port_counter->hstats->value[i] += counter->stats->value[i]; } /** * rdma_get_counter_auto_mode - Find the counter that @qp should be bound * with in auto mode * * Return: The counter (with ref-count increased) if found */ static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, u8 port) { struct rdma_port_counter *port_counter; struct rdma_counter *counter = NULL; struct ib_device *dev = qp->device; struct rdma_restrack_entry *res; struct rdma_restrack_root *rt; unsigned long id = 0; port_counter = &dev->port_data[port].port_counter; rt = &dev->res[RDMA_RESTRACK_COUNTER]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { if (!rdma_is_visible_in_pid_ns(res)) continue; counter = container_of(res, struct rdma_counter, res); if ((counter->device != qp->device) || (counter->port != port)) goto next; if (auto_mode_match(qp, counter, port_counter->mode.mask)) break; next: counter = NULL; } if (counter && !kref_get_unless_zero(&counter->kref)) counter = NULL; xa_unlock(&rt->xa); return counter; } static void rdma_counter_res_add(struct rdma_counter *counter, struct ib_qp *qp) { if (rdma_is_kernel_res(&qp->res)) { rdma_restrack_set_task(&counter->res, qp->res.kern_name); rdma_restrack_kadd(&counter->res); } else { rdma_restrack_attach_task(&counter->res, qp->res.task); rdma_restrack_uadd(&counter->res); } } static void counter_release(struct kref *kref) { struct rdma_counter *counter; counter = container_of(kref, struct rdma_counter, kref); counter_history_stat_update(counter); counter->device->ops.counter_dealloc(counter); rdma_counter_free(counter); } /** * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on * the auto-mode rule */ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) { struct rdma_port_counter *port_counter; struct ib_device *dev = qp->device; struct rdma_counter *counter; int ret; if (!rdma_is_port_valid(dev, port)) return -EINVAL; port_counter = &dev->port_data[port].port_counter; if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) return 0; counter = rdma_get_counter_auto_mode(qp, port); if (counter) { ret = __rdma_counter_bind_qp(counter, qp); if (ret) { kref_put(&counter->kref, counter_release); return ret; } } else { counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); if (!counter) return -ENOMEM; auto_mode_init_counter(counter, qp, port_counter->mode.mask); ret = __rdma_counter_bind_qp(counter, qp); if (ret) { rdma_counter_free(counter); return ret; } rdma_counter_res_add(counter, qp); } return 0; } /** * rdma_counter_unbind_qp - Unbind a qp from a counter * @force: * true - Decrease the counter ref-count anyway (e.g., qp destroy) */ int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) { struct rdma_counter *counter = qp->counter; int ret; if (!counter) return -EINVAL; ret = __rdma_counter_unbind_qp(qp); if (ret && !force) return ret; kref_put(&counter->kref, counter_release); return 0; } int rdma_counter_query_stats(struct rdma_counter *counter) { struct ib_device *dev = counter->device; int ret; if (!dev->ops.counter_update_stats) return -EINVAL; mutex_lock(&counter->lock); ret = dev->ops.counter_update_stats(counter); mutex_unlock(&counter->lock); return ret; } static u64 get_running_counters_hwstat_sum(struct ib_device *dev, u8 port, u32 index) { struct rdma_restrack_entry *res; struct rdma_restrack_root *rt; struct rdma_counter *counter; unsigned long id = 0; u64 sum = 0; rt = &dev->res[RDMA_RESTRACK_COUNTER]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { if (!rdma_restrack_get(res)) continue; xa_unlock(&rt->xa); counter = container_of(res, struct rdma_counter, res); if ((counter->device != dev) || (counter->port != port) || rdma_counter_query_stats(counter)) goto next; sum += counter->stats->value[index]; next: xa_lock(&rt->xa); rdma_restrack_put(res); } xa_unlock(&rt->xa); return sum; } /** * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a * specific port, including the running ones and history data */ u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index) { struct rdma_port_counter *port_counter; u64 sum; port_counter = &dev->port_data[port].port_counter; if (!port_counter->hstats) return 0; sum = get_running_counters_hwstat_sum(dev, port, index); sum += port_counter->hstats->value[index]; return sum; } static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) { struct rdma_restrack_entry *res = NULL; struct ib_qp *qp = NULL; res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); if (IS_ERR(res)) return NULL; if (!rdma_is_visible_in_pid_ns(res)) goto err; qp = container_of(res, struct ib_qp, res); if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) goto err; return qp; err: rdma_restrack_put(&qp->res); return NULL; } static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, struct ib_qp *qp) { if ((counter->device != qp->device) || (counter->port != qp->port)) return -EINVAL; return __rdma_counter_bind_qp(counter, qp); } static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, u32 counter_id) { struct rdma_restrack_entry *res; struct rdma_counter *counter; res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); if (IS_ERR(res)) return NULL; if (!rdma_is_visible_in_pid_ns(res)) { rdma_restrack_put(res); return NULL; } counter = container_of(res, struct rdma_counter, res); kref_get(&counter->kref); rdma_restrack_put(res); return counter; } /** * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id */ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, u32 qp_num, u32 counter_id) { struct rdma_counter *counter; struct ib_qp *qp; int ret; qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; counter = rdma_get_counter_by_id(dev, counter_id); if (!counter) { ret = -ENOENT; goto err; } if (counter->res.task != qp->res.task) { ret = -EINVAL; goto err_task; } ret = rdma_counter_bind_qp_manual(counter, qp); if (ret) goto err_task; rdma_restrack_put(&qp->res); return 0; err_task: kref_put(&counter->kref, counter_release); err: rdma_restrack_put(&qp->res); return ret; } /** * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it * The id of new counter is returned in @counter_id */ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, u32 qp_num, u32 *counter_id) { struct rdma_counter *counter; struct ib_qp *qp; int ret; if (!rdma_is_port_valid(dev, port)) return -EINVAL; qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { ret = -EINVAL; goto err; } counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); if (!counter) { ret = -ENOMEM; goto err; } ret = rdma_counter_bind_qp_manual(counter, qp); if (ret) goto err_bind; if (counter_id) *counter_id = counter->id; rdma_counter_res_add(counter, qp); rdma_restrack_put(&qp->res); return ret; err_bind: rdma_counter_free(counter); err: rdma_restrack_put(&qp->res); return ret; } /** * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter */ int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, u32 qp_num, u32 counter_id) { struct rdma_port_counter *port_counter; struct ib_qp *qp; int ret; if (!rdma_is_port_valid(dev, port)) return -EINVAL; qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { ret = -EINVAL; goto out; } port_counter = &dev->port_data[port].port_counter; if (!qp->counter || qp->counter->id != counter_id || port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { ret = -EINVAL; goto out; } ret = rdma_counter_unbind_qp(qp, false); out: rdma_restrack_put(&qp->res); return ret; } int rdma_counter_get_mode(struct ib_device *dev, u8 port, enum rdma_nl_counter_mode *mode, enum rdma_nl_counter_mask *mask) { struct rdma_port_counter *port_counter; port_counter = &dev->port_data[port].port_counter; *mode = port_counter->mode.mode; *mask = port_counter->mode.mask; return 0; } void rdma_counter_init(struct ib_device *dev) { struct rdma_port_counter *port_counter; u32 port, i; if (!dev->port_data) return; rdma_for_each_port(dev, port) { port_counter = &dev->port_data[port].port_counter; port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; mutex_init(&port_counter->lock); if (!dev->ops.alloc_hw_stats) continue; port_counter->hstats = dev->ops.alloc_hw_stats(dev, port); if (!port_counter->hstats) goto fail; } return; fail: for (i = port; i >= rdma_start_port(dev); i--) { port_counter = &dev->port_data[port].port_counter; kfree(port_counter->hstats); port_counter->hstats = NULL; mutex_destroy(&port_counter->lock); } } void rdma_counter_release(struct ib_device *dev) { struct rdma_port_counter *port_counter; u32 port; rdma_for_each_port(dev, port) { port_counter = &dev->port_data[port].port_counter; kfree(port_counter->hstats); mutex_destroy(&port_counter->lock); } }