From 00bd1439f464cfac3c60f6eabfe209b8a52e8194 Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Mon, 7 Oct 2019 16:59:32 +0300 Subject: RDMA/rw: Support threshold for registration vs scattering to local pages If there are more scatter entries than the recommended limit provided by the ib device, UMR registration is used. This will provide optimal performance when performing large RDMA READs over devices that advertise the threshold capability. With ConnectX-5 running NVMeoF RDMA with FIO single QP 128KB writes: Without use of cap: 70Gb/sec With use of cap: 84Gb/sec Link: https://lore.kernel.org/r/20191007135933.12483-3-leon@kernel.org Signed-off-by: Yamin Friedman Reviewed-by: Or Gerlitz Signed-off-by: Leon Romanovsky Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6a47ba85c54c..a465fcae992b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -445,6 +445,8 @@ struct ib_device_attr { struct ib_tm_caps tm_caps; struct ib_cq_caps cq_caps; u64 max_dm_size; + /* Max entries for sgl for optimized performance per READ */ + u32 max_sgl_rd; }; enum ib_mtu { -- cgit v1.2.3-59-g8ed1b From a3de94e3d61ec6e6c57ee066ec4d28ebc260dafa Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 16 Oct 2019 09:23:05 +0300 Subject: IB/mlx5: Introduce ODP diagnostic counters Introduce ODP diagnostic counters and count the following per MR within IB/mlx5 driver: 1) Page faults: Total number of faulted pages. 2) Page invalidations: Total number of pages invalidated by the OS during all invalidation events. The translations can be no longer valid due to either non-present pages or mapping changes. Link: https://lore.kernel.org/r/20191016062308.11886-2-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++++ drivers/infiniband/hw/mlx5/odp.c | 15 +++++++++++++++ include/rdma/ib_verbs.h | 5 +++++ 3 files changed, 24 insertions(+) (limited to 'include/rdma') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bf30d53d94dc..5aae05ebf64b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -585,6 +585,9 @@ struct mlx5_ib_dm { IB_ACCESS_REMOTE_READ |\ IB_ZERO_BASED) +#define mlx5_update_odp_stats(mr, counter_name, value) \ + atomic64_add(value, &((mr)->odp_stats.counter_name)) + struct mlx5_ib_mr { struct ib_mr ibmr; void *descs; @@ -622,6 +625,7 @@ struct mlx5_ib_mr { wait_queue_head_t q_leaf_free; struct mlx5_async_work cb_work; atomic_t num_pending_prefetch; + struct ib_odp_counters odp_stats; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 95cf0249b015..3601c6ad96f9 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -224,6 +224,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt)) - 1; u64 idx = 0, blk_start_idx = 0; + u64 invalidations = 0; int in_block = 0; u64 addr; @@ -261,6 +262,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, blk_start_idx = idx; in_block = 1; } + + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; } else { u64 umr_offset = idx & umr_block_mask; @@ -279,6 +283,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&umem_odp->umem_mutex); + + mlx5_update_odp_stats(mr, invalidations, invalidations); + /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -287,6 +294,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, ib_umem_odp_unmap_dma_pages(umem_odp, start, end); + if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { WRITE_ONCE(umem_odp->dying, 1); @@ -801,6 +809,13 @@ next_mr: if (ret < 0) goto srcu_unlock; + /* + * When prefetching a page, page fault is generated + * in order to bring the page to the main memory. + * In the current flow, page faults are being counted. + */ + mlx5_update_odp_stats(mr, faults, ret); + npages += ret; ret = 0; break; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a465fcae992b..1ef31b27a41c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2220,6 +2220,11 @@ struct rdma_netdev_alloc_params { struct net_device *netdev, void *param); }; +struct ib_odp_counters { + atomic64_t faults; + atomic64_t invalidations; +}; + struct ib_counters { struct ib_device *device; struct ib_uobject *uobject; -- cgit v1.2.3-59-g8ed1b From e1b95ae0b0ea4987afca73d1dc71dfc0b8ad4e49 Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 16 Oct 2019 09:23:07 +0300 Subject: RDMA/mlx5: Return ODP type per MR Provide an ODP explicit/implicit type as part of 'rdma -dd resource show mr' dump. For example: $ rdma -dd resource show mr dev mlx5_0 mrn 1 rkey 0xa99a lkey 0xa99a mrlen 50000000 pdn 9 pid 7372 comm ibv_rc_pingpong drv_odp explicit For non-ODP MRs, we won't print "drv_odp ..." at all. Link: https://lore.kernel.org/r/20191016062308.11886-4-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 13 ++++++++++ drivers/infiniband/hw/mlx5/Makefile | 2 +- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/odp.c | 2 ++ drivers/infiniband/hw/mlx5/restrack.c | 48 +++++++++++++++++++++++++++++++++++ include/rdma/restrack.h | 3 +++ 7 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/mlx5/restrack.c (limited to 'include/rdma') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 462275480276..a7f5add714d4 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -183,6 +183,19 @@ static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, return 0; } +int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, + const char *str) +{ + if (put_driver_name_print_type(msg, name, + RDMA_NLDEV_PRINT_TYPE_UNSPEC)) + return -EMSGSIZE; + if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str)) + return -EMSGSIZE; + + return 0; +} +EXPORT_SYMBOL(rdma_nl_put_driver_string); + int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value) { return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 9924be8384d8..d0a043ccbe58 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ - cong.o + cong.o restrack.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1106a2238b14..4a731cafbf7d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6271,6 +6271,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .disassociate_ucontext = mlx5_ib_disassociate_ucontext, .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, + .fill_res_entry = mlx5_ib_fill_res_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5aae05ebf64b..a0ca1ef16e4e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -626,6 +626,7 @@ struct mlx5_ib_mr { struct mlx5_async_work cb_work; atomic_t num_pending_prefetch; struct ib_odp_counters odp_stats; + bool is_odp_implicit; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) @@ -1339,6 +1340,8 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, u8 *native_port_num); void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 3601c6ad96f9..2ab6e44aeaae 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -543,6 +543,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); + imr->is_odp_implicit = true; + return imr; } diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c new file mode 100644 index 000000000000..065049f52b83 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include +#include "mlx5_ib.h" + +static int fill_res_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + if (mr->is_odp_implicit) { + if (rdma_nl_put_driver_string(msg, "odp", "implicit")) + goto err; + } else { + if (rdma_nl_put_driver_string(msg, "odp", "explicit")) + goto err; + } + + nla_nest_end(msg, table_attr); + return 0; + +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_res_mr_entry(msg, res); + + return 0; +} diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 83df1ec6664e..fe9b3c507a9c 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -156,6 +156,9 @@ int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name, int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value); int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value); +int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, + const char *str); + struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id); -- cgit v1.2.3-59-g8ed1b From 4061ff7aa379fa770a82da0ed7ec4f9163034518 Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 16 Oct 2019 09:23:08 +0300 Subject: RDMA/nldev: Provide MR statistics Add RDMA nldev netlink interface for dumping MR statistics information. Output example: $ ./ibv_rc_pingpong -o -P -s 500000000 local address: LID 0x0001, QPN 0x00008a, PSN 0xf81096, GID :: $ rdma stat show mr dev mlx5_0 mrn 2 page_faults 122071 page_invalidations 0 Link: https://lore.kernel.org/r/20191016062308.11886-5-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/nldev.c | 45 ++++++++++++++++++++++++++++++----- drivers/infiniband/hw/mlx5/main.c | 2 ++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++ drivers/infiniband/hw/mlx5/restrack.c | 42 ++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 7 ++++++ include/rdma/restrack.h | 2 ++ 7 files changed, 95 insertions(+), 6 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 74941bc39c98..eb35b663a742 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2605,6 +2605,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, drain_sq); SET_DEVICE_OP(dev_ops, enable_driver); SET_DEVICE_OP(dev_ops, fill_res_entry); + SET_DEVICE_OP(dev_ops, fill_stat_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); SET_DEVICE_OP(dev_ops, get_hw_stats); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index a7f5add714d4..3bb208557c45 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -439,6 +439,14 @@ static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, return dev->ops.fill_res_entry(msg, res); } +static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (!dev->ops.fill_stat_entry) + return false; + return dev->ops.fill_stat_entry(msg, res); +} + static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { @@ -739,8 +747,8 @@ err: return ret; } -static int fill_stat_hwcounter_entry(struct sk_buff *msg, - const char *name, u64 value) +int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, + u64 value) { struct nlattr *entry_attr; @@ -762,6 +770,25 @@ err: nla_nest_cancel(msg, entry_attr); return -EMSGSIZE; } +EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry); + +static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_mr *mr = container_of(res, struct ib_mr, res); + struct ib_device *dev = mr->pd->device; + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) + goto err; + + if (fill_stat_entry(dev, msg, res)) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} static int fill_stat_counter_hwcounters(struct sk_buff *msg, struct rdma_counter *counter) @@ -775,7 +802,7 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg, return -EMSGSIZE; for (i = 0; i < st->num_counters; i++) - if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i])) + if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i])) goto err; nla_nest_end(msg, table_attr); @@ -1897,7 +1924,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, for (i = 0; i < num_cnts; i++) { v = stats->value[i] + rdma_counter_get_hwstat_value(device, port, i); - if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) { + if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) { ret = -EMSGSIZE; goto err_table; } @@ -2006,7 +2033,10 @@ static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, case RDMA_NLDEV_ATTR_RES_QP: ret = stat_get_doit_qp(skb, nlh, extack, tb); break; - + case RDMA_NLDEV_ATTR_RES_MR: + ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR, + fill_stat_mr_entry); + break; default: ret = -EINVAL; break; @@ -2030,7 +2060,10 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb, case RDMA_NLDEV_ATTR_RES_QP: ret = nldev_res_get_counter_dumpit(skb, cb); break; - + case RDMA_NLDEV_ATTR_RES_MR: + ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR, + fill_stat_mr_entry); + break; default: ret = -EINVAL; break; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4a731cafbf7d..39d54e285ae9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -67,6 +67,7 @@ #include #include #include +#include #define UVERBS_MODULE_NAME mlx5_ib #include @@ -6272,6 +6273,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, .fill_res_entry = mlx5_ib_fill_res_entry, + .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a0ca1ef16e4e..e9bdb48cf1d3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1342,6 +1342,8 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); int mlx5_ib_fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res); +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 065049f52b83..8f6c04f12531 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -8,6 +8,39 @@ #include #include "mlx5_ib.h" +static int fill_stat_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + + if (!table_attr) + goto err; + + if (rdma_nl_stat_hwcounter_entry(msg, "page_faults", + atomic64_read(&mr->odp_stats.faults))) + goto err_table; + if (rdma_nl_stat_hwcounter_entry( + msg, "page_invalidations", + atomic64_read(&mr->odp_stats.invalidations))) + goto err_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + static int fill_res_mr_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) { @@ -46,3 +79,12 @@ int mlx5_ib_fill_res_entry(struct sk_buff *msg, return 0; } + +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_stat_mr_entry(msg, res); + + return 0; +} diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1ef31b27a41c..cca9985b4cbc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2570,6 +2570,13 @@ struct ib_device_ops { */ int (*counter_update_stats)(struct rdma_counter *counter); + /** + * Allows rdma drivers to add their own restrack attributes + * dumped via 'rdma stat' iproute2 command. + */ + int (*fill_stat_entry)(struct sk_buff *msg, + struct rdma_restrack_entry *entry); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_cq); DECLARE_RDMA_OBJ_SIZE(ib_pd); diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index fe9b3c507a9c..7682d1bcf789 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -158,6 +158,8 @@ int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value); int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, const char *str); +int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, + u64 value); struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, -- cgit v1.2.3-59-g8ed1b From a916051191a3080b3124e77199126a032c327303 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Oct 2019 10:15:55 +0300 Subject: RDMA/cm: Use specific keyword to check define There is a specific define keyword to check if define exists or not, let's use it instead of open-coded variant. Link: https://lore.kernel.org/r/20191020071559.9743-3-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm_msgs.h | 2 +- include/rdma/ib_cm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 3d16d614aff6..0a9e2d3fb9df 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -31,7 +31,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE * SOFTWARE. */ -#if !defined(CM_MSGS_H) +#ifndef CM_MSGS_H #define CM_MSGS_H #include diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 49f4f75499b3..5dc4ec986527 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -32,7 +32,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#if !defined(IB_CM_H) +#ifndef IB_CM_H #define IB_CM_H #include -- cgit v1.2.3-59-g8ed1b From 24f52149230454249ae628b922f741036e83b84c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Oct 2019 10:15:56 +0300 Subject: RDMA/cm: Update copyright together with SPDX tag Add Mellanox to lust of copyright holders and replace copyright boilerplate with relevant SPDX tag. Link: https://lore.kernel.org/r/20191020071559.9743-4-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 30 ++---------------------------- drivers/infiniband/core/cm_msgs.h | 30 ++---------------------------- drivers/infiniband/core/cma.c | 31 ++----------------------------- include/rdma/ib_cm.h | 30 ++---------------------------- 4 files changed, 8 insertions(+), 113 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index bc4abb05dbd4..7ffa16ea5fe3 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,36 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ #include diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 0a9e2d3fb9df..92d7260ac913 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -1,35 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING the madirectory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use source and binary forms, with or - * withmodification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retathe above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ #ifndef CM_MSGS_H #define CM_MSGS_H diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c8566a423719..8f318928f29d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1,36 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. - * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 5dc4ec986527..b01a8a8d4de9 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -1,36 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. */ #ifndef IB_CM_H #define IB_CM_H -- cgit v1.2.3-59-g8ed1b From ecdfdfdbe4d4c74029f2b416b7ee6d0aeb56364a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Oct 2019 15:58:27 -0700 Subject: RDMA/core: Fix ib_dma_max_seg_size() If dev->dma_device->params == NULL then the maximum DMA segment size is 64 KB. See also the dma_get_max_seg_size() implementation. This patch fixes the following kernel warning: DMA-API: infiniband rxe0: mapping sg segment longer than device claims to support [len=126976] [max=65536] WARNING: CPU: 4 PID: 4848 at kernel/dma/debug.c:1220 debug_dma_map_sg+0x3d9/0x450 RIP: 0010:debug_dma_map_sg+0x3d9/0x450 Call Trace: srp_queuecommand+0x626/0x18d0 [ib_srp] scsi_queue_rq+0xd02/0x13e0 [scsi_mod] __blk_mq_try_issue_directly+0x2b3/0x3f0 blk_mq_request_issue_directly+0xac/0xf0 blk_insert_cloned_request+0xdf/0x170 dm_mq_queue_rq+0x43d/0x830 [dm_mod] __blk_mq_try_issue_directly+0x2b3/0x3f0 blk_mq_request_issue_directly+0xac/0xf0 blk_mq_try_issue_list_directly+0xb8/0x170 blk_mq_sched_insert_requests+0x23c/0x3b0 blk_mq_flush_plug_list+0x529/0x730 blk_flush_plug_list+0x21f/0x260 blk_mq_make_request+0x56b/0xf20 generic_make_request+0x196/0x660 submit_bio+0xae/0x290 blkdev_direct_IO+0x822/0x900 generic_file_direct_write+0x110/0x200 __generic_file_write_iter+0x124/0x2a0 blkdev_write_iter+0x168/0x270 aio_write+0x1c4/0x310 io_submit_one+0x971/0x1390 __x64_sys_io_submit+0x12a/0x390 do_syscall_64+0x6f/0x2e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Link: https://lore.kernel.org/r/20191025225830.257535-2-bvanassche@acm.org Cc: Fixes: 0b5cb3300ae5 ("RDMA/srp: Increase max_segment_size") Signed-off-by: Bart Van Assche Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cca9985b4cbc..0626b62ed107 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4057,9 +4057,7 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { - struct device_dma_parameters *p = dev->dma_device->dma_parms; - - return p ? p->max_segment_size : UINT_MAX; + return dma_get_max_seg_size(dev->dma_device); } /** -- cgit v1.2.3-59-g8ed1b From 423f52d65005e8f5067d94bd4f41d8a7d8388135 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:29 -0300 Subject: RDMA/mlx5: Use an xarray for the children of an implicit ODP Currently the child leaves are stored in the shared interval tree and every lookup for a child must be done under the interval tree rwsem. This is further complicated by dropping the rwsem during iteration (ie the odp_lookup(), odp_next() pattern), which requires a very tricky an difficult to understand locking scheme with SRCU. Instead reserve the interval tree for the exclusive use of the mmu notifier related code in umem_odp.c and give each implicit MR a xarray containing all the child MRs. Since the size of each child is 1GB of VA, a 1 level xarray will index 64G of VA, and a 2 level will index 2TB, making xarray a much better data structure choice than an interval tree. The locking properties of xarray will be used in the next patches to rework the implicit ODP locking scheme into something simpler. At this point, the xarray is locked by the implicit MR's umem_mutex, and read can also be locked by the odp_srcu. Link: https://lore.kernel.org/r/20191009160934.3143-10-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +- drivers/infiniband/hw/mlx5/odp.c | 195 +++++++++++------------------------ include/rdma/ib_umem_odp.h | 16 --- 3 files changed, 67 insertions(+), 149 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2cf91db6a36f..88769fcffb5a 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -617,10 +617,13 @@ struct mlx5_ib_mr { u64 data_iova; u64 pi_iova; + /* For ODP and implicit */ atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; - struct mlx5_async_work cb_work; atomic_t num_pending_prefetch; + struct xarray implicit_children; + + struct mlx5_async_work cb_work; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 8f43af6580ce..6f7eea175c72 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -93,132 +93,54 @@ struct mlx5_pagefault { static u64 mlx5_imr_ksm_entries; -static int check_parent(struct ib_umem_odp *odp, - struct mlx5_ib_mr *parent) +void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, + struct mlx5_ib_mr *imr, int flags) { - struct mlx5_ib_mr *mr = odp->private; - - return mr && mr->parent == parent && !odp->dying; -} - -static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) -{ - if (WARN_ON(!mr || !is_odp_mr(mr))) - return NULL; - - return to_ib_umem_odp(mr->umem)->per_mm; -} - -static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) -{ - struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent; - struct ib_ucontext_per_mm *per_mm = odp->per_mm; - struct rb_node *rb; - - down_read(&per_mm->umem_rwsem); - while (1) { - rb = rb_next(&odp->interval_tree.rb); - if (!rb) - goto not_found; - odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (check_parent(odp, parent)) - goto end; - } -not_found: - odp = NULL; -end: - up_read(&per_mm->umem_rwsem); - return odp; -} - -static struct ib_umem_odp *odp_lookup(u64 start, u64 length, - struct mlx5_ib_mr *parent) -{ - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent); - struct ib_umem_odp *odp; - struct rb_node *rb; - - down_read(&per_mm->umem_rwsem); - odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length); - if (!odp) - goto end; - - while (1) { - if (check_parent(odp, parent)) - goto end; - rb = rb_next(&odp->interval_tree.rb); - if (!rb) - goto not_found; - odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (ib_umem_start(odp) > start + length) - goto not_found; - } -not_found: - odp = NULL; -end: - up_read(&per_mm->umem_rwsem); - return odp; -} - -void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, - size_t nentries, struct mlx5_ib_mr *mr, int flags) -{ - struct ib_pd *pd = mr->ibmr.pd; - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct ib_umem_odp *odp; - unsigned long va; - int i; + struct mlx5_klm *end = pklm + nentries; if (flags & MLX5_IB_UPD_XLT_ZAP) { - for (i = 0; i < nentries; i++, pklm++) { + for (; pklm != end; pklm++, idx++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - pklm->key = cpu_to_be32(dev->null_mkey); + pklm->key = cpu_to_be32(imr->dev->null_mkey); pklm->va = 0; } return; } /* - * The locking here is pretty subtle. Ideally the implicit children - * list would be protected by the umem_mutex, however that is not + * The locking here is pretty subtle. Ideally the implicit_children + * xarray would be protected by the umem_mutex, however that is not * possible. Instead this uses a weaker update-then-lock pattern: * * srcu_read_lock() - * + * xa_store() * mutex_lock(umem_mutex) * mlx5_ib_update_xlt() * mutex_unlock(umem_mutex) * destroy lkey * - * ie any change the children list must be followed by the locked - * update_xlt before destroying. + * ie any change the xarray must be followed by the locked update_xlt + * before destroying. * * The umem_mutex provides the acquire/release semantic needed to make - * the children list visible to a racing thread. While SRCU is not + * the xa_store() visible to a racing thread. While SRCU is not * technically required, using it gives consistent use of the SRCU - * locking around the children list. + * locking around the xarray. */ - lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex); - lockdep_assert_held(&mr->dev->odp_srcu); + lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); + lockdep_assert_held(&imr->dev->odp_srcu); - odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, - nentries * MLX5_IMR_MTT_SIZE, mr); + for (; pklm != end; pklm++, idx++) { + struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); - for (i = 0; i < nentries; i++, pklm++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - va = (offset + i) * MLX5_IMR_MTT_SIZE; - if (odp && ib_umem_start(odp) == va) { - struct mlx5_ib_mr *mtt = odp->private; - + if (mtt) { pklm->key = cpu_to_be32(mtt->ibmr.lkey); - pklm->va = cpu_to_be64(va); - odp = odp_next(odp); + pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); } else { - pklm->key = cpu_to_be32(dev->null_mkey); + pklm->key = cpu_to_be32(imr->dev->null_mkey); pklm->va = 0; } - mlx5_ib_dbg(dev, "[%d] va %lx key %x\n", - i, va, be32_to_cpu(pklm->key)); } } @@ -320,6 +242,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { + xa_erase(&mr->parent->implicit_children, + ib_umem_start(umem_odp) >> MLX5_IMR_MTT_SHIFT); xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); umem_odp->dying = 1; atomic_inc(&mr->parent->num_leaf_free); @@ -464,6 +388,16 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_release; } + /* + * Once the store to either xarray completes any error unwind has to + * use synchronize_srcu(). Avoid this with xa_reserve() + */ + err = xa_err(xa_store(&imr->implicit_children, idx, mr, GFP_KERNEL)); + if (err) { + ret = ERR_PTR(err); + goto out_release; + } + xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, GFP_ATOMIC); @@ -479,7 +413,7 @@ out_umem: return ret; } -static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr, +static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr, u64 io_virt, size_t bcnt) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); @@ -487,39 +421,32 @@ static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr, unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT; unsigned long inv_start_idx = end_idx + 1; unsigned long inv_len = 0; - struct ib_umem_odp *result = NULL; - struct ib_umem_odp *odp; + struct mlx5_ib_mr *result = NULL; int ret; mutex_lock(&odp_imr->umem_mutex); - odp = odp_lookup(idx * MLX5_IMR_MTT_SIZE, 1, imr); for (idx = idx; idx <= end_idx; idx++) { - if (unlikely(!odp)) { - struct mlx5_ib_mr *mtt; + struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); + if (unlikely(!mtt)) { mtt = implicit_get_child_mr(imr, idx); if (IS_ERR(mtt)) { - result = ERR_CAST(mtt); + result = mtt; goto out; } - odp = to_ib_umem_odp(mtt->umem); inv_start_idx = min(inv_start_idx, idx); inv_len = idx - inv_start_idx + 1; } /* Return first odp if region not covered by single one */ if (likely(!result)) - result = odp; - - odp = odp_next(odp); - if (odp && ib_umem_start(odp) != idx * MLX5_IMR_MTT_SIZE) - odp = NULL; + result = mtt; } /* - * Any time the children in the interval tree are changed we must - * perform an update of the xlt before exiting to ensure the HW and - * the tree remains synchronized. + * Any time the implicit_children are changed we must perform an + * update of the xlt before exiting to ensure the HW and the + * implicit_children remains synchronized. */ out: if (likely(!inv_len)) @@ -569,6 +496,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); + xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, mlx5_imr_ksm_entries, @@ -596,18 +524,15 @@ out_umem: void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); - struct rb_node *node; + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct mlx5_ib_mr *mtt; + unsigned long idx; - down_read(&per_mm->umem_rwsem); - for (node = rb_first_cached(&per_mm->umem_tree); node; - node = rb_next(node)) { - struct ib_umem_odp *umem_odp = - rb_entry(node, struct ib_umem_odp, interval_tree.rb); - struct mlx5_ib_mr *mr = umem_odp->private; + mutex_lock(&odp_imr->umem_mutex); + xa_for_each (&imr->implicit_children, idx, mtt) { + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem); - if (mr->parent != imr) - continue; + xa_erase(&imr->implicit_children, idx); mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), @@ -623,9 +548,12 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) schedule_work(&umem_odp->work); mutex_unlock(&umem_odp->umem_mutex); } - up_read(&per_mm->umem_rwsem); + mutex_unlock(&odp_imr->umem_mutex); wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); + WARN_ON(!xa_empty(&imr->implicit_children)); + /* Remove any left over reserved elements */ + xa_destroy(&imr->implicit_children); } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) @@ -718,7 +646,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, u32 *bytes_mapped, u32 flags) { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - struct ib_umem_odp *child; + struct mlx5_ib_mr *mtt; int npages = 0; if (!odp->is_implicit_odp) { @@ -733,17 +661,18 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - io_virt < bcnt)) return -EFAULT; - child = implicit_mr_get_data(mr, io_virt, bcnt); - if (IS_ERR(child)) - return PTR_ERR(child); + mtt = implicit_mr_get_data(mr, io_virt, bcnt); + if (IS_ERR(mtt)) + return PTR_ERR(mtt); /* Fault each child mr that intersects with our interval. */ while (bcnt) { - u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(child)); + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem); + u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(umem_odp)); u64 len = end - io_virt; int ret; - ret = pagefault_real_mr(child->private, child, io_virt, len, + ret = pagefault_real_mr(mtt, umem_odp, io_virt, len, bytes_mapped, flags); if (ret < 0) return ret; @@ -752,12 +681,14 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, npages += ret; if (unlikely(bcnt)) { - child = odp_next(child); + mtt = xa_load(&mr->implicit_children, + io_virt >> MLX5_IMR_MTT_SHIFT); + /* * implicit_mr_get_data sets up all the leaves, this * means they got invalidated before we got to them. */ - if (!child || ib_umem_start(child) != io_virt) { + if (!mtt) { mlx5_ib_dbg( mr->dev, "next implicit leaf removed at 0x%llx.\n", diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 253df1a1fa54..28078efc3833 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -156,22 +156,6 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, umem_call_back cb, bool blockable, void *cookie); -/* - * Find first region intersecting with address range. - * Return NULL if not found - */ -static inline struct ib_umem_odp * -rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length) -{ - struct interval_tree_node *node; - - node = interval_tree_iter_first(root, addr, addr + length - 1); - if (!node) - return NULL; - return container_of(node, struct ib_umem_odp, interval_tree); - -} - static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, unsigned long mmu_seq) { -- cgit v1.2.3-59-g8ed1b From 5256edcb98a14b11409a2d323f56a70a8b366363 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:32 -0300 Subject: RDMA/mlx5: Rework implicit ODP destroy Use SRCU in a sensible way by removing all MRs in the implicit tree from the two xarrays (the update operation), then a synchronize, followed by a normal single threaded teardown. This is only a little unusual from the normal pattern as there can still be some work pending in the unbound wq that may also require a workqueue flush. This is tracked with a single atomic, consolidating the redundant existing atomics and wait queue. For understand-ability the entire ODP implicit create/destroy flow now largely exists in a single pair of functions within odp.c, with a few support functions for tearing down an unused child. Link: https://lore.kernel.org/r/20191009160934.3143-13-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 - drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 ++- drivers/infiniband/hw/mlx5/mr.c | 21 ++--- drivers/infiniband/hw/mlx5/odp.c | 152 +++++++++++++++++++++++------------ include/rdma/ib_umem_odp.h | 2 - 5 files changed, 120 insertions(+), 66 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4692c37b057c..add24b628900 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6146,8 +6146,6 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - srcu_barrier(&dev->odp_srcu); cleanup_srcu_struct(&dev->odp_srcu); WARN_ON(!xa_empty(&dev->sig_mrs)); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 88769fcffb5a..b8c958f62628 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -618,10 +618,13 @@ struct mlx5_ib_mr { u64 pi_iova; /* For ODP and implicit */ - atomic_t num_leaf_free; - wait_queue_head_t q_leaf_free; - atomic_t num_pending_prefetch; + atomic_t num_deferred_work; struct xarray implicit_children; + union { + struct rcu_head rcu; + struct list_head elm; + struct work_struct work; + } odp_destroy; struct mlx5_async_work cb_work; }; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd94838a8845..1e91f61efa8a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1317,7 +1317,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (is_odp_mr(mr)) { to_ib_umem_odp(mr->umem)->private = mr; - atomic_set(&mr->num_pending_prefetch, 0); + atomic_set(&mr->num_deferred_work, 0); err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, GFP_KERNEL)); @@ -1573,17 +1573,15 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) synchronize_srcu(&dev->odp_srcu); /* dequeue pending prefetch requests for the mr */ - if (atomic_read(&mr->num_pending_prefetch)) + if (atomic_read(&mr->num_deferred_work)) { flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&mr->num_pending_prefetch)); + WARN_ON(atomic_read(&mr->num_deferred_work)); + } /* Destroy all page mappings */ - if (!umem_odp->is_implicit_odp) - mlx5_ib_invalidate_range(umem_odp, - ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); - else - mlx5_ib_free_implicit_mr(mr); + mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); + /* * We kill the umem before the MR for ODP, * so that there will not be any invalidations in @@ -1620,6 +1618,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); } + if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) { + mlx5_ib_free_implicit_mr(mmr); + return 0; + } + dereg_mr(to_mdev(ibmr->device), mmr); return 0; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 8bd30db87c21..5cf93d8129a9 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -144,31 +144,79 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, } } -static void mr_leaf_free_action(struct work_struct *work) +/* + * This must be called after the mr has been removed from implicit_children + * and odp_mkeys and the SRCU synchronized. NOTE: The MR does not necessarily + * have to be empty here, parallel page faults could have raced with the free + * process and added pages to it. + */ +static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) { - struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); - int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; - struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; + struct mlx5_ib_mr *imr = mr->parent; struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; int srcu_key; - mr->parent = NULL; - synchronize_srcu(&mr->dev->odp_srcu); + /* implicit_child_mr's are not allowed to have deferred work */ + WARN_ON(atomic_read(&mr->num_deferred_work)); - if (xa_load(&mr->dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key))) { + if (need_imr_xlt) { srcu_key = srcu_read_lock(&mr->dev->odp_srcu); mutex_lock(&odp_imr->umem_mutex); - mlx5_ib_update_xlt(imr, idx, 1, 0, + mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); } - ib_umem_odp_release(odp); + + mr->parent = NULL; mlx5_mr_cache_free(mr->dev, mr); + ib_umem_odp_release(odp); + atomic_dec(&imr->num_deferred_work); +} + +static void free_implicit_child_mr_work(struct work_struct *work) +{ + struct mlx5_ib_mr *mr = + container_of(work, struct mlx5_ib_mr, odp_destroy.work); + + free_implicit_child_mr(mr, true); +} + +static void free_implicit_child_mr_rcu(struct rcu_head *head) +{ + struct mlx5_ib_mr *mr = + container_of(head, struct mlx5_ib_mr, odp_destroy.rcu); + + /* Freeing a MR is a sleeping operation, so bounce to a work queue */ + INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); + queue_work(system_unbound_wq, &mr->odp_destroy.work); +} + +static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; + struct mlx5_ib_mr *imr = mr->parent; - if (atomic_dec_and_test(&imr->num_leaf_free)) - wake_up(&imr->q_leaf_free); + xa_lock(&imr->implicit_children); + /* + * This can race with mlx5_ib_free_implicit_mr(), the first one to + * reach the xa lock wins the race and destroys the MR. + */ + if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) != + mr) + goto out_unlock; + + __xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + atomic_inc(&imr->num_deferred_work); + call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, + free_implicit_child_mr_rcu); + +out_unlock: + xa_unlock(&imr->implicit_children); } void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, @@ -240,15 +288,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, ib_umem_odp_unmap_dma_pages(umem_odp, start, end); - if (unlikely(!umem_odp->npages && mr->parent && - !umem_odp->dying)) { - xa_erase(&mr->parent->implicit_children, - ib_umem_start(umem_odp) >> MLX5_IMR_MTT_SHIFT); - xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); - umem_odp->dying = 1; - atomic_inc(&mr->parent->num_leaf_free); - schedule_work(&umem_odp->work); - } + if (unlikely(!umem_odp->npages && mr->parent)) + destroy_unused_implicit_child_mr(mr); mutex_unlock(&umem_odp->umem_mutex); } @@ -375,7 +416,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; mr->parent = imr; odp->private = mr; - INIT_WORK(&odp->work, mr_leaf_free_action); err = mlx5_ib_update_xlt(mr, 0, MLX5_IMR_MTT_ENTRIES, @@ -391,7 +431,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, * Once the store to either xarray completes any error unwind has to * use synchronize_srcu(). Avoid this with xa_reserve() */ - ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, GFP_KERNEL); + ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, + GFP_KERNEL); + if (likely(!ret)) + xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_ATOMIC); if (unlikely(ret)) { if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); @@ -404,9 +448,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_release; } - xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_ATOMIC); - mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); return mr; @@ -445,9 +486,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; imr->umem = &umem_odp->umem; - init_waitqueue_head(&imr->q_leaf_free); - atomic_set(&imr->num_leaf_free, 0); - atomic_set(&imr->num_pending_prefetch, 0); + atomic_set(&imr->num_deferred_work, 0); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -477,35 +516,48 @@ out_umem: void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct mlx5_ib_dev *dev = imr->dev; + struct list_head destroy_list; struct mlx5_ib_mr *mtt; + struct mlx5_ib_mr *tmp; unsigned long idx; - mutex_lock(&odp_imr->umem_mutex); - xa_for_each (&imr->implicit_children, idx, mtt) { - struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem); + INIT_LIST_HEAD(&destroy_list); - xa_erase(&imr->implicit_children, idx); + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key)); + /* + * This stops the SRCU protected page fault path from touching either + * the imr or any children. The page fault path can only reach the + * children xarray via the imr. + */ + synchronize_srcu(&dev->odp_srcu); - mutex_lock(&umem_odp->umem_mutex); - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); + xa_lock(&imr->implicit_children); + xa_for_each (&imr->implicit_children, idx, mtt) { + __xa_erase(&imr->implicit_children, idx); + __xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key)); + list_add(&mtt->odp_destroy.elm, &destroy_list); + } + xa_unlock(&imr->implicit_children); - if (umem_odp->dying) { - mutex_unlock(&umem_odp->umem_mutex); - continue; - } + /* Fence access to the child pointers via the pagefault thread */ + synchronize_srcu(&dev->odp_srcu); - umem_odp->dying = 1; - atomic_inc(&imr->num_leaf_free); - schedule_work(&umem_odp->work); - mutex_unlock(&umem_odp->umem_mutex); + /* + * num_deferred_work can only be incremented inside the odp_srcu, or + * under xa_lock while the child is in the xarray. Thus at this point + * it is only decreasing, and all work holding it is now on the wq. + */ + if (atomic_read(&imr->num_deferred_work)) { + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&imr->num_deferred_work)); } - mutex_unlock(&odp_imr->umem_mutex); - wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); - WARN_ON(!xa_empty(&imr->implicit_children)); - /* Remove any left over reserved elements */ - xa_destroy(&imr->implicit_children); + list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) + free_implicit_child_mr(mtt, false); + + mlx5_mr_cache_free(dev, imr); + ib_umem_odp_release(odp_imr); } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) @@ -1579,7 +1631,7 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - atomic_dec(&work->frags[i].mr->num_pending_prefetch); + atomic_dec(&work->frags[i].mr->num_deferred_work); kvfree(work); } @@ -1658,7 +1710,7 @@ static bool init_prefetch_work(struct ib_pd *pd, } /* Keep the MR pointer will valid outside the SRCU */ - atomic_inc(&work->frags[i].mr->num_pending_prefetch); + atomic_inc(&work->frags[i].mr->num_deferred_work); } work->num_sge = num_sge; return true; diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 28078efc3833..09b0e4494986 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -78,9 +78,7 @@ struct ib_umem_odp { bool is_implicit_odp; struct completion notifier_completion; - int dying; unsigned int page_shift; - struct work_struct work; }; static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) -- cgit v1.2.3-59-g8ed1b From 3411f9f01b76bd88aa6e0e013847ab6479cb4f24 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:11 +0200 Subject: RDMA/core: Create mmap database and cookie helper functions Create some common API's for adding entries to a xa_mmap. Searching for an entry and freeing one. The general approach is copied from the EFA driver and improved to be more general and do more to help the drivers. Integration with the core allows a reference counted scheme with a free function so that the driver can know when its mmaps are all gone. This significant new functionality will be helpful for drivers to have the correct lifetime model for mmap objects. Link: https://lore.kernel.org/r/20191030094417.16866-3-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/ib_core_uverbs.c | 236 +++++++++++++++++++++++++++++++ drivers/infiniband/core/rdma_core.c | 1 + drivers/infiniband/core/uverbs_cmd.c | 2 + include/rdma/ib_verbs.h | 35 +++++ 5 files changed, 275 insertions(+) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f8d383ceae05..e785bebaf16e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2642,6 +2642,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, map_mr_sg_pi); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); + SET_DEVICE_OP(dev_ops, mmap_free); SET_DEVICE_OP(dev_ops, modify_ah); SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c index b74d2a2fb342..aacd84a45de6 100644 --- a/drivers/infiniband/core/ib_core_uverbs.c +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -71,3 +71,239 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, return 0; } EXPORT_SYMBOL(rdma_user_mmap_io); + +/** + * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa + * + * @ucontext: associated user context + * @pgoff: The mmap offset >> PAGE_SHIFT + * + * This function is called when a user tries to mmap with an offset (returned + * by rdma_user_mmap_get_offset()) it initially received from the driver. The + * rdma_user_mmap_entry was created by the function + * rdma_user_mmap_entry_insert(). This function increases the refcnt of the + * entry so that it won't be deleted from the xarray in the meantime. + * + * Return an reference to an entry if exists or NULL if there is no + * match. rdma_user_mmap_entry_put() must be called to put the reference. + */ +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, + unsigned long pgoff) +{ + struct rdma_user_mmap_entry *entry; + + if (pgoff > U32_MAX) + return NULL; + + xa_lock(&ucontext->mmap_xa); + + entry = xa_load(&ucontext->mmap_xa, pgoff); + + /* + * If refcount is zero, entry is already being deleted, driver_removed + * indicates that the no further mmaps are possible and we waiting for + * the active VMAs to be closed. + */ + if (!entry || entry->start_pgoff != pgoff || entry->driver_removed || + !kref_get_unless_zero(&entry->ref)) + goto err; + + xa_unlock(&ucontext->mmap_xa); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n", + pgoff, entry->npages); + + return entry; + +err: + xa_unlock(&ucontext->mmap_xa); + return NULL; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff); + +/** + * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa + * + * @ucontext: associated user context + * @vma: the vma being mmap'd into + * + * This function is like rdma_user_mmap_entry_get_pgoff() except that it also + * checks that the VMA is correct. + */ +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, + struct vm_area_struct *vma) +{ + struct rdma_user_mmap_entry *entry; + + if (!(vma->vm_flags & VM_SHARED)) + return NULL; + entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff); + if (!entry) + return NULL; + if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) { + rdma_user_mmap_entry_put(entry); + return NULL; + } + return entry; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_get); + +static void rdma_user_mmap_entry_free(struct kref *kref) +{ + struct rdma_user_mmap_entry *entry = + container_of(kref, struct rdma_user_mmap_entry, ref); + struct ib_ucontext *ucontext = entry->ucontext; + unsigned long i; + + /* + * Erase all entries occupied by this single entry, this is deferred + * until all VMA are closed so that the mmap offsets remain unique. + */ + xa_lock(&ucontext->mmap_xa); + for (i = 0; i < entry->npages; i++) + __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i); + xa_unlock(&ucontext->mmap_xa); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n", + entry->start_pgoff, entry->npages); + + if (ucontext->device->ops.mmap_free) + ucontext->device->ops.mmap_free(entry); +} + +/** + * rdma_user_mmap_entry_put() - Drop reference to the mmap entry + * + * @entry: an entry in the mmap_xa + * + * This function is called when the mapping is closed if it was + * an io mapping or when the driver is done with the entry for + * some other reason. + * Should be called after rdma_user_mmap_entry_get was called + * and entry is no longer needed. This function will erase the + * entry and free it if its refcnt reaches zero. + */ +void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry) +{ + kref_put(&entry->ref, rdma_user_mmap_entry_free); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_put); + +/** + * rdma_user_mmap_entry_remove() - Drop reference to entry and + * mark it as unmmapable + * + * @entry: the entry to insert into the mmap_xa + * + * Drivers can call this to prevent userspace from creating more mappings for + * entry, however existing mmaps continue to exist and ops->mmap_free() will + * not be called until all user mmaps are destroyed. + */ +void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) +{ + if (!entry) + return; + + entry->driver_removed = true; + kref_put(&entry->ref, rdma_user_mmap_entry_free); +} +EXPORT_SYMBOL(rdma_user_mmap_entry_remove); + +/** + * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa + * + * @ucontext: associated user context. + * @entry: the entry to insert into the mmap_xa + * @length: length of the address that will be mmapped + * + * This function should be called by drivers that use the rdma_user_mmap + * interface for implementing their mmap syscall A database of mmap offsets is + * handled in the core and helper functions are provided to insert entries + * into the database and extract entries when the user calls mmap with the + * given offset. The function allocates a unique page offset that should be + * provided to user, the user will use the offset to retrieve information such + * as address to be mapped and how. + * + * Return: 0 on success and -ENOMEM on failure + */ +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length) +{ + struct ib_uverbs_file *ufile = ucontext->ufile; + XA_STATE(xas, &ucontext->mmap_xa, 0); + u32 xa_first, xa_last, npages; + int err; + u32 i; + + if (!entry) + return -EINVAL; + + kref_init(&entry->ref); + entry->ucontext = ucontext; + + /* + * We want the whole allocation to be done without interruption from a + * different thread. The allocation requires finding a free range and + * storing. During the xa_insert the lock could be released, possibly + * allowing another thread to choose the same range. + */ + mutex_lock(&ufile->umap_lock); + + xa_lock(&ucontext->mmap_xa); + + /* We want to find an empty range */ + npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE); + entry->npages = npages; + while (true) { + /* First find an empty index */ + xas_find_marked(&xas, U32_MAX, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) + goto err_unlock; + + xa_first = xas.xa_index; + + /* Is there enough room to have the range? */ + if (check_add_overflow(xa_first, npages, &xa_last)) + goto err_unlock; + + /* + * Now look for the next present entry. If an entry doesn't + * exist, we found an empty range and can proceed. + */ + xas_next_entry(&xas, xa_last - 1); + if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last) + break; + } + + for (i = xa_first; i < xa_last; i++) { + err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL); + if (err) + goto err_undo; + } + + /* + * Internally the kernel uses a page offset, in libc this is a byte + * offset. Drivers should not return pgoff to userspace. + */ + entry->start_pgoff = xa_first; + xa_unlock(&ucontext->mmap_xa); + mutex_unlock(&ufile->umap_lock); + + ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n", + entry->start_pgoff, npages); + + return 0; + +err_undo: + for (; i > xa_first; i--) + __xa_erase(&ucontext->mmap_xa, i - 1); + +err_unlock: + xa_unlock(&ucontext->mmap_xa); + mutex_unlock(&ufile->umap_lock); + return -ENOMEM; +} +EXPORT_SYMBOL(rdma_user_mmap_entry_insert); diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index ccf4d069c25c..6c72773faf29 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, rdma_restrack_del(&ucontext->res); ib_dev->ops.dealloc_ucontext(ucontext); + WARN_ON(!xa_empty(&ucontext->mmap_xa)); kfree(ucontext); ufile->ucontext = NULL; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 14a80fd9f464..06ed32c8662f 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -252,6 +252,8 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) ucontext->closing = false; ucontext->cleanup_retryable = false; + xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC); + ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) goto err_free; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0626b62ed107..8865ec28180a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1473,6 +1473,7 @@ struct ib_ucontext { * Implementation details of the RDMA core, don't use in drivers: */ struct rdma_restrack_entry res; + struct xarray mmap_xa; }; struct ib_uobject { @@ -2258,6 +2259,21 @@ struct iw_cm_conn_param; #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct +struct rdma_user_mmap_entry { + struct kref ref; + struct ib_ucontext *ucontext; + unsigned long start_pgoff; + size_t npages; + bool driver_removed; +}; + +/* Return the offset (in bytes) the user should pass to libc's mmap() */ +static inline u64 +rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry) +{ + return (u64)entry->start_pgoff << PAGE_SHIFT; +} + /** * struct ib_device_ops - InfiniBand device operations * This structure defines all the InfiniBand device operations, providers will @@ -2370,6 +2386,13 @@ struct ib_device_ops { struct ib_udata *udata); void (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); + /** + * This will be called once refcount of an entry in mmap_xa reaches + * zero. The type of the memory that was mapped may differ between + * entries and is opaque to the rdma_user_mmap interface. + * Therefore needs to be implemented by the driver in mmap_free. + */ + void (*mmap_free)(struct rdma_user_mmap_entry *entry); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); @@ -2815,6 +2838,18 @@ static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, return -EINVAL; } #endif +int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length); +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, + unsigned long pgoff); +struct rdma_user_mmap_entry * +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, + struct vm_area_struct *vma); +void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry); + +void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry); static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) { -- cgit v1.2.3-59-g8ed1b From c043ff2cfb7f6fdd9a1cb1a7ba3800f19b70bf65 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:12 +0200 Subject: RDMA: Connect between the mmap entry and the umap_priv structure The rdma_user_mmap_io interface created a common interface for drivers to correctly map hw resources and zap them once the ucontext is destroyed enabling the drivers to safely free the hw resources. However, this meant the drivers need to delay freeing the resource to the ucontext destroy phase to ensure they were no longer mapped. The new mechanism for a common way of handling user/driver address mapping enabled notifying the driver if all umap_priv mappings were removed, and enabled freeing the hw resources when they are done with and not delay it until ucontext destroy. Since not all drivers use the mechanism, NULL can be sent to the rdma_user_mmap_io interface to continue working as before. Drivers that use the mmap_xa interface can pass the entry being mapped to the rdma_user_mmap_io function to be linked together. Link: https://lore.kernel.org/r/20191030094417.16866-4-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 4 ++- drivers/infiniband/core/ib_core_uverbs.c | 48 ++++++++++++++++++++++++------- drivers/infiniband/core/uverbs_main.c | 10 ++++++- drivers/infiniband/hw/efa/efa_verbs.c | 6 ++-- drivers/infiniband/hw/hns/hns_roce_main.c | 6 ++-- drivers/infiniband/hw/mlx4/main.c | 9 ++++-- drivers/infiniband/hw/mlx5/main.c | 8 ++++-- include/rdma/ib_verbs.h | 13 ++------- 8 files changed, 70 insertions(+), 34 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 0252da9560f4..6be180eb8cb3 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -391,9 +391,11 @@ void rdma_nl_net_exit(struct rdma_dev_net *rnet); struct rdma_umap_priv { struct vm_area_struct *vma; struct list_head list; + struct rdma_user_mmap_entry *entry; }; void rdma_umap_priv_init(struct rdma_umap_priv *priv, - struct vm_area_struct *vma); + struct vm_area_struct *vma, + struct rdma_user_mmap_entry *entry); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c index aacd84a45de6..f509c478b469 100644 --- a/drivers/infiniband/core/ib_core_uverbs.c +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -8,23 +8,36 @@ #include "uverbs.h" #include "core_priv.h" -/* - * Each time we map IO memory into user space this keeps track of the mapping. - * When the device is hot-unplugged we 'zap' the mmaps in user space to point - * to the zero page and allow the hot unplug to proceed. +/** + * rdma_umap_priv_init() - Initialize the private data of a vma + * + * @priv: The already allocated private data + * @vma: The vm area struct that needs private data + * @entry: entry into the mmap_xa that needs to be linked with + * this vma + * + * Each time we map IO memory into user space this keeps track of the + * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space + * to point to the zero page and allow the hot unplug to proceed. * * This is necessary for cases like PCI physical hot unplug as the actual BAR * memory may vanish after this and access to it from userspace could MCE. * * RDMA drivers supporting disassociation must have their user space designed * to cope in some way with their IO pages going to the zero page. + * */ void rdma_umap_priv_init(struct rdma_umap_priv *priv, - struct vm_area_struct *vma) + struct vm_area_struct *vma, + struct rdma_user_mmap_entry *entry) { struct ib_uverbs_file *ufile = vma->vm_file->private_data; priv->vma = vma; + if (entry) { + kref_get(&entry->ref); + priv->entry = entry; + } vma->vm_private_data = priv; /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */ @@ -34,13 +47,26 @@ void rdma_umap_priv_init(struct rdma_umap_priv *priv, } EXPORT_SYMBOL(rdma_umap_priv_init); -/* - * Map IO memory into a process. This is to be called by drivers as part of - * their mmap() functions if they wish to send something like PCI-E BAR memory - * to userspace. +/** + * rdma_user_mmap_io() - Map IO memory into a process + * + * @ucontext: associated user context + * @vma: the vma related to the current mmap call + * @pfn: pfn to map + * @size: size to map + * @prot: pgprot to use in remap call + * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL + * if mmap_entry is not used by the driver + * + * This is to be called by drivers as part of their mmap() functions if they + * wish to send something like PCI-E BAR memory to userspace. + * + * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on + * success. */ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot) + unsigned long pfn, unsigned long size, pgprot_t prot, + struct rdma_user_mmap_entry *entry) { struct ib_uverbs_file *ufile = ucontext->ufile; struct rdma_umap_priv *priv; @@ -67,7 +93,7 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, return -EAGAIN; } - rdma_umap_priv_init(priv, vma); + rdma_umap_priv_init(priv, vma, entry); return 0; } EXPORT_SYMBOL(rdma_user_mmap_io); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index b1f5334ff907..9aa7ffc1d12a 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -819,7 +819,7 @@ static void rdma_umap_open(struct vm_area_struct *vma) priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto out_unlock; - rdma_umap_priv_init(priv, vma); + rdma_umap_priv_init(priv, vma, opriv->entry); up_read(&ufile->hw_destroy_rwsem); return; @@ -850,6 +850,9 @@ static void rdma_umap_close(struct vm_area_struct *vma) * this point. */ mutex_lock(&ufile->umap_lock); + if (priv->entry) + rdma_user_mmap_entry_put(priv->entry); + list_del(&priv->list); mutex_unlock(&ufile->umap_lock); kfree(priv); @@ -950,6 +953,11 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); + + if (priv->entry) { + rdma_user_mmap_entry_put(priv->entry); + priv->entry = NULL; + } } mutex_unlock(&ufile->umap_lock); skip_mm: diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 4edae89e8e3c..37e3d62bbb51 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1612,11 +1612,13 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, switch (entry->mmap_flag) { case EFA_MMAP_IO_NC: err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); break; case EFA_MMAP_IO_WC: err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_writecombine(vma->vm_page_prot)); + pgprot_writecombine(vma->vm_page_prot), + NULL); break; case EFA_MMAP_DMA_PAGE: for (va = vma->vm_start; va < vma->vm_end; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index b5d196c119ee..803dc6f4b496 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -359,7 +359,8 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, to_hr_ucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); /* vm_pgoff: 1 -- TPTR */ case 1: @@ -372,7 +373,8 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, hr_dev->tptr_dma_addr >> PAGE_SHIFT, hr_dev->tptr_size, - vma->vm_page_prot); + vma->vm_page_prot, + NULL); default: return -EINVAL; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8d2f1e38b891..f89b129b7e3a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1146,7 +1146,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return rdma_user_mmap_io(context, vma, to_mucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); case 1: if (dev->dev->caps.bf_reg_size == 0) @@ -1155,7 +1156,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) context, vma, to_mucontext(context)->uar.pfn + dev->dev->caps.num_uars, - PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot)); + PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot), + NULL); case 3: { struct mlx4_clock_params params; @@ -1171,7 +1173,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) params.bar) + params.offset) >> PAGE_SHIFT, - PAGE_SIZE, pgprot_noncached(vma->vm_page_prot)); + PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), + NULL); } default: diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 22db87278c40..2a510de4ae0f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2156,7 +2156,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, - prot); + prot, NULL); if (err) { mlx5_ib_err(dev, "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n", @@ -2198,7 +2198,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) PAGE_SHIFT) + page_idx; return rdma_user_mmap_io(context, vma, pfn, map_size, - pgprot_writecombine(vma->vm_page_prot)); + pgprot_writecombine(vma->vm_page_prot), + NULL); } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) @@ -2236,7 +2237,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm PAGE_SHIFT; return rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8865ec28180a..416e72ea80d9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2826,18 +2826,9 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, void ib_set_device_ops(struct ib_device *device, const struct ib_device_ops *ops); -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot); -#else -static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, - pgprot_t prot) -{ - return -EINVAL; -} -#endif + unsigned long pfn, unsigned long size, pgprot_t prot, + struct rdma_user_mmap_entry *entry); int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, struct rdma_user_mmap_entry *entry, size_t length); -- cgit v1.2.3-59-g8ed1b From 8a80cf93106045cd9e36330e153d0e606428619e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:30 +0200 Subject: RDMA/mad: Delete never implemented functions Delete never implemented and used MAD functions. Link: https://lore.kernel.org/r/20191029062745.7932-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 19 ------------------- include/rdma/ib_mad.h | 40 ---------------------------------------- 2 files changed, 59 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 9947d16edef2..8adb487eb314 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1397,25 +1397,6 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) } EXPORT_SYMBOL(ib_free_recv_mad); -struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - return ERR_PTR(-EINVAL); /* XXX: for now */ -} -EXPORT_SYMBOL(ib_redirect_mad_qp); - -int ib_process_mad_wc(struct ib_mad_agent *mad_agent, - struct ib_wc *wc) -{ - dev_err(&mad_agent->device->dev, - "ib_process_mad_wc() not implemented yet\n"); - return 0; -} -EXPORT_SYMBOL(ib_process_mad_wc); - static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req) { diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index eea946fcc819..4e62650e2127 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -814,46 +814,6 @@ void ib_cancel_mad(struct ib_mad_agent *mad_agent, int ib_modify_mad(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, u32 timeout_ms); -/** - * ib_redirect_mad_qp - Registers a QP for MAD services. - * @qp: Reference to a QP that requires MAD services. - * @rmpp_version: If set, indicates that the client will send - * and receive MADs that contain the RMPP header for the given version. - * If set to 0, indicates that RMPP is not used by this client. - * @send_handler: The completion callback routine invoked after a send - * request has completed. - * @recv_handler: The completion callback routine invoked for a received - * MAD. - * @context: User specified context associated with the registration. - * - * Use of this call allows clients to use MAD services, such as RMPP, - * on user-owned QPs. After calling this routine, users may send - * MADs on the specified QP by calling ib_mad_post_send. - */ -struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, - u8 rmpp_version, - ib_mad_send_handler send_handler, - ib_mad_recv_handler recv_handler, - void *context); - -/** - * ib_process_mad_wc - Processes a work completion associated with a - * MAD sent or received on a redirected QP. - * @mad_agent: Specifies the registered MAD service using the redirected QP. - * @wc: References a work completion associated with a sent or received - * MAD segment. - * - * This routine is used to complete or continue processing on a MAD request. - * If the work completion is associated with a send operation, calling - * this routine is required to continue an RMPP transfer or to wait for a - * corresponding response, if it is a request. If the work completion is - * associated with a receive operation, calling this routine is required to - * process an inbound or outbound RMPP transfer, or to match a response MAD - * with its corresponding request. - */ -int ib_process_mad_wc(struct ib_mad_agent *mad_agent, - struct ib_wc *wc); - /** * ib_create_send_mad - Allocate and initialize a data buffer and work request * for sending a MAD. -- cgit v1.2.3-59-g8ed1b From e26e7b88f6b7482cbff633c6fc9eaee3ecbd41b1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:45 +0200 Subject: RDMA: Change MAD processing function to remove extra casting and parameter All users of process_mad() converts input pointers from ib_mad_hdr to be ib_mad, update the function declaration to use ib_mad directly. Also remove not used input MAD size parameter. Link: https://lore.kernel.org/r/20191029062745.7932-17-leon@kernel.org Signed-off-by: Leon Romanovsky Tested-By: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 12 +++--- drivers/infiniband/core/sysfs.c | 6 +-- drivers/infiniband/hw/hfi1/mad.c | 13 +++--- drivers/infiniband/hw/hfi1/verbs.h | 5 +-- drivers/infiniband/hw/mlx4/mad.c | 25 +++++------- drivers/infiniband/hw/mlx4/mlx4_ib.h | 7 ++-- drivers/infiniband/hw/mlx5/mad.c | 24 +++++------ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +-- drivers/infiniband/hw/mthca/mthca_dev.h | 12 ++---- drivers/infiniband/hw/mthca/mthca_mad.c | 70 ++++++++++++++------------------ drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 17 +++----- drivers/infiniband/hw/ocrdma/ocrdma_ah.h | 11 ++--- drivers/infiniband/hw/qedr/verbs.c | 17 ++------ drivers/infiniband/hw/qedr/verbs.h | 7 ++-- drivers/infiniband/hw/qib/qib_mad.c | 15 +++---- drivers/infiniband/hw/qib/qib_verbs.h | 5 +-- include/rdma/ib_verbs.h | 7 ++-- 17 files changed, 104 insertions(+), 154 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 8adb487eb314..c54db13fa9b0 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -913,9 +913,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, /* No GRH for DR SMP */ ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL, - (const struct ib_mad_hdr *)smp, mad_size, - (struct ib_mad_hdr *)mad_priv->mad, - &mad_size, &out_mad_pkey_index); + (const struct ib_mad *)smp, + (struct ib_mad *)mad_priv->mad, &mad_size, + &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: @@ -2321,9 +2321,9 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) if (port_priv->device->ops.process_mad) { ret = port_priv->device->ops.process_mad( port_priv->device, 0, port_priv->port_num, wc, - &recv->grh, (const struct ib_mad_hdr *)recv->mad, - recv->mad_size, (struct ib_mad_hdr *)response->mad, - &mad_size, &resp_mad_pkey_index); + &recv->grh, (const struct ib_mad *)recv->mad, + (struct ib_mad *)response->mad, &mad_size, + &resp_mad_pkey_index); if (opa) wc->pkey_index = resp_mad_pkey_index; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 62c756ea5668..087682e6969e 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -497,10 +497,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, if (attr != IB_PMA_CLASS_PORT_INFO) in_mad->data[41] = port_num; /* PortSelect field */ - if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, - port_num, NULL, NULL, - (const struct ib_mad_hdr *)in_mad, mad_size, - (struct ib_mad_hdr *)out_mad, &mad_size, + if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL, + in_mad, out_mad, &mad_size, &out_mad_pkey_index) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index a54746f4a0ae..a51bcd2b4391 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -4915,11 +4915,10 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, */ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index) { - switch (in_mad->base_version) { + switch (in_mad->mad_hdr.base_version) { case OPA_MGMT_BASE_VERSION: return hfi1_process_opa_mad(ibdev, mad_flags, port, in_wc, in_grh, @@ -4928,10 +4927,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, out_mad_size, out_mad_pkey_index); case IB_MGMT_BASE_VERSION: - return hfi1_process_ib_mad(ibdev, mad_flags, port, - in_wc, in_grh, - (const struct ib_mad *)in_mad, - (struct ib_mad *)out_mad); + return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc, + in_grh, in_mad, out_mad); default: break; } diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index ae9582ddbc8f..b0e9bf7cd150 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -330,9 +330,8 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); /* * The PSN_MASK and PSN_SHIFT allow for diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index c6ea4c50da1e..abe68708d6d6 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -983,13 +983,10 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { struct mlx4_ib_dev *dev = to_mdev(ibdev); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA @@ -997,20 +994,20 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, */ if (link == IB_LINK_LAYER_INFINIBAND) { if (mlx4_is_slave(dev->dev) && - (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && - (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS || - in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT || - in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO))) - return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + (in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS || + in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT || + in->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO))) + return iboe_process_mad(ibdev, mad_flags, port_num, + in_wc, in_grh, in, out); - return ib_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, + in, out); } if (link == IB_LINK_LAYER_ETHERNET) return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + in_grh, in, out); return -EINVAL; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index eb53bb4c0c91..0d846fea8fdc 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -786,11 +786,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, +int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 1c87412a162f..14e0c17de6a9 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -219,15 +219,12 @@ done: int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - u8 mgmt_class = in_mad->mad_hdr.mgmt_class; - u8 method = in_mad->mad_hdr.method; + u8 mgmt_class = in->mad_hdr.mgmt_class; + u8 method = in->mad_hdr.method; u16 slid; int err; @@ -247,13 +244,13 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, /* Don't process SMInfo queries -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) + if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) return IB_MAD_RESULT_SUCCESS; } break; case IB_MGMT_CLASS_PERF_MGMT: if (MLX5_CAP_GEN(dev->mdev, vport_counters) && method == IB_MGMT_METHOD_GET) - return process_pma_cmd(dev, port_num, in_mad, out_mad); + return process_pma_cmd(dev, port_num, in, out); /* fallthrough */ case MLX5_IB_VENDOR_CLASS1: /* fallthrough */ @@ -267,16 +264,15 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; } - err = mlx5_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); + err = mlx5_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, + in_grh, in, out); if (err) return IB_MAD_RESULT_FAILURE; /* set return bit in status of directed route responses */ if (mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + out->mad_hdr.status |= cpu_to_be16(1 << 15); if (method == IB_MGMT_METHOD_TRAP_REPRESS) /* no response for trap repress */ diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 0bdb8b45ea15..933a2059886b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1192,9 +1192,8 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, unsigned int *meta_sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index bfd4eebc1182..599794c5a78f 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -576,14 +576,10 @@ enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port); int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); -int mthca_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); +int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); int mthca_create_agents(struct mthca_dev *dev); void mthca_free_agents(struct mthca_dev *dev); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 0893604d2a62..99aa8183a7f2 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -196,26 +196,19 @@ static void forward_trap(struct mthca_dev *dev, } } -int mthca_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) +int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { int err; u16 slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; /* Forward locally generated traps to the SM */ - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && - slid == 0) { - forward_trap(to_mdev(ibdev), port_num, in_mad); + if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP && !slid) { + forward_trap(to_mdev(ibdev), port_num, in); return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; } @@ -225,40 +218,39 @@ int mthca_process_mad(struct ib_device *ibdev, * Only handle PMA and Mellanox vendor-specific class gets and * sets for other classes. */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in->mad_hdr.method != IB_MGMT_METHOD_GET && + in->mad_hdr.method != IB_MGMT_METHOD_SET && + in->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) return IB_MAD_RESULT_SUCCESS; /* * Don't process SMInfo queries or vendor-specific * MADs -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || - ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == + if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || + ((in->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == IB_SMP_ATTR_VENDOR_MASK)) return IB_MAD_RESULT_SUCCESS; - } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || - in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + } else if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || + in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { + if (in->mad_hdr.method != IB_MGMT_METHOD_GET && + in->mad_hdr.method != IB_MGMT_METHOD_SET) return IB_MAD_RESULT_SUCCESS; } else return IB_MAD_RESULT_SUCCESS; - if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && - in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && - in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + if ((in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in->mad_hdr.method == IB_MGMT_METHOD_SET && + in->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) prev_lid = ib_lid_cpu16(pattr.lid); - err = mthca_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); + err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, + in_grh, in, out); if (err == -EBADMSG) return IB_MAD_RESULT_SUCCESS; else if (err) { @@ -266,16 +258,16 @@ int mthca_process_mad(struct ib_device *ibdev, return IB_MAD_RESULT_FAILURE; } - if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad, prev_lid); - node_desc_override(ibdev, out_mad); + if (!out->mad_hdr.status) { + smp_snoop(ibdev, port_num, in, prev_lid); + node_desc_override(ibdev, out); } /* set return bit in status of directed route responses */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out->mad_hdr.status |= cpu_to_be16(1 << 15); - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) /* no response for trap repress */ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 4098508b9240..2b7f00ac41b0 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -247,23 +247,18 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -int ocrdma_process_mad(struct ib_device *ibdev, - int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, +int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index) { int status = IB_MAD_RESULT_SUCCESS; struct ocrdma_dev *dev; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { dev = get_ocrdma_dev(ibdev); - ocrdma_pma_counters(dev, out_mad); + ocrdma_pma_counters(dev, out); status |= IB_MAD_RESULT_REPLY; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 64cb82c08664..9780afcde780 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -56,12 +56,9 @@ int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int ocrdma_process_mad(struct ib_device *, - int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, +int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index); #endif /* __OCRDMA_AH_H__ */ diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 8096b8fcab4e..fea95faa3b21 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -4346,19 +4346,10 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) } int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *mad_hdr, - size_t in_mad_size, struct ib_mad_hdr *out_mad, - size_t *out_mad_size, u16 *out_mad_pkey_index) + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index) { - struct qedr_dev *dev = get_qedr_dev(ibdev); - - DP_DEBUG(dev, QEDR_MSG_GSI, - "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n", - mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod, - mad_hdr->class_specific, mad_hdr->class_version, - mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status); return IB_MAD_RESULT_SUCCESS; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 620472116c6d..18027844eb87 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -92,10 +92,9 @@ int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *, const struct ib_recv_wr **bad_wr); int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, struct ib_mad_hdr *out_mad, - size_t *out_mad_size, u16 *out_mad_pkey_index); + const struct ib_grh *in_grh, const struct ib_mad *in_mad, + struct ib_mad *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index); int qedr_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index b259aaf85d4a..79bb83222e8d 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2386,24 +2386,21 @@ bail: */ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { int ret; struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - switch (in_mad->mad_hdr.mgmt_class) { + switch (in->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: - ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad); + ret = process_subn(ibdev, mad_flags, port, in, out); goto bail; case IB_MGMT_CLASS_PERF_MGMT: - ret = process_perf(ibdev, port, in_mad, out_mad); + ret = process_perf(ibdev, port, in, out); goto bail; case IB_MGMT_CLASS_CONG_MGMT: @@ -2412,7 +2409,7 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, ret = IB_MAD_RESULT_SUCCESS; goto bail; } - ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad); + ret = process_cc(ibdev, mad_flags, port, in, out); goto bail; default: diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 17bdf8acee2f..8bf414b47b96 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -245,9 +245,8 @@ void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx); void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 416e72ea80d9..663fa16caa76 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2123,7 +2123,7 @@ struct ib_flow_action { atomic_t usecnt; }; -struct ib_mad_hdr; +struct ib_mad; struct ib_grh; enum ib_process_mad_flags { @@ -2301,9 +2301,8 @@ struct ib_device_ops { int (*process_mad)(struct ib_device *device, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); int (*query_device)(struct ib_device *device, struct ib_device_attr *device_attr, struct ib_udata *udata); -- cgit v1.2.3-59-g8ed1b From 72b894b09a96b741c92562709f6629310f2b34a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Nov 2019 08:32:14 +0100 Subject: IB/umem: remove the dmasync argument to ib_umem_get The argument is always ignored, so remove it. Link: https://lore.kernel.org/r/20191113073214.9514-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Acked-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 3 +-- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 10 ++++----- drivers/infiniband/hw/cxgb4/mem.c | 2 +- drivers/infiniband/hw/efa/efa_verbs.c | 2 +- drivers/infiniband/hw/hns/hns_roce_cq.c | 2 +- drivers/infiniband/hw/hns/hns_roce_db.c | 2 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 4 ++-- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/infiniband/hw/mlx4/doorbell.c | 2 +- drivers/infiniband/hw/mlx4/mr.c | 2 +- drivers/infiniband/hw/mlx4/qp.c | 5 ++--- drivers/infiniband/hw/mlx4/srq.c | 2 +- drivers/infiniband/hw/mlx5/cq.c | 4 ++-- drivers/infiniband/hw/mlx5/devx.c | 2 +- drivers/infiniband/hw/mlx5/doorbell.c | 2 +- drivers/infiniband/hw/mlx5/mr.c | 2 +- drivers/infiniband/hw/mlx5/qp.c | 4 ++-- drivers/infiniband/hw/mlx5/srq.c | 2 +- drivers/infiniband/hw/mthca/mthca_provider.c | 4 +--- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- drivers/infiniband/hw/qedr/verbs.c | 29 ++++++++++++--------------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 4 ++-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 2 +- drivers/infiniband/sw/rdmavt/mr.c | 2 +- drivers/infiniband/sw/rxe/rxe_mr.c | 2 +- include/rdma/ib_umem.h | 4 ++-- 31 files changed, 54 insertions(+), 61 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 66148739b00f..7a3b99597ead 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -185,10 +185,9 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz); * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned - * @dmasync: flush in-flight DMA when the memory region is written */ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, - size_t size, int access, int dmasync) + size_t size, int access) { struct ib_ucontext *context; struct ib_umem *umem; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 8afd7d93cfe4..9b6ca15a183c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -837,7 +837,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes += (qplib_qp->sq.max_wqe * psn_sz); } bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -851,7 +851,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(udata, ureq.qprva, bytes, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) goto rqfail; qp->rumem = umem; @@ -1304,7 +1304,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -2547,7 +2547,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->umem = ib_umem_get(udata, req.cq_va, entries * sizeof(struct cq_base), - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->umem)) { rc = PTR_ERR(cq->umem); goto fail; @@ -3512,7 +3512,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, /* The fixed portion of the rkey is the same as the lkey */ mr->ib_mr.rkey = mr->qplib_mr.rkey; - umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(umem)) { dev_err(rdev_to_dev(rdev), "Failed to get umem"); rc = -EFAULT; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 35c284af574d..fe3a7e8561df 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -543,7 +543,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mhp->rhp = rhp; - mhp->umem = ib_umem_get(udata, start, length, acc, 0); + mhp->umem = ib_umem_get(udata, start, length, acc); if (IS_ERR(mhp->umem)) goto err_free_skb; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index b242ea7a3bc8..657baa63faec 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1371,7 +1371,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_out; } - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); ibdev_dbg(&dev->ibdev, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index e25fa19c249c..cde2960328df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -219,7 +219,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, u32 npages; *umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index c00714c2f16a..10af6958ab69 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -31,7 +31,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, refcount_set(&page->refcount, 1); page->user_virt = page_addr; - page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { ret = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 6589e283cc77..9ad19170c3f9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1145,7 +1145,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); goto err_free; @@ -1230,7 +1230,7 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, } ib_umem_release(mr->umem); - mr->umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); mr->umem = NULL; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9442f017db02..a6565b674801 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -745,7 +745,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr, - hr_qp->buff_size, 0, 0); + hr_qp->buff_size, 0); if (IS_ERR(hr_qp->umem)) { dev_err(dev, "ib_umem_get error for create qp\n"); ret = PTR_ERR(hr_qp->umem); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index a1bfa51c67fe..0cceae0fb4af 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -186,7 +186,7 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) return -EFAULT; - srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0); if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); @@ -206,7 +206,7 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, /* config index queue BA */ srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr, - srq->idx_que.buf_size, 0, 0); + srq->idx_que.buf_size, 0); if (IS_ERR(srq->idx_que.umem)) { dev_err(hr_dev->dev, "ib_umem_get error for index queue\n"); ret = PTR_ERR(srq->idx_que.umem); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index cd9ee1664a69..86375947bc67 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1763,7 +1763,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, if (length > I40IW_MAX_MR_SIZE) return ERR_PTR(-EINVAL); - region = ib_umem_get(udata, start, length, acc, 0); + region = ib_umem_get(udata, start, length, acc); if (IS_ERR(region)) return (struct ib_mr *)region; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index a7d238d312f0..306b21281fa2 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -145,7 +145,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata, int n; *umem = ib_umem_get(udata, buf_addr, cqe * cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 0f390351cef0..714f9df5bf39 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -64,7 +64,7 @@ int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 6ae503cfc526..dfa17bcdcdbc 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -398,7 +398,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start, up_read(¤t->mm->mmap_sem); } - return ib_umem_get(udata, start, length, access_flags, 0); + return ib_umem_get(udata, start, length, access_flags); } struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index bd4aa04416c6..85f57b76e446 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -916,7 +916,7 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); - qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0, 0); + qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; @@ -1110,8 +1110,7 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, if (err) goto err; - qp->umem = - ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0, 0); + qp->umem = ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 848db7264cc9..8dcf6e3d9ae2 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -110,7 +110,7 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) return -EFAULT; - srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0); if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index cc938d27f913..dd8d24ee8e1d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -709,7 +709,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, cq->buf.umem = ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->buf.umem)) { err = PTR_ERR(cq->buf.umem); return err; @@ -1110,7 +1110,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, umem = ib_umem_get(udata, ucmd.buf_addr, (size_t)ucmd.cqe_size * entries, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) { err = PTR_ERR(umem); return err; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2a5812a4c3bb..9d0a18cf9e5e 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2134,7 +2134,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0); + obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 8f4e5f22b84c..12737c509aa2 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -64,7 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 077ca10d9ed9..60d39b9ec41c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -764,7 +764,7 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (order) *order = ilog2(roundup_pow_of_two(*ncont)); } else { - u = ib_umem_get(udata, start, length, access_flags, 0); + u = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(u)) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); return PTR_ERR(u); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c831b455ffa8..26fb0227a514 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -749,7 +749,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, { int err; - *umem = ib_umem_get(udata, addr, size, 0, 0); + *umem = ib_umem_get(udata, addr, size, 0); if (IS_ERR(*umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); return PTR_ERR(*umem); @@ -806,7 +806,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (!ucmd->buf_addr) return -EINVAL; - rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0); + rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0); if (IS_ERR(rwq->umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); err = PTR_ERR(rwq->umem); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4e7fde86c96b..62939df3c692 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -80,7 +80,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); - srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0); if (IS_ERR(srq->umem)) { mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); err = PTR_ERR(srq->umem); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 23554d8bf241..33002530fee7 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -880,9 +880,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(udata, start, length, acc, - ucmd.mr_attrs & MTHCA_MR_DMASYNC); - + mr->umem = ib_umem_get(udata, start, length, acc); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e72050de5734..9bc1ca6f6f9e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -869,7 +869,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(status); - mr->umem = ib_umem_get(udata, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc); if (IS_ERR(mr->umem)) { status = -EFAULT; goto umem_err; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index fea95faa3b21..55b77d753d12 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -762,7 +762,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, struct qedr_dev *dev, struct qedr_userq *q, u64 buf_addr, size_t buf_len, bool requires_db_rec, - int access, int dmasync, + int access, int alloc_and_init) { u32 fw_pages; @@ -770,7 +770,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, q->buf_addr = buf_addr; q->buf_len = buf_len; - q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync); + q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access); if (IS_ERR(q->umem)) { DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n", PTR_ERR(q->umem)); @@ -927,9 +927,8 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->cq_type = QEDR_CQ_TYPE_USER; rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr, - ureq.len, true, - IB_ACCESS_LOCAL_WRITE, - 1, 1); + ureq.len, true, IB_ACCESS_LOCAL_WRITE, + 1); if (rc) goto err0; @@ -1401,19 +1400,19 @@ static void qedr_free_srq_kernel_params(struct qedr_srq *srq) static int qedr_init_srq_user_params(struct ib_udata *udata, struct qedr_srq *srq, struct qedr_create_srq_ureq *ureq, - int access, int dmasync) + int access) { struct scatterlist *sg; int rc; rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr, - ureq->srq_len, false, access, dmasync, 1); + ureq->srq_len, false, access, 1); if (rc) return rc; srq->prod_umem = ib_umem_get(udata, ureq->prod_pair_addr, - sizeof(struct rdma_srq_producers), access, dmasync); + sizeof(struct rdma_srq_producers), access); if (IS_ERR(srq->prod_umem)) { qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); ib_umem_release(srq->usrq.umem); @@ -1510,7 +1509,7 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, goto err0; } - rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0); + rc = qedr_init_srq_user_params(udata, srq, &ureq, 0); if (rc) goto err0; @@ -1751,18 +1750,16 @@ static int qedr_create_user_qp(struct qedr_dev *dev, return rc; } - /* SQ - read access only (0), dma sync not required (0) */ + /* SQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, - ureq.sq_len, true, 0, 0, - alloc_and_init); + ureq.sq_len, true, 0, alloc_and_init); if (rc) return rc; if (!qp->srq) { - /* RQ - read access only (0), dma sync not required (0) */ + /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, - ureq.rq_len, true, - 0, 0, alloc_and_init); + ureq.rq_len, true, 0, alloc_and_init); if (rc) return rc; } @@ -2837,7 +2834,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->type = QEDR_MR_USER; - mr->umem = ib_umem_get(udata, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc); if (IS_ERR(mr->umem)) { rc = -EFAULT; goto err0; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 7800e6930502..a26a4fd86bf4 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -136,7 +136,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } cq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->umem)) { ret = PTR_ERR(cq->umem); goto err_cq; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index f3a3d22ee8d7..c61e665ff261 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -126,7 +126,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(-EINVAL); } - umem = ib_umem_get(udata, start, length, access_flags, 0); + umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(umem)) { dev_warn(&dev->pdev->dev, "could not get umem for mem region\n"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 22daf2389d95..f15809c28f67 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -277,7 +277,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, if (!is_srq) { /* set qp->sq.wqe_cnt, shift, buf_size.. */ qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr, - ucmd.rbuf_size, 0, 0); + ucmd.rbuf_size, 0); if (IS_ERR(qp->rumem)) { ret = PTR_ERR(qp->rumem); goto err_qp; @@ -289,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, } qp->sumem = ib_umem_get(udata, ucmd.sbuf_addr, - ucmd.sbuf_size, 0, 0); + ucmd.sbuf_size, 0); if (IS_ERR(qp->sumem)) { if (!is_srq) ib_umem_release(qp->rumem); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 36cdfbdbd325..98c8be71d91d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -146,7 +146,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, goto err_srq; } - srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0); if (IS_ERR(srq->umem)) { ret = PTR_ERR(srq->umem); goto err_srq; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index a6a39f01dca3..b9a76bf74857 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -390,7 +390,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (length == 0) return ERR_PTR(-EINVAL); - umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(umem)) return (void *)umem; diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index ea6a819b7167..35a2baf2f364 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -169,7 +169,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, void *vaddr; int err; - umem = ib_umem_get(udata, start, length, access, 0); + umem = ib_umem_get(udata, start, length, access); if (IS_ERR(umem)) { pr_warn("err %d from rxe_umem_get\n", (int)PTR_ERR(umem)); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index a91b2af64ec4..753f54e17e0a 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -70,7 +70,7 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem) #ifdef CONFIG_INFINIBAND_USER_MEM struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, - size_t size, int access, int dmasync); + size_t size, int access); void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, @@ -85,7 +85,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, static inline struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, size_t size, - int access, int dmasync) + int access) { return ERR_PTR(-EINVAL); } -- cgit v1.2.3-59-g8ed1b From bfcb3c5d14854f001881dc3f5cc29bf186598d9f Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Wed, 6 Nov 2019 15:08:32 +0200 Subject: IB/core: Add interfaces to get VF node and port GUIDs Provide ability to get node and port GUIDs of VFs to be symmetrical to already existing set option. Signed-off-by: Danit Goldberg Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/verbs.c | 10 ++++++++++ include/rdma/ib_verbs.h | 6 ++++++ 3 files changed, 17 insertions(+) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 99c4a55545cf..38fadbec054d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2614,6 +2614,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_port_immutable); SET_DEVICE_OP(dev_ops, get_vector_affinity); SET_DEVICE_OP(dev_ops, get_vf_config); + SET_DEVICE_OP(dev_ops, get_vf_guid); SET_DEVICE_OP(dev_ops, get_vf_stats); SET_DEVICE_OP(dev_ops, init_port); SET_DEVICE_OP(dev_ops, invalidate_range); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f974b6854224..7d96351c8d8c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2458,6 +2458,16 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, } EXPORT_SYMBOL(ib_set_vf_guid); +int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + if (!device->ops.get_vf_guid) + return -EOPNOTSUPP; + + return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid); +} +EXPORT_SYMBOL(ib_get_vf_guid); /** * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection * information) and set an appropriate memory region for registration. diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6a47ba85c54c..ec7d1a1f8f31 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2448,6 +2448,9 @@ struct ib_device_ops { struct ifla_vf_info *ivf); int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); + int (*get_vf_guid)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, int type); struct ib_wq *(*create_wq)(struct ib_pd *pd, @@ -3303,6 +3306,9 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info); int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); +int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); -- cgit v1.2.3-59-g8ed1b