diff options
Diffstat (limited to 'net/rds')
-rw-r--r-- | net/rds/Kconfig | 6 | ||||
-rw-r--r-- | net/rds/Makefile | 2 | ||||
-rw-r--r-- | net/rds/af_rds.c | 32 | ||||
-rw-r--r-- | net/rds/bind.c | 2 | ||||
-rw-r--r-- | net/rds/cong.c | 2 | ||||
-rw-r--r-- | net/rds/connection.c | 35 | ||||
-rw-r--r-- | net/rds/ib.c | 51 | ||||
-rw-r--r-- | net/rds/ib.h | 9 | ||||
-rw-r--r-- | net/rds/ib_cm.c | 178 | ||||
-rw-r--r-- | net/rds/ib_fmr.c | 269 | ||||
-rw-r--r-- | net/rds/ib_frmr.c | 8 | ||||
-rw-r--r-- | net/rds/ib_mr.h | 14 | ||||
-rw-r--r-- | net/rds/ib_rdma.c | 28 | ||||
-rw-r--r-- | net/rds/ib_recv.c | 25 | ||||
-rw-r--r-- | net/rds/ib_ring.c | 2 | ||||
-rw-r--r-- | net/rds/ib_send.c | 9 | ||||
-rw-r--r-- | net/rds/info.c | 7 | ||||
-rw-r--r-- | net/rds/message.c | 31 | ||||
-rw-r--r-- | net/rds/rdma.c | 86 | ||||
-rw-r--r-- | net/rds/rdma_transport.c | 5 | ||||
-rw-r--r-- | net/rds/rdma_transport.h | 2 | ||||
-rw-r--r-- | net/rds/rds.h | 33 | ||||
-rw-r--r-- | net/rds/recv.c | 9 | ||||
-rw-r--r-- | net/rds/send.c | 15 | ||||
-rw-r--r-- | net/rds/tcp.c | 45 | ||||
-rw-r--r-- | net/rds/tcp.h | 9 | ||||
-rw-r--r-- | net/rds/tcp_connect.c | 8 | ||||
-rw-r--r-- | net/rds/tcp_listen.c | 56 | ||||
-rw-r--r-- | net/rds/tcp_recv.c | 2 | ||||
-rw-r--r-- | net/rds/tcp_send.c | 9 | ||||
-rw-r--r-- | net/rds/threads.c | 2 | ||||
-rw-r--r-- | net/rds/transport.c | 26 |
32 files changed, 362 insertions, 655 deletions
diff --git a/net/rds/Kconfig b/net/rds/Kconfig index c64e154bc18f..75cd696963b2 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -3,14 +3,14 @@ config RDS tristate "The Reliable Datagram Sockets Protocol" depends on INET - ---help--- + help The RDS (Reliable Datagram Sockets) protocol provides reliable, sequenced delivery of datagrams over Infiniband or TCP. config RDS_RDMA tristate "RDS over Infiniband" depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS - ---help--- + help Allow RDS to use Infiniband as a transport. This transport supports RDMA operations. @@ -18,7 +18,7 @@ config RDS_TCP tristate "RDS over TCP" depends on RDS depends on IPV6 || !IPV6 - ---help--- + help Allow RDS to use TCP as a transport. This transport does not support RDMA operations. diff --git a/net/rds/Makefile b/net/rds/Makefile index e647f9de104a..8fdc118e2927 100644 --- a/net/rds/Makefile +++ b/net/rds/Makefile @@ -7,7 +7,7 @@ rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \ obj-$(CONFIG_RDS_RDMA) += rds_rdma.o rds_rdma-y := rdma_transport.o \ ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \ - ib_sysctl.o ib_rdma.o ib_fmr.o ib_frmr.o + ib_sysctl.o ib_rdma.o ib_frmr.o obj-$(CONFIG_RDS_TCP) += rds_tcp.o diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 1a5bf3fa4578..3ff6995244e5 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -290,8 +290,7 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return 0; } -static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, - int len) +static int rds_cancel_sent_to(struct rds_sock *rs, sockptr_t optval, int len) { struct sockaddr_in6 sin6; struct sockaddr_in sin; @@ -308,14 +307,15 @@ static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, goto out; } else if (len < sizeof(struct sockaddr_in6)) { /* Assume IPv4 */ - if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) { + if (copy_from_sockptr(&sin, optval, + sizeof(struct sockaddr_in))) { ret = -EFAULT; goto out; } ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr); sin6.sin6_port = sin.sin_port; } else { - if (copy_from_user(&sin6, optval, + if (copy_from_sockptr(&sin6, optval, sizeof(struct sockaddr_in6))) { ret = -EFAULT; goto out; @@ -327,21 +327,20 @@ out: return ret; } -static int rds_set_bool_option(unsigned char *optvar, char __user *optval, +static int rds_set_bool_option(unsigned char *optvar, sockptr_t optval, int optlen) { int value; if (optlen < sizeof(int)) return -EINVAL; - if (get_user(value, (int __user *) optval)) + if (copy_from_sockptr(&value, optval, sizeof(int))) return -EFAULT; *optvar = !!value; return 0; } -static int rds_cong_monitor(struct rds_sock *rs, char __user *optval, - int optlen) +static int rds_cong_monitor(struct rds_sock *rs, sockptr_t optval, int optlen) { int ret; @@ -358,8 +357,7 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval, return ret; } -static int rds_set_transport(struct rds_sock *rs, char __user *optval, - int optlen) +static int rds_set_transport(struct rds_sock *rs, sockptr_t optval, int optlen) { int t_type; @@ -369,7 +367,7 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval, if (optlen != sizeof(int)) return -EINVAL; - if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type))) + if (copy_from_sockptr(&t_type, optval, sizeof(t_type))) return -EFAULT; if (t_type < 0 || t_type >= RDS_TRANS_COUNT) @@ -380,7 +378,7 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval, return rs->rs_transport ? 0 : -ENOPROTOOPT; } -static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, +static int rds_enable_recvtstamp(struct sock *sk, sockptr_t optval, int optlen, int optname) { int val, valbool; @@ -388,7 +386,7 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, if (optlen != sizeof(int)) return -EFAULT; - if (get_user(val, (int __user *)optval)) + if (copy_from_sockptr(&val, optval, sizeof(int))) return -EFAULT; valbool = val ? 1 : 0; @@ -404,7 +402,7 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, return 0; } -static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, +static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval, int optlen) { struct rds_rx_trace_so trace; @@ -413,7 +411,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, if (optlen != sizeof(struct rds_rx_trace_so)) return -EFAULT; - if (copy_from_user(&trace, optval, sizeof(trace))) + if (copy_from_sockptr(&trace, optval, sizeof(trace))) return -EFAULT; if (trace.rx_traces > RDS_MSG_RX_DGRAM_TRACE_MAX) @@ -432,7 +430,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, } static int rds_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct rds_sock *rs = rds_sk_to_rs(sock->sk); int ret; @@ -896,7 +894,7 @@ module_exit(rds_exit); u32 rds_gen_num; -static int rds_init(void) +static int __init rds_init(void) { int ret; diff --git a/net/rds/bind.c b/net/rds/bind.c index 5b5fb4ca8d3e..97a29172a8ee 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -104,7 +104,7 @@ static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr, return -EINVAL; last = rover; } else { - rover = max_t(u16, prandom_u32(), 2); + rover = max_t(u16, get_random_u16(), 2); last = rover - 1; } diff --git a/net/rds/cong.c b/net/rds/cong.c index ccdff09a79c8..8b689ebbd5b5 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -236,7 +236,7 @@ void rds_cong_queue_updates(struct rds_cong_map *map) * tcp_setsockopt and/or tcp_sendmsg will deadlock * when it tries to get the sock_lock()) * 2. Interrupts are masked so that we can mark the - * the port congested from both send and recv paths. + * port congested from both send and recv paths. * (See comment around declaration of rdc_cong_lock). * An attempt to get the sock_lock() here will * therefore trigger warnings. diff --git a/net/rds/connection.c b/net/rds/connection.c index ed7f2133acc2..b4cc699c5fad 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -240,12 +240,24 @@ static struct rds_connection *__rds_conn_create(struct net *net, if (loop_trans) { rds_trans_put(loop_trans); conn->c_loopback = 1; - if (is_outgoing && trans->t_prefer_loopback) { - /* "outgoing" connection - and the transport - * says it wants the connection handled by the - * loopback transport. This is what TCP does. - */ - trans = &rds_loop_transport; + if (trans->t_prefer_loopback) { + if (likely(is_outgoing)) { + /* "outgoing" connection to local address. + * Protocol says it wants the connection + * handled by the loopback transport. + * This is what TCP does. + */ + trans = &rds_loop_transport; + } else { + /* No transport currently in use + * should end up here, but if it + * does, reset/destroy the connection. + */ + kfree(conn->c_path); + kmem_cache_free(rds_conn_slab, conn); + conn = ERR_PTR(-EOPNOTSUPP); + goto out; + } } } @@ -905,6 +917,17 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp) } EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); +/* Check connectivity of all paths + */ +void rds_check_all_paths(struct rds_connection *conn) +{ + int i = 0; + + do { + rds_conn_path_connect_if_down(&conn->c_path[i]); + } while (++i < conn->c_npaths); +} + void rds_conn_connect_if_down(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); diff --git a/net/rds/ib.c b/net/rds/ib.c index a792d8a3872a..9826fe7f9d00 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -30,7 +30,6 @@ * SOFTWARE. * */ -#include <linux/dmapool.h> #include <linux/kernel.h> #include <linux/in.h> #include <linux/if.h> @@ -108,7 +107,6 @@ static void rds_ib_dev_free(struct work_struct *work) rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); - dma_pool_destroy(rds_ibdev->rid_hdrs_pool); list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { list_del(&i_ipaddr->list); @@ -127,19 +125,23 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) queue_work(rds_wq, &rds_ibdev->free_work); } -static void rds_ib_add_one(struct ib_device *device) +static int rds_ib_add_one(struct ib_device *device) { struct rds_ib_device *rds_ibdev; - bool has_fr, has_fmr; + int ret; /* Only handle IB (no iWARP) devices */ if (device->node_type != RDMA_NODE_IB_CA) - return; + return -EOPNOTSUPP; + + /* Device must support FRWR */ + if (!(device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) + return -EOPNOTSUPP; rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, ibdev_to_node(device)); if (!rds_ibdev) - return; + return -ENOMEM; spin_lock_init(&rds_ibdev->spinlock); refcount_set(&rds_ibdev->refcount, 1); @@ -151,20 +153,14 @@ static void rds_ib_add_one(struct ib_device *device) rds_ibdev->max_wrs = device->attrs.max_qp_wr; rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); - has_fr = (device->attrs.device_cap_flags & - IB_DEVICE_MEM_MGT_EXTENSIONS); - has_fmr = (device->ops.alloc_fmr && device->ops.dealloc_fmr && - device->ops.map_phys_fmr && device->ops.unmap_fmr); - rds_ibdev->use_fastreg = (has_fr && !has_fmr); rds_ibdev->odp_capable = - !!(device->attrs.device_cap_flags & - IB_DEVICE_ON_DEMAND_PAGING) && + !!(device->attrs.kernel_cap_flags & + IBK_ON_DEMAND_PAGING) && !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & IB_ODP_SUPPORT_WRITE) && !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & IB_ODP_SUPPORT_READ); - rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; rds_ibdev->max_1m_mrs = device->attrs.max_mr ? min_t(unsigned int, (device->attrs.max_mr / 2), rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size; @@ -182,25 +178,22 @@ static void rds_ib_add_one(struct ib_device *device) if (!rds_ibdev->vector_load) { pr_err("RDS/IB: %s failed to allocate vector memory\n", __func__); + ret = -ENOMEM; goto put_dev; } rds_ibdev->dev = device; rds_ibdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(rds_ibdev->pd)) { + ret = PTR_ERR(rds_ibdev->pd); rds_ibdev->pd = NULL; goto put_dev; } - rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name, - device->dma_device, - sizeof(struct rds_header), - L1_CACHE_BYTES, 0); - if (!rds_ibdev->rid_hdrs_pool) - goto put_dev; rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); if (IS_ERR(rds_ibdev->mr_1m_pool)) { + ret = PTR_ERR(rds_ibdev->mr_1m_pool); rds_ibdev->mr_1m_pool = NULL; goto put_dev; } @@ -208,18 +201,16 @@ static void rds_ib_add_one(struct ib_device *device) rds_ibdev->mr_8k_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL); if (IS_ERR(rds_ibdev->mr_8k_pool)) { + ret = PTR_ERR(rds_ibdev->mr_8k_pool); rds_ibdev->mr_8k_pool = NULL; goto put_dev; } - rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", - device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, - rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_mrs, - rds_ibdev->max_8k_mrs); + rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", + device->attrs.max_mr, rds_ibdev->max_wrs, rds_ibdev->max_sge, + rds_ibdev->max_1m_mrs, rds_ibdev->max_8k_mrs); - pr_info("RDS/IB: %s: %s supported and preferred\n", - device->name, - rds_ibdev->use_fastreg ? "FRMR" : "FMR"); + pr_info("RDS/IB: %s: added\n", device->name); down_write(&rds_ib_devices_lock); list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); @@ -227,12 +218,13 @@ static void rds_ib_add_one(struct ib_device *device) refcount_inc(&rds_ibdev->refcount); ib_set_client_data(device, &rds_ib_client, rds_ibdev); - refcount_inc(&rds_ibdev->refcount); rds_ib_nodev_connect(); + return 0; put_dev: rds_ib_dev_put(rds_ibdev); + return ret; } /* @@ -274,9 +266,6 @@ static void rds_ib_remove_one(struct ib_device *device, void *client_data) { struct rds_ib_device *rds_ibdev = client_data; - if (!rds_ibdev) - return; - rds_ib_dev_shutdown(rds_ibdev); /* stop connection attempts from getting a reference to this device. */ diff --git a/net/rds/ib.h b/net/rds/ib.h index 0296f1f7acda..2ba71102b1f1 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -246,14 +246,11 @@ struct rds_ib_device { struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; - struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */ - u8 use_fastreg:1; u8 odp_capable:1; unsigned int max_mrs; struct rds_ib_mr_pool *mr_1m_pool; struct rds_ib_mr_pool *mr_8k_pool; - unsigned int fmr_max_remaps; unsigned int max_8k_mrs; unsigned int max_1m_mrs; int max_sge; @@ -266,7 +263,6 @@ struct rds_ib_device { int *vector_load; }; -#define ibdev_to_node(ibdev) dev_to_node((ibdev)->dev.parent) #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev) /* bits for i_ack_flags */ @@ -383,11 +379,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6); void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event); -struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, - struct dma_pool *pool, - dma_addr_t **dma_addrs, u32 num_hdrs); -void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, - dma_addr_t *dma_addrs, u32 num_hdrs); #define rds_ib_conn_error(conn, fmt...) \ __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index c71f4328d138..26b069e1999d 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -30,13 +30,13 @@ * SOFTWARE. * */ -#include <linux/dmapool.h> #include <linux/kernel.h> #include <linux/in.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/ratelimit.h> #include <net/addrconf.h> +#include <rdma/ib_cm.h> #include "rds_single_path.h" #include "rds.h" @@ -68,31 +68,6 @@ static void rds_ib_set_flow_control(struct rds_connection *conn, u32 credits) } /* - * Tune RNR behavior. Without flow control, we use a rather - * low timeout, but not the absolute minimum - this should - * be tunable. - * - * We already set the RNR retry count to 7 (which is the - * smallest infinite number :-) above. - * If flow control is off, we want to change this back to 0 - * so that we learn quickly when our credit accounting is - * buggy. - * - * Caller passes in a qp_attr pointer - don't waste stack spacv - * by allocation this twice. - */ -static void -rds_ib_tune_rnr(struct rds_ib_connection *ic, struct ib_qp_attr *attr) -{ - int ret; - - attr->min_rnr_timer = IB_RNR_TIMER_000_32; - ret = ib_modify_qp(ic->i_cm_id->qp, attr, IB_QP_MIN_RNR_TIMER); - if (ret) - printk(KERN_NOTICE "ib_modify_qp(IB_QP_MIN_RNR_TIMER): err=%d\n", -ret); -} - -/* * Connection established. * We get here for both outgoing and incoming connection. */ @@ -100,7 +75,6 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even { struct rds_ib_connection *ic = conn->c_transport_data; const union rds_ib_conn_priv *dp = NULL; - struct ib_qp_attr qp_attr; __be64 ack_seq = 0; __be32 credit = 0; u8 major = 0; @@ -168,14 +142,6 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even * the posted credit count. */ rds_ib_recv_refill(conn, 1, GFP_KERNEL); - /* Tune RNR behavior */ - rds_ib_tune_rnr(ic, &qp_attr); - - qp_attr.qp_state = IB_QPS_RTS; - err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); - if (err) - printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); - /* update ib_device with this local ipaddr */ err = rds_ib_update_ipaddr(ic->rds_ibdev, &conn->c_laddr); if (err) @@ -440,42 +406,87 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index) rds_ibdev->vector_load[index]--; } +static void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr, + dma_addr_t dma_addr, enum dma_data_direction dir) +{ + ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir); + kfree(hdr); +} + +static struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev, + dma_addr_t *dma_addr, enum dma_data_direction dir) +{ + struct rds_header *hdr; + + hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev)); + if (!hdr) + return NULL; + + *dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr), + DMA_BIDIRECTIONAL); + if (ib_dma_mapping_error(dev, *dma_addr)) { + kfree(hdr); + return NULL; + } + + return hdr; +} + +/* Free the DMA memory used to store struct rds_header. + * + * @dev: the RDS IB device + * @hdrs: pointer to the array storing DMA memory pointers + * @dma_addrs: pointer to the array storing DMA addresses + * @num_hdars: number of headers to free. + */ +static void rds_dma_hdrs_free(struct rds_ib_device *dev, + struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs, + enum dma_data_direction dir) +{ + u32 i; + + for (i = 0; i < num_hdrs; i++) + rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir); + kvfree(hdrs); + kvfree(dma_addrs); +} + + /* Allocate DMA coherent memory to be used to store struct rds_header for * sending/receiving packets. The pointers to the DMA memory and the * associated DMA addresses are stored in two arrays. * - * @ibdev: the IB device - * @pool: the DMA memory pool + * @dev: the RDS IB device * @dma_addrs: pointer to the array for storing DMA addresses * @num_hdrs: number of headers to allocate * * It returns the pointer to the array storing the DMA memory pointers. On * error, NULL pointer is returned. */ -struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, - struct dma_pool *pool, - dma_addr_t **dma_addrs, u32 num_hdrs) +static struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev, + dma_addr_t **dma_addrs, u32 num_hdrs, + enum dma_data_direction dir) { struct rds_header **hdrs; dma_addr_t *hdr_daddrs; u32 i; hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, - ibdev_to_node(ibdev)); + ibdev_to_node(dev->dev)); if (!hdrs) return NULL; hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, - ibdev_to_node(ibdev)); + ibdev_to_node(dev->dev)); if (!hdr_daddrs) { kvfree(hdrs); return NULL; } for (i = 0; i < num_hdrs; i++) { - hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]); + hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir); if (!hdrs[i]) { - rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i); + rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir); return NULL; } } @@ -484,24 +495,6 @@ struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, return hdrs; } -/* Free the DMA memory used to store struct rds_header. - * - * @pool: the DMA memory pool - * @hdrs: pointer to the array storing DMA memory pointers - * @dma_addrs: pointer to the array storing DMA addresses - * @num_hdars: number of headers to free. - */ -void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, - dma_addr_t *dma_addrs, u32 num_hdrs) -{ - u32 i; - - for (i = 0; i < num_hdrs; i++) - dma_pool_free(pool, hdrs[i], dma_addrs[i]); - kvfree(hdrs); - kvfree(dma_addrs); -} - /* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. @@ -515,7 +508,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn) struct rds_ib_device *rds_ibdev; unsigned long max_wrs; int ret, fr_queue_space; - struct dma_pool *pool; /* * It's normal to see a null device if an incoming connection races @@ -526,10 +518,10 @@ static int rds_ib_setup_qp(struct rds_connection *conn) return -EOPNOTSUPP; /* The fr_queue_space is currently set to 512, to add extra space on - * completion queue and send queue. This extra space is used for FRMR + * completion queue and send queue. This extra space is used for FRWR * registration and invalidation work requests */ - fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0); + fr_queue_space = RDS_IB_DEFAULT_FR_WR; /* add the conn now so that connection establishment has the dev */ rds_ib_add_conn(rds_ibdev, conn); @@ -611,25 +603,26 @@ static int rds_ib_setup_qp(struct rds_connection *conn) goto recv_cq_out; } - pool = rds_ibdev->rid_hdrs_pool; - ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr, + DMA_TO_DEVICE); if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("DMA send hdrs alloc failed\n"); goto qp_out; } - ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr, + DMA_FROM_DEVICE); if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("DMA recv hdrs alloc failed\n"); goto send_hdrs_dma_out; } - ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL, - &ic->i_ack_dma); + ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma, + DMA_TO_DEVICE); if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("DMA ack header alloc failed\n"); @@ -665,18 +658,19 @@ sends_out: vfree(ic->i_sends); ack_dma_out: - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma, + DMA_TO_DEVICE); ic->i_ack = NULL; recv_hdrs_dma_out: - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr, DMA_FROM_DEVICE); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; send_hdrs_dma_out: - rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr, DMA_TO_DEVICE); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; @@ -710,7 +704,7 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6) * original size. The only way to tell the difference is by looking at * the contents, which are initialized to zero. * If the protocol version fields aren't set, this is a connection attempt - * from an older version. This could could be 3.0 or 2.0 - we can't tell. + * from an older version. This could be 3.0 or 2.0 - we can't tell. * We really should have changed this for OFED 1.3 :-( */ @@ -919,6 +913,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, event->param.conn.responder_resources, event->param.conn.initiator_depth, isv6); + rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32); /* rdma_accept() calls rdma_reject() internally if it fails */ if (rdma_accept(cm_id, &conn_param)) rds_ib_conn_error(conn, "rdma_accept failed\n"); @@ -927,7 +922,8 @@ out: if (conn) mutex_unlock(&conn->c_cm_lock); if (err) - rdma_reject(cm_id, &err, sizeof(int)); + rdma_reject(cm_id, &err, sizeof(int), + IB_CM_REJ_CONSUMER_DEFINED); return destroy; } @@ -954,9 +950,10 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6) rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, conn->c_proposed_version, UINT_MAX, UINT_MAX, isv6); - ret = rdma_connect(cm_id, &conn_param); + ret = rdma_connect_locked(cm_id, &conn_param); if (ret) - rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); + rds_ib_conn_error(conn, "rdma_connect_locked failed (%d)\n", + ret); out: /* Beware - returning non-zero tells the rdma_cm to destroy @@ -1107,29 +1104,30 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) } if (ic->rds_ibdev) { - struct dma_pool *pool; - - pool = ic->rds_ibdev->rid_hdrs_pool; - /* then free the resources that ib callbacks use */ if (ic->i_send_hdrs) { - rds_dma_hdrs_free(pool, ic->i_send_hdrs, + rds_dma_hdrs_free(ic->rds_ibdev, + ic->i_send_hdrs, ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + ic->i_send_ring.w_nr, + DMA_TO_DEVICE); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; } if (ic->i_recv_hdrs) { - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, + rds_dma_hdrs_free(ic->rds_ibdev, + ic->i_recv_hdrs, ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + ic->i_recv_ring.w_nr, + DMA_FROM_DEVICE); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; } if (ic->i_ack) { - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack, + ic->i_ack_dma, DMA_TO_DEVICE); ic->i_ack = NULL; } } else { diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c deleted file mode 100644 index 93c0437e6a5f..000000000000 --- a/net/rds/ib_fmr.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (c) 2016 Oracle. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ib_mr.h" - -struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) -{ - struct rds_ib_mr_pool *pool; - struct rds_ib_mr *ibmr = NULL; - struct rds_ib_fmr *fmr; - int err = 0; - - if (npages <= RDS_MR_8K_MSG_SIZE) - pool = rds_ibdev->mr_8k_pool; - else - pool = rds_ibdev->mr_1m_pool; - - if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) - queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); - - /* Switch pools if one of the pool is reaching upper limit */ - if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) { - if (pool->pool_type == RDS_IB_MR_8K_POOL) - pool = rds_ibdev->mr_1m_pool; - else - pool = rds_ibdev->mr_8k_pool; - } - - ibmr = rds_ib_try_reuse_ibmr(pool); - if (ibmr) - return ibmr; - - ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, - rdsibdev_to_node(rds_ibdev)); - if (!ibmr) { - err = -ENOMEM; - goto out_no_cigar; - } - - fmr = &ibmr->u.fmr; - fmr->fmr = ib_alloc_fmr(rds_ibdev->pd, - (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_ATOMIC), - &pool->fmr_attr); - if (IS_ERR(fmr->fmr)) { - err = PTR_ERR(fmr->fmr); - fmr->fmr = NULL; - pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, err); - goto out_no_cigar; - } - - ibmr->pool = pool; - if (pool->pool_type == RDS_IB_MR_8K_POOL) - rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc); - else - rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc); - - return ibmr; - -out_no_cigar: - kfree(ibmr); - atomic_dec(&pool->item_count); - - return ERR_PTR(err); -} - -static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, - struct rds_ib_mr *ibmr, struct scatterlist *sg, - unsigned int nents) -{ - struct ib_device *dev = rds_ibdev->dev; - struct rds_ib_fmr *fmr = &ibmr->u.fmr; - struct scatterlist *scat = sg; - u64 io_addr = 0; - u64 *dma_pages; - u32 len; - int page_cnt, sg_dma_len; - int i, j; - int ret; - - sg_dma_len = ib_dma_map_sg(dev, sg, nents, DMA_BIDIRECTIONAL); - if (unlikely(!sg_dma_len)) { - pr_warn("RDS/IB: %s failed!\n", __func__); - return -EBUSY; - } - - len = 0; - page_cnt = 0; - - for (i = 0; i < sg_dma_len; ++i) { - unsigned int dma_len = sg_dma_len(&scat[i]); - u64 dma_addr = sg_dma_address(&scat[i]); - - if (dma_addr & ~PAGE_MASK) { - if (i > 0) { - ib_dma_unmap_sg(dev, sg, nents, - DMA_BIDIRECTIONAL); - return -EINVAL; - } else { - ++page_cnt; - } - } - if ((dma_addr + dma_len) & ~PAGE_MASK) { - if (i < sg_dma_len - 1) { - ib_dma_unmap_sg(dev, sg, nents, - DMA_BIDIRECTIONAL); - return -EINVAL; - } else { - ++page_cnt; - } - } - - len += dma_len; - } - - page_cnt += len >> PAGE_SHIFT; - if (page_cnt > ibmr->pool->fmr_attr.max_pages) { - ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); - return -EINVAL; - } - - dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC, - rdsibdev_to_node(rds_ibdev)); - if (!dma_pages) { - ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); - return -ENOMEM; - } - - page_cnt = 0; - for (i = 0; i < sg_dma_len; ++i) { - unsigned int dma_len = sg_dma_len(&scat[i]); - u64 dma_addr = sg_dma_address(&scat[i]); - - for (j = 0; j < dma_len; j += PAGE_SIZE) - dma_pages[page_cnt++] = - (dma_addr & PAGE_MASK) + j; - } - - ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr); - if (ret) { - ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); - goto out; - } - - /* Success - we successfully remapped the MR, so we can - * safely tear down the old mapping. - */ - rds_ib_teardown_mr(ibmr); - - ibmr->sg = scat; - ibmr->sg_len = nents; - ibmr->sg_dma_len = sg_dma_len; - ibmr->remap_count++; - - if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) - rds_ib_stats_inc(s_ib_rdma_mr_8k_used); - else - rds_ib_stats_inc(s_ib_rdma_mr_1m_used); - ret = 0; - -out: - kfree(dma_pages); - - return ret; -} - -struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *rds_ibdev, - struct scatterlist *sg, - unsigned long nents, - u32 *key) -{ - struct rds_ib_mr *ibmr = NULL; - struct rds_ib_fmr *fmr; - int ret; - - ibmr = rds_ib_alloc_fmr(rds_ibdev, nents); - if (IS_ERR(ibmr)) - return ibmr; - - ibmr->device = rds_ibdev; - fmr = &ibmr->u.fmr; - ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); - if (ret == 0) - *key = fmr->fmr->rkey; - else - rds_ib_free_mr(ibmr, 0); - - return ibmr; -} - -void rds_ib_unreg_fmr(struct list_head *list, unsigned int *nfreed, - unsigned long *unpinned, unsigned int goal) -{ - struct rds_ib_mr *ibmr, *next; - struct rds_ib_fmr *fmr; - LIST_HEAD(fmr_list); - int ret = 0; - unsigned int freed = *nfreed; - - /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ - list_for_each_entry(ibmr, list, unmap_list) { - fmr = &ibmr->u.fmr; - list_add(&fmr->fmr->list, &fmr_list); - } - - ret = ib_unmap_fmr(&fmr_list); - if (ret) - pr_warn("RDS/IB: FMR invalidation failed (err=%d)\n", ret); - - /* Now we can destroy the DMA mapping and unpin any pages */ - list_for_each_entry_safe(ibmr, next, list, unmap_list) { - fmr = &ibmr->u.fmr; - *unpinned += ibmr->sg_len; - __rds_ib_teardown_mr(ibmr); - if (freed < goal || - ibmr->remap_count >= ibmr->pool->fmr_attr.max_maps) { - if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) - rds_ib_stats_inc(s_ib_rdma_mr_8k_free); - else - rds_ib_stats_inc(s_ib_rdma_mr_1m_free); - list_del(&ibmr->unmap_list); - ib_dealloc_fmr(fmr->fmr); - kfree(ibmr); - freed++; - } - } - *nfreed = freed; -} - -void rds_ib_free_fmr_list(struct rds_ib_mr *ibmr) -{ - struct rds_ib_mr_pool *pool = ibmr->pool; - - if (ibmr->remap_count >= pool->fmr_attr.max_maps) - llist_add(&ibmr->llnode, &pool->drop_list); - else - llist_add(&ibmr->llnode, &pool->free_list); -} diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 06ecf9d2d4bf..28c1b0022178 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -76,7 +76,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, frmr = &ibmr->u.frmr; frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG, - pool->fmr_attr.max_pages); + pool->max_pages); if (IS_ERR(frmr->mr)) { pr_warn("RDS/IB: %s failed to allocate MR", __func__); err = PTR_ERR(frmr->mr); @@ -131,9 +131,9 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) cpu_relax(); } - ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, + ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len, &off, PAGE_SIZE); - if (unlikely(ret != ibmr->sg_len)) + if (unlikely(ret != ibmr->sg_dma_len)) return ret < 0 ? ret : -EINVAL; if (cmpxchg(&frmr->fr_state, @@ -240,7 +240,7 @@ static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev, } frmr->dma_npages += len >> PAGE_SHIFT; - if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) { + if (frmr->dma_npages > ibmr->pool->max_pages) { ret = -EMSGSIZE; goto out_unmap; } diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 0c8252d7fe2b..ea5e9aee4959 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -43,10 +43,6 @@ #define RDS_MR_8K_SCALE (256 / (RDS_MR_8K_MSG_SIZE + 1)) #define RDS_MR_8K_POOL_SIZE (RDS_MR_8K_SCALE * (8192 / 2)) -struct rds_ib_fmr { - struct ib_fmr *fmr; -}; - enum rds_ib_fr_state { FRMR_IS_FREE, /* mr invalidated & ready for use */ FRMR_IS_INUSE, /* mr is in use or used & can be invalidated */ @@ -84,7 +80,6 @@ struct rds_ib_mr { u8 odp:1; union { - struct rds_ib_fmr fmr; struct rds_ib_frmr frmr; struct ib_mr *mr; } u; @@ -109,8 +104,7 @@ struct rds_ib_mr_pool { unsigned long max_items; unsigned long max_items_soft; unsigned long max_free_pinned; - struct ib_fmr_attr fmr_attr; - bool use_fastreg; + unsigned int max_pages; }; extern struct workqueue_struct *rds_ib_mr_wq; @@ -136,15 +130,9 @@ u32 rds_ib_get_lkey(void *trans_private); void __rds_ib_teardown_mr(struct rds_ib_mr *); void rds_ib_teardown_mr(struct rds_ib_mr *); -struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int); struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *); int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **); -struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *, - unsigned long, u32 *); struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *); -void rds_ib_unreg_fmr(struct list_head *, unsigned int *, - unsigned long *, unsigned int); -void rds_ib_free_fmr_list(struct rds_ib_mr *); struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev, struct rds_ib_connection *ic, struct scatterlist *sg, diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index b34b24e237f8..8f070ee7e742 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -181,7 +181,7 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; iinfo->rdma_mr_max = pool_1m->max_items; - iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages; + iinfo->rdma_mr_size = pool_1m->max_pages; } #if IS_ENABLED(CONFIG_IPV6) @@ -191,7 +191,7 @@ void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; iinfo6->rdma_mr_max = pool_1m->max_items; - iinfo6->rdma_mr_size = pool_1m->fmr_attr.max_pages; + iinfo6->rdma_mr_size = pool_1m->max_pages; } #endif @@ -406,10 +406,7 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, if (list_empty(&unmap_list)) goto out; - if (pool->use_fastreg) - rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal); - else - rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal); + rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal); if (!list_empty(&unmap_list)) { unsigned long flags; @@ -503,10 +500,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate) } /* Return it to the pool's free list */ - if (rds_ibdev->use_fastreg) - rds_ib_free_frmr_list(ibmr); - else - rds_ib_free_fmr_list(ibmr); + rds_ib_free_frmr_list(ibmr); atomic_add(ibmr->sg_len, &pool->free_pinned); atomic_inc(&pool->dirty_count); @@ -622,10 +616,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, goto out; } - if (rds_ibdev->use_fastreg) - ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); - else - ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret); + ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); if (IS_ERR(ibmr)) { ret = PTR_ERR(ibmr); pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret); @@ -669,19 +660,16 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev, if (pool_type == RDS_IB_MR_1M_POOL) { /* +1 allows for unaligned MRs */ - pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1; + pool->max_pages = RDS_MR_1M_MSG_SIZE + 1; pool->max_items = rds_ibdev->max_1m_mrs; } else { /* pool_type == RDS_IB_MR_8K_POOL */ - pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1; + pool->max_pages = RDS_MR_8K_MSG_SIZE + 1; pool->max_items = rds_ibdev->max_8k_mrs; } - pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4; - pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; - pool->fmr_attr.page_shift = PAGE_SHIFT; + pool->max_free_pinned = pool->max_items * pool->max_pages / 4; pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4; - pool->use_fastreg = rds_ibdev->use_fastreg; return pool; } diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 694d411dc72f..cfbf0e129cba 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -310,8 +310,8 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, struct rds_ib_connection *ic = conn->c_transport_data; struct ib_sge *sge; int ret = -ENOMEM; - gfp_t slab_mask = GFP_NOWAIT; - gfp_t page_mask = GFP_NOWAIT; + gfp_t slab_mask = gfp; + gfp_t page_mask = gfp; if (gfp & __GFP_DIRECT_RECLAIM) { slab_mask = GFP_KERNEL; @@ -363,6 +363,7 @@ static int acquire_refill(struct rds_connection *conn) static void release_refill(struct rds_connection *conn) { clear_bit(RDS_RECV_REFILL, &conn->c_flags); + smp_mb__after_atomic(); /* We don't use wait_on_bit()/wake_up_bit() because our waking is in a * hot path and finding waiters is very rare. We don't want to walk @@ -662,10 +663,16 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi seq = rds_ib_get_ack(ic); rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); + + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma, + sizeof(*hdr), DMA_TO_DEVICE); rds_message_populate_header(hdr, 0, 0, 0); hdr->h_ack = cpu_to_be64(seq); hdr->h_credit = adv_credits; rds_message_make_checksum(hdr); + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma, + sizeof(*hdr), DMA_TO_DEVICE); + ic->i_ack_queued = jiffies; ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL); @@ -845,6 +852,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_incoming *ibinc = ic->i_ibinc; struct rds_header *ihdr, *hdr; + dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; /* XXX shut down the connection if port 0,0 are seen? */ @@ -863,6 +871,8 @@ static void rds_ib_process_recv(struct rds_connection *conn, ihdr = ic->i_recv_hdrs[recv - ic->i_recvs]; + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr, + sizeof(*ihdr), DMA_FROM_DEVICE); /* Validate the checksum. */ if (!rds_message_verify_checksum(ihdr)) { rds_ib_conn_error(conn, "incoming message " @@ -870,7 +880,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, "forcing a reconnect\n", &conn->c_faddr); rds_stats_inc(s_recv_drop_bad_checksum); - return; + goto done; } /* Process the ACK sequence which comes with every packet */ @@ -899,7 +909,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, */ rds_ib_frag_free(ic, recv->r_frag); recv->r_frag = NULL; - return; + goto done; } /* @@ -933,7 +943,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, hdr->h_dport != ihdr->h_dport) { rds_ib_conn_error(conn, "fragment header mismatch; forcing reconnect\n"); - return; + goto done; } } @@ -965,6 +975,9 @@ static void rds_ib_process_recv(struct rds_connection *conn, rds_inc_put(&ibinc->ii_inc); } +done: + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr, + sizeof(*ihdr), DMA_FROM_DEVICE); } void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, @@ -1020,7 +1033,7 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, rds_ib_stats_inc(s_ib_rx_ring_empty); if (rds_ib_ring_low(&ic->i_recv_ring)) { - rds_ib_recv_refill(conn, 0, GFP_NOWAIT); + rds_ib_recv_refill(conn, 0, GFP_NOWAIT | __GFP_NOWARN); rds_ib_stats_inc(s_ib_rx_refill_from_cq); } } diff --git a/net/rds/ib_ring.c b/net/rds/ib_ring.c index ff97e8eda858..006b2e441418 100644 --- a/net/rds/ib_ring.c +++ b/net/rds/ib_ring.c @@ -141,7 +141,7 @@ int rds_ib_ring_low(struct rds_ib_work_ring *ring) } /* - * returns the oldest alloced ring entry. This will be the next one + * returns the oldest allocated ring entry. This will be the next one * freed. This can't be called if there are none allocated. */ u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring) diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index dfe778220657..4190b90ff3b1 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -294,7 +294,6 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) rds_ib_ring_free(&ic->i_send_ring, completed); rds_ib_sub_signaled(ic, nr_sig); - nr_sig = 0; if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || test_bit(0, &conn->c_map_queued)) @@ -638,6 +637,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, send->s_sge[0].length = sizeof(struct rds_header); send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, + ic->i_send_hdrs_dma[pos], + sizeof(struct rds_header), + DMA_TO_DEVICE); memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); @@ -688,6 +691,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, adv_credits = 0; rds_ib_stats_inc(s_ib_tx_credit_updates); } + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, + ic->i_send_hdrs_dma[pos], + sizeof(struct rds_header), + DMA_TO_DEVICE); if (prev) prev->s_wr.next = &send->s_wr; diff --git a/net/rds/info.c b/net/rds/info.c index 03f6fd56d237..b6b46a8214a0 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -162,7 +162,6 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, struct rds_info_lengths lens; unsigned long nr_pages = 0; unsigned long start; - unsigned long i; rds_info_func func; struct page **pages = NULL; int ret; @@ -193,7 +192,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, ret = -ENOMEM; goto out; } - ret = get_user_pages_fast(start, nr_pages, FOLL_WRITE, pages); + ret = pin_user_pages_fast(start, nr_pages, FOLL_WRITE, pages); if (ret != nr_pages) { if (ret > 0) nr_pages = ret; @@ -235,8 +234,8 @@ call_func: ret = -EFAULT; out: - for (i = 0; pages && i < nr_pages; i++) - put_page(pages[i]); + if (pages) + unpin_user_pages(pages, nr_pages); kfree(pages); return ret; diff --git a/net/rds/message.c b/net/rds/message.c index 50f13f1d4ae0..44dbc612ef54 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2020 Oracle and/or its affiliates. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -162,12 +162,12 @@ static void rds_message_purge(struct rds_message *rm) if (rm->rdma.op_active) rds_rdma_free_op(&rm->rdma); if (rm->rdma.op_rdma_mr) - rds_mr_put(rm->rdma.op_rdma_mr); + kref_put(&rm->rdma.op_rdma_mr->r_kref, __rds_put_mr_final); if (rm->atomic.op_active) rds_atomic_free_op(&rm->atomic); if (rm->atomic.op_rdma_mr) - rds_mr_put(rm->atomic.op_rdma_mr); + kref_put(&rm->atomic.op_rdma_mr->r_kref, __rds_put_mr_final); } void rds_message_put(struct rds_message *rm) @@ -308,26 +308,20 @@ out: /* * RDS ops use this to grab SG entries from the rm's sg pool. */ -struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents, - int *ret) +struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents) { struct scatterlist *sg_first = (struct scatterlist *) &rm[1]; struct scatterlist *sg_ret; - if (WARN_ON(!ret)) - return NULL; - if (nents <= 0) { pr_warn("rds: alloc sgs failed! nents <= 0\n"); - *ret = -EINVAL; - return NULL; + return ERR_PTR(-EINVAL); } if (rm->m_used_sgs + nents > rm->m_total_sgs) { pr_warn("rds: alloc sgs failed! total %d used %d nents %d\n", rm->m_total_sgs, rm->m_used_sgs, nents); - *ret = -ENOMEM; - return NULL; + return ERR_PTR(-ENOMEM); } sg_ret = &sg_first[rm->m_used_sgs]; @@ -343,7 +337,6 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in unsigned int i; int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE); int extra_bytes = num_sgs * sizeof(struct scatterlist); - int ret; rm = rds_message_alloc(extra_bytes, GFP_NOWAIT); if (!rm) @@ -352,15 +345,16 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE); - rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret); - if (!rm->data.op_sg) { + rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); + if (IS_ERR(rm->data.op_sg)) { + void *err = ERR_CAST(rm->data.op_sg); rds_message_put(rm); - return ERR_PTR(ret); + return err; } for (i = 0; i < rm->data.op_nents; ++i) { sg_set_page(&rm->data.op_sg[i], - virt_to_page(page_addrs[i]), + virt_to_page((void *)page_addrs[i]), PAGE_SIZE, 0); } @@ -397,7 +391,7 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * size_t start; ssize_t copied; - copied = iov_iter_get_pages(from, &pages, PAGE_SIZE, + copied = iov_iter_get_pages2(from, &pages, PAGE_SIZE, 1, &start); if (copied < 0) { struct mmpin *mmp; @@ -411,7 +405,6 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * goto err; } total_copied += copied; - iov_iter_advance(from, copied); length -= copied; sg_set_page(sg, pages, copied, start); rm->data.op_nents++; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 585e6b3b69ce..fba82d36593a 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2020 Oracle and/or its affiliates. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -84,7 +84,7 @@ static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key, if (insert) { rb_link_node(&insert->r_rb_node, parent, p); rb_insert_color(&insert->r_rb_node, root); - refcount_inc(&insert->r_refcount); + kref_get(&insert->r_kref); } return NULL; } @@ -99,10 +99,7 @@ static void rds_destroy_mr(struct rds_mr *mr) unsigned long flags; rdsdebug("RDS: destroy mr key is %x refcnt %u\n", - mr->r_key, refcount_read(&mr->r_refcount)); - - if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state)) - return; + mr->r_key, kref_read(&mr->r_kref)); spin_lock_irqsave(&rs->rs_rdma_lock, flags); if (!RB_EMPTY_NODE(&mr->r_rb_node)) @@ -115,8 +112,10 @@ static void rds_destroy_mr(struct rds_mr *mr) mr->r_trans->free_mr(trans_private, mr->r_invalidate); } -void __rds_put_mr_final(struct rds_mr *mr) +void __rds_put_mr_final(struct kref *kref) { + struct rds_mr *mr = container_of(kref, struct rds_mr, r_kref); + rds_destroy_mr(mr); kfree(mr); } @@ -140,8 +139,7 @@ void rds_rdma_drop_keys(struct rds_sock *rs) rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); RB_CLEAR_NODE(&mr->r_rb_node); spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); - rds_destroy_mr(mr); - rds_mr_put(mr); + kref_put(&mr->r_kref, __rds_put_mr_final); spin_lock_irqsave(&rs->rs_rdma_lock, flags); } spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); @@ -242,7 +240,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, goto out; } - refcount_set(&mr->r_refcount, 1); + kref_init(&mr->r_kref); RB_CLEAR_NODE(&mr->r_rb_node); mr->r_trans = rs->rs_transport; mr->r_sock = rs; @@ -271,7 +269,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, goto out; } else { nents = ret; - sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); + sg = kmalloc_array(nents, sizeof(*sg), GFP_KERNEL); if (!sg) { ret = -ENOMEM; goto out; @@ -343,7 +341,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, rdsdebug("RDS: get_mr key is %x\n", mr->r_key); if (mr_ret) { - refcount_inc(&mr->r_refcount); + kref_get(&mr->r_kref); *mr_ret = mr; } @@ -351,25 +349,24 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, out: kfree(pages); if (mr) - rds_mr_put(mr); + kref_put(&mr->r_kref, __rds_put_mr_final); return ret; } -int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen) +int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen) { struct rds_get_mr_args args; if (optlen != sizeof(struct rds_get_mr_args)) return -EINVAL; - if (copy_from_user(&args, (struct rds_get_mr_args __user *)optval, - sizeof(struct rds_get_mr_args))) + if (copy_from_sockptr(&args, optval, sizeof(struct rds_get_mr_args))) return -EFAULT; return __rds_rdma_map(rs, &args, NULL, NULL, NULL); } -int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) +int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen) { struct rds_get_mr_for_dest_args args; struct rds_get_mr_args new_args; @@ -377,7 +374,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) if (optlen != sizeof(struct rds_get_mr_for_dest_args)) return -EINVAL; - if (copy_from_user(&args, (struct rds_get_mr_for_dest_args __user *)optval, + if (copy_from_sockptr(&args, optval, sizeof(struct rds_get_mr_for_dest_args))) return -EFAULT; @@ -396,7 +393,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) /* * Free the MR indicated by the given R_Key */ -int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) +int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen) { struct rds_free_mr_args args; struct rds_mr *mr; @@ -405,8 +402,7 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) if (optlen != sizeof(struct rds_free_mr_args)) return -EINVAL; - if (copy_from_user(&args, (struct rds_free_mr_args __user *)optval, - sizeof(struct rds_free_mr_args))) + if (copy_from_sockptr(&args, optval, sizeof(struct rds_free_mr_args))) return -EFAULT; /* Special case - a null cookie means flush all unused MRs */ @@ -434,13 +430,7 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) if (!mr) return -EINVAL; - /* - * call rds_destroy_mr() ourselves so that we're sure it's done by the time - * we return. If we let rds_mr_put() do it it might not happen until - * someone else drops their ref. - */ - rds_destroy_mr(mr); - rds_mr_put(mr); + kref_put(&mr->r_kref, __rds_put_mr_final); return 0; } @@ -464,6 +454,14 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) return; } + /* Get a reference so that the MR won't go away before calling + * sync_mr() below. + */ + kref_get(&mr->r_kref); + + /* If it is going to be freed, remove it from the tree now so + * that no other thread can find it and free it. + */ if (mr->r_use_once || force) { rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); RB_CLEAR_NODE(&mr->r_rb_node); @@ -477,12 +475,13 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) if (mr->r_trans->sync_mr) mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); + /* Release the reference held above. */ + kref_put(&mr->r_kref, __rds_put_mr_final); + /* If the MR was marked as invalidate, this will * trigger an async flush. */ - if (zot_me) { - rds_destroy_mr(mr); - rds_mr_put(mr); - } + if (zot_me) + kref_put(&mr->r_kref, __rds_put_mr_final); } void rds_rdma_free_op(struct rm_rdma_op *ro) @@ -490,7 +489,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro) unsigned int i; if (ro->op_odp_mr) { - rds_mr_put(ro->op_odp_mr); + kref_put(&ro->op_odp_mr->r_kref, __rds_put_mr_final); } else { for (i = 0; i < ro->op_nents; i++) { struct page *page = sg_page(&ro->op_sg[i]); @@ -566,6 +565,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args, if (args->nr_local == 0) return -EINVAL; + if (args->nr_local > UIO_MAXIOV) + return -EMSGSIZE; + iov->iov = kcalloc(args->nr_local, sizeof(struct rds_iovec), GFP_KERNEL); @@ -664,9 +666,11 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, op->op_odp_mr = NULL; WARN_ON(!nr_pages); - op->op_sg = rds_message_alloc_sgs(rm, nr_pages, &ret); - if (!op->op_sg) + op->op_sg = rds_message_alloc_sgs(rm, nr_pages); + if (IS_ERR(op->op_sg)) { + ret = PTR_ERR(op->op_sg); goto out_pages; + } if (op->op_notify || op->op_recverr) { /* We allocate an uninitialized notifier here, because @@ -730,7 +734,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, goto out_pages; } RB_CLEAR_NODE(&local_odp_mr->r_rb_node); - refcount_set(&local_odp_mr->r_refcount, 1); + kref_init(&local_odp_mr->r_kref); local_odp_mr->r_trans = rs->rs_transport; local_odp_mr->r_sock = rs; local_odp_mr->r_trans_private = @@ -738,7 +742,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, NULL, 0, rs, &local_odp_mr->r_key, NULL, iov->addr, iov->bytes, ODP_VIRTUAL); if (IS_ERR(local_odp_mr->r_trans_private)) { - ret = IS_ERR(local_odp_mr->r_trans_private); + ret = PTR_ERR(local_odp_mr->r_trans_private); rdsdebug("get_mr ret %d %p\"", ret, local_odp_mr->r_trans_private); kfree(local_odp_mr); @@ -827,7 +831,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, if (!mr) err = -EINVAL; /* invalid r_key */ else - refcount_inc(&mr->r_refcount); + kref_get(&mr->r_kref); spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); if (mr) { @@ -905,9 +909,11 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT); rm->atomic.op_active = 1; rm->atomic.op_recverr = rs->rs_recverr; - rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1, &ret); - if (!rm->atomic.op_sg) + rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); + if (IS_ERR(rm->atomic.op_sg)) { + ret = PTR_ERR(rm->atomic.op_sg); goto err; + } /* verify 8 byte-aligned */ if (args->local_addr & 0x7) { diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 5f741e51b4ba..d36f3f6b4351 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -87,6 +87,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ADDR_RESOLVED: rdma_set_service_type(cm_id, conn->c_tos); + rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32); /* XXX do we need to clean up if this fails? */ ret = rdma_resolve_route(cm_id, RDS_RDMA_RESOLVE_TIMEOUT_MS); @@ -290,7 +291,7 @@ static void rds_rdma_listen_stop(void) #endif } -static int rds_rdma_init(void) +static int __init rds_rdma_init(void) { int ret; @@ -306,7 +307,7 @@ out: } module_init(rds_rdma_init); -static void rds_rdma_exit(void) +static void __exit rds_rdma_exit(void) { /* stop listening first to ensure no new connections are attempted */ rds_rdma_listen_stop(); diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h index bfafd4a6d827..ca4c3a667091 100644 --- a/net/rds/rdma_transport.h +++ b/net/rds/rdma_transport.h @@ -13,7 +13,7 @@ /* Below reject reason is for legacy interoperability issue with non-linux * RDS endpoints where older version incompatibility is conveyed via value 1. - * For future version(s), proper encoded reject reason should be be used. + * For future version(s), proper encoded reject reason should be used. */ #define RDS_RDMA_REJ_INCOMPAT 1 diff --git a/net/rds/rds.h b/net/rds/rds.h index e4a603523083..d35d1fc39807 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -291,7 +291,7 @@ struct rds_incoming { struct rds_mr { struct rb_node r_rb_node; - refcount_t r_refcount; + struct kref r_kref; u32 r_key; /* A copy of the creation flags */ @@ -299,19 +299,11 @@ struct rds_mr { unsigned int r_invalidate:1; unsigned int r_write:1; - /* This is for RDS_MR_DEAD. - * It would be nice & consistent to make this part of the above - * bit field here, but we need to use test_and_set_bit. - */ - unsigned long r_state; struct rds_sock *r_sock; /* back pointer to the socket that owns us */ struct rds_transport *r_trans; void *r_trans_private; }; -/* Flags for mr->r_state */ -#define RDS_MR_DEAD 0 - static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset) { return r_key | (((u64) offset) << 32); @@ -786,6 +778,7 @@ void rds_conn_drop(struct rds_connection *conn); void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy); void rds_conn_connect_if_down(struct rds_connection *conn); void rds_conn_path_connect_if_down(struct rds_conn_path *cp); +void rds_check_all_paths(struct rds_connection *conn); void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, @@ -831,6 +824,12 @@ rds_conn_path_up(struct rds_conn_path *cp) } static inline int +rds_conn_path_down(struct rds_conn_path *cp) +{ + return atomic_read(&cp->cp_state) == RDS_CONN_DOWN; +} + +static inline int rds_conn_up(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); @@ -852,8 +851,7 @@ rds_conn_connecting(struct rds_connection *conn) /* message.c */ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); -struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents, - int *ret); +struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents); int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, bool zcopy); struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); @@ -926,9 +924,9 @@ int rds_send_pong(struct rds_conn_path *cp, __be16 dport); /* rdma.c */ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); -int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen); -int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen); -int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen); +int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen); +int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen); +int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen); void rds_rdma_drop_keys(struct rds_sock *rs); int rds_rdma_extra_size(struct rds_rdma_args *args, struct rds_iov_vector *iov); @@ -946,12 +944,7 @@ void rds_atomic_send_complete(struct rds_message *rm, int wc_status); int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); -void __rds_put_mr_final(struct rds_mr *mr); -static inline void rds_mr_put(struct rds_mr *mr) -{ - if (refcount_dec_and_test(&mr->r_refcount)) - __rds_put_mr_final(mr); -} +void __rds_put_mr_final(struct kref *kref); static inline bool rds_destroy_pending(struct rds_connection *conn) { diff --git a/net/rds/recv.c b/net/rds/recv.c index c8404971d5ab..5b426dc3634d 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -450,12 +450,13 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) { struct rds_notifier *notifier; - struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */ + struct rds_rdma_notify cmsg; unsigned int count = 0, max_messages = ~0U; unsigned long flags; LIST_HEAD(copy); int err = 0; + memset(&cmsg, 0, sizeof(cmsg)); /* fill holes with zero */ /* put_cmsg copies to user space and thus may sleep. We can't do this * with rs_lock held, so first grab as many notifications as we can stuff @@ -713,7 +714,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (rds_cmsg_recv(inc, msg, rs)) { ret = -EFAULT; - goto out; + break; } rds_recvmsg_zcookie(rs, msg); @@ -721,8 +722,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (msg->msg_name) { if (ipv6_addr_v4mapped(&inc->i_saddr)) { - sin = (struct sockaddr_in *)msg->msg_name; - sin->sin_family = AF_INET; sin->sin_port = inc->i_hdr.h_sport; sin->sin_addr.s_addr = @@ -730,8 +729,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); msg->msg_namelen = sizeof(*sin); } else { - sin6 = (struct sockaddr_in6 *)msg->msg_name; - sin6->sin6_family = AF_INET6; sin6->sin6_port = inc->i_hdr.h_sport; sin6->sin6_addr = inc->i_saddr; diff --git a/net/rds/send.c b/net/rds/send.c index 82dcd8b84fe7..0c5504068e3c 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -272,7 +272,7 @@ restart: /* Unfortunately, the way Infiniband deals with * RDMA to a bad MR key is by moving the entire - * queue pair to error state. We cold possibly + * queue pair to error state. We could possibly * recover from that, but right now we drop the * connection. * Therefore, we never retransmit messages with RDMA ops. @@ -934,7 +934,7 @@ static int rds_rm_size(struct msghdr *msg, int num_sgs, case RDS_CMSG_ZCOPY_COOKIE: zcopy_cookie = true; - /* fall through */ + fallthrough; case RDS_CMSG_RDMA_DEST: case RDS_CMSG_RDMA_MAP: @@ -1225,7 +1225,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) } /* If the socket is already bound to a link local address, * it can only send to peers on the same link. But allow - * communicating beween link local and non-link local address. + * communicating between link local and non-link local address. */ if (scope_id != rs->rs_bound_scope_id) { if (!scope_id) { @@ -1274,9 +1274,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Attach data to the rm */ if (payload_len) { - rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret); - if (!rm->data.op_sg) + rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); + if (IS_ERR(rm->data.op_sg)) { + ret = PTR_ERR(rm->data.op_sg); goto out; + } ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy); if (ret) goto out; @@ -1338,7 +1340,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) goto out; } - rds_conn_path_connect_if_down(cpath); + if (rds_conn_path_down(cpath)) + rds_check_all_paths(conn); ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); if (ret) { diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 66121bc6f34e..4444fd82b66d 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -62,8 +62,7 @@ static atomic_t rds_tcp_unloading = ATOMIC_INIT(0); static struct kmem_cache *rds_tcp_conn_slab; static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *fpos); + void *buffer, size_t *lenp, loff_t *fpos); static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF; static int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF; @@ -90,15 +89,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = { { } }; -/* doing it this way avoids calling tcp_sk() */ -void rds_tcp_nonagle(struct socket *sock) -{ - int val = 1; - - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val, - sizeof(val)); -} - u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) { /* seq# of the last byte of data in tcp send buffer */ @@ -176,10 +166,10 @@ void rds_tcp_reset_callbacks(struct socket *sock, */ atomic_set(&cp->cp_state, RDS_CONN_RESETTING); wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags)); - lock_sock(osock->sk); /* reset receive side state for rds_tcp_data_recv() for osock */ cancel_delayed_work_sync(&cp->cp_send_w); cancel_delayed_work_sync(&cp->cp_recv_w); + lock_sock(osock->sk); if (tc->t_tinc) { rds_inc_put(&tc->t_tinc->ti_inc); tc->t_tinc = NULL; @@ -323,8 +313,8 @@ out: } #endif -static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, - __u32 scope_id) +int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, + __u32 scope_id) { struct net_device *dev = NULL; #if IS_ENABLED(CONFIG_IPV6) @@ -497,23 +487,37 @@ struct rds_tcp_net { /* All module specific customizations to the RDS-TCP socket should be done in * rds_tcp_tune() and applied after socket creation. */ -void rds_tcp_tune(struct socket *sock) +bool rds_tcp_tune(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct rds_tcp_net *rtn; - rds_tcp_nonagle(sock); + tcp_sock_set_nodelay(sock->sk); lock_sock(sk); + /* TCP timer functions might access net namespace even after + * a process which created this net namespace terminated. + */ + if (!sk->sk_net_refcnt) { + if (!maybe_get_net(net)) { + release_sock(sk); + return false; + } + sk->sk_net_refcnt = 1; + netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); + } + rtn = net_generic(net, rds_tcp_netid); if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size; sk->sk_userlocks |= SOCK_SNDBUF_LOCK; } if (rtn->rcvbuf_size > 0) { - sk->sk_sndbuf = rtn->rcvbuf_size; + sk->sk_rcvbuf = rtn->rcvbuf_size; sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } release_sock(sk); + return true; } static void rds_tcp_accept_worker(struct work_struct *work) @@ -676,8 +680,7 @@ static void rds_tcp_sysctl_reset(struct net *net) } static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *fpos) + void *buffer, size_t *lenp, loff_t *fpos) { struct net *net = current->nsproxy->net_ns; int err; @@ -709,7 +712,7 @@ static void rds_tcp_exit(void) } module_exit(rds_tcp_exit); -static int rds_tcp_init(void) +static int __init rds_tcp_init(void) { int ret; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 3c69361d21c7..f8b5930d7b34 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -49,8 +49,7 @@ struct rds_tcp_statistics { }; /* tcp.c */ -void rds_tcp_tune(struct socket *sock); -void rds_tcp_nonagle(struct socket *sock); +bool rds_tcp_tune(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, @@ -60,7 +59,8 @@ u32 rds_tcp_snd_una(struct rds_tcp_connection *tc); u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq); extern struct rds_transport rds_tcp_transport; void rds_tcp_accept_work(struct sock *sk); - +int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, + __u32 scope_id); /* tcp_connect.c */ int rds_tcp_conn_path_connect(struct rds_conn_path *cp); void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn); @@ -71,9 +71,8 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6); void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); -int rds_tcp_keepalive(struct socket *sock); +void rds_tcp_keepalive(struct socket *sock); void *rds_tcp_listen_sock_def_readable(struct net *net); -void rds_tcp_set_linger(struct socket *sock); /* tcp_recv.c */ int rds_tcp_recv_init(void); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 008f50fb25dd..f0c477c5d1db 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -78,6 +78,7 @@ void rds_tcp_state_change(struct sock *sk) case TCP_CLOSE_WAIT: case TCP_CLOSE: rds_conn_path_drop(cp, false); + break; default: break; } @@ -123,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) if (ret < 0) goto out; - rds_tcp_tune(sock); + if (!rds_tcp_tune(sock)) { + ret = -EINVAL; + goto out; + } if (isv6) { sin6.sin6_family = AF_INET6; @@ -207,7 +211,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) if (sock) { if (rds_destroy_pending(cp->cp_conn)) - rds_tcp_set_linger(sock); + sock_no_linger(sock->sk); sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); lock_sock(sock->sk); rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */ diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 810a3a49e947..7edf2e69d3fe 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -38,36 +38,19 @@ #include "rds.h" #include "tcp.h" -int rds_tcp_keepalive(struct socket *sock) +void rds_tcp_keepalive(struct socket *sock) { /* values below based on xs_udp_default_timeout */ int keepidle = 5; /* send a probe 'keepidle' secs after last data */ int keepcnt = 5; /* number of unack'ed probes before declaring dead */ - int keepalive = 1; - int ret = 0; - - ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&keepalive, sizeof(keepalive)); - if (ret < 0) - goto bail; - - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, - (char *)&keepcnt, sizeof(keepcnt)); - if (ret < 0) - goto bail; - - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, - (char *)&keepidle, sizeof(keepidle)); - if (ret < 0) - goto bail; + sock_set_keepalive(sock->sk); + tcp_sock_set_keepcnt(sock->sk, keepcnt); + tcp_sock_set_keepidle(sock->sk, keepidle); /* KEEPINTVL is the interval between successive probes. We follow * the model in xs_tcp_finish_connecting() and re-use keepidle. */ - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, - (char *)&keepidle, sizeof(keepidle)); -bail: - return ret; + tcp_sock_set_keepintvl(sock->sk, keepidle); } /* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the @@ -111,17 +94,6 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) return NULL; } -void rds_tcp_set_linger(struct socket *sock) -{ - struct linger no_linger = { - .l_onoff = 1, - .l_linger = 0, - }; - - kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&no_linger, sizeof(no_linger)); -} - int rds_tcp_accept_one(struct socket *sock) { struct socket *new_sock = NULL; @@ -160,11 +132,11 @@ int rds_tcp_accept_one(struct socket *sock) new_sock->ops = sock->ops; __module_get(new_sock->ops->owner); - ret = rds_tcp_keepalive(new_sock); - if (ret < 0) + rds_tcp_keepalive(new_sock); + if (!rds_tcp_tune(new_sock)) { + ret = -EINVAL; goto out; - - rds_tcp_tune(new_sock); + } inet = inet_sk(new_sock->sk); @@ -198,6 +170,12 @@ int rds_tcp_accept_one(struct socket *sock) } #endif + if (!rds_tcp_laddr_check(sock_net(sock->sk), peer_addr, dev_if)) { + /* local address connection is only allowed via loopback */ + ret = -EOPNOTSUPP; + goto out; + } + conn = rds_conn_create(sock_net(sock->sk), my_addr, peer_addr, &rds_tcp_transport, 0, GFP_KERNEL, dev_if); @@ -241,7 +219,7 @@ rst_nsk: * be pending on it. By setting linger, we achieve the side-effect * of avoiding TIME_WAIT state on new_sock. */ - rds_tcp_set_linger(new_sock); + sock_no_linger(new_sock->sk); kernel_sock_shutdown(new_sock, SHUT_RDWR); ret = 0; out: @@ -303,7 +281,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6) } sock->sk->sk_reuse = SK_CAN_REUSE; - rds_tcp_nonagle(sock); + tcp_sock_set_nodelay(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_user_data = sock->sk->sk_data_ready; diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 42c5ff1eda95..f4ee13da90c7 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -177,7 +177,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, goto out; } tc->t_tinc = tinc; - rdsdebug("alloced tinc %p\n", tinc); + rdsdebug("allocated tinc %p\n", tinc); rds_inc_path_init(&tinc->ti_inc, cp, &cp->cp_conn->c_faddr); tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] = diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 78a2554a4497..8c4d1d6e9249 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -38,23 +38,18 @@ #include "rds.h" #include "tcp.h" -static void rds_tcp_cork(struct socket *sock, int val) -{ - kernel_setsockopt(sock, SOL_TCP, TCP_CORK, (void *)&val, sizeof(val)); -} - void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp) { struct rds_tcp_connection *tc = cp->cp_transport_data; - rds_tcp_cork(tc->t_sock, 1); + tcp_sock_set_cork(tc->t_sock->sk, true); } void rds_tcp_xmit_path_complete(struct rds_conn_path *cp) { struct rds_tcp_connection *tc = cp->cp_transport_data; - rds_tcp_cork(tc->t_sock, 0); + tcp_sock_set_cork(tc->t_sock->sk, false); } /* the core send_sem serializes this with other xmit and shutdown */ diff --git a/net/rds/threads.c b/net/rds/threads.c index 32dc50f0a303..1f424cbfcbb4 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -208,6 +208,7 @@ void rds_send_worker(struct work_struct *work) case -ENOMEM: rds_stats_inc(s_send_delayed_retry); queue_delayed_work(rds_wq, &cp->cp_send_w, 2); + break; default: break; } @@ -232,6 +233,7 @@ void rds_recv_worker(struct work_struct *work) case -ENOMEM: rds_stats_inc(s_recv_delayed_retry); queue_delayed_work(rds_wq, &cp->cp_recv_w, 2); + break; default: break; } diff --git a/net/rds/transport.c b/net/rds/transport.c index 46f709a4b577..f8001ec80867 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -38,6 +38,12 @@ #include "rds.h" #include "loop.h" +static char * const rds_trans_modules[] = { + [RDS_TRANS_IB] = "rds_rdma", + [RDS_TRANS_GAP] = NULL, + [RDS_TRANS_TCP] = "rds_tcp", +}; + static struct rds_transport *transports[RDS_TRANS_COUNT]; static DECLARE_RWSEM(rds_trans_sem); @@ -110,18 +116,20 @@ struct rds_transport *rds_trans_get(int t_type) { struct rds_transport *ret = NULL; struct rds_transport *trans; - unsigned int i; down_read(&rds_trans_sem); - for (i = 0; i < RDS_TRANS_COUNT; i++) { - trans = transports[i]; - - if (trans && trans->t_type == t_type && - (!trans->t_owner || try_module_get(trans->t_owner))) { - ret = trans; - break; - } + trans = transports[t_type]; + if (!trans) { + up_read(&rds_trans_sem); + if (rds_trans_modules[t_type]) + request_module(rds_trans_modules[t_type]); + down_read(&rds_trans_sem); + trans = transports[t_type]; } + if (trans && trans->t_type == t_type && + (!trans->t_owner || try_module_get(trans->t_owner))) + ret = trans; + up_read(&rds_trans_sem); return ret; |