aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds/rdma.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-01-28 16:02:33 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-01-28 16:02:33 -0800
commitbd2463ac7d7ec51d432f23bf0e893fb371a908cd (patch)
tree3da32c23be83adb9d9bda7e51b51fa39f69f2447 /net/rds/rdma.c
parentMerge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6 (diff)
parentnet: phy: add default ARCH_BCM_IPROC for MDIO_BCM_IPROC (diff)
downloadlinux-bd2463ac7d7ec51d432f23bf0e893fb371a908cd.tar.xz
linux-bd2463ac7d7ec51d432f23bf0e893fb371a908cd.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: 1) Add WireGuard 2) Add HE and TWT support to ath11k driver, from John Crispin. 3) Add ESP in TCP encapsulation support, from Sabrina Dubroca. 4) Add variable window congestion control to TIPC, from Jon Maloy. 5) Add BCM84881 PHY driver, from Russell King. 6) Start adding netlink support for ethtool operations, from Michal Kubecek. 7) Add XDP drop and TX action support to ena driver, from Sameeh Jubran. 8) Add new ipv4 route notifications so that mlxsw driver does not have to handle identical routes itself. From Ido Schimmel. 9) Add BPF dynamic program extensions, from Alexei Starovoitov. 10) Support RX and TX timestamping in igc, from Vinicius Costa Gomes. 11) Add support for macsec HW offloading, from Antoine Tenart. 12) Add initial support for MPTCP protocol, from Christoph Paasch, Matthieu Baerts, Florian Westphal, Peter Krystad, and many others. 13) Add Octeontx2 PF support, from Sunil Goutham, Geetha sowjanya, Linu Cherian, and others. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1469 commits) net: phy: add default ARCH_BCM_IPROC for MDIO_BCM_IPROC udp: segment looped gso packets correctly netem: change mailing list qed: FW 8.42.2.0 debug features qed: rt init valid initialization changed qed: Debug feature: ilt and mdump qed: FW 8.42.2.0 Add fw overlay feature qed: FW 8.42.2.0 HSI changes qed: FW 8.42.2.0 iscsi/fcoe changes qed: Add abstraction for different hsi values per chip qed: FW 8.42.2.0 Additional ll2 type qed: Use dmae to write to widebus registers in fw_funcs qed: FW 8.42.2.0 Parser offsets modified qed: FW 8.42.2.0 Queue Manager changes qed: FW 8.42.2.0 Expose new registers and change windows qed: FW 8.42.2.0 Internal ram offsets modifications MAINTAINERS: Add entry for Marvell OcteonTX2 Physical Function driver Documentation: net: octeontx2: Add RVU HW and drivers overview octeontx2-pf: ethtool RSS config support octeontx2-pf: Add basic ethtool support ...
Diffstat (limited to 'net/rds/rdma.c')
-rw-r--r--net/rds/rdma.c157
1 files changed, 117 insertions, 40 deletions
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 916f5ec373d8..3341eee87bf9 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -156,11 +156,13 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
struct page **pages, int write)
{
+ unsigned int gup_flags = FOLL_LONGTERM;
int ret;
- ret = get_user_pages_fast(user_addr, nr_pages, write ? FOLL_WRITE : 0,
- pages);
+ if (write)
+ gup_flags |= FOLL_WRITE;
+ ret = get_user_pages_fast(user_addr, nr_pages, gup_flags, pages);
if (ret >= 0 && ret < nr_pages) {
while (ret--)
put_page(pages[ret]);
@@ -175,13 +177,14 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
struct rds_conn_path *cp)
{
struct rds_mr *mr = NULL, *found;
+ struct scatterlist *sg = NULL;
unsigned int nr_pages;
struct page **pages = NULL;
- struct scatterlist *sg;
void *trans_private;
unsigned long flags;
rds_rdma_cookie_t cookie;
- unsigned int nents;
+ unsigned int nents = 0;
+ int need_odp = 0;
long i;
int ret;
@@ -195,6 +198,21 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
goto out;
}
+ /* If the combination of the addr and size requested for this memory
+ * region causes an integer overflow, return error.
+ */
+ if (((args->vec.addr + args->vec.bytes) < args->vec.addr) ||
+ PAGE_ALIGN(args->vec.addr + args->vec.bytes) <
+ (args->vec.addr + args->vec.bytes)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!can_do_mlock()) {
+ ret = -EPERM;
+ goto out;
+ }
+
nr_pages = rds_pages_in_vec(&args->vec);
if (nr_pages == 0) {
ret = -EINVAL;
@@ -248,36 +266,44 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
* the zero page.
*/
ret = rds_pin_pages(args->vec.addr, nr_pages, pages, 1);
- if (ret < 0)
- goto out;
-
- nents = ret;
- sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
- if (!sg) {
- ret = -ENOMEM;
+ if (ret == -EOPNOTSUPP) {
+ need_odp = 1;
+ } else if (ret <= 0) {
goto out;
- }
- WARN_ON(!nents);
- sg_init_table(sg, nents);
-
- /* Stick all pages into the scatterlist */
- for (i = 0 ; i < nents; i++)
- sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0);
+ } else {
+ nents = ret;
+ sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
+ if (!sg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ WARN_ON(!nents);
+ sg_init_table(sg, nents);
- rdsdebug("RDS: trans_private nents is %u\n", nents);
+ /* Stick all pages into the scatterlist */
+ for (i = 0 ; i < nents; i++)
+ sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0);
+ rdsdebug("RDS: trans_private nents is %u\n", nents);
+ }
/* Obtain a transport specific MR. If this succeeds, the
* s/g list is now owned by the MR.
* Note that dma_map() implies that pending writes are
* flushed to RAM, so no dma_sync is needed here. */
- trans_private = rs->rs_transport->get_mr(sg, nents, rs,
- &mr->r_key,
- cp ? cp->cp_conn : NULL);
+ trans_private = rs->rs_transport->get_mr(
+ sg, nents, rs, &mr->r_key, cp ? cp->cp_conn : NULL,
+ args->vec.addr, args->vec.bytes,
+ need_odp ? ODP_ZEROBASED : ODP_NOT_NEEDED);
if (IS_ERR(trans_private)) {
- for (i = 0 ; i < nents; i++)
- put_page(sg_page(&sg[i]));
- kfree(sg);
+ /* In ODP case, we don't GUP pages, so don't need
+ * to release anything.
+ */
+ if (!need_odp) {
+ for (i = 0 ; i < nents; i++)
+ put_page(sg_page(&sg[i]));
+ kfree(sg);
+ }
ret = PTR_ERR(trans_private);
goto out;
}
@@ -291,7 +317,11 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
* map page aligned regions. So we keep the offset, and build
* a 64bit cookie containing <R_Key, offset> and pass that
* around. */
- cookie = rds_rdma_make_cookie(mr->r_key, args->vec.addr & ~PAGE_MASK);
+ if (need_odp)
+ cookie = rds_rdma_make_cookie(mr->r_key, 0);
+ else
+ cookie = rds_rdma_make_cookie(mr->r_key,
+ args->vec.addr & ~PAGE_MASK);
if (cookie_ret)
*cookie_ret = cookie;
@@ -456,22 +486,26 @@ void rds_rdma_free_op(struct rm_rdma_op *ro)
{
unsigned int i;
- for (i = 0; i < ro->op_nents; i++) {
- struct page *page = sg_page(&ro->op_sg[i]);
-
- /* Mark page dirty if it was possibly modified, which
- * is the case for a RDMA_READ which copies from remote
- * to local memory */
- if (!ro->op_write) {
- WARN_ON(!page->mapping && irqs_disabled());
- set_page_dirty(page);
+ if (ro->op_odp_mr) {
+ rds_mr_put(ro->op_odp_mr);
+ } else {
+ for (i = 0; i < ro->op_nents; i++) {
+ struct page *page = sg_page(&ro->op_sg[i]);
+
+ /* Mark page dirty if it was possibly modified, which
+ * is the case for a RDMA_READ which copies from remote
+ * to local memory
+ */
+ if (!ro->op_write)
+ set_page_dirty(page);
+ put_page(page);
}
- put_page(page);
}
kfree(ro->op_notifier);
ro->op_notifier = NULL;
ro->op_active = 0;
+ ro->op_odp_mr = NULL;
}
void rds_atomic_free_op(struct rm_atomic_op *ao)
@@ -581,6 +615,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
struct rds_iovec *iovs;
unsigned int i, j;
int ret = 0;
+ bool odp_supported = true;
if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args))
|| rm->rdma.op_active)
@@ -602,6 +637,9 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
ret = -EINVAL;
goto out_ret;
}
+ /* odp-mr is not supported for multiple requests within one message */
+ if (args->nr_local != 1)
+ odp_supported = false;
iovs = vec->iov;
@@ -623,6 +661,8 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
op->op_silent = !!(args->flags & RDS_RDMA_SILENT);
op->op_active = 1;
op->op_recverr = rs->rs_recverr;
+ op->op_odp_mr = NULL;
+
WARN_ON(!nr_pages);
op->op_sg = rds_message_alloc_sgs(rm, nr_pages, &ret);
if (!op->op_sg)
@@ -672,10 +712,44 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
* If it's a READ operation, we need to pin the pages for writing.
*/
ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
- if (ret < 0)
+ if ((!odp_supported && ret <= 0) ||
+ (odp_supported && ret <= 0 && ret != -EOPNOTSUPP))
goto out_pages;
- else
- ret = 0;
+
+ if (ret == -EOPNOTSUPP) {
+ struct rds_mr *local_odp_mr;
+
+ if (!rs->rs_transport->get_mr) {
+ ret = -EOPNOTSUPP;
+ goto out_pages;
+ }
+ local_odp_mr =
+ kzalloc(sizeof(*local_odp_mr), GFP_KERNEL);
+ if (!local_odp_mr) {
+ ret = -ENOMEM;
+ goto out_pages;
+ }
+ RB_CLEAR_NODE(&local_odp_mr->r_rb_node);
+ refcount_set(&local_odp_mr->r_refcount, 1);
+ local_odp_mr->r_trans = rs->rs_transport;
+ local_odp_mr->r_sock = rs;
+ local_odp_mr->r_trans_private =
+ rs->rs_transport->get_mr(
+ NULL, 0, rs, &local_odp_mr->r_key, NULL,
+ iov->addr, iov->bytes, ODP_VIRTUAL);
+ if (IS_ERR(local_odp_mr->r_trans_private)) {
+ ret = IS_ERR(local_odp_mr->r_trans_private);
+ rdsdebug("get_mr ret %d %p\"", ret,
+ local_odp_mr->r_trans_private);
+ kfree(local_odp_mr);
+ ret = -EOPNOTSUPP;
+ goto out_pages;
+ }
+ rdsdebug("Need odp; local_odp_mr %p trans_private %p\n",
+ local_odp_mr, local_odp_mr->r_trans_private);
+ op->op_odp_mr = local_odp_mr;
+ op->op_odp_addr = iov->addr;
+ }
rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n",
nr_bytes, nr, iov->bytes, iov->addr);
@@ -691,6 +765,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
min_t(unsigned int, iov->bytes, PAGE_SIZE - offset),
offset);
+ sg_dma_len(sg) = sg->length;
rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n",
sg->offset, sg->length, iov->addr, iov->bytes);
@@ -709,6 +784,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
goto out_pages;
}
op->op_bytes = nr_bytes;
+ ret = 0;
out_pages:
kfree(pages);
@@ -755,7 +831,8 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
if (mr) {
- mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
+ mr->r_trans->sync_mr(mr->r_trans_private,
+ DMA_TO_DEVICE);
rm->rdma.op_rdma_mr = mr;
}
return err;