aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/staging/rdma/hfi1/Makefile2
-rw-r--r--drivers/staging/rdma/hfi1/file_ops.c1
-rw-r--r--drivers/staging/rdma/hfi1/hfi.h14
-rw-r--r--drivers/staging/rdma/hfi1/mmu_rb.c304
-rw-r--r--drivers/staging/rdma/hfi1/mmu_rb.h73
-rw-r--r--drivers/staging/rdma/hfi1/user_exp_rcv.c336
6 files changed, 471 insertions, 259 deletions
diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile
index 9b117062d52e..8dc59382ee96 100644
--- a/drivers/staging/rdma/hfi1/Makefile
+++ b/drivers/staging/rdma/hfi1/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \
eprom.o file_ops.o firmware.o \
- init.o intr.o mad.o pcie.o pio.o pio_copy.o platform.o \
+ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
verbs_txreq.o
diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c
index e4490aecf262..e460261f94b7 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/staging/rdma/hfi1/file_ops.c
@@ -58,6 +58,7 @@
#include "user_exp_rcv.h"
#include "eprom.h"
#include "aspm.h"
+#include "mmu_rb.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h
index 572288308406..78c8e24b1970 100644
--- a/drivers/staging/rdma/hfi1/hfi.h
+++ b/drivers/staging/rdma/hfi1/hfi.h
@@ -1179,6 +1179,7 @@ struct hfi1_devdata {
#define PT_EAGER 1
#define PT_INVALID 2
+struct tid_rb_node;
struct mmu_rb_node;
/* Private data for file operations */
@@ -1189,20 +1190,17 @@ struct hfi1_filedata {
struct hfi1_user_sdma_pkt_q *pq;
/* for cpu affinity; -1 if none */
int rec_cpu_num;
- struct mmu_notifier mn;
struct rb_root tid_rb_root;
- struct mmu_rb_node **entry_to_rb;
+ struct tid_rb_node **entry_to_rb;
spinlock_t tid_lock; /* protect tid_[limit,used] counters */
u32 tid_limit;
u32 tid_used;
- spinlock_t rb_lock; /* protect tid_rb_root RB tree */
u32 *invalid_tids;
u32 invalid_tid_idx;
- spinlock_t invalid_lock; /* protect the invalid_tids array */
- int (*mmu_rb_insert)(struct hfi1_filedata *, struct rb_root *,
- struct mmu_rb_node *);
- void (*mmu_rb_remove)(struct hfi1_filedata *, struct rb_root *,
- struct mmu_rb_node *);
+ /* protect invalid_tids array and invalid_tid_idx */
+ spinlock_t invalid_lock;
+ int (*mmu_rb_insert)(struct rb_root *, struct mmu_rb_node *);
+ void (*mmu_rb_remove)(struct rb_root *, struct mmu_rb_node *);
};
extern struct list_head hfi1_dev_list;
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c
new file mode 100644
index 000000000000..779ebafd4f4d
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/mmu_rb.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/list.h>
+#include <linux/mmu_notifier.h>
+#include <linux/rbtree.h>
+
+#include "mmu_rb.h"
+#include "trace.h"
+
+struct mmu_rb_handler {
+ struct list_head list;
+ struct mmu_notifier mn;
+ struct rb_root *root;
+ spinlock_t lock; /* protect the RB tree */
+ struct mmu_rb_ops *ops;
+};
+
+static LIST_HEAD(mmu_rb_handlers);
+static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */
+
+static struct mmu_rb_handler *find_mmu_handler(struct rb_root *);
+static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
+ unsigned long);
+static inline void mmu_notifier_range_start(struct mmu_notifier *,
+ struct mm_struct *,
+ unsigned long, unsigned long);
+static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
+ unsigned long, unsigned long);
+static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
+ unsigned long, unsigned long);
+
+static struct mmu_notifier_ops mn_opts = {
+ .invalidate_page = mmu_notifier_page,
+ .invalidate_range_start = mmu_notifier_range_start,
+};
+
+int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
+{
+ struct mmu_rb_handler *handlr;
+
+ if (!ops->compare || !ops->invalidate)
+ return -EINVAL;
+
+ handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
+ if (!handlr)
+ return -ENOMEM;
+
+ handlr->root = root;
+ handlr->ops = ops;
+ INIT_HLIST_NODE(&handlr->mn.hlist);
+ spin_lock_init(&handlr->lock);
+ handlr->mn.ops = &mn_opts;
+ spin_lock(&mmu_rb_lock);
+ list_add_tail(&handlr->list, &mmu_rb_handlers);
+ spin_unlock(&mmu_rb_lock);
+
+ return mmu_notifier_register(&handlr->mn, current->mm);
+}
+
+void hfi1_mmu_rb_unregister(struct rb_root *root)
+{
+ struct mmu_rb_handler *handler = find_mmu_handler(root);
+
+ spin_lock(&mmu_rb_lock);
+ list_del(&handler->list);
+ spin_unlock(&mmu_rb_lock);
+
+ if (!RB_EMPTY_ROOT(root)) {
+ struct rb_node *node;
+ struct mmu_rb_node *rbnode;
+
+ while ((node = rb_first(root))) {
+ rbnode = rb_entry(node, struct mmu_rb_node, node);
+ if (handler->ops->remove)
+ handler->ops->remove(root, rbnode);
+ rb_erase(node, root);
+ kfree(rbnode);
+ }
+ }
+
+ if (current->mm)
+ mmu_notifier_unregister(&handler->mn, current->mm);
+ kfree(handler);
+}
+
+int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+ struct rb_node **new, *parent = NULL;
+ struct mmu_rb_handler *handler = find_mmu_handler(root);
+ struct mmu_rb_node *this;
+ int res, ret = 0;
+
+ if (!handler)
+ return -EINVAL;
+
+ new = &handler->root->rb_node;
+ spin_lock(&handler->lock);
+ while (*new) {
+ this = container_of(*new, struct mmu_rb_node, node);
+ res = handler->ops->compare(this, mnode->addr, mnode->len);
+ parent = *new;
+
+ if (res < 0) {
+ new = &((*new)->rb_left);
+ } else if (res > 0) {
+ new = &((*new)->rb_right);
+ } else {
+ ret = 1;
+ goto unlock;
+ }
+ }
+
+ if (handler->ops->insert) {
+ ret = handler->ops->insert(root, mnode);
+ if (ret)
+ goto unlock;
+ }
+
+ rb_link_node(&mnode->node, parent, new);
+ rb_insert_color(&mnode->node, root);
+unlock:
+ spin_unlock(&handler->lock);
+ return ret;
+}
+
+/* Caller must host handler lock */
+static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
+ unsigned long addr,
+ unsigned long len)
+{
+ struct rb_node *node = handler->root->rb_node;
+ struct mmu_rb_node *mnode;
+ int res;
+
+ while (node) {
+ mnode = container_of(node, struct mmu_rb_node, node);
+ res = handler->ops->compare(mnode, addr, len);
+
+ if (res < 0)
+ node = node->rb_left;
+ else if (res > 0)
+ node = node->rb_right;
+ else
+ return mnode;
+ }
+ return NULL;
+}
+
+static void __mmu_rb_remove(struct mmu_rb_handler *handler,
+ struct mmu_rb_node *node)
+{
+ /* Validity of handler and node pointers has been checked by caller. */
+ if (handler->ops->remove)
+ handler->ops->remove(handler->root, node);
+ rb_erase(&node->node, handler->root);
+}
+
+struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
+ unsigned long len)
+{
+ struct mmu_rb_handler *handler = find_mmu_handler(root);
+ struct mmu_rb_node *node;
+
+ if (!handler)
+ return ERR_PTR(-EINVAL);
+
+ spin_lock(&handler->lock);
+ node = __mmu_rb_search(handler, addr, len);
+ spin_unlock(&handler->lock);
+
+ return node;
+}
+
+void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
+{
+ struct mmu_rb_handler *handler = find_mmu_handler(root);
+
+ if (!handler || !node)
+ return;
+
+ spin_lock(&handler->lock);
+ __mmu_rb_remove(handler, node);
+ spin_unlock(&handler->lock);
+}
+
+static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
+{
+ struct mmu_rb_handler *handler;
+
+ spin_lock(&mmu_rb_lock);
+ list_for_each_entry(handler, &mmu_rb_handlers, list) {
+ if (handler->root == root)
+ goto unlock;
+ }
+ handler = NULL;
+unlock:
+ spin_unlock(&mmu_rb_lock);
+ return handler;
+}
+
+static inline void mmu_notifier_page(struct mmu_notifier *mn,
+ struct mm_struct *mm, unsigned long addr)
+{
+ mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE);
+}
+
+static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ mmu_notifier_mem_invalidate(mn, start, end);
+}
+
+static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
+ unsigned long start, unsigned long end)
+{
+ struct mmu_rb_handler *handler =
+ container_of(mn, struct mmu_rb_handler, mn);
+ struct rb_root *root = handler->root;
+ struct mmu_rb_node *node;
+ unsigned long addr = start;
+
+ spin_lock(&handler->lock);
+ while (addr < end) {
+ /*
+ * There is no good way to provide a reasonable length to the
+ * search function at this point. Using the remaining length in
+ * the invalidation range is not the right thing to do.
+ * We have to rely on the fact that the insertion algorithm
+ * takes care of any overlap or length restrictions by using the
+ * actual size of each node. Therefore, we can use a page as an
+ * arbitrary, non-zero value.
+ */
+ node = __mmu_rb_search(handler, addr, PAGE_SIZE);
+
+ if (!node) {
+ /*
+ * Didn't find a node at this address. However, the
+ * range could be bigger than what we have registered
+ * so we have to keep looking.
+ */
+ addr += PAGE_SIZE;
+ continue;
+ }
+ if (handler->ops->invalidate(root, node))
+ __mmu_rb_remove(handler, node);
+
+ /*
+ * The next address to be looked up is computed based
+ * on the node's starting address. This is due to the
+ * fact that the range where we start might be in the
+ * middle of the node's buffer so simply incrementing
+ * the address by the node's size would result is a
+ * bad address.
+ */
+ addr = node->addr + node->len;
+ }
+ spin_unlock(&handler->lock);
+}
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h
new file mode 100644
index 000000000000..9fe1076ab39d
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/mmu_rb.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_MMU_RB_H
+#define _HFI1_MMU_RB_H
+
+#include "hfi.h"
+
+struct mmu_rb_node {
+ struct rb_node node;
+ unsigned long addr;
+ unsigned long len;
+};
+
+struct mmu_rb_ops {
+ int (*compare)(struct mmu_rb_node *, unsigned long,
+ unsigned long);
+ int (*insert)(struct rb_root *, struct mmu_rb_node *);
+ void (*remove)(struct rb_root *, struct mmu_rb_node *);
+ int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
+};
+
+int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops);
+void hfi1_mmu_rb_unregister(struct rb_root *);
+int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
+void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
+struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long,
+ unsigned long);
+
+#endif /* _HFI1_MMU_RB_H */
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c
index fccae508a5d0..c9e05ddd469f 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.c
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c
@@ -48,6 +48,7 @@
#include "user_exp_rcv.h"
#include "trace.h"
+#include "mmu_rb.h"
struct tid_group {
struct list_head list;
@@ -57,11 +58,9 @@ struct tid_group {
u8 map;
};
-struct mmu_rb_node {
- struct rb_node rbnode;
- unsigned long virt;
+struct tid_rb_node {
+ struct mmu_rb_node mmu;
unsigned long phys;
- unsigned long len;
struct tid_group *grp;
u32 rcventry;
dma_addr_t dma_addr;
@@ -70,16 +69,6 @@ struct mmu_rb_node {
struct page *pages[0];
};
-enum mmu_call_types {
- MMU_INVALIDATE_PAGE = 0,
- MMU_INVALIDATE_RANGE = 1
-};
-
-static const char * const mmu_types[] = {
- "PAGE",
- "RANGE"
-};
-
struct tid_pageset {
u16 idx;
u16 count;
@@ -99,28 +88,21 @@ static int set_rcvarray_entry(struct file *, unsigned long, u32,
struct tid_group *, struct page **, unsigned);
static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long,
unsigned long);
-static struct mmu_rb_node *mmu_rb_search(struct rb_root *, unsigned long);
-static int mmu_rb_insert_by_addr(struct hfi1_filedata *, struct rb_root *,
- struct mmu_rb_node *);
-static int mmu_rb_insert_by_entry(struct hfi1_filedata *, struct rb_root *,
- struct mmu_rb_node *);
-static void mmu_rb_remove_by_addr(struct hfi1_filedata *, struct rb_root *,
- struct mmu_rb_node *);
-static void mmu_rb_remove_by_entry(struct hfi1_filedata *, struct rb_root *,
- struct mmu_rb_node *);
-static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
- unsigned long, unsigned long,
- enum mmu_call_types);
-static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
- unsigned long);
-static inline void mmu_notifier_range_start(struct mmu_notifier *,
- struct mm_struct *,
- unsigned long, unsigned long);
+static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
+static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
+static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
struct tid_pageset *, unsigned, u16, struct page **,
u32 *, unsigned *, unsigned *);
static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
-static void clear_tid_node(struct hfi1_filedata *, u16, struct mmu_rb_node *);
+static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *);
+
+static struct mmu_rb_ops tid_rb_ops = {
+ .compare = mmu_addr_cmp,
+ .insert = mmu_rb_insert,
+ .remove = mmu_rb_remove,
+ .invalidate = mmu_rb_invalidate
+};
static inline u32 rcventry2tidinfo(u32 rcventry)
{
@@ -167,11 +149,6 @@ static inline void tid_group_move(struct tid_group *group,
tid_group_add_tail(group, s2);
}
-static struct mmu_notifier_ops mn_opts = {
- .invalidate_page = mmu_notifier_page,
- .invalidate_range_start = mmu_notifier_range_start,
-};
-
/*
* Initialize context and file private data needed for Expected
* receive caching. This needs to be done after the context has
@@ -185,11 +162,8 @@ int hfi1_user_exp_rcv_init(struct file *fp)
unsigned tidbase;
int i, ret = 0;
- INIT_HLIST_NODE(&fd->mn.hlist);
- spin_lock_init(&fd->rb_lock);
spin_lock_init(&fd->tid_lock);
spin_lock_init(&fd->invalid_lock);
- fd->mn.ops = &mn_opts;
fd->tid_rb_root = RB_ROOT;
if (!uctxt->subctxt_cnt || !fd->subctxt) {
@@ -239,7 +213,7 @@ int hfi1_user_exp_rcv_init(struct file *fp)
* fails, continue but turn off the TID caching for
* all user contexts.
*/
- ret = mmu_notifier_register(&fd->mn, current->mm);
+ ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops);
if (ret) {
dd_dev_info(dd,
"Failed MMU notifier registration %d\n",
@@ -250,11 +224,11 @@ int hfi1_user_exp_rcv_init(struct file *fp)
}
if (HFI1_CAP_IS_USET(TID_UNMAP)) {
- fd->mmu_rb_insert = mmu_rb_insert_by_entry;
- fd->mmu_rb_remove = mmu_rb_remove_by_entry;
+ fd->mmu_rb_insert = mmu_rb_insert;
+ fd->mmu_rb_remove = mmu_rb_remove;
} else {
- fd->mmu_rb_insert = mmu_rb_insert_by_addr;
- fd->mmu_rb_remove = mmu_rb_remove_by_addr;
+ fd->mmu_rb_insert = hfi1_mmu_rb_insert;
+ fd->mmu_rb_remove = hfi1_mmu_rb_remove;
}
/*
@@ -295,8 +269,8 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
* The notifier would have been removed when the process'es mm
* was freed.
*/
- if (current->mm && !HFI1_CAP_IS_USET(TID_UNMAP))
- mmu_notifier_unregister(&fd->mn, current->mm);
+ if (!HFI1_CAP_IS_USET(TID_UNMAP))
+ hfi1_mmu_rb_unregister(&fd->tid_rb_root);
kfree(fd->invalid_tids);
@@ -312,19 +286,6 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
list_del_init(&grp->list);
kfree(grp);
}
- spin_lock(&fd->rb_lock);
- if (!RB_EMPTY_ROOT(&fd->tid_rb_root)) {
- struct rb_node *node;
- struct mmu_rb_node *rbnode;
-
- while ((node = rb_first(&fd->tid_rb_root))) {
- rbnode = rb_entry(node, struct mmu_rb_node,
- rbnode);
- rb_erase(&rbnode->rbnode, &fd->tid_rb_root);
- kfree(rbnode);
- }
- }
- spin_unlock(&fd->rb_lock);
hfi1_clear_tids(uctxt);
}
@@ -866,7 +827,7 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
int ret;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct mmu_rb_node *node;
+ struct tid_rb_node *node;
struct hfi1_devdata *dd = uctxt->dd;
struct rb_root *root = &fd->tid_rb_root;
dma_addr_t phys;
@@ -890,9 +851,9 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
return -EFAULT;
}
- node->virt = vaddr;
+ node->mmu.addr = vaddr;
+ node->mmu.len = npages * PAGE_SIZE;
node->phys = page_to_phys(pages[0]);
- node->len = npages * PAGE_SIZE;
node->npages = npages;
node->rcventry = rcventry;
node->dma_addr = phys;
@@ -900,21 +861,19 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
node->freed = false;
memcpy(node->pages, pages, sizeof(struct page *) * npages);
- spin_lock(&fd->rb_lock);
- ret = fd->mmu_rb_insert(fd, root, node);
- spin_unlock(&fd->rb_lock);
+ ret = fd->mmu_rb_insert(root, &node->mmu);
if (ret) {
hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
- node->rcventry, node->virt, node->phys, ret);
+ node->rcventry, node->mmu.addr, node->phys, ret);
pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
PCI_DMA_FROMDEVICE);
kfree(node);
return -EFAULT;
}
hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
- trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry,
- npages, node->virt, node->phys, phys);
+ trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
+ node->mmu.addr, node->phys, phys);
return 0;
}
@@ -924,7 +883,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
- struct mmu_rb_node *node;
+ struct tid_rb_node *node;
u8 tidctrl = EXP_TID_GET(tidinfo, CTRL);
u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry;
@@ -939,14 +898,11 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
rcventry = tididx + (tidctrl - 1);
- spin_lock(&fd->rb_lock);
node = fd->entry_to_rb[rcventry];
- if (!node || node->rcventry != (uctxt->expected_base + rcventry)) {
- spin_unlock(&fd->rb_lock);
+ if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF;
- }
- fd->mmu_rb_remove(fd, &fd->tid_rb_root, node);
- spin_unlock(&fd->rb_lock);
+ fd->mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
+
if (grp)
*grp = node->grp;
clear_tid_node(fd, fd->subctxt, node);
@@ -954,13 +910,13 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
}
static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
- struct mmu_rb_node *node)
+ struct tid_rb_node *node)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
- node->npages, node->virt, node->phys,
+ node->npages, node->mmu.addr, node->phys,
node->dma_addr);
hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
@@ -970,7 +926,7 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
*/
flush_wc();
- pci_unmap_single(dd->pcidev, node->dma_addr, node->len,
+ pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
PCI_DMA_FROMDEVICE);
hfi1_release_user_pages(node->pages, node->npages, true);
@@ -997,216 +953,96 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
list_for_each_entry_safe(grp, ptr, &set->list, list) {
list_del_init(&grp->list);
- spin_lock(&fd->rb_lock);
for (i = 0; i < grp->size; i++) {
if (grp->map & (1 << i)) {
u16 rcventry = grp->base + i;
- struct mmu_rb_node *node;
+ struct tid_rb_node *node;
node = fd->entry_to_rb[rcventry -
uctxt->expected_base];
if (!node || node->rcventry != rcventry)
continue;
- fd->mmu_rb_remove(fd, root, node);
+ fd->mmu_rb_remove(root, &node->mmu);
clear_tid_node(fd, -1, node);
}
}
- spin_unlock(&fd->rb_lock);
}
}
-static inline void mmu_notifier_page(struct mmu_notifier *mn,
- struct mm_struct *mm, unsigned long addr)
-{
- mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE,
- MMU_INVALIDATE_PAGE);
-}
-
-static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
{
- mmu_notifier_mem_invalidate(mn, start, end, MMU_INVALIDATE_RANGE);
-}
+ struct hfi1_filedata *fdata =
+ container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct hfi1_ctxtdata *uctxt = fdata->uctxt;
+ struct tid_rb_node *node =
+ container_of(mnode, struct tid_rb_node, mmu);
-static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
- unsigned long start, unsigned long end,
- enum mmu_call_types type)
-{
- struct hfi1_filedata *fd = container_of(mn, struct hfi1_filedata, mn);
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct rb_root *root = &fd->tid_rb_root;
- struct mmu_rb_node *node;
- unsigned long addr = start;
+ if (node->freed)
+ return 0;
- trace_hfi1_mmu_invalidate(uctxt->ctxt, fd->subctxt, mmu_types[type],
- start, end);
+ trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+ node->rcventry, node->npages, node->dma_addr);
+ node->freed = true;
- spin_lock(&fd->rb_lock);
- while (addr < end) {
- node = mmu_rb_search(root, addr);
+ spin_lock(&fdata->invalid_lock);
+ if (fdata->invalid_tid_idx < uctxt->expected_count) {
+ fdata->invalid_tids[fdata->invalid_tid_idx] =
+ rcventry2tidinfo(node->rcventry - uctxt->expected_base);
+ fdata->invalid_tids[fdata->invalid_tid_idx] |=
+ EXP_TID_SET(LEN, node->npages);
+ if (!fdata->invalid_tid_idx) {
+ unsigned long *ev;
- if (!node) {
/*
- * Didn't find a node at this address. However, the
- * range could be bigger than what we have registered
- * so we have to keep looking.
+ * hfi1_set_uevent_bits() sets a user event flag
+ * for all processes. Because calling into the
+ * driver to process TID cache invalidations is
+ * expensive and TID cache invalidations are
+ * handled on a per-process basis, we can
+ * optimize this to set the flag only for the
+ * process in question.
*/
- addr += PAGE_SIZE;
- continue;
- }
-
- /*
- * The next address to be looked up is computed based
- * on the node's starting address. This is due to the
- * fact that the range where we start might be in the
- * middle of the node's buffer so simply incrementing
- * the address by the node's size would result is a
- * bad address.
- */
- addr = node->virt + (node->npages * PAGE_SIZE);
- if (node->freed)
- continue;
-
- trace_hfi1_exp_tid_inval(uctxt->ctxt, fd->subctxt, node->virt,
- node->rcventry, node->npages,
- node->dma_addr);
- node->freed = true;
-
- spin_lock(&fd->invalid_lock);
- if (fd->invalid_tid_idx < uctxt->expected_count) {
- fd->invalid_tids[fd->invalid_tid_idx] =
- rcventry2tidinfo(node->rcventry -
- uctxt->expected_base);
- fd->invalid_tids[fd->invalid_tid_idx] |=
- EXP_TID_SET(LEN, node->npages);
- if (!fd->invalid_tid_idx) {
- unsigned long *ev;
-
- /*
- * hfi1_set_uevent_bits() sets a user event flag
- * for all processes. Because calling into the
- * driver to process TID cache invalidations is
- * expensive and TID cache invalidations are
- * handled on a per-process basis, we can
- * optimize this to set the flag only for the
- * process in question.
- */
- ev = uctxt->dd->events +
- (((uctxt->ctxt -
- uctxt->dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
- set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
- }
- fd->invalid_tid_idx++;
+ ev = uctxt->dd->events +
+ (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+ HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+ set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
}
- spin_unlock(&fd->invalid_lock);
+ fdata->invalid_tid_idx++;
}
- spin_unlock(&fd->rb_lock);
+ spin_unlock(&fdata->invalid_lock);
+ return 0;
}
-static inline int mmu_addr_cmp(struct mmu_rb_node *node, unsigned long addr,
- unsigned long len)
+static int mmu_addr_cmp(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len)
{
- if ((addr + len) <= node->virt)
+ if ((addr + len) <= node->addr)
return -1;
- else if (addr >= node->virt && addr < (node->virt + node->len))
+ else if (addr >= node->addr && addr < (node->addr + node->len))
return 0;
else
return 1;
}
-static inline int mmu_entry_cmp(struct mmu_rb_node *node, u32 entry)
-{
- if (entry < node->rcventry)
- return -1;
- else if (entry > node->rcventry)
- return 1;
- else
- return 0;
-}
-
-static struct mmu_rb_node *mmu_rb_search(struct rb_root *root,
- unsigned long addr)
-{
- struct rb_node *node = root->rb_node;
-
- while (node) {
- struct mmu_rb_node *mnode =
- container_of(node, struct mmu_rb_node, rbnode);
- /*
- * When searching, use at least one page length for size. The
- * MMU notifier will not give us anything less than that. We
- * also don't need anything more than a page because we are
- * guaranteed to have non-overlapping buffers in the tree.
- */
- int result = mmu_addr_cmp(mnode, addr, PAGE_SIZE);
-
- if (result < 0)
- node = node->rb_left;
- else if (result > 0)
- node = node->rb_right;
- else
- return mnode;
- }
- return NULL;
-}
-
-static int mmu_rb_insert_by_entry(struct hfi1_filedata *fdata,
- struct rb_root *root,
- struct mmu_rb_node *node)
+static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
{
+ struct hfi1_filedata *fdata =
+ container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct tid_rb_node *tnode =
+ container_of(node, struct tid_rb_node, mmu);
u32 base = fdata->uctxt->expected_base;
- fdata->entry_to_rb[node->rcventry - base] = node;
+ fdata->entry_to_rb[tnode->rcventry - base] = tnode;
return 0;
}
-static int mmu_rb_insert_by_addr(struct hfi1_filedata *fdata,
- struct rb_root *root, struct mmu_rb_node *node)
-{
- struct rb_node **new = &root->rb_node, *parent = NULL;
- u32 base = fdata->uctxt->expected_base;
-
- /* Figure out where to put new node */
- while (*new) {
- struct mmu_rb_node *this =
- container_of(*new, struct mmu_rb_node, rbnode);
- int result = mmu_addr_cmp(this, node->virt, node->len);
-
- parent = *new;
- if (result < 0)
- new = &((*new)->rb_left);
- else if (result > 0)
- new = &((*new)->rb_right);
- else
- return 1;
- }
-
- /* Add new node and rebalance tree. */
- rb_link_node(&node->rbnode, parent, new);
- rb_insert_color(&node->rbnode, root);
-
- fdata->entry_to_rb[node->rcventry - base] = node;
- return 0;
-}
-
-static void mmu_rb_remove_by_entry(struct hfi1_filedata *fdata,
- struct rb_root *root,
- struct mmu_rb_node *node)
-{
- u32 base = fdata->uctxt->expected_base;
-
- fdata->entry_to_rb[node->rcventry - base] = NULL;
-}
-
-static void mmu_rb_remove_by_addr(struct hfi1_filedata *fdata,
- struct rb_root *root,
- struct mmu_rb_node *node)
+static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
{
+ struct hfi1_filedata *fdata =
+ container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct tid_rb_node *tnode =
+ container_of(node, struct tid_rb_node, mmu);
u32 base = fdata->uctxt->expected_base;
- fdata->entry_to_rb[node->rcventry - base] = NULL;
- rb_erase(&node->rbnode, root);
+ fdata->entry_to_rb[tnode->rcventry - base] = NULL;
}