aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/rdma/hfi1/user_sdma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/rdma/hfi1/user_sdma.c')
-rw-r--r--drivers/staging/rdma/hfi1/user_sdma.c130
1 files changed, 87 insertions, 43 deletions
diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c
index ab6b6a42000f..0014c9c0e967 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/staging/rdma/hfi1/user_sdma.c
@@ -180,6 +180,8 @@ struct user_sdma_iovec {
u64 offset;
};
+#define SDMA_CACHE_NODE_EVICT BIT(0)
+
struct sdma_mmu_node {
struct mmu_rb_node rb;
struct list_head list;
@@ -187,6 +189,7 @@ struct sdma_mmu_node {
atomic_t refcount;
struct page **pages;
unsigned npages;
+ unsigned long flags;
};
struct user_sdma_request {
@@ -278,7 +281,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
static void user_sdma_free_request(struct user_sdma_request *, bool);
static int pin_vector_pages(struct user_sdma_request *,
struct user_sdma_iovec *);
-static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned);
+static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned,
+ unsigned);
static int check_header_template(struct user_sdma_request *,
struct hfi1_pkt_header *, u32, u32);
static int set_txreq_header(struct user_sdma_request *,
@@ -299,7 +303,8 @@ static int defer_packet_queue(
static void activate_packet_queue(struct iowait *, int);
static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
-static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *,
+ struct mm_struct *);
static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
static struct mmu_rb_ops sdma_rb_ops = {
@@ -595,6 +600,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
goto free_req;
}
+ /* Checking P_KEY for requests from user-space */
+ if (egress_pkey_check(dd->pport, req->hdr.lrh, req->hdr.bth, sc,
+ PKEY_CHECK_INVALID)) {
+ ret = -EINVAL;
+ goto free_req;
+ }
+
/*
* Also should check the BTH.lnh. If it says the next header is GRH then
* the RXE parsing will be off and will land in the middle of the KDETH
@@ -1028,27 +1040,29 @@ static inline int num_user_pages(const struct iovec *iov)
return 1 + ((epage - spage) >> PAGE_SHIFT);
}
-/* Caller must hold pq->evict_lock */
static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
{
u32 cleared = 0;
struct sdma_mmu_node *node, *ptr;
+ struct list_head to_evict = LIST_HEAD_INIT(to_evict);
+ spin_lock(&pq->evict_lock);
list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) {
/* Make sure that no one is still using the node. */
if (!atomic_read(&node->refcount)) {
- /*
- * Need to use the page count now as the remove callback
- * will free the node.
- */
+ set_bit(SDMA_CACHE_NODE_EVICT, &node->flags);
+ list_del_init(&node->list);
+ list_add(&node->list, &to_evict);
cleared += node->npages;
- spin_unlock(&pq->evict_lock);
- hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
- spin_lock(&pq->evict_lock);
if (cleared >= npages)
break;
}
}
+ spin_unlock(&pq->evict_lock);
+
+ list_for_each_entry_safe(node, ptr, &to_evict, list)
+ hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
+
return cleared;
}
@@ -1060,11 +1074,13 @@ static int pin_vector_pages(struct user_sdma_request *req,
struct sdma_mmu_node *node = NULL;
struct mmu_rb_node *rb_node;
- rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
- (unsigned long)iovec->iov.iov_base,
- iovec->iov.iov_len);
- if (rb_node)
+ rb_node = hfi1_mmu_rb_extract(&pq->sdma_rb_root,
+ (unsigned long)iovec->iov.iov_base,
+ iovec->iov.iov_len);
+ if (rb_node && !IS_ERR(rb_node))
node = container_of(rb_node, struct sdma_mmu_node, rb);
+ else
+ rb_node = NULL;
if (!node) {
node = kzalloc(sizeof(*node), GFP_KERNEL);
@@ -1072,7 +1088,6 @@ static int pin_vector_pages(struct user_sdma_request *req,
return -ENOMEM;
node->rb.addr = (unsigned long)iovec->iov.iov_base;
- node->rb.len = iovec->iov.iov_len;
node->pq = pq;
atomic_set(&node->refcount, 0);
INIT_LIST_HEAD(&node->list);
@@ -1089,11 +1104,25 @@ static int pin_vector_pages(struct user_sdma_request *req,
memcpy(pages, node->pages, node->npages * sizeof(*pages));
npages -= node->npages;
+
+ /*
+ * If rb_node is NULL, it means that this is brand new node
+ * and, therefore not on the eviction list.
+ * If, however, the rb_node is non-NULL, it means that the
+ * node is already in RB tree and, therefore on the eviction
+ * list (nodes are unconditionally inserted in the eviction
+ * list). In that case, we have to remove the node prior to
+ * calling the eviction function in order to prevent it from
+ * freeing this node.
+ */
+ if (rb_node) {
+ spin_lock(&pq->evict_lock);
+ list_del_init(&node->list);
+ spin_unlock(&pq->evict_lock);
+ }
retry:
if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) {
- spin_lock(&pq->evict_lock);
cleared = sdma_cache_evict(pq, npages);
- spin_unlock(&pq->evict_lock);
if (cleared >= npages)
goto retry;
}
@@ -1107,49 +1136,45 @@ retry:
goto bail;
}
if (pinned != npages) {
- unpin_vector_pages(current->mm, pages, pinned);
+ unpin_vector_pages(current->mm, pages, node->npages,
+ pinned);
ret = -EFAULT;
goto bail;
}
kfree(node->pages);
+ node->rb.len = iovec->iov.iov_len;
node->pages = pages;
node->npages += pinned;
npages = node->npages;
spin_lock(&pq->evict_lock);
- if (!rb_node)
- list_add(&node->list, &pq->evict);
- else
- list_move(&node->list, &pq->evict);
+ list_add(&node->list, &pq->evict);
pq->n_locked += pinned;
spin_unlock(&pq->evict_lock);
}
iovec->pages = node->pages;
iovec->npages = npages;
- if (!rb_node) {
- ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
- if (ret) {
- spin_lock(&pq->evict_lock);
+ ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
+ if (ret) {
+ spin_lock(&pq->evict_lock);
+ if (!list_empty(&node->list))
list_del(&node->list);
- pq->n_locked -= node->npages;
- spin_unlock(&pq->evict_lock);
- ret = 0;
- goto bail;
- }
- } else {
- atomic_inc(&node->refcount);
+ pq->n_locked -= node->npages;
+ spin_unlock(&pq->evict_lock);
+ goto bail;
}
return 0;
bail:
- if (!rb_node)
- kfree(node);
+ if (rb_node)
+ unpin_vector_pages(current->mm, node->pages, 0, node->npages);
+ kfree(node);
return ret;
}
static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
- unsigned npages)
+ unsigned start, unsigned npages)
{
- hfi1_release_user_pages(mm, pages, npages, 0);
+ hfi1_release_user_pages(mm, pages + start, npages, 0);
kfree(pages);
}
@@ -1502,7 +1527,7 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
&req->pq->sdma_rb_root,
(unsigned long)req->iovs[i].iov.iov_base,
req->iovs[i].iov.iov_len);
- if (!mnode)
+ if (!mnode || IS_ERR(mnode))
continue;
node = container_of(mnode, struct sdma_mmu_node, rb);
@@ -1547,24 +1572,43 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
}
static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
- bool notifier)
+ struct mm_struct *mm)
{
struct sdma_mmu_node *node =
container_of(mnode, struct sdma_mmu_node, rb);
spin_lock(&node->pq->evict_lock);
- list_del(&node->list);
+ /*
+ * We've been called by the MMU notifier but this node has been
+ * scheduled for eviction. The eviction function will take care
+ * of freeing this node.
+ * We have to take the above lock first because we are racing
+ * against the setting of the bit in the eviction function.
+ */
+ if (mm && test_bit(SDMA_CACHE_NODE_EVICT, &node->flags)) {
+ spin_unlock(&node->pq->evict_lock);
+ return;
+ }
+
+ if (!list_empty(&node->list))
+ list_del(&node->list);
node->pq->n_locked -= node->npages;
spin_unlock(&node->pq->evict_lock);
- unpin_vector_pages(notifier ? NULL : current->mm, node->pages,
+ /*
+ * If mm is set, we are being called by the MMU notifier and we
+ * should not pass a mm_struct to unpin_vector_page(). This is to
+ * prevent a deadlock when hfi1_release_user_pages() attempts to
+ * take the mmap_sem, which the MMU notifier has already taken.
+ */
+ unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0,
node->npages);
/*
* If called by the MMU notifier, we have to adjust the pinned
* page count ourselves.
*/
- if (notifier)
- current->mm->pinned_vm -= node->npages;
+ if (mm)
+ mm->pinned_vm -= node->npages;
kfree(node);
}