diff options
Diffstat (limited to 'drivers/staging/rdma/hfi1/user_sdma.c')
-rw-r--r-- | drivers/staging/rdma/hfi1/user_sdma.c | 130 |
1 files changed, 87 insertions, 43 deletions
diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c index ab6b6a42000f..0014c9c0e967 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/staging/rdma/hfi1/user_sdma.c @@ -180,6 +180,8 @@ struct user_sdma_iovec { u64 offset; }; +#define SDMA_CACHE_NODE_EVICT BIT(0) + struct sdma_mmu_node { struct mmu_rb_node rb; struct list_head list; @@ -187,6 +189,7 @@ struct sdma_mmu_node { atomic_t refcount; struct page **pages; unsigned npages; + unsigned long flags; }; struct user_sdma_request { @@ -278,7 +281,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *); static void user_sdma_free_request(struct user_sdma_request *, bool); static int pin_vector_pages(struct user_sdma_request *, struct user_sdma_iovec *); -static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned); +static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned, + unsigned); static int check_header_template(struct user_sdma_request *, struct hfi1_pkt_header *, u32, u32); static int set_txreq_header(struct user_sdma_request *, @@ -299,7 +303,8 @@ static int defer_packet_queue( static void activate_packet_queue(struct iowait *, int); static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *); -static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool); +static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, + struct mm_struct *); static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *); static struct mmu_rb_ops sdma_rb_ops = { @@ -595,6 +600,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, goto free_req; } + /* Checking P_KEY for requests from user-space */ + if (egress_pkey_check(dd->pport, req->hdr.lrh, req->hdr.bth, sc, + PKEY_CHECK_INVALID)) { + ret = -EINVAL; + goto free_req; + } + /* * Also should check the BTH.lnh. If it says the next header is GRH then * the RXE parsing will be off and will land in the middle of the KDETH @@ -1028,27 +1040,29 @@ static inline int num_user_pages(const struct iovec *iov) return 1 + ((epage - spage) >> PAGE_SHIFT); } -/* Caller must hold pq->evict_lock */ static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) { u32 cleared = 0; struct sdma_mmu_node *node, *ptr; + struct list_head to_evict = LIST_HEAD_INIT(to_evict); + spin_lock(&pq->evict_lock); list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) { /* Make sure that no one is still using the node. */ if (!atomic_read(&node->refcount)) { - /* - * Need to use the page count now as the remove callback - * will free the node. - */ + set_bit(SDMA_CACHE_NODE_EVICT, &node->flags); + list_del_init(&node->list); + list_add(&node->list, &to_evict); cleared += node->npages; - spin_unlock(&pq->evict_lock); - hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb); - spin_lock(&pq->evict_lock); if (cleared >= npages) break; } } + spin_unlock(&pq->evict_lock); + + list_for_each_entry_safe(node, ptr, &to_evict, list) + hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb); + return cleared; } @@ -1060,11 +1074,13 @@ static int pin_vector_pages(struct user_sdma_request *req, struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; - rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root, - (unsigned long)iovec->iov.iov_base, - iovec->iov.iov_len); - if (rb_node) + rb_node = hfi1_mmu_rb_extract(&pq->sdma_rb_root, + (unsigned long)iovec->iov.iov_base, + iovec->iov.iov_len); + if (rb_node && !IS_ERR(rb_node)) node = container_of(rb_node, struct sdma_mmu_node, rb); + else + rb_node = NULL; if (!node) { node = kzalloc(sizeof(*node), GFP_KERNEL); @@ -1072,7 +1088,6 @@ static int pin_vector_pages(struct user_sdma_request *req, return -ENOMEM; node->rb.addr = (unsigned long)iovec->iov.iov_base; - node->rb.len = iovec->iov.iov_len; node->pq = pq; atomic_set(&node->refcount, 0); INIT_LIST_HEAD(&node->list); @@ -1089,11 +1104,25 @@ static int pin_vector_pages(struct user_sdma_request *req, memcpy(pages, node->pages, node->npages * sizeof(*pages)); npages -= node->npages; + + /* + * If rb_node is NULL, it means that this is brand new node + * and, therefore not on the eviction list. + * If, however, the rb_node is non-NULL, it means that the + * node is already in RB tree and, therefore on the eviction + * list (nodes are unconditionally inserted in the eviction + * list). In that case, we have to remove the node prior to + * calling the eviction function in order to prevent it from + * freeing this node. + */ + if (rb_node) { + spin_lock(&pq->evict_lock); + list_del_init(&node->list); + spin_unlock(&pq->evict_lock); + } retry: if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) { - spin_lock(&pq->evict_lock); cleared = sdma_cache_evict(pq, npages); - spin_unlock(&pq->evict_lock); if (cleared >= npages) goto retry; } @@ -1107,49 +1136,45 @@ retry: goto bail; } if (pinned != npages) { - unpin_vector_pages(current->mm, pages, pinned); + unpin_vector_pages(current->mm, pages, node->npages, + pinned); ret = -EFAULT; goto bail; } kfree(node->pages); + node->rb.len = iovec->iov.iov_len; node->pages = pages; node->npages += pinned; npages = node->npages; spin_lock(&pq->evict_lock); - if (!rb_node) - list_add(&node->list, &pq->evict); - else - list_move(&node->list, &pq->evict); + list_add(&node->list, &pq->evict); pq->n_locked += pinned; spin_unlock(&pq->evict_lock); } iovec->pages = node->pages; iovec->npages = npages; - if (!rb_node) { - ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); - if (ret) { - spin_lock(&pq->evict_lock); + ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); + if (ret) { + spin_lock(&pq->evict_lock); + if (!list_empty(&node->list)) list_del(&node->list); - pq->n_locked -= node->npages; - spin_unlock(&pq->evict_lock); - ret = 0; - goto bail; - } - } else { - atomic_inc(&node->refcount); + pq->n_locked -= node->npages; + spin_unlock(&pq->evict_lock); + goto bail; } return 0; bail: - if (!rb_node) - kfree(node); + if (rb_node) + unpin_vector_pages(current->mm, node->pages, 0, node->npages); + kfree(node); return ret; } static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, - unsigned npages) + unsigned start, unsigned npages) { - hfi1_release_user_pages(mm, pages, npages, 0); + hfi1_release_user_pages(mm, pages + start, npages, 0); kfree(pages); } @@ -1502,7 +1527,7 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) &req->pq->sdma_rb_root, (unsigned long)req->iovs[i].iov.iov_base, req->iovs[i].iov.iov_len); - if (!mnode) + if (!mnode || IS_ERR(mnode)) continue; node = container_of(mnode, struct sdma_mmu_node, rb); @@ -1547,24 +1572,43 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) } static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, - bool notifier) + struct mm_struct *mm) { struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); spin_lock(&node->pq->evict_lock); - list_del(&node->list); + /* + * We've been called by the MMU notifier but this node has been + * scheduled for eviction. The eviction function will take care + * of freeing this node. + * We have to take the above lock first because we are racing + * against the setting of the bit in the eviction function. + */ + if (mm && test_bit(SDMA_CACHE_NODE_EVICT, &node->flags)) { + spin_unlock(&node->pq->evict_lock); + return; + } + + if (!list_empty(&node->list)) + list_del(&node->list); node->pq->n_locked -= node->npages; spin_unlock(&node->pq->evict_lock); - unpin_vector_pages(notifier ? NULL : current->mm, node->pages, + /* + * If mm is set, we are being called by the MMU notifier and we + * should not pass a mm_struct to unpin_vector_page(). This is to + * prevent a deadlock when hfi1_release_user_pages() attempts to + * take the mmap_sem, which the MMU notifier has already taken. + */ + unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0, node->npages); /* * If called by the MMU notifier, we have to adjust the pinned * page count ourselves. */ - if (notifier) - current->mm->pinned_vm -= node->npages; + if (mm) + mm->pinned_vm -= node->npages; kfree(node); } |