aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/rdma/hfi1/file_ops.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-22 15:48:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-22 15:48:44 -0700
commitb8ba4526832fcccba7f46e55ce9a8b79902bdcec (patch)
tree5f2fc306e9909c9936efc017bf2c8fde49d8c9bb /drivers/staging/rdma/hfi1/file_ops.c
parentMerge tag 'nfs-for-4.6-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs (diff)
parentMerge branches 'i40iw', 'sriov' and 'hfi1' into k.o/for-4.6 (diff)
downloadlinux-dev-b8ba4526832fcccba7f46e55ce9a8b79902bdcec.tar.xz
linux-dev-b8ba4526832fcccba7f46e55ce9a8b79902bdcec.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull more rdma updates from Doug Ledford: "Round two of 4.6 merge window patches. This is a monster pull request. I held off on the hfi1 driver updates (the hfi1 driver is intimately tied to the qib driver and the new rdmavt software library that was created to help both of them) in my first pull request. The hfi1/qib/rdmavt update is probably 90% of this pull request. The hfi1 driver is being left in staging so that it can be fixed up in regards to the API that Al and yourself didn't like. Intel has agreed to do the work, but in the meantime, this clears out 300+ patches in the backlog queue and brings my tree and their tree closer to sync. This also includes about 10 patches to the core and a few to mlx5 to create an infrastructure for configuring SRIOV ports on IB devices. That series includes one patch to the net core that we sent to netdev@ and Dave Miller with each of the three revisions to the series. We didn't get any response to the patch, so we took that as implicit approval. Finally, this series includes Intel's new iWARP driver for their x722 cards. It's not nearly the beast as the hfi1 driver. It also has a linux-next merge issue, but that has been resolved and it now passes just fine. Summary: - A few minor core fixups needed for the next patch series - The IB SRIOV series. This has bounced around for several versions. Of note is the fact that the first patch in this series effects the net core. It was directed to netdev and DaveM for each iteration of the series (three versions total). Dave did not object, but did not respond either. I've taken this as permission to move forward with the series. - The new Intel X722 iWARP driver - A huge set of updates to the Intel hfi1 driver. Of particular interest here is that we have left the driver in staging since it still has an API that people object to. Intel is working on a fix, but getting these patches in now helps keep me sane as the upstream and Intel's trees were over 300 patches apart" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (362 commits) IB/ipoib: Allow mcast packets from other VFs IB/mlx5: Implement callbacks for manipulating VFs net/mlx5_core: Implement modify HCA vport command net/mlx5_core: Add VF param when querying vport counter IB/ipoib: Add ndo operations for configuring VFs IB/core: Add interfaces to control VF attributes IB/core: Support accessing SA in virtualized environment IB/core: Add subnet prefix to port info IB/mlx5: Fix decision on using MAD_IFC net/core: Add support for configuring VF GUIDs IB/{core, ulp} Support above 32 possible device capability flags IB/core: Replace setting the zero values in ib_uverbs_ex_query_device net/mlx5_core: Introduce offload arithmetic hardware capabilities net/mlx5_core: Refactor device capability function net/mlx5_core: Fix caching ATOMIC endian mode capability ib_srpt: fix a WARN_ON() message i40iw: Replace the obsolete crypto hash interface with shash IB/hfi1: Add SDMA cache eviction algorithm IB/hfi1: Switch to using the pin query function IB/hfi1: Specify mm when releasing pages ...
Diffstat (limited to 'drivers/staging/rdma/hfi1/file_ops.c')
-rw-r--r--drivers/staging/rdma/hfi1/file_ops.c552
1 files changed, 104 insertions, 448 deletions
diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c
index 8b911e8bf0df..8396dc5fb6c1 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/staging/rdma/hfi1/file_ops.c
@@ -1,12 +1,11 @@
/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2015 Intel Corporation.
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
@@ -18,8 +17,6 @@
*
* BSD LICENSE
*
- * Copyright(c) 2015 Intel Corporation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -60,6 +57,8 @@
#include "user_sdma.h"
#include "user_exp_rcv.h"
#include "eprom.h"
+#include "aspm.h"
+#include "mmu_rb.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -96,9 +95,6 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
static int vma_fault(struct vm_area_struct *, struct vm_fault *);
-static int exp_tid_setup(struct file *, struct hfi1_tid_info *);
-static int exp_tid_free(struct file *, struct hfi1_tid_info *);
-static void unlock_exp_tids(struct hfi1_ctxtdata *);
static const struct file_operations hfi1_file_ops = {
.owner = THIS_MODULE,
@@ -164,7 +160,6 @@ enum mmap_types {
#define dbg(fmt, ...) \
pr_info(fmt, ##__VA_ARGS__)
-
static inline int is_valid_mmap(u64 token)
{
return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC);
@@ -188,6 +183,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
struct hfi1_cmd cmd;
struct hfi1_user_info uinfo;
struct hfi1_tid_info tinfo;
+ unsigned long addr;
ssize_t consumed = 0, copy = 0, ret = 0;
void *dest = NULL;
__u64 user_val = 0;
@@ -219,6 +215,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
break;
case HFI1_CMD_TID_UPDATE:
case HFI1_CMD_TID_FREE:
+ case HFI1_CMD_TID_INVAL_READ:
copy = sizeof(tinfo);
dest = &tinfo;
break;
@@ -294,9 +291,8 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
sc_return_credits(uctxt->sc);
break;
case HFI1_CMD_TID_UPDATE:
- ret = exp_tid_setup(fp, &tinfo);
+ ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
if (!ret) {
- unsigned long addr;
/*
* Copy the number of tidlist entries we used
* and the length of the buffer we registered.
@@ -311,8 +307,25 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
ret = -EFAULT;
}
break;
+ case HFI1_CMD_TID_INVAL_READ:
+ ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
+ if (ret)
+ break;
+ addr = (unsigned long)cmd.addr +
+ offsetof(struct hfi1_tid_info, tidcnt);
+ if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+ sizeof(tinfo.tidcnt)))
+ ret = -EFAULT;
+ break;
case HFI1_CMD_TID_FREE:
- ret = exp_tid_free(fp, &tinfo);
+ ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
+ if (ret)
+ break;
+ addr = (unsigned long)cmd.addr +
+ offsetof(struct hfi1_tid_info, tidcnt);
+ if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+ sizeof(tinfo.tidcnt)))
+ ret = -EFAULT;
break;
case HFI1_CMD_RECV_CTRL:
ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val);
@@ -373,8 +386,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
break;
}
if (dd->flags & HFI1_FORCED_FREEZE) {
- /* Don't allow context reset if we are into
- * forced freeze */
+ /*
+ * Don't allow context reset if we are into
+ * forced freeze
+ */
ret = -ENODEV;
break;
}
@@ -382,8 +397,9 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
ret = sc_enable(sc);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
uctxt->ctxt);
- } else
+ } else {
ret = sc_restart(sc);
+ }
if (!ret)
sc_return_credits(sc);
break;
@@ -393,7 +409,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
case HFI1_CMD_EP_ERASE_RANGE:
case HFI1_CMD_EP_READ_RANGE:
case HFI1_CMD_EP_WRITE_RANGE:
- ret = handle_eprom_command(&cmd);
+ ret = handle_eprom_command(fp, &cmd);
break;
}
@@ -732,6 +748,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
/* drain user sdma queue */
hfi1_user_sdma_free_queues(fdata);
+ /* release the cpu */
+ hfi1_put_proc_affinity(dd, fdata->rec_cpu_num);
+
/*
* Clear any left over, unhandled events so the next process that
* gets this context doesn't get confused.
@@ -755,6 +774,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
HFI1_RCVCTRL_TIDFLOW_DIS |
HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_TAILUPD_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
@@ -777,14 +797,12 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
uctxt->pionowait = 0;
uctxt->event_flags = 0;
- hfi1_clear_tids(uctxt);
+ hfi1_user_exp_rcv_free(fdata);
hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
- if (uctxt->tid_pg_list)
- unlock_exp_tids(uctxt);
-
hfi1_stats.sps_ctxts--;
- dd->freectxts++;
+ if (++dd->freectxts == dd->num_user_contexts)
+ aspm_enable_all(dd);
mutex_unlock(&hfi1_mutex);
hfi1_free_ctxtdata(dd, uctxt);
done:
@@ -826,8 +844,16 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
mutex_lock(&hfi1_mutex);
/* First, lets check if we need to setup a shared context? */
- if (uinfo->subctxt_cnt)
+ if (uinfo->subctxt_cnt) {
+ struct hfi1_filedata *fd = fp->private_data;
+
ret = find_shared_ctxt(fp, uinfo);
+ if (ret < 0)
+ goto done_unlock;
+ if (ret)
+ fd->rec_cpu_num = hfi1_get_proc_affinity(
+ fd->uctxt->dd, fd->uctxt->numa_id);
+ }
/*
* We execute the following block if we couldn't find a
@@ -837,6 +863,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
ret = get_user_context(fp, uinfo, i_minor - 1, alg);
}
+done_unlock:
mutex_unlock(&hfi1_mutex);
done:
return ret;
@@ -962,7 +989,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt;
unsigned ctxt;
- int ret;
+ int ret, numa;
if (dd->flags & HFI1_FROZEN) {
/*
@@ -982,17 +1009,26 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
if (ctxt == dd->num_rcv_contexts)
return -EBUSY;
- uctxt = hfi1_create_ctxtdata(dd->pport, ctxt);
+ fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1);
+ if (fd->rec_cpu_num != -1)
+ numa = cpu_to_node(fd->rec_cpu_num);
+ else
+ numa = numa_node_id();
+ uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa);
if (!uctxt) {
dd_dev_err(dd,
"Unable to allocate ctxtdata memory, failing open\n");
return -ENOMEM;
}
+ hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)",
+ uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num,
+ uctxt->numa_id);
+
/*
* Allocate and enable a PIO send context.
*/
uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
- uctxt->numa_id);
+ uctxt->dd->node);
if (!uctxt->sc)
return -ENOMEM;
@@ -1026,7 +1062,12 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
INIT_LIST_HEAD(&uctxt->sdma_queues);
spin_lock_init(&uctxt->sdma_qlock);
hfi1_stats.sps_ctxts++;
- dd->freectxts--;
+ /*
+ * Disable ASPM when there are open user/PSM contexts to avoid
+ * issues with ASPM L1 exit latency
+ */
+ if (dd->freectxts-- == dd->num_user_contexts)
+ aspm_disable_all(dd);
fd->uctxt = uctxt;
return 0;
@@ -1035,22 +1076,19 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
const struct hfi1_user_info *uinfo)
{
- int ret = 0;
unsigned num_subctxts;
num_subctxts = uinfo->subctxt_cnt;
- if (num_subctxts > HFI1_MAX_SHARED_CTXTS) {
- ret = -EINVAL;
- goto bail;
- }
+ if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
+ return -EINVAL;
uctxt->subctxt_cnt = uinfo->subctxt_cnt;
uctxt->subctxt_id = uinfo->subctxt_id;
uctxt->active_slaves = 1;
uctxt->redirect_seq_cnt = 1;
set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
-bail:
- return ret;
+
+ return 0;
}
static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
@@ -1105,10 +1143,10 @@ static int user_init(struct file *fp)
* has done it.
*/
if (fd->subctxt) {
- ret = wait_event_interruptible(uctxt->wait,
- !test_bit(HFI1_CTXT_MASTER_UNINIT,
- &uctxt->event_flags));
- goto done;
+ ret = wait_event_interruptible(uctxt->wait, !test_bit(
+ HFI1_CTXT_MASTER_UNINIT,
+ &uctxt->event_flags));
+ goto expected;
}
/* initialize poll variables... */
@@ -1146,8 +1184,16 @@ static int user_init(struct file *fp)
rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+ /*
+ * The RcvCtxtCtrl.TailUpd bit has to be explicitly written.
+ * We can't rely on the correct value to be set from prior
+ * uses of the chip or ctxt. Therefore, add the rcvctrl op
+ * for both cases.
+ */
if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+ else
+ rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
/* Notify any waiting slaves */
@@ -1155,8 +1201,18 @@ static int user_init(struct file *fp)
clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
wake_up(&uctxt->wait);
}
- ret = 0;
+expected:
+ /*
+ * Expected receive has to be setup for all processes (including
+ * shared contexts). However, it has to be done after the master
+ * context has been fully configured as it depends on the
+ * eager/expected split of the RcvArray entries.
+ * Setting it up here ensures that the subcontexts will be waiting
+ * (due to the above wait_event_interruptible() until the master
+ * is setup.
+ */
+ ret = hfi1_user_exp_rcv_init(fp);
done:
return ret;
}
@@ -1226,46 +1282,6 @@ static int setup_ctxt(struct file *fp)
if (ret)
goto done;
}
- /* Setup Expected Rcv memories */
- uctxt->tid_pg_list = vzalloc(uctxt->expected_count *
- sizeof(struct page **));
- if (!uctxt->tid_pg_list) {
- ret = -ENOMEM;
- goto done;
- }
- uctxt->physshadow = vzalloc(uctxt->expected_count *
- sizeof(*uctxt->physshadow));
- if (!uctxt->physshadow) {
- ret = -ENOMEM;
- goto done;
- }
- /* allocate expected TID map and initialize the cursor */
- atomic_set(&uctxt->tidcursor, 0);
- uctxt->numtidgroups = uctxt->expected_count /
- dd->rcv_entries.group_size;
- uctxt->tidmapcnt = uctxt->numtidgroups / BITS_PER_LONG +
- !!(uctxt->numtidgroups % BITS_PER_LONG);
- uctxt->tidusemap = kzalloc_node(uctxt->tidmapcnt *
- sizeof(*uctxt->tidusemap),
- GFP_KERNEL, uctxt->numa_id);
- if (!uctxt->tidusemap) {
- ret = -ENOMEM;
- goto done;
- }
- /*
- * In case that the number of groups is not a multiple of
- * 64 (the number of groups in a tidusemap element), mark
- * the extra ones as used. This will effectively make them
- * permanently used and should never be assigned. Otherwise,
- * the code which checks how many free groups we have will
- * get completely confused about the state of the bits.
- */
- if (uctxt->numtidgroups % BITS_PER_LONG)
- uctxt->tidusemap[uctxt->tidmapcnt - 1] =
- ~((1ULL << (uctxt->numtidgroups %
- BITS_PER_LONG)) - 1);
- trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0,
- uctxt->tidusemap, uctxt->tidmapcnt);
}
ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
if (ret)
@@ -1391,8 +1407,9 @@ static unsigned int poll_next(struct file *fp,
set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt);
pollflag = 0;
- } else
+ } else {
pollflag = POLLIN | POLLRDNORM;
+ }
spin_unlock_irq(&dd->uctxt_lock);
return pollflag;
@@ -1470,8 +1487,9 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
if (uctxt->rcvhdrtail_kvaddr)
clear_rcvhdrtail(uctxt);
rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
- } else
+ } else {
rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
+ }
hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt);
/* always; new head should be equal to new tail; see above */
bail:
@@ -1504,367 +1522,6 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
return 0;
}
-#define num_user_pages(vaddr, len) \
- (1 + (((((unsigned long)(vaddr) + \
- (unsigned long)(len) - 1) & PAGE_MASK) - \
- ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
-
-/**
- * tzcnt - count the number of trailing zeros in a 64bit value
- * @value: the value to be examined
- *
- * Returns the number of trailing least significant zeros in the
- * the input value. If the value is zero, return the number of
- * bits of the value.
- */
-static inline u8 tzcnt(u64 value)
-{
- return value ? __builtin_ctzl(value) : sizeof(value) * 8;
-}
-
-static inline unsigned num_free_groups(unsigned long map, u16 *start)
-{
- unsigned free;
- u16 bitidx = *start;
-
- if (bitidx >= BITS_PER_LONG)
- return 0;
- /* "Turn off" any bits set before our bit index */
- map &= ~((1ULL << bitidx) - 1);
- free = tzcnt(map) - bitidx;
- while (!free && bitidx < BITS_PER_LONG) {
- /* Zero out the last set bit so we look at the rest */
- map &= ~(1ULL << bitidx);
- /*
- * Account for the previously checked bits and advance
- * the bit index. We don't have to check for bitidx
- * getting bigger than BITS_PER_LONG here as it would
- * mean extra instructions that we don't need. If it
- * did happen, it would push free to a negative value
- * which will break the loop.
- */
- free = tzcnt(map) - ++bitidx;
- }
- *start = bitidx;
- return free;
-}
-
-static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo)
-{
- int ret = 0;
- struct hfi1_filedata *fd = fp->private_data;
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_devdata *dd = uctxt->dd;
- unsigned tid, mapped = 0, npages, ngroups, exp_groups,
- tidpairs = uctxt->expected_count / 2;
- struct page **pages;
- unsigned long vaddr, tidmap[uctxt->tidmapcnt];
- dma_addr_t *phys;
- u32 tidlist[tidpairs], pairidx = 0, tidcursor;
- u16 useidx, idx, bitidx, tidcnt = 0;
-
- vaddr = tinfo->vaddr;
-
- if (offset_in_page(vaddr)) {
- ret = -EINVAL;
- goto bail;
- }
-
- npages = num_user_pages(vaddr, tinfo->length);
- if (!npages) {
- ret = -EINVAL;
- goto bail;
- }
- if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
- npages * PAGE_SIZE)) {
- dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
- (void *)vaddr, npages);
- ret = -EFAULT;
- goto bail;
- }
-
- memset(tidmap, 0, sizeof(tidmap[0]) * uctxt->tidmapcnt);
- memset(tidlist, 0, sizeof(tidlist[0]) * tidpairs);
-
- exp_groups = uctxt->expected_count / dd->rcv_entries.group_size;
- /* which group set do we look at first? */
- tidcursor = atomic_read(&uctxt->tidcursor);
- useidx = (tidcursor >> 16) & 0xffff;
- bitidx = tidcursor & 0xffff;
-
- /*
- * Keep going until we've mapped all pages or we've exhausted all
- * RcvArray entries.
- * This iterates over the number of tidmaps + 1
- * (idx <= uctxt->tidmapcnt) so we check the bitmap which we
- * started from one more time for any free bits before the
- * starting point bit.
- */
- for (mapped = 0, idx = 0;
- mapped < npages && idx <= uctxt->tidmapcnt;) {
- u64 i, offset = 0;
- unsigned free, pinned, pmapped = 0, bits_used;
- u16 grp;
-
- /*
- * "Reserve" the needed group bits under lock so other
- * processes can't step in the middle of it. Once
- * reserved, we don't need the lock anymore since we
- * are guaranteed the groups.
- */
- spin_lock(&uctxt->exp_lock);
- if (uctxt->tidusemap[useidx] == -1ULL ||
- bitidx >= BITS_PER_LONG) {
- /* no free groups in the set, use the next */
- useidx = (useidx + 1) % uctxt->tidmapcnt;
- idx++;
- bitidx = 0;
- spin_unlock(&uctxt->exp_lock);
- continue;
- }
- ngroups = ((npages - mapped) / dd->rcv_entries.group_size) +
- !!((npages - mapped) % dd->rcv_entries.group_size);
-
- /*
- * If we've gotten here, the current set of groups does have
- * one or more free groups.
- */
- free = num_free_groups(uctxt->tidusemap[useidx], &bitidx);
- if (!free) {
- /*
- * Despite the check above, free could still come back
- * as 0 because we don't check the entire bitmap but
- * we start from bitidx.
- */
- spin_unlock(&uctxt->exp_lock);
- continue;
- }
- bits_used = min(free, ngroups);
- tidmap[useidx] |= ((1ULL << bits_used) - 1) << bitidx;
- uctxt->tidusemap[useidx] |= tidmap[useidx];
- spin_unlock(&uctxt->exp_lock);
-
- /*
- * At this point, we know where in the map we have free bits.
- * properly offset into the various "shadow" arrays and compute
- * the RcvArray entry index.
- */
- offset = ((useidx * BITS_PER_LONG) + bitidx) *
- dd->rcv_entries.group_size;
- pages = uctxt->tid_pg_list + offset;
- phys = uctxt->physshadow + offset;
- tid = uctxt->expected_base + offset;
-
- /* Calculate how many pages we can pin based on free bits */
- pinned = min((bits_used * dd->rcv_entries.group_size),
- (npages - mapped));
- /*
- * Now that we know how many free RcvArray entries we have,
- * we can pin that many user pages.
- */
- ret = hfi1_acquire_user_pages(vaddr + (mapped * PAGE_SIZE),
- pinned, true, pages);
- if (ret) {
- /*
- * We can't continue because the pages array won't be
- * initialized. This should never happen,
- * unless perhaps the user has mpin'ed the pages
- * themselves.
- */
- dd_dev_info(dd,
- "Failed to lock addr %p, %u pages: errno %d\n",
- (void *) vaddr, pinned, -ret);
- /*
- * Let go of the bits that we reserved since we are not
- * going to use them.
- */
- spin_lock(&uctxt->exp_lock);
- uctxt->tidusemap[useidx] &=
- ~(((1ULL << bits_used) - 1) << bitidx);
- spin_unlock(&uctxt->exp_lock);
- goto done;
- }
- /*
- * How many groups do we need based on how many pages we have
- * pinned?
- */
- ngroups = (pinned / dd->rcv_entries.group_size) +
- !!(pinned % dd->rcv_entries.group_size);
- /*
- * Keep programming RcvArray entries for all the <ngroups> free
- * groups.
- */
- for (i = 0, grp = 0; grp < ngroups; i++, grp++) {
- unsigned j;
- u32 pair_size = 0, tidsize;
- /*
- * This inner loop will program an entire group or the
- * array of pinned pages (which ever limit is hit
- * first).
- */
- for (j = 0; j < dd->rcv_entries.group_size &&
- pmapped < pinned; j++, pmapped++, tid++) {
- tidsize = PAGE_SIZE;
- phys[pmapped] = hfi1_map_page(dd->pcidev,
- pages[pmapped], 0,
- tidsize, PCI_DMA_FROMDEVICE);
- trace_hfi1_exp_rcv_set(uctxt->ctxt,
- fd->subctxt,
- tid, vaddr,
- phys[pmapped],
- pages[pmapped]);
- /*
- * Each RcvArray entry is programmed with one
- * page * worth of memory. This will handle
- * the 8K MTU as well as anything smaller
- * due to the fact that both entries in the
- * RcvTidPair are programmed with a page.
- * PSM currently does not handle anything
- * bigger than 8K MTU, so should we even worry
- * about 10K here?
- */
- hfi1_put_tid(dd, tid, PT_EXPECTED,
- phys[pmapped],
- ilog2(tidsize >> PAGE_SHIFT) + 1);
- pair_size += tidsize >> PAGE_SHIFT;
- EXP_TID_RESET(tidlist[pairidx], LEN, pair_size);
- if (!(tid % 2)) {
- tidlist[pairidx] |=
- EXP_TID_SET(IDX,
- (tid - uctxt->expected_base)
- / 2);
- tidlist[pairidx] |=
- EXP_TID_SET(CTRL, 1);
- tidcnt++;
- } else {
- tidlist[pairidx] |=
- EXP_TID_SET(CTRL, 2);
- pair_size = 0;
- pairidx++;
- }
- }
- /*
- * We've programmed the entire group (or as much of the
- * group as we'll use. Now, it's time to push it out...
- */
- flush_wc();
- }
- mapped += pinned;
- atomic_set(&uctxt->tidcursor,
- (((useidx & 0xffffff) << 16) |
- ((bitidx + bits_used) & 0xffffff)));
- }
- trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0, uctxt->tidusemap,
- uctxt->tidmapcnt);
-
-done:
- /* If we've mapped anything, copy relevant info to user */
- if (mapped) {
- if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
- tidlist, sizeof(tidlist[0]) * tidcnt)) {
- ret = -EFAULT;
- goto done;
- }
- /* copy TID info to user */
- if (copy_to_user((void __user *)(unsigned long)tinfo->tidmap,
- tidmap, sizeof(tidmap[0]) * uctxt->tidmapcnt))
- ret = -EFAULT;
- }
-bail:
- /*
- * Calculate mapped length. New Exp TID protocol does not "unwind" and
- * report an error if it can't map the entire buffer. It just reports
- * the length that was mapped.
- */
- tinfo->length = mapped * PAGE_SIZE;
- tinfo->tidcnt = tidcnt;
- return ret;
-}
-
-static int exp_tid_free(struct file *fp, struct hfi1_tid_info *tinfo)
-{
- struct hfi1_filedata *fd = fp->private_data;
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_devdata *dd = uctxt->dd;
- unsigned long tidmap[uctxt->tidmapcnt];
- struct page **pages;
- dma_addr_t *phys;
- u16 idx, bitidx, tid;
- int ret = 0;
-
- if (copy_from_user(&tidmap, (void __user *)(unsigned long)
- tinfo->tidmap,
- sizeof(tidmap[0]) * uctxt->tidmapcnt)) {
- ret = -EFAULT;
- goto done;
- }
- for (idx = 0; idx < uctxt->tidmapcnt; idx++) {
- unsigned long map;
-
- bitidx = 0;
- if (!tidmap[idx])
- continue;
- map = tidmap[idx];
- while ((bitidx = tzcnt(map)) < BITS_PER_LONG) {
- int i, pcount = 0;
- struct page *pshadow[dd->rcv_entries.group_size];
- unsigned offset = ((idx * BITS_PER_LONG) + bitidx) *
- dd->rcv_entries.group_size;
-
- pages = uctxt->tid_pg_list + offset;
- phys = uctxt->physshadow + offset;
- tid = uctxt->expected_base + offset;
- for (i = 0; i < dd->rcv_entries.group_size;
- i++, tid++) {
- if (pages[i]) {
- hfi1_put_tid(dd, tid, PT_INVALID,
- 0, 0);
- trace_hfi1_exp_rcv_free(uctxt->ctxt,
- fd->subctxt,
- tid, phys[i],
- pages[i]);
- pci_unmap_page(dd->pcidev, phys[i],
- PAGE_SIZE, PCI_DMA_FROMDEVICE);
- pshadow[pcount] = pages[i];
- pages[i] = NULL;
- pcount++;
- phys[i] = 0;
- }
- }
- flush_wc();
- hfi1_release_user_pages(pshadow, pcount, true);
- clear_bit(bitidx, &uctxt->tidusemap[idx]);
- map &= ~(1ULL<<bitidx);
- }
- }
- trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 1, uctxt->tidusemap,
- uctxt->tidmapcnt);
-done:
- return ret;
-}
-
-static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt)
-{
- struct hfi1_devdata *dd = uctxt->dd;
- unsigned tid;
-
- dd_dev_info(dd, "ctxt %u unlocking any locked expTID pages\n",
- uctxt->ctxt);
- for (tid = 0; tid < uctxt->expected_count; tid++) {
- struct page *p = uctxt->tid_pg_list[tid];
- dma_addr_t phys;
-
- if (!p)
- continue;
-
- phys = uctxt->physshadow[tid];
- uctxt->physshadow[tid] = 0;
- uctxt->tid_pg_list[tid] = NULL;
- pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE);
- hfi1_release_user_pages(&p, 1, true);
- }
-}
-
static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
u16 pkey)
{
@@ -1933,10 +1590,9 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence)
return filp->f_pos;
}
-
/* NOTE: assumes unsigned long is 8 bytes */
static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
- loff_t *f_pos)
+ loff_t *f_pos)
{
struct hfi1_devdata *dd = filp->private_data;
void __iomem *base = dd->kregbase;
@@ -1972,12 +1628,12 @@ static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
* them. These registers are defined as having a read value
* of 0.
*/
- else if (csr_off == ASIC_GPIO_CLEAR
- || csr_off == ASIC_GPIO_FORCE
- || csr_off == ASIC_QSFP1_CLEAR
- || csr_off == ASIC_QSFP1_FORCE
- || csr_off == ASIC_QSFP2_CLEAR
- || csr_off == ASIC_QSFP2_FORCE)
+ else if (csr_off == ASIC_GPIO_CLEAR ||
+ csr_off == ASIC_GPIO_FORCE ||
+ csr_off == ASIC_QSFP1_CLEAR ||
+ csr_off == ASIC_QSFP1_FORCE ||
+ csr_off == ASIC_QSFP2_CLEAR ||
+ csr_off == ASIC_QSFP2_FORCE)
data = 0;
else if (csr_off >= barlen) {
/*