aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1/pio_copy.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hfi1/pio_copy.c')
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c246
1 files changed, 62 insertions, 184 deletions
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 3a1ef3056282..aa7773643107 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -165,9 +165,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
preempt_enable();
}
-/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
-#define USE_SHIFTS 1
-#ifdef USE_SHIFTS
/*
* Handle carry bytes using shifts and masks.
*
@@ -187,150 +184,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
#define mshift(x) (8 * (x))
/*
- * Read nbytes bytes from "from" and return them in the LSB bytes
- * of pbuf->carry. Other bytes are zeroed. Any previous value
- * pbuf->carry is lost.
- *
- * NOTES:
- * o do not read from from if nbytes is zero
- * o from may _not_ be u64 aligned
- * o nbytes must not span a QW boundary
- */
-static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
- unsigned int nbytes)
-{
- unsigned long off;
-
- if (nbytes == 0) {
- pbuf->carry.val64 = 0;
- } else {
- /* align our pointer */
- off = (unsigned long)from & 0x7;
- from = (void *)((unsigned long)from & ~0x7l);
- pbuf->carry.val64 = ((*(u64 *)from)
- << zshift(nbytes + off))/* zero upper bytes */
- >> zshift(nbytes); /* place at bottom */
- }
- pbuf->carry_bytes = nbytes;
-}
-
-/*
- * Read nbytes bytes from "from" and put them at the next significant bytes
- * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra
- * read does not overfill carry.
- *
- * NOTES:
- * o from may _not_ be u64 aligned
- * o nbytes may span a QW boundary
- */
-static inline void read_extra_bytes(struct pio_buf *pbuf,
- const void *from, unsigned int nbytes)
-{
- unsigned long off = (unsigned long)from & 0x7;
- unsigned int room, xbytes;
-
- /* align our pointer */
- from = (void *)((unsigned long)from & ~0x7l);
-
- /* check count first - don't read anything if count is zero */
- while (nbytes) {
- /* find the number of bytes in this u64 */
- room = 8 - off; /* this u64 has room for this many bytes */
- xbytes = min(room, nbytes);
-
- /*
- * shift down to zero lower bytes, shift up to zero upper
- * bytes, shift back down to move into place
- */
- pbuf->carry.val64 |= (((*(u64 *)from)
- >> mshift(off))
- << zshift(xbytes))
- >> zshift(xbytes + pbuf->carry_bytes);
- off = 0;
- pbuf->carry_bytes += xbytes;
- nbytes -= xbytes;
- from += sizeof(u64);
- }
-}
-
-/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
- unsigned int remaining;
-
- if (zbytes == 0) /* nothing to do */
- return;
-
- remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */
-
- /* NOTE: zshift only guaranteed to work if remaining != 0 */
- if (remaining)
- pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining))
- >> zshift(remaining);
- else
- pbuf->carry.val64 = 0;
- pbuf->carry_bytes = remaining;
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the LSB bytes of
- * pbuf->carry with the upper bytes zeroed..
- *
- * NOTES:
- * o result must keep unused bytes zeroed
- * o src must be u64 aligned
- */
-static inline void merge_write8(
- struct pio_buf *pbuf,
- void __iomem *dest,
- const void *src)
-{
- u64 new, temp;
-
- new = *(u64 *)src;
- temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
- writeq(temp, dest);
- pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
-}
-
-/*
- * Write a quad word using all bytes of carry.
- */
-static inline void carry8_write8(union mix carry, void __iomem *dest)
-{
- writeq(carry.val64, dest);
-}
-
-/*
- * Write a quad word using all the valid bytes of carry. If carry
- * has zero valid bytes, nothing is written.
- * Returns 0 on nothing written, non-zero on quad word written.
- */
-static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
-{
- if (pbuf->carry_bytes) {
- /* unused bytes are always kept zeroed, so just write */
- writeq(pbuf->carry.val64, dest);
- return 1;
- }
-
- return 0;
-}
-
-#else /* USE_SHIFTS */
-/*
- * Handle carry bytes using byte copies.
- *
- * NOTE: the value the unused portion of carry is left uninitialized.
- */
-
-/*
* Jump copy - no-loop copy for < 8 bytes.
*/
static inline void jcopy(u8 *dest, const u8 *src, u32 n)
@@ -338,18 +191,25 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
switch (n) {
case 7:
*dest++ = *src++;
+ /* fall through */
case 6:
*dest++ = *src++;
+ /* fall through */
case 5:
*dest++ = *src++;
+ /* fall through */
case 4:
*dest++ = *src++;
+ /* fall through */
case 3:
*dest++ = *src++;
+ /* fall through */
case 2:
*dest++ = *src++;
+ /* fall through */
case 1:
*dest++ = *src++;
+ /* fall through */
}
}
@@ -365,6 +225,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
unsigned int nbytes)
{
+ pbuf->carry.val64 = 0;
jcopy(&pbuf->carry.val8[0], from, nbytes);
pbuf->carry_bytes = nbytes;
}
@@ -385,40 +246,31 @@ static inline void read_extra_bytes(struct pio_buf *pbuf,
}
/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * We do not care about the value of unused bytes in carry, so just
- * reduce the byte count.
+ * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
+ * Put the unused part of the next 8 bytes of src into the LSB bytes of
+ * pbuf->carry with the upper bytes zeroed..
*
* NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
- pbuf->carry_bytes -= zbytes;
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the low bytes of
- * pbuf->carry.
+ * o result must keep unused bytes zeroed
+ * o src must be u64 aligned
*/
static inline void merge_write8(
struct pio_buf *pbuf,
- void *dest,
+ void __iomem *dest,
const void *src)
{
- u32 remainder = 8 - pbuf->carry_bytes;
+ u64 new, temp;
- jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
- writeq(pbuf->carry.val64, dest);
- jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
+ new = *(u64 *)src;
+ temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
+ writeq(temp, dest);
+ pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
}
/*
* Write a quad word using all bytes of carry.
*/
-static inline void carry8_write8(union mix carry, void *dest)
+static inline void carry8_write8(union mix carry, void __iomem *dest)
{
writeq(carry.val64, dest);
}
@@ -428,20 +280,16 @@ static inline void carry8_write8(union mix carry, void *dest)
* has zero valid bytes, nothing is written.
* Returns 0 on nothing written, non-zero on quad word written.
*/
-static inline int carry_write8(struct pio_buf *pbuf, void *dest)
+static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
{
if (pbuf->carry_bytes) {
- u64 zero = 0;
-
- jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
- 8 - pbuf->carry_bytes);
+ /* unused bytes are always kept zeroed, so just write */
writeq(pbuf->carry.val64, dest);
return 1;
}
return 0;
}
-#endif /* USE_SHIFTS */
/*
* Segmented PIO Copy - start
@@ -550,8 +398,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
{
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
void __iomem *dend; /* 8-byte data end */
- unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3;
- unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7;
+ unsigned long qw_to_write = nbytes >> 3;
+ unsigned long bytes_left = nbytes & 0x7;
/* calculate 8-byte data end */
dend = dest + (qw_to_write * sizeof(u64));
@@ -621,16 +469,46 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
dest += sizeof(u64);
}
- /* adjust carry */
- if (pbuf->carry_bytes < bytes_left) {
- /* need to read more */
- read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes);
+ pbuf->qw_written += qw_to_write;
+
+ /* handle carry and left-over bytes */
+ if (pbuf->carry_bytes + bytes_left >= 8) {
+ unsigned long nread;
+
+ /* there is enough to fill another qw - fill carry */
+ nread = 8 - pbuf->carry_bytes;
+ read_extra_bytes(pbuf, from, nread);
+
+ /*
+ * One more write - but need to make sure dest is correct.
+ * Check for wrap and the possibility the write
+ * should be in SOP space.
+ *
+ * The two checks immediately below cannot both be true, hence
+ * the else. If we have wrapped, we cannot still be within the
+ * first block. Conversely, if we are still in the first block,
+ * we cannot have wrapped. We do the wrap check first as that
+ * is more likely.
+ */
+ /* adjust if we have wrapped */
+ if (dest >= pbuf->end)
+ dest -= pbuf->size;
+ /* jump to the SOP range if within the first block */
+ else if (pbuf->qw_written < PIO_BLOCK_QWS)
+ dest += SOP_DISTANCE;
+
+ /* flush out full carry */
+ carry8_write8(pbuf->carry, dest);
+ pbuf->qw_written++;
+
+ /* now adjust and read the rest of the bytes into carry */
+ bytes_left -= nread;
+ from += nread; /* from is now not aligned */
+ read_low_bytes(pbuf, from, bytes_left);
} else {
- /* remove invalid bytes */
- zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left);
+ /* not enough to fill another qw, append the rest to carry */
+ read_extra_bytes(pbuf, from, bytes_left);
}
-
- pbuf->qw_written += qw_to_write;
}
/*