aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/sfc/io.h
diff options
context:
space:
mode:
authorBen Hutchings <bhutchings@solarflare.com>2011-02-22 17:26:10 +0000
committerBen Hutchings <bhutchings@solarflare.com>2011-03-04 17:58:42 +0000
commit65f0b417dee94f779ce9b77102b7d73c93723b39 (patch)
tree390279203a8c73a986d15be5cc30f9bb2e95c1e8 /drivers/net/sfc/io.h
parentsfc: Bump version to 3.1 (diff)
downloadlinux-dev-65f0b417dee94f779ce9b77102b7d73c93723b39.tar.xz
linux-dev-65f0b417dee94f779ce9b77102b7d73c93723b39.zip
sfc: Use write-combining to reduce TX latency
Based on work by Neil Turton <nturton@solarflare.com> and Kieran Mansley <kmansley@solarflare.com>. The BIU has now been verified to handle 3- and 4-dword writes within a single 128-bit register correctly. This means we can enable write- combining and only insert write barriers between writes to distinct registers. This has been observed to save about 0.5 us when pushing a TX descriptor to an empty TX queue. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Diffstat (limited to 'drivers/net/sfc/io.h')
-rw-r--r--drivers/net/sfc/io.h13
1 files changed, 9 insertions, 4 deletions
diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h
index dc45110b2456..d9d8c2ef1074 100644
--- a/drivers/net/sfc/io.h
+++ b/drivers/net/sfc/io.h
@@ -48,9 +48,9 @@
* replacing the low 96 bits with zero does not affect functionality.
* - If the host writes to the last dword address of such a register
* (i.e. the high 32 bits) the underlying register will always be
- * written. If the collector does not hold values for the low 96
- * bits of the register, they will be written as zero. Writing to
- * the last qword does not have this effect and must not be done.
+ * written. If the collector and the current write together do not
+ * provide values for all 128 bits of the register, the low 96 bits
+ * will be written as zero.
* - If the host writes to the address of any other part of such a
* register while the collector already holds values for some other
* register, the write is discarded and the collector maintains its
@@ -103,6 +103,7 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value,
_efx_writed(efx, value->u32[2], reg + 8);
_efx_writed(efx, value->u32[3], reg + 12);
#endif
+ wmb();
mmiowb();
spin_unlock_irqrestore(&efx->biu_lock, flags);
}
@@ -125,6 +126,7 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase,
__raw_writel((__force u32)value->u32[0], membase + addr);
__raw_writel((__force u32)value->u32[1], membase + addr + 4);
#endif
+ wmb();
mmiowb();
spin_unlock_irqrestore(&efx->biu_lock, flags);
}
@@ -139,6 +141,7 @@ static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value,
/* No lock required */
_efx_writed(efx, value->u32[0], reg);
+ wmb();
}
/* Read a 128-bit CSR, locking as appropriate. */
@@ -237,12 +240,14 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value,
#ifdef EFX_USE_QWORD_IO
_efx_writeq(efx, value->u64[0], reg + 0);
+ _efx_writeq(efx, value->u64[1], reg + 8);
#else
_efx_writed(efx, value->u32[0], reg + 0);
_efx_writed(efx, value->u32[1], reg + 4);
-#endif
_efx_writed(efx, value->u32[2], reg + 8);
_efx_writed(efx, value->u32[3], reg + 12);
+#endif
+ wmb();
}
#define efx_writeo_page(efx, value, reg, page) \
_efx_writeo_page(efx, value, \