aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig16
-rw-r--r--lib/Makefile5
-rw-r--r--lib/bitmap.c2
-rw-r--r--lib/dump_stack.c60
-rw-r--r--lib/kfifo.c2
-rw-r--r--lib/logic_pio.c280
-rw-r--r--lib/raid6/.gitignore1
-rw-r--r--lib/raid6/Makefile27
-rw-r--r--lib/raid6/algos.c4
-rw-r--r--lib/raid6/altivec.uc3
-rw-r--r--lib/raid6/sse2.c14
-rw-r--r--lib/raid6/test/Makefile22
-rw-r--r--lib/raid6/vpermxor.uc105
-rw-r--r--lib/sbitmap.c10
-rw-r--r--lib/test_bitmap.c4
-rw-r--r--lib/test_firmware.c1
16 files changed, 536 insertions, 20 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index e96089499371..5fe577673b98 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -55,6 +55,22 @@ config ARCH_USE_CMPXCHG_LOCKREF
config ARCH_HAS_FAST_MULTIPLIER
bool
+config INDIRECT_PIO
+ bool "Access I/O in non-MMIO mode"
+ depends on ARM64
+ help
+ On some platforms where no separate I/O space exists, there are I/O
+ hosts which can not be accessed in MMIO mode. Using the logical PIO
+ mechanism, the host-local I/O resource can be mapped into system
+ logic PIO space shared with MMIO hosts, such as PCI/PCIe, then the
+ system can access the I/O devices with the mapped-logic PIO through
+ I/O accessors.
+
+ This way has relatively little I/O performance cost. Please make
+ sure your devices really need this configure item enabled.
+
+ When in doubt, say N.
+
config CRC_CCITT
tristate "CRC-CCITT functions"
help
diff --git a/lib/Makefile b/lib/Makefile
index a90d4fcd748f..8fc0d3a9b34f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,7 +18,7 @@ KCOV_INSTRUMENT_debugobjects.o := n
KCOV_INSTRUMENT_dynamic_debug.o := n
lib-y := ctype.o string.o vsprintf.o cmdline.o \
- rbtree.o radix-tree.o dump_stack.o timerqueue.o\
+ rbtree.o radix-tree.o timerqueue.o\
idr.o int_sqrt.o extable.o \
sha1.o chacha20.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
@@ -26,6 +26,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
earlycpio.o seq_buf.o siphash.o \
nmi_backtrace.o nodemask.o win_minmax.o
+lib-$(CONFIG_PRINTK) += dump_stack.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
@@ -81,6 +82,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
+obj-y += logic_pio.o
+
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
obj-$(CONFIG_BTREE) += btree.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 9e498c77ed0e..a42eff7e8c48 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -607,7 +607,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
/* if no digit is after '-', it's wrong*/
if (at_start && in_range)
return -EINVAL;
- if (!(a <= b) || !(used_size <= group_size))
+ if (!(a <= b) || group_size == 0 || !(used_size <= group_size))
return -EINVAL;
if (b >= nmaskbits)
return -ERANGE;
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index c5edbedd364d..5cff72f18c4a 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -10,6 +10,66 @@
#include <linux/sched/debug.h>
#include <linux/smp.h>
#include <linux/atomic.h>
+#include <linux/kexec.h>
+#include <linux/utsname.h>
+
+static char dump_stack_arch_desc_str[128];
+
+/**
+ * dump_stack_set_arch_desc - set arch-specific str to show with task dumps
+ * @fmt: printf-style format string
+ * @...: arguments for the format string
+ *
+ * The configured string will be printed right after utsname during task
+ * dumps. Usually used to add arch-specific system identifiers. If an
+ * arch wants to make use of such an ID string, it should initialize this
+ * as soon as possible during boot.
+ */
+void __init dump_stack_set_arch_desc(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(dump_stack_arch_desc_str, sizeof(dump_stack_arch_desc_str),
+ fmt, args);
+ va_end(args);
+}
+
+/**
+ * dump_stack_print_info - print generic debug info for dump_stack()
+ * @log_lvl: log level
+ *
+ * Arch-specific dump_stack() implementations can use this function to
+ * print out the same debug information as the generic dump_stack().
+ */
+void dump_stack_print_info(const char *log_lvl)
+{
+ printk("%sCPU: %d PID: %d Comm: %.20s %s%s %s %.*s\n",
+ log_lvl, raw_smp_processor_id(), current->pid, current->comm,
+ kexec_crash_loaded() ? "Kdump: loaded " : "",
+ print_tainted(),
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
+
+ if (dump_stack_arch_desc_str[0] != '\0')
+ printk("%sHardware name: %s\n",
+ log_lvl, dump_stack_arch_desc_str);
+
+ print_worker_info(log_lvl, current);
+}
+
+/**
+ * show_regs_print_info - print generic debug info for show_regs()
+ * @log_lvl: log level
+ *
+ * show_regs() implementations can use this function to print out generic
+ * debug information.
+ */
+void show_regs_print_info(const char *log_lvl)
+{
+ dump_stack_print_info(log_lvl);
+}
static void __dump_stack(void)
{
diff --git a/lib/kfifo.c b/lib/kfifo.c
index 90ba1eb1df06..b0f757bf7213 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -39,7 +39,7 @@ int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
size_t esize, gfp_t gfp_mask)
{
/*
- * round down to the next power of 2, since our 'let the indices
+ * round up to the next power of 2, since our 'let the indices
* wrap' technique works only in this case.
*/
size = roundup_pow_of_two(size);
diff --git a/lib/logic_pio.c b/lib/logic_pio.c
new file mode 100644
index 000000000000..feea48fd1a0d
--- /dev/null
+++ b/lib/logic_pio.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2017 HiSilicon Limited, All Rights Reserved.
+ * Author: Gabriele Paoloni <gabriele.paoloni@huawei.com>
+ * Author: Zhichang Yuan <yuanzhichang@hisilicon.com>
+ */
+
+#define pr_fmt(fmt) "LOGIC PIO: " fmt
+
+#include <linux/of.h>
+#include <linux/io.h>
+#include <linux/logic_pio.h>
+#include <linux/mm.h>
+#include <linux/rculist.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+/* The unique hardware address list */
+static LIST_HEAD(io_range_list);
+static DEFINE_MUTEX(io_range_mutex);
+
+/* Consider a kernel general helper for this */
+#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
+
+/**
+ * logic_pio_register_range - register logical PIO range for a host
+ * @new_range: pointer to the IO range to be registered.
+ *
+ * Returns 0 on success, the error code in case of failure.
+ *
+ * Register a new IO range node in the IO range list.
+ */
+int logic_pio_register_range(struct logic_pio_hwaddr *new_range)
+{
+ struct logic_pio_hwaddr *range;
+ resource_size_t start;
+ resource_size_t end;
+ resource_size_t mmio_sz = 0;
+ resource_size_t iio_sz = MMIO_UPPER_LIMIT;
+ int ret = 0;
+
+ if (!new_range || !new_range->fwnode || !new_range->size)
+ return -EINVAL;
+
+ start = new_range->hw_start;
+ end = new_range->hw_start + new_range->size;
+
+ mutex_lock(&io_range_mutex);
+ list_for_each_entry_rcu(range, &io_range_list, list) {
+ if (range->fwnode == new_range->fwnode) {
+ /* range already there */
+ goto end_register;
+ }
+ if (range->flags == LOGIC_PIO_CPU_MMIO &&
+ new_range->flags == LOGIC_PIO_CPU_MMIO) {
+ /* for MMIO ranges we need to check for overlap */
+ if (start >= range->hw_start + range->size ||
+ end < range->hw_start) {
+ mmio_sz += range->size;
+ } else {
+ ret = -EFAULT;
+ goto end_register;
+ }
+ } else if (range->flags == LOGIC_PIO_INDIRECT &&
+ new_range->flags == LOGIC_PIO_INDIRECT) {
+ iio_sz += range->size;
+ }
+ }
+
+ /* range not registered yet, check for available space */
+ if (new_range->flags == LOGIC_PIO_CPU_MMIO) {
+ if (mmio_sz + new_range->size - 1 > MMIO_UPPER_LIMIT) {
+ /* if it's too big check if 64K space can be reserved */
+ if (mmio_sz + SZ_64K - 1 > MMIO_UPPER_LIMIT) {
+ ret = -E2BIG;
+ goto end_register;
+ }
+ new_range->size = SZ_64K;
+ pr_warn("Requested IO range too big, new size set to 64K\n");
+ }
+ new_range->io_start = mmio_sz;
+ } else if (new_range->flags == LOGIC_PIO_INDIRECT) {
+ if (iio_sz + new_range->size - 1 > IO_SPACE_LIMIT) {
+ ret = -E2BIG;
+ goto end_register;
+ }
+ new_range->io_start = iio_sz;
+ } else {
+ /* invalid flag */
+ ret = -EINVAL;
+ goto end_register;
+ }
+
+ list_add_tail_rcu(&new_range->list, &io_range_list);
+
+end_register:
+ mutex_unlock(&io_range_mutex);
+ return ret;
+}
+
+/**
+ * find_io_range_by_fwnode - find logical PIO range for given FW node
+ * @fwnode: FW node handle associated with logical PIO range
+ *
+ * Returns pointer to node on success, NULL otherwise.
+ *
+ * Traverse the io_range_list to find the registered node for @fwnode.
+ */
+struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode)
+{
+ struct logic_pio_hwaddr *range;
+
+ list_for_each_entry_rcu(range, &io_range_list, list) {
+ if (range->fwnode == fwnode)
+ return range;
+ }
+ return NULL;
+}
+
+/* Return a registered range given an input PIO token */
+static struct logic_pio_hwaddr *find_io_range(unsigned long pio)
+{
+ struct logic_pio_hwaddr *range;
+
+ list_for_each_entry_rcu(range, &io_range_list, list) {
+ if (in_range(pio, range->io_start, range->size))
+ return range;
+ }
+ pr_err("PIO entry token %lx invalid\n", pio);
+ return NULL;
+}
+
+/**
+ * logic_pio_to_hwaddr - translate logical PIO to HW address
+ * @pio: logical PIO value
+ *
+ * Returns HW address if valid, ~0 otherwise.
+ *
+ * Translate the input logical PIO to the corresponding hardware address.
+ * The input PIO should be unique in the whole logical PIO space.
+ */
+resource_size_t logic_pio_to_hwaddr(unsigned long pio)
+{
+ struct logic_pio_hwaddr *range;
+
+ range = find_io_range(pio);
+ if (range)
+ return range->hw_start + pio - range->io_start;
+
+ return (resource_size_t)~0;
+}
+
+/**
+ * logic_pio_trans_hwaddr - translate HW address to logical PIO
+ * @fwnode: FW node reference for the host
+ * @addr: Host-relative HW address
+ * @size: size to translate
+ *
+ * Returns Logical PIO value if successful, ~0UL otherwise
+ */
+unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode,
+ resource_size_t addr, resource_size_t size)
+{
+ struct logic_pio_hwaddr *range;
+
+ range = find_io_range_by_fwnode(fwnode);
+ if (!range || range->flags == LOGIC_PIO_CPU_MMIO) {
+ pr_err("IO range not found or invalid\n");
+ return ~0UL;
+ }
+ if (range->size < size) {
+ pr_err("resource size %pa cannot fit in IO range size %pa\n",
+ &size, &range->size);
+ return ~0UL;
+ }
+ return addr - range->hw_start + range->io_start;
+}
+
+unsigned long logic_pio_trans_cpuaddr(resource_size_t addr)
+{
+ struct logic_pio_hwaddr *range;
+
+ list_for_each_entry_rcu(range, &io_range_list, list) {
+ if (range->flags != LOGIC_PIO_CPU_MMIO)
+ continue;
+ if (in_range(addr, range->hw_start, range->size))
+ return addr - range->hw_start + range->io_start;
+ }
+ pr_err("addr %llx not registered in io_range_list\n",
+ (unsigned long long) addr);
+ return ~0UL;
+}
+
+#if defined(CONFIG_INDIRECT_PIO) && defined(PCI_IOBASE)
+#define BUILD_LOGIC_IO(bw, type) \
+type logic_in##bw(unsigned long addr) \
+{ \
+ type ret = (type)~0; \
+ \
+ if (addr < MMIO_UPPER_LIMIT) { \
+ ret = read##bw(PCI_IOBASE + addr); \
+ } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \
+ struct logic_pio_hwaddr *entry = find_io_range(addr); \
+ \
+ if (entry && entry->ops) \
+ ret = entry->ops->in(entry->hostdata, \
+ addr, sizeof(type)); \
+ else \
+ WARN_ON_ONCE(1); \
+ } \
+ return ret; \
+} \
+ \
+void logic_out##bw(type value, unsigned long addr) \
+{ \
+ if (addr < MMIO_UPPER_LIMIT) { \
+ write##bw(value, PCI_IOBASE + addr); \
+ } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \
+ struct logic_pio_hwaddr *entry = find_io_range(addr); \
+ \
+ if (entry && entry->ops) \
+ entry->ops->out(entry->hostdata, \
+ addr, value, sizeof(type)); \
+ else \
+ WARN_ON_ONCE(1); \
+ } \
+} \
+ \
+void logic_ins##bw(unsigned long addr, void *buffer, \
+ unsigned int count) \
+{ \
+ if (addr < MMIO_UPPER_LIMIT) { \
+ reads##bw(PCI_IOBASE + addr, buffer, count); \
+ } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \
+ struct logic_pio_hwaddr *entry = find_io_range(addr); \
+ \
+ if (entry && entry->ops) \
+ entry->ops->ins(entry->hostdata, \
+ addr, buffer, sizeof(type), count); \
+ else \
+ WARN_ON_ONCE(1); \
+ } \
+ \
+} \
+ \
+void logic_outs##bw(unsigned long addr, const void *buffer, \
+ unsigned int count) \
+{ \
+ if (addr < MMIO_UPPER_LIMIT) { \
+ writes##bw(PCI_IOBASE + addr, buffer, count); \
+ } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \
+ struct logic_pio_hwaddr *entry = find_io_range(addr); \
+ \
+ if (entry && entry->ops) \
+ entry->ops->outs(entry->hostdata, \
+ addr, buffer, sizeof(type), count); \
+ else \
+ WARN_ON_ONCE(1); \
+ } \
+}
+
+BUILD_LOGIC_IO(b, u8)
+EXPORT_SYMBOL(logic_inb);
+EXPORT_SYMBOL(logic_insb);
+EXPORT_SYMBOL(logic_outb);
+EXPORT_SYMBOL(logic_outsb);
+
+BUILD_LOGIC_IO(w, u16)
+EXPORT_SYMBOL(logic_inw);
+EXPORT_SYMBOL(logic_insw);
+EXPORT_SYMBOL(logic_outw);
+EXPORT_SYMBOL(logic_outsw);
+
+BUILD_LOGIC_IO(l, u32)
+EXPORT_SYMBOL(logic_inl);
+EXPORT_SYMBOL(logic_insl);
+EXPORT_SYMBOL(logic_outl);
+EXPORT_SYMBOL(logic_outsl);
+
+#endif /* CONFIG_INDIRECT_PIO && PCI_IOBASE */
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index f01b1cb04f91..3de0d8921286 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -4,3 +4,4 @@ int*.c
tables.c
neon?.c
s390vx?.c
+vpermxor*.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 44d6b46df051..2f8b61dfd9b0 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -5,7 +5,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
int8.o int16.o int32.o
raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -90,6 +91,30 @@ $(obj)/altivec8.c: UNROLL := 8
$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
CFLAGS_neon1.o += $(NEON_FLAGS)
targets += neon1.c
$(obj)/neon1.c: UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index c65aa80d67ed..5065b1e7e327 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_altivec2,
&raid6_altivec4,
&raid6_altivec8,
+ &raid6_vpermxor1,
+ &raid6_vpermxor2,
+ &raid6_vpermxor4,
+ &raid6_vpermxor8,
#endif
#if defined(CONFIG_S390)
&raid6_s390vx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8a1fef..d20ed0d11411 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
#include <altivec.h>
#ifdef __KERNEL__
# include <asm/cputable.h>
# include <asm/switch_to.h>
+#endif /* __KERNEL__ */
/*
* This is the C data type to use. We use a vector of
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index 1d2276b007ee..8191e1d0d2fb 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -91,7 +91,7 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
size_t bytes, void **ptrs)
- {
+{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
@@ -200,9 +200,9 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end();
}
- static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
+static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
size_t bytes, void **ptrs)
- {
+{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
@@ -265,7 +265,7 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
asm volatile("sfence" : : : "memory");
kernel_fpu_end();
- }
+}
const struct raid6_calls raid6_sse2x2 = {
raid6_sse22_gen_syndrome,
@@ -366,9 +366,9 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end();
}
- static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
+static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
size_t bytes, void **ptrs)
- {
+{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;
@@ -471,7 +471,7 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
}
asm volatile("sfence" : : : "memory");
kernel_fpu_end();
- }
+}
const struct raid6_calls raid6_sse2x4 = {
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index fabc477b1417..5d73f5cb4d8a 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -45,10 +45,12 @@ else ifeq ($(HAS_NEON),yes)
CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
else
HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
- gcc -c -x c - >&/dev/null && \
- rm ./-.o && echo yes)
+ gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
ifeq ($(HAS_ALTIVEC),yes)
- OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+ CFLAGS += -I../../../arch/powerpc/include
+ CFLAGS += -DCONFIG_ALTIVEC
+ OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
endif
endif
@@ -95,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
altivec8.c: altivec.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
+vpermxor1.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
int1.c: int.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=1 < int.uc > $@
@@ -117,7 +131,7 @@ tables.c: mktables
./mktables > tables.c
clean:
- rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+ rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
spotless: clean
rm -f *~
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
new file mode 100644
index 000000000000..10475dc423c1
--- /dev/null
+++ b/lib/raid6/vpermxor.uc
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * vpermxor$#.c
+ *
+ * Based on H. Peter Anvin's paper - The mathematics of RAID-6
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ * This file is postprocessed using unroll.awk
+ *
+ * vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q
+ * syndrome calculations.
+ * This can be run on systems which have both Altivec and vpermxor instruction.
+ *
+ * This instruction was introduced in POWER8 - ISA v2.07.
+ */
+
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+#include <asm/cputable.h>
+#include <asm/ppc-opcode.h>
+#include <asm/switch_to.h>
+#endif
+
+typedef vector unsigned char unative_t;
+#define NSIZE sizeof(unative_t)
+
+static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
+ 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
+ 0x06, 0x04, 0x02,0x00};
+static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
+ 0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
+ 0x60, 0x40, 0x20, 0x00};
+
+static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
+ void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+ unative_t wp$$, wq$$, wd$$;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ for (d = 0; d < bytes; d += NSIZE*$#) {
+ wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+
+ for (z = z0-1; z>=0; z--) {
+ wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ /* P syndrome */
+ wp$$ = vec_xor(wp$$, wd$$);
+
+ /* Q syndrome */
+ asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
+ wq$$ = vec_xor(wq$$, wd$$);
+ }
+ *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+ *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+ }
+}
+
+static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+
+ raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
+
+ disable_kernel_altivec();
+ preempt_enable();
+}
+
+int raid6_have_altivec_vpermxor(void);
+#if $# == 1
+int raid6_have_altivec_vpermxor(void)
+{
+ /* Check if arch has both altivec and the vpermxor instructions */
+# ifdef __KERNEL__
+ return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S));
+# else
+ return 1;
+#endif
+
+}
+#endif
+
+const struct raid6_calls raid6_vpermxor$# = {
+ raid6_vpermxor$#_gen_syndrome,
+ NULL,
+ raid6_have_altivec_vpermxor,
+ "vpermxor$#",
+ 0
+};
+#endif
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 42b5ca0acf93..e6a9c06ec70c 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -100,7 +100,7 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
return -1;
}
- if (!test_and_set_bit(nr, word))
+ if (!test_and_set_bit_lock(nr, word))
break;
hint = nr + 1;
@@ -434,9 +434,9 @@ static void sbq_wake_up(struct sbitmap_queue *sbq)
/*
* Pairs with the memory barrier in set_current_state() to ensure the
* proper ordering of clear_bit()/waitqueue_active() in the waker and
- * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See
- * the comment on waitqueue_active(). This is __after_atomic because we
- * just did clear_bit() in the caller.
+ * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
+ * waiter. See the comment on waitqueue_active(). This is __after_atomic
+ * because we just did clear_bit_unlock() in the caller.
*/
smp_mb__after_atomic();
@@ -469,7 +469,7 @@ static void sbq_wake_up(struct sbitmap_queue *sbq)
void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
unsigned int cpu)
{
- sbitmap_clear_bit(&sbq->sb, nr);
+ sbitmap_clear_bit_unlock(&sbq->sb, nr);
sbq_wake_up(sbq);
if (likely(!sbq->round_robin && nr < sbq->sb.depth))
*per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index b3f235baa05d..413367cf569e 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -255,6 +255,10 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
{-EINVAL, "-1", NULL, 8, 0},
{-EINVAL, "-0", NULL, 8, 0},
{-EINVAL, "10-1", NULL, 8, 0},
+ {-EINVAL, "0-31:", NULL, 8, 0},
+ {-EINVAL, "0-31:0", NULL, 8, 0},
+ {-EINVAL, "0-31:0/0", NULL, 8, 0},
+ {-EINVAL, "0-31:1/0", NULL, 8, 0},
{-EINVAL, "0-31:10/1", NULL, 8, 0},
};
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 078a61480573..cee000ac54d8 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -21,6 +21,7 @@
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/kthread.h>
+#include <linux/vmalloc.h>
#define TEST_FIRMWARE_NAME "test-firmware.bin"
#define TEST_FIRMWARE_NUM_REQS 4