diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 16 | ||||
-rw-r--r-- | lib/Makefile | 5 | ||||
-rw-r--r-- | lib/bitmap.c | 2 | ||||
-rw-r--r-- | lib/dump_stack.c | 60 | ||||
-rw-r--r-- | lib/kfifo.c | 2 | ||||
-rw-r--r-- | lib/logic_pio.c | 280 | ||||
-rw-r--r-- | lib/raid6/.gitignore | 1 | ||||
-rw-r--r-- | lib/raid6/Makefile | 27 | ||||
-rw-r--r-- | lib/raid6/algos.c | 4 | ||||
-rw-r--r-- | lib/raid6/altivec.uc | 3 | ||||
-rw-r--r-- | lib/raid6/sse2.c | 14 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 22 | ||||
-rw-r--r-- | lib/raid6/vpermxor.uc | 105 | ||||
-rw-r--r-- | lib/sbitmap.c | 10 | ||||
-rw-r--r-- | lib/test_bitmap.c | 4 | ||||
-rw-r--r-- | lib/test_firmware.c | 1 |
16 files changed, 536 insertions, 20 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index e96089499371..5fe577673b98 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -55,6 +55,22 @@ config ARCH_USE_CMPXCHG_LOCKREF config ARCH_HAS_FAST_MULTIPLIER bool +config INDIRECT_PIO + bool "Access I/O in non-MMIO mode" + depends on ARM64 + help + On some platforms where no separate I/O space exists, there are I/O + hosts which can not be accessed in MMIO mode. Using the logical PIO + mechanism, the host-local I/O resource can be mapped into system + logic PIO space shared with MMIO hosts, such as PCI/PCIe, then the + system can access the I/O devices with the mapped-logic PIO through + I/O accessors. + + This way has relatively little I/O performance cost. Please make + sure your devices really need this configure item enabled. + + When in doubt, say N. + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/Makefile b/lib/Makefile index a90d4fcd748f..8fc0d3a9b34f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -18,7 +18,7 @@ KCOV_INSTRUMENT_debugobjects.o := n KCOV_INSTRUMENT_dynamic_debug.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ - rbtree.o radix-tree.o dump_stack.o timerqueue.o\ + rbtree.o radix-tree.o timerqueue.o\ idr.o int_sqrt.o extable.o \ sha1.o chacha20.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ @@ -26,6 +26,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ earlycpio.o seq_buf.o siphash.o \ nmi_backtrace.o nodemask.o win_minmax.o +lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o @@ -81,6 +82,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o +obj-y += logic_pio.o + obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_BTREE) += btree.o diff --git a/lib/bitmap.c b/lib/bitmap.c index 9e498c77ed0e..a42eff7e8c48 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -607,7 +607,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, /* if no digit is after '-', it's wrong*/ if (at_start && in_range) return -EINVAL; - if (!(a <= b) || !(used_size <= group_size)) + if (!(a <= b) || group_size == 0 || !(used_size <= group_size)) return -EINVAL; if (b >= nmaskbits) return -ERANGE; diff --git a/lib/dump_stack.c b/lib/dump_stack.c index c5edbedd364d..5cff72f18c4a 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -10,6 +10,66 @@ #include <linux/sched/debug.h> #include <linux/smp.h> #include <linux/atomic.h> +#include <linux/kexec.h> +#include <linux/utsname.h> + +static char dump_stack_arch_desc_str[128]; + +/** + * dump_stack_set_arch_desc - set arch-specific str to show with task dumps + * @fmt: printf-style format string + * @...: arguments for the format string + * + * The configured string will be printed right after utsname during task + * dumps. Usually used to add arch-specific system identifiers. If an + * arch wants to make use of such an ID string, it should initialize this + * as soon as possible during boot. + */ +void __init dump_stack_set_arch_desc(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(dump_stack_arch_desc_str, sizeof(dump_stack_arch_desc_str), + fmt, args); + va_end(args); +} + +/** + * dump_stack_print_info - print generic debug info for dump_stack() + * @log_lvl: log level + * + * Arch-specific dump_stack() implementations can use this function to + * print out the same debug information as the generic dump_stack(). + */ +void dump_stack_print_info(const char *log_lvl) +{ + printk("%sCPU: %d PID: %d Comm: %.20s %s%s %s %.*s\n", + log_lvl, raw_smp_processor_id(), current->pid, current->comm, + kexec_crash_loaded() ? "Kdump: loaded " : "", + print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + + if (dump_stack_arch_desc_str[0] != '\0') + printk("%sHardware name: %s\n", + log_lvl, dump_stack_arch_desc_str); + + print_worker_info(log_lvl, current); +} + +/** + * show_regs_print_info - print generic debug info for show_regs() + * @log_lvl: log level + * + * show_regs() implementations can use this function to print out generic + * debug information. + */ +void show_regs_print_info(const char *log_lvl) +{ + dump_stack_print_info(log_lvl); +} static void __dump_stack(void) { diff --git a/lib/kfifo.c b/lib/kfifo.c index 90ba1eb1df06..b0f757bf7213 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -39,7 +39,7 @@ int __kfifo_alloc(struct __kfifo *fifo, unsigned int size, size_t esize, gfp_t gfp_mask) { /* - * round down to the next power of 2, since our 'let the indices + * round up to the next power of 2, since our 'let the indices * wrap' technique works only in this case. */ size = roundup_pow_of_two(size); diff --git a/lib/logic_pio.c b/lib/logic_pio.c new file mode 100644 index 000000000000..feea48fd1a0d --- /dev/null +++ b/lib/logic_pio.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2017 HiSilicon Limited, All Rights Reserved. + * Author: Gabriele Paoloni <gabriele.paoloni@huawei.com> + * Author: Zhichang Yuan <yuanzhichang@hisilicon.com> + */ + +#define pr_fmt(fmt) "LOGIC PIO: " fmt + +#include <linux/of.h> +#include <linux/io.h> +#include <linux/logic_pio.h> +#include <linux/mm.h> +#include <linux/rculist.h> +#include <linux/sizes.h> +#include <linux/slab.h> + +/* The unique hardware address list */ +static LIST_HEAD(io_range_list); +static DEFINE_MUTEX(io_range_mutex); + +/* Consider a kernel general helper for this */ +#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) + +/** + * logic_pio_register_range - register logical PIO range for a host + * @new_range: pointer to the IO range to be registered. + * + * Returns 0 on success, the error code in case of failure. + * + * Register a new IO range node in the IO range list. + */ +int logic_pio_register_range(struct logic_pio_hwaddr *new_range) +{ + struct logic_pio_hwaddr *range; + resource_size_t start; + resource_size_t end; + resource_size_t mmio_sz = 0; + resource_size_t iio_sz = MMIO_UPPER_LIMIT; + int ret = 0; + + if (!new_range || !new_range->fwnode || !new_range->size) + return -EINVAL; + + start = new_range->hw_start; + end = new_range->hw_start + new_range->size; + + mutex_lock(&io_range_mutex); + list_for_each_entry_rcu(range, &io_range_list, list) { + if (range->fwnode == new_range->fwnode) { + /* range already there */ + goto end_register; + } + if (range->flags == LOGIC_PIO_CPU_MMIO && + new_range->flags == LOGIC_PIO_CPU_MMIO) { + /* for MMIO ranges we need to check for overlap */ + if (start >= range->hw_start + range->size || + end < range->hw_start) { + mmio_sz += range->size; + } else { + ret = -EFAULT; + goto end_register; + } + } else if (range->flags == LOGIC_PIO_INDIRECT && + new_range->flags == LOGIC_PIO_INDIRECT) { + iio_sz += range->size; + } + } + + /* range not registered yet, check for available space */ + if (new_range->flags == LOGIC_PIO_CPU_MMIO) { + if (mmio_sz + new_range->size - 1 > MMIO_UPPER_LIMIT) { + /* if it's too big check if 64K space can be reserved */ + if (mmio_sz + SZ_64K - 1 > MMIO_UPPER_LIMIT) { + ret = -E2BIG; + goto end_register; + } + new_range->size = SZ_64K; + pr_warn("Requested IO range too big, new size set to 64K\n"); + } + new_range->io_start = mmio_sz; + } else if (new_range->flags == LOGIC_PIO_INDIRECT) { + if (iio_sz + new_range->size - 1 > IO_SPACE_LIMIT) { + ret = -E2BIG; + goto end_register; + } + new_range->io_start = iio_sz; + } else { + /* invalid flag */ + ret = -EINVAL; + goto end_register; + } + + list_add_tail_rcu(&new_range->list, &io_range_list); + +end_register: + mutex_unlock(&io_range_mutex); + return ret; +} + +/** + * find_io_range_by_fwnode - find logical PIO range for given FW node + * @fwnode: FW node handle associated with logical PIO range + * + * Returns pointer to node on success, NULL otherwise. + * + * Traverse the io_range_list to find the registered node for @fwnode. + */ +struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode) +{ + struct logic_pio_hwaddr *range; + + list_for_each_entry_rcu(range, &io_range_list, list) { + if (range->fwnode == fwnode) + return range; + } + return NULL; +} + +/* Return a registered range given an input PIO token */ +static struct logic_pio_hwaddr *find_io_range(unsigned long pio) +{ + struct logic_pio_hwaddr *range; + + list_for_each_entry_rcu(range, &io_range_list, list) { + if (in_range(pio, range->io_start, range->size)) + return range; + } + pr_err("PIO entry token %lx invalid\n", pio); + return NULL; +} + +/** + * logic_pio_to_hwaddr - translate logical PIO to HW address + * @pio: logical PIO value + * + * Returns HW address if valid, ~0 otherwise. + * + * Translate the input logical PIO to the corresponding hardware address. + * The input PIO should be unique in the whole logical PIO space. + */ +resource_size_t logic_pio_to_hwaddr(unsigned long pio) +{ + struct logic_pio_hwaddr *range; + + range = find_io_range(pio); + if (range) + return range->hw_start + pio - range->io_start; + + return (resource_size_t)~0; +} + +/** + * logic_pio_trans_hwaddr - translate HW address to logical PIO + * @fwnode: FW node reference for the host + * @addr: Host-relative HW address + * @size: size to translate + * + * Returns Logical PIO value if successful, ~0UL otherwise + */ +unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode, + resource_size_t addr, resource_size_t size) +{ + struct logic_pio_hwaddr *range; + + range = find_io_range_by_fwnode(fwnode); + if (!range || range->flags == LOGIC_PIO_CPU_MMIO) { + pr_err("IO range not found or invalid\n"); + return ~0UL; + } + if (range->size < size) { + pr_err("resource size %pa cannot fit in IO range size %pa\n", + &size, &range->size); + return ~0UL; + } + return addr - range->hw_start + range->io_start; +} + +unsigned long logic_pio_trans_cpuaddr(resource_size_t addr) +{ + struct logic_pio_hwaddr *range; + + list_for_each_entry_rcu(range, &io_range_list, list) { + if (range->flags != LOGIC_PIO_CPU_MMIO) + continue; + if (in_range(addr, range->hw_start, range->size)) + return addr - range->hw_start + range->io_start; + } + pr_err("addr %llx not registered in io_range_list\n", + (unsigned long long) addr); + return ~0UL; +} + +#if defined(CONFIG_INDIRECT_PIO) && defined(PCI_IOBASE) +#define BUILD_LOGIC_IO(bw, type) \ +type logic_in##bw(unsigned long addr) \ +{ \ + type ret = (type)~0; \ + \ + if (addr < MMIO_UPPER_LIMIT) { \ + ret = read##bw(PCI_IOBASE + addr); \ + } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \ + struct logic_pio_hwaddr *entry = find_io_range(addr); \ + \ + if (entry && entry->ops) \ + ret = entry->ops->in(entry->hostdata, \ + addr, sizeof(type)); \ + else \ + WARN_ON_ONCE(1); \ + } \ + return ret; \ +} \ + \ +void logic_out##bw(type value, unsigned long addr) \ +{ \ + if (addr < MMIO_UPPER_LIMIT) { \ + write##bw(value, PCI_IOBASE + addr); \ + } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \ + struct logic_pio_hwaddr *entry = find_io_range(addr); \ + \ + if (entry && entry->ops) \ + entry->ops->out(entry->hostdata, \ + addr, value, sizeof(type)); \ + else \ + WARN_ON_ONCE(1); \ + } \ +} \ + \ +void logic_ins##bw(unsigned long addr, void *buffer, \ + unsigned int count) \ +{ \ + if (addr < MMIO_UPPER_LIMIT) { \ + reads##bw(PCI_IOBASE + addr, buffer, count); \ + } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \ + struct logic_pio_hwaddr *entry = find_io_range(addr); \ + \ + if (entry && entry->ops) \ + entry->ops->ins(entry->hostdata, \ + addr, buffer, sizeof(type), count); \ + else \ + WARN_ON_ONCE(1); \ + } \ + \ +} \ + \ +void logic_outs##bw(unsigned long addr, const void *buffer, \ + unsigned int count) \ +{ \ + if (addr < MMIO_UPPER_LIMIT) { \ + writes##bw(PCI_IOBASE + addr, buffer, count); \ + } else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \ + struct logic_pio_hwaddr *entry = find_io_range(addr); \ + \ + if (entry && entry->ops) \ + entry->ops->outs(entry->hostdata, \ + addr, buffer, sizeof(type), count); \ + else \ + WARN_ON_ONCE(1); \ + } \ +} + +BUILD_LOGIC_IO(b, u8) +EXPORT_SYMBOL(logic_inb); +EXPORT_SYMBOL(logic_insb); +EXPORT_SYMBOL(logic_outb); +EXPORT_SYMBOL(logic_outsb); + +BUILD_LOGIC_IO(w, u16) +EXPORT_SYMBOL(logic_inw); +EXPORT_SYMBOL(logic_insw); +EXPORT_SYMBOL(logic_outw); +EXPORT_SYMBOL(logic_outsw); + +BUILD_LOGIC_IO(l, u32) +EXPORT_SYMBOL(logic_inl); +EXPORT_SYMBOL(logic_insl); +EXPORT_SYMBOL(logic_outl); +EXPORT_SYMBOL(logic_outsl); + +#endif /* CONFIG_INDIRECT_PIO && PCI_IOBASE */ diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore index f01b1cb04f91..3de0d8921286 100644 --- a/lib/raid6/.gitignore +++ b/lib/raid6/.gitignore @@ -4,3 +4,4 @@ int*.c tables.c neon?.c s390vx?.c +vpermxor*.c diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 44d6b46df051..2f8b61dfd9b0 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -5,7 +5,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ int8.o int16.o int32.o raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o -raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o +raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ + vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o @@ -90,6 +91,30 @@ $(obj)/altivec8.c: UNROLL := 8 $(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) +CFLAGS_vpermxor1.o += $(altivec_flags) +targets += vpermxor1.c +$(obj)/vpermxor1.c: UNROLL := 1 +$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_vpermxor2.o += $(altivec_flags) +targets += vpermxor2.c +$(obj)/vpermxor2.c: UNROLL := 2 +$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_vpermxor4.o += $(altivec_flags) +targets += vpermxor4.c +$(obj)/vpermxor4.c: UNROLL := 4 +$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_vpermxor8.o += $(altivec_flags) +targets += vpermxor8.c +$(obj)/vpermxor8.c: UNROLL := 8 +$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + CFLAGS_neon1.o += $(NEON_FLAGS) targets += neon1.c $(obj)/neon1.c: UNROLL := 1 diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index c65aa80d67ed..5065b1e7e327 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_altivec2, &raid6_altivec4, &raid6_altivec8, + &raid6_vpermxor1, + &raid6_vpermxor2, + &raid6_vpermxor4, + &raid6_vpermxor8, #endif #if defined(CONFIG_S390) &raid6_s390vx8, diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index 682aae8a1fef..d20ed0d11411 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc @@ -24,10 +24,13 @@ #include <linux/raid/pq.h> +#ifdef CONFIG_ALTIVEC + #include <altivec.h> #ifdef __KERNEL__ # include <asm/cputable.h> # include <asm/switch_to.h> +#endif /* __KERNEL__ */ /* * This is the C data type to use. We use a vector of diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c index 1d2276b007ee..8191e1d0d2fb 100644 --- a/lib/raid6/sse2.c +++ b/lib/raid6/sse2.c @@ -91,7 +91,7 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) static void raid6_sse21_xor_syndrome(int disks, int start, int stop, size_t bytes, void **ptrs) - { +{ u8 **dptr = (u8 **)ptrs; u8 *p, *q; int d, z, z0; @@ -200,9 +200,9 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } - static void raid6_sse22_xor_syndrome(int disks, int start, int stop, +static void raid6_sse22_xor_syndrome(int disks, int start, int stop, size_t bytes, void **ptrs) - { +{ u8 **dptr = (u8 **)ptrs; u8 *p, *q; int d, z, z0; @@ -265,7 +265,7 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) asm volatile("sfence" : : : "memory"); kernel_fpu_end(); - } +} const struct raid6_calls raid6_sse2x2 = { raid6_sse22_gen_syndrome, @@ -366,9 +366,9 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } - static void raid6_sse24_xor_syndrome(int disks, int start, int stop, +static void raid6_sse24_xor_syndrome(int disks, int start, int stop, size_t bytes, void **ptrs) - { +{ u8 **dptr = (u8 **)ptrs; u8 *p, *q; int d, z, z0; @@ -471,7 +471,7 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) } asm volatile("sfence" : : : "memory"); kernel_fpu_end(); - } +} const struct raid6_calls raid6_sse2x4 = { diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index fabc477b1417..5d73f5cb4d8a 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -45,10 +45,12 @@ else ifeq ($(HAS_NEON),yes) CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\ - gcc -c -x c - >&/dev/null && \ - rm ./-.o && echo yes) + gcc -c -x c - >/dev/null && rm ./-.o && echo yes) ifeq ($(HAS_ALTIVEC),yes) - OBJS += altivec1.o altivec2.o altivec4.o altivec8.o + CFLAGS += -I../../../arch/powerpc/include + CFLAGS += -DCONFIG_ALTIVEC + OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ + vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o endif endif @@ -95,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk altivec8.c: altivec.uc ../unroll.awk $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@ +vpermxor1.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@ + +vpermxor2.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@ + +vpermxor4.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@ + +vpermxor8.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@ + int1.c: int.uc ../unroll.awk $(AWK) ../unroll.awk -vN=1 < int.uc > $@ @@ -117,7 +131,7 @@ tables.c: mktables ./mktables > tables.c clean: - rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test + rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test spotless: clean rm -f *~ diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc new file mode 100644 index 000000000000..10475dc423c1 --- /dev/null +++ b/lib/raid6/vpermxor.uc @@ -0,0 +1,105 @@ +/* + * Copyright 2017, Matt Brown, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * vpermxor$#.c + * + * Based on H. Peter Anvin's paper - The mathematics of RAID-6 + * + * $#-way unrolled portable integer math RAID-6 instruction set + * This file is postprocessed using unroll.awk + * + * vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q + * syndrome calculations. + * This can be run on systems which have both Altivec and vpermxor instruction. + * + * This instruction was introduced in POWER8 - ISA v2.07. + */ + +#include <linux/raid/pq.h> +#ifdef CONFIG_ALTIVEC + +#include <altivec.h> +#ifdef __KERNEL__ +#include <asm/cputable.h> +#include <asm/ppc-opcode.h> +#include <asm/switch_to.h> +#endif + +typedef vector unsigned char unative_t; +#define NSIZE sizeof(unative_t) + +static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14, + 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, + 0x06, 0x04, 0x02,0x00}; +static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d, + 0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80, + 0x60, 0x40, 0x20, 0x00}; + +static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes, + void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + unative_t wp$$, wq$$, wd$$; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for (d = 0; d < bytes; d += NSIZE*$#) { + wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + + for (z = z0-1; z>=0; z--) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + /* P syndrome */ + wp$$ = vec_xor(wp$$, wd$$); + + /* Q syndrome */ + asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$)); + wq$$ = vec_xor(wq$$, wd$$); + } + *(unative_t *)&p[d+NSIZE*$$] = wp$$; + *(unative_t *)&q[d+NSIZE*$$] = wq$$; + } +} + +static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + preempt_disable(); + enable_kernel_altivec(); + + raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs); + + disable_kernel_altivec(); + preempt_enable(); +} + +int raid6_have_altivec_vpermxor(void); +#if $# == 1 +int raid6_have_altivec_vpermxor(void) +{ + /* Check if arch has both altivec and the vpermxor instructions */ +# ifdef __KERNEL__ + return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) && + cpu_has_feature(CPU_FTR_ARCH_207S)); +# else + return 1; +#endif + +} +#endif + +const struct raid6_calls raid6_vpermxor$# = { + raid6_vpermxor$#_gen_syndrome, + NULL, + raid6_have_altivec_vpermxor, + "vpermxor$#", + 0 +}; +#endif diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 42b5ca0acf93..e6a9c06ec70c 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -100,7 +100,7 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, return -1; } - if (!test_and_set_bit(nr, word)) + if (!test_and_set_bit_lock(nr, word)) break; hint = nr + 1; @@ -434,9 +434,9 @@ static void sbq_wake_up(struct sbitmap_queue *sbq) /* * Pairs with the memory barrier in set_current_state() to ensure the * proper ordering of clear_bit()/waitqueue_active() in the waker and - * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See - * the comment on waitqueue_active(). This is __after_atomic because we - * just did clear_bit() in the caller. + * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the + * waiter. See the comment on waitqueue_active(). This is __after_atomic + * because we just did clear_bit_unlock() in the caller. */ smp_mb__after_atomic(); @@ -469,7 +469,7 @@ static void sbq_wake_up(struct sbitmap_queue *sbq) void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { - sbitmap_clear_bit(&sbq->sb, nr); + sbitmap_clear_bit_unlock(&sbq->sb, nr); sbq_wake_up(sbq); if (likely(!sbq->round_robin && nr < sbq->sb.depth)) *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index b3f235baa05d..413367cf569e 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -255,6 +255,10 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = { {-EINVAL, "-1", NULL, 8, 0}, {-EINVAL, "-0", NULL, 8, 0}, {-EINVAL, "10-1", NULL, 8, 0}, + {-EINVAL, "0-31:", NULL, 8, 0}, + {-EINVAL, "0-31:0", NULL, 8, 0}, + {-EINVAL, "0-31:0/0", NULL, 8, 0}, + {-EINVAL, "0-31:1/0", NULL, 8, 0}, {-EINVAL, "0-31:10/1", NULL, 8, 0}, }; diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 078a61480573..cee000ac54d8 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -21,6 +21,7 @@ #include <linux/uaccess.h> #include <linux/delay.h> #include <linux/kthread.h> +#include <linux/vmalloc.h> #define TEST_FIRMWARE_NAME "test-firmware.bin" #define TEST_FIRMWARE_NUM_REQS 4 |