From 52239484bf8aec031afa84ae08aa88224d819b93 Mon Sep 17 00:00:00 2001 From: Libin Date: Thu, 5 Sep 2013 18:57:56 +0800 Subject: x86/smpboot: Fix announce_cpu() to printk() the last "OK" properly When booting secondary CPUs, announce_cpu() is called to show which cpu has been brought up. For example: [ 0.402751] smpboot: Booting Node 0, Processors #1 #2 #3 #4 #5 OK [ 0.525667] smpboot: Booting Node 1, Processors #6 #7 #8 #9 #10 #11 OK [ 0.755592] smpboot: Booting Node 0, Processors #12 #13 #14 #15 #16 #17 OK [ 0.890495] smpboot: Booting Node 1, Processors #18 #19 #20 #21 #22 #23 But the last "OK" is lost, because 'nr_cpu_ids-1' represents the maximum possible cpu id. It should use the maximum present cpu id in case not all CPUs booted up. Signed-off-by: Libin Cc: Cc: Cc: Cc: Link: http://lkml.kernel.org/r/1378378676-18276-1-git-send-email-huawei.libin@huawei.com [ tweaked the changelog, removed unnecessary line break, tweaked the format to align the fields vertically. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index aecc98a93d1b..6cacab671f9b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -653,6 +653,7 @@ static void announce_cpu(int cpu, int apicid) { static int current_node = -1; int node = early_cpu_to_node(cpu); + int max_cpu_present = find_last_bit(cpumask_bits(cpu_present_mask), NR_CPUS); if (system_state == SYSTEM_BOOTING) { if (node != current_node) { @@ -661,7 +662,7 @@ static void announce_cpu(int cpu, int apicid) current_node = node; pr_info("Booting Node %3d, Processors ", node); } - pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : ""); + pr_cont(" #%4d%s", cpu, cpu == max_cpu_present ? " OK\n" : ""); return; } else pr_info("Booting Node %d Processor %d APIC 0x%x\n", -- cgit v1.2.3-59-g8ed1b From c0da0fa1d7ebe3c86747b10c92e0ac2be1524a8a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 7 Sep 2013 11:39:10 +0200 Subject: x86: Remove now-unused save_rest() b3af11afe06a ("x86: get rid of pt_regs argument of iopl(2)") dropped PTREGSCALL which was also the last user of save_rest. Drop that now-unused function too. Signed-off-by: Borislav Petkov Cc: Al Viro Link: http://lkml.kernel.org/r/1378546750-19727-1-git-send-email-bp@suse.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1b69951a81e2..b077f4cc225a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -487,21 +487,6 @@ ENDPROC(native_usergs_sysret64) TRACE_IRQS_OFF .endm -ENTRY(save_rest) - PARTIAL_FRAME 1 (REST_SKIP+8) - movq 5*8+16(%rsp), %r11 /* save return address */ - movq_cfi rbx, RBX+16 - movq_cfi rbp, RBP+16 - movq_cfi r12, R12+16 - movq_cfi r13, R13+16 - movq_cfi r14, R14+16 - movq_cfi r15, R15+16 - movq %r11, 8(%rsp) /* return address */ - FIXUP_TOP_OF_STACK %r11, 16 - ret - CFI_ENDPROC -END(save_rest) - /* save complete stack frame */ .pushsection .kprobes.text, "ax" ENTRY(save_paranoid) -- cgit v1.2.3-59-g8ed1b From 06c939c1f41b1c28bb17fae074ad610e22a01db8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 Sep 2013 13:26:36 +0200 Subject: perf/x86/intel: Fix Silvermont offcore masks Fengguang Wu reported: > sparse warnings: (new ones prefixed by >>) > > >> arch/x86/kernel/cpu/perf_event_intel.c:901:9: sparse: constant 0x768005ffff is so big it is long > >> arch/x86/kernel/cpu/perf_event_intel.c:902:9: sparse: constant 0x768005ffff is so big it is long > > vim +901 arch/x86/kernel/cpu/perf_event_intel.c > > 895 }, > 896 }; > 897 > 898 static struct extra_reg intel_slm_extra_regs[] __read_mostly = > 899 { > 900 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ > > 901 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0), > > 902 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1), > 903 EVENT_EXTRA_END > 904 }; > 905 Extend those constants to 64 bits. Reported-by: fengguang.wu@intel.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20130909112636.GQ31370@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c62d88396ad5..9db76c31b3c3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -899,8 +899,8 @@ static __initconst const u64 atom_hw_cache_event_ids static struct extra_reg intel_slm_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ - INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1), EVENT_EXTRA_END }; -- cgit v1.2.3-59-g8ed1b From 4db30e38ec2ad937678964f227202c6b5e90508e Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 13 Sep 2013 10:57:54 -0400 Subject: tile: fix typos in comment in arch/tile/kernel/unaligned.c Signed-off-by: Chris Metcalf --- arch/tile/kernel/unaligned.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c index b425fb6a480d..b030b4e78845 100644 --- a/arch/tile/kernel/unaligned.c +++ b/arch/tile/kernel/unaligned.c @@ -551,8 +551,8 @@ static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff) /* * This function generates unalign fixup JIT. * - * We fist find unalign load/store instruction's destination, source - * reguisters: ra, rb and rd. and 3 scratch registers by calling + * We first find unalign load/store instruction's destination, source + * registers: ra, rb and rd. and 3 scratch registers by calling * find_regs(...). 3 scratch clobbers should not alias with any register * used in the fault bundle. Then analyze the fault bundle to determine * if it's a load or store, operand width, branch or address increment etc. -- cgit v1.2.3-59-g8ed1b From 8629470ef89da00bd1b974df17ccbb36b3865798 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 13 Sep 2013 11:14:25 -0400 Subject: tile: use pmd_pfn() instead of casting via pte_t Signed-off-by: Chris Metcalf --- arch/tile/mm/pgtable.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 2deaddf3e01f..4fd9ec0b58ed 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -127,8 +127,7 @@ void shatter_huge_page(unsigned long addr) } /* Shatter the huge page into the preallocated L2 page table. */ - pmd_populate_kernel(&init_mm, pmd, - get_prealloc_pte(pte_pfn(*(pte_t *)pmd))); + pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd))); #ifdef __PAGETABLE_PMD_FOLDED /* Walk every pgd on the system and update the pmd there. */ -- cgit v1.2.3-59-g8ed1b From 4b12909fd137d9a236e4b5c76e0d4b44c2b7f49f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 11 Sep 2013 13:57:15 -0400 Subject: tile: remove HUGE_VMAP dead code A config option to allow a variant vmap() using huge pages that was never upstreamed had some bits of code related to it scattered around the tile architecture; the config option was removed downstream and this commit cleans up the scattered evidence of it from the upstream as well. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/page.h | 5 ++--- arch/tile/include/asm/pgtable_32.h | 12 ++---------- arch/tile/include/asm/pgtable_64.h | 4 +--- arch/tile/kernel/setup.c | 3 +-- arch/tile/mm/fault.c | 2 -- arch/tile/mm/init.c | 4 ---- 6 files changed, 6 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index 6346888f7bdc..672768008618 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -182,10 +182,9 @@ static inline __attribute_const__ int get_order(unsigned long size) #define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1))) #define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */ -#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x400000000) /* 4 GB */ -#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ +#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ +#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ #define _VMALLOC_START FIXADDR_TOP -#define HUGE_VMAP_BASE (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ #define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */ #define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */ #define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024)) diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index 63142ab3b3dd..d26a42279036 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -55,17 +55,9 @@ #define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK) #ifdef CONFIG_HIGHMEM -# define __VMAPPING_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) +# define _VMALLOC_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) #else -# define __VMAPPING_END (FIXADDR_START & ~(HPAGE_SIZE-1)) -#endif - -#ifdef CONFIG_HUGEVMAP -#define HUGE_VMAP_END __VMAPPING_END -#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE) -#define _VMALLOC_END HUGE_VMAP_BASE -#else -#define _VMALLOC_END __VMAPPING_END +# define _VMALLOC_END (FIXADDR_START & ~(HPAGE_SIZE-1)) #endif /* diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index 3421177f7370..2c8a9cd102d3 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -52,12 +52,10 @@ * memory allocation code). The vmalloc code puts in an internal * guard page between each allocation. */ -#define _VMALLOC_END HUGE_VMAP_BASE +#define _VMALLOC_END MEM_SV_START #define VMALLOC_END _VMALLOC_END #define VMALLOC_START _VMALLOC_START -#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE) - #ifndef __ASSEMBLY__ /* We have no pud since we are a three-level page table. */ diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 4c34caea9dd3..74c91729a62a 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1268,8 +1268,7 @@ static void __init validate_va(void) if ((long)VMALLOC_START >= 0) early_panic( "Linux VMALLOC region below the 2GB line (%#lx)!\n" - "Reconfigure the kernel with fewer NR_HUGE_VMAPS\n" - "or smaller VMALLOC_RESERVE.\n", + "Reconfigure the kernel with smaller VMALLOC_RESERVE.\n", VMALLOC_START); #endif } diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 4c288f199453..6c0571216a9d 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -149,8 +149,6 @@ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) pmd_k = vmalloc_sync_one(pgd, address); if (!pmd_k) return -1; - if (pmd_huge(*pmd_k)) - return 0; /* support TILE huge_vmap() API */ pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 4e316deb92fd..0fa1acfac79a 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -827,10 +827,6 @@ void __init mem_init(void) FIXADDR_START, FIXADDR_TOP + PAGE_SIZE - 1); printk(KERN_DEBUG " PKMAP %#lx - %#lx\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1); -#endif -#ifdef CONFIG_HUGEVMAP - printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n", - HUGE_VMAP_BASE, HUGE_VMAP_END - 1); #endif printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n", _VMALLOC_START, _VMALLOC_END - 1); -- cgit v1.2.3-59-g8ed1b From 35fab6118f189823bd590ce2c56e66bacf674b97 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 9 Sep 2013 12:18:54 -0400 Subject: tile: remove stale arch/tile/kernel/futex_64.S This should have been removed with commit 47d632f9f8f3, but it was overlooked. Signed-off-by: Chris Metcalf --- arch/tile/kernel/futex_64.S | 55 --------------------------------------------- 1 file changed, 55 deletions(-) delete mode 100644 arch/tile/kernel/futex_64.S (limited to 'arch') diff --git a/arch/tile/kernel/futex_64.S b/arch/tile/kernel/futex_64.S deleted file mode 100644 index f465d1eda20f..000000000000 --- a/arch/tile/kernel/futex_64.S +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2011 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * Atomically access user memory, but use MMU to avoid propagating - * kernel exceptions. - */ - -#include -#include -#include -#include -#include - -/* - * Provide a set of atomic memory operations supporting . - * - * r0: user address to manipulate - * r1: new value to write, or for cmpxchg, old value to compare against - * r2: (cmpxchg only) new value to write - * - * Return __get_user struct, r0 with value, r1 with error. - */ -#define FUTEX_OP(name, ...) \ -STD_ENTRY(futex_##name) \ - __VA_ARGS__; \ - { \ - move r1, zero; \ - jrp lr \ - }; \ - STD_ENDPROC(futex_##name); \ - .pushsection __ex_table,"a"; \ - .quad 1b, get_user_fault; \ - .popsection - - .pushsection .fixup,"ax" -get_user_fault: - { movei r1, -EFAULT; jrp lr } - ENDPROC(get_user_fault) - .popsection - -FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2) -FUTEX_OP(set, 1: exch4 r0, r0, r1) -FUTEX_OP(add, 1: fetchadd4 r0, r0, r1) -FUTEX_OP(or, 1: fetchor4 r0, r0, r1) -FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1) -- cgit v1.2.3-59-g8ed1b From 9d8e3f9693245415db0b7c58551a91fa9fd1f9c7 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 13 Sep 2013 13:16:46 -0700 Subject: perf/x86/intel: Mark MEM_LOAD_UOPS_MISS_RETIRED as precise on SNB On Intel SNB (SNB, SNB-EP), the event MEM_LOAD_UOPS_MISS_RETIRED supports PEBS. It was missing for the SNB PEBS event constraint table thereby preventing any measurement with PEBS for it. This patch adds the event to the PEBS table for SNB. WARNING: it should be noted that this event like a few others are subject to the erratum BT241 for Xeon E5 (SNB-EP). As such, the event may undercount when used with PEBS unless the workaround is implemented. But without this patch and just the workaround, the kernel would not allow precise sampling on this event. BT241 is documented in: http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/xeon-e5-family-spec-update.pdf Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: zheng.z.yan@intel.com Link: http://lkml.kernel.org/r/20130913201646.GA23981@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 3065c57a63c1..4ab70acd3a8d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -558,6 +558,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ EVENT_CONSTRAINT_END }; -- cgit v1.2.3-59-g8ed1b From 0f531431d3de88efb4234d6c0ce22089ec035a38 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 13 Sep 2013 17:02:29 +0300 Subject: x86/intel/lpss: Add pin control support to Intel low power subsystem x86 chips with LPSS (low power subsystem) such as Lynxpoint and Baytrail have SoC like peripheral support and controllable pins. At the moment, Baytrail needs the pinctrl-baytrail driver to let peripherals control their gpio resources, but more pincontrol functions such as pin muxing and grouping are possible to add later. Signed-off-by: Mathias Nyman Reviewed-by: Mika Westerberg Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1379080949-21734-1-git-send-email-mathias.nyman@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b32ebf92b0ce..4d5843d5f64d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -482,11 +482,12 @@ config X86_INTEL_LPSS bool "Intel Low Power Subsystem Support" depends on ACPI select COMMON_CLK + select PINCTRL ---help--- Select to build support for Intel Low Power Subsystem such as found on Intel Lynxpoint PCH. Selecting this option enables - things like clock tree (common clock framework) which are needed - by the LPSS peripheral drivers. + things like clock tree (common clock framework) and pincontrol + which are needed by the LPSS peripheral drivers. config X86_RDC321X bool "RDC R-321x SoC" -- cgit v1.2.3-59-g8ed1b From ad2a4bb378ff76f9ca4c61bb11b1b208f71e1861 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 16 Sep 2013 13:02:57 -0400 Subject: tile: double default VMALLOC space With per-cpu data as well as loaded kernel modules coming from the vmalloc arena, we get close to the line all the time and occasionally need more than we had, so just double it up by default. Signed-off-by: Chris Metcalf --- arch/tile/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 8a7cc663b3f8..d45a2c48f185 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -361,7 +361,7 @@ config CMDLINE_OVERRIDE config VMALLOC_RESERVE hex - default 0x1000000 + default 0x2000000 config HARDWALL bool "Hardwall support to allow access to user dynamic network" -- cgit v1.2.3-59-g8ed1b From 126eb08820a2c97c10ea58e73a544c2f075d59a7 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 16 Sep 2013 13:52:45 -0400 Subject: tile: improve gxio iorpc autogenerated code style Fix some whitespace style issues in some auto-generated files. Signed-off-by: Chris Metcalf --- arch/tile/gxio/iorpc_mpipe.c | 90 +++++++++++++++---------------- arch/tile/gxio/iorpc_mpipe_info.c | 15 +++--- arch/tile/gxio/iorpc_trio.c | 28 +++++----- arch/tile/gxio/iorpc_usb_host.c | 8 +-- arch/tile/gxio/usb_host.c | 8 +-- arch/tile/include/gxio/iorpc_mpipe.h | 52 +++++++++--------- arch/tile/include/gxio/iorpc_mpipe_info.h | 12 ++--- arch/tile/include/gxio/iorpc_trio.h | 28 +++++----- arch/tile/include/gxio/iorpc_usb_host.h | 8 +-- arch/tile/include/gxio/usb_host.h | 8 +-- 10 files changed, 127 insertions(+), 130 deletions(-) (limited to 'arch') diff --git a/arch/tile/gxio/iorpc_mpipe.c b/arch/tile/gxio/iorpc_mpipe.c index 4f8f3d619c4a..e19325c4c431 100644 --- a/arch/tile/gxio/iorpc_mpipe.c +++ b/arch/tile/gxio/iorpc_mpipe.c @@ -21,7 +21,7 @@ struct alloc_buffer_stacks_param { unsigned int flags; }; -int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -45,7 +45,7 @@ struct init_buffer_stack_aux_param { unsigned int buffer_size_enum; }; -int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int stack, unsigned int buffer_size_enum) @@ -80,7 +80,7 @@ struct alloc_notif_rings_param { unsigned int flags; }; -int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -102,7 +102,7 @@ struct init_notif_ring_aux_param { unsigned int ring; }; -int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring) { @@ -133,7 +133,7 @@ struct request_notif_ring_interrupt_param { unsigned int ring; }; -int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int ring) @@ -158,7 +158,7 @@ struct enable_notif_ring_interrupt_param { unsigned int ring; }; -int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context, unsigned int ring) { struct enable_notif_ring_interrupt_param temp; @@ -179,7 +179,7 @@ struct alloc_notif_groups_param { unsigned int flags; }; -int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -201,7 +201,7 @@ struct init_notif_group_param { gxio_mpipe_notif_group_bits_t bits; }; -int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context, +int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, unsigned int group, gxio_mpipe_notif_group_bits_t bits) { @@ -223,7 +223,7 @@ struct alloc_buckets_param { unsigned int flags; }; -int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count, +int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { struct alloc_buckets_param temp; @@ -244,7 +244,7 @@ struct init_bucket_param { MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info; }; -int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket, +int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket, MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info) { struct init_bucket_param temp; @@ -265,7 +265,7 @@ struct alloc_edma_rings_param { unsigned int flags; }; -int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -288,7 +288,7 @@ struct init_edma_ring_aux_param { unsigned int channel; }; -int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring, unsigned int channel) { @@ -315,7 +315,7 @@ int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, EXPORT_SYMBOL(gxio_mpipe_init_edma_ring_aux); -int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob, +int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob, size_t blob_size) { const void *params = blob; @@ -332,7 +332,7 @@ struct register_client_memory_param { unsigned int flags; }; -int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context, +int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, unsigned int iotlb, HV_PTE pte, unsigned int flags) { @@ -355,7 +355,7 @@ struct link_open_aux_param { unsigned int flags; }; -int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context, _gxio_mpipe_link_name_t name, unsigned int flags) { struct link_open_aux_param temp; @@ -374,7 +374,7 @@ struct link_close_aux_param { int mac; }; -int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac) +int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac) { struct link_close_aux_param temp; struct link_close_aux_param *params = &temp; @@ -393,7 +393,7 @@ struct link_set_attr_aux_param { int64_t val; }; -int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac, +int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac, uint32_t attr, int64_t val) { struct link_set_attr_aux_param temp; @@ -415,8 +415,8 @@ struct get_timestamp_aux_param { uint64_t cycles; }; -int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec, - uint64_t * nsec, uint64_t * cycles) +int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec, + uint64_t *nsec, uint64_t *cycles) { int __result; struct get_timestamp_aux_param temp; @@ -440,7 +440,7 @@ struct set_timestamp_aux_param { uint64_t cycles; }; -int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec, +int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec, uint64_t nsec, uint64_t cycles) { struct set_timestamp_aux_param temp; @@ -460,8 +460,7 @@ struct adjust_timestamp_aux_param { int64_t nsec; }; -int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, - int64_t nsec) +int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec) { struct adjust_timestamp_aux_param temp; struct adjust_timestamp_aux_param *params = &temp; @@ -475,25 +474,6 @@ int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_aux); -struct adjust_timestamp_freq_param { - int32_t ppb; -}; - -int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context, - int32_t ppb) -{ - struct adjust_timestamp_freq_param temp; - struct adjust_timestamp_freq_param *params = &temp; - - params->ppb = ppb; - - return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, - sizeof(*params), - GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ); -} - -EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq); - struct config_edma_ring_blks_param { unsigned int ering; unsigned int max_blks; @@ -501,7 +481,7 @@ struct config_edma_ring_blks_param { unsigned int db; }; -int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context, +int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context, unsigned int ering, unsigned int max_blks, unsigned int min_snf_blks, unsigned int db) { @@ -520,11 +500,29 @@ int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context, EXPORT_SYMBOL(gxio_mpipe_config_edma_ring_blks); +struct adjust_timestamp_freq_param { + int32_t ppb; +}; + +int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb) +{ + struct adjust_timestamp_freq_param temp; + struct adjust_timestamp_freq_param *params = &temp; + + params->ppb = ppb; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ); +} + +EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq); + struct arm_pollfd_param { union iorpc_pollfd pollfd; }; -int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie) +int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie) { struct arm_pollfd_param temp; struct arm_pollfd_param *params = &temp; @@ -541,7 +539,7 @@ struct close_pollfd_param { union iorpc_pollfd pollfd; }; -int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie) +int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie) { struct close_pollfd_param temp; struct close_pollfd_param *params = &temp; @@ -558,7 +556,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base) +int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -579,7 +577,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context, +int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_mpipe_info.c b/arch/tile/gxio/iorpc_mpipe_info.c index 64883aabeb9c..77019c6e9b4a 100644 --- a/arch/tile/gxio/iorpc_mpipe_info.c +++ b/arch/tile/gxio/iorpc_mpipe_info.c @@ -15,12 +15,11 @@ /* This file is machine-generated; DO NOT EDIT! */ #include "gxio/iorpc_mpipe_info.h" - struct instance_aux_param { _gxio_mpipe_link_name_t name; }; -int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context, _gxio_mpipe_link_name_t name) { struct instance_aux_param temp; @@ -39,10 +38,10 @@ struct enumerate_aux_param { _gxio_mpipe_link_mac_t mac; }; -int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context, unsigned int idx, - _gxio_mpipe_link_name_t * name, - _gxio_mpipe_link_mac_t * mac) + _gxio_mpipe_link_name_t *name, + _gxio_mpipe_link_mac_t *mac) { int __result; struct enumerate_aux_param temp; @@ -50,7 +49,7 @@ int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, __result = hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), - (((uint64_t) idx << 32) | + (((uint64_t)idx << 32) | GXIO_MPIPE_INFO_OP_ENUMERATE_AUX)); *name = params->name; *mac = params->mac; @@ -64,7 +63,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context, HV_PTE *base) { int __result; @@ -86,7 +85,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_trio.c b/arch/tile/gxio/iorpc_trio.c index da6e18e049c3..1d3cedb9aeb4 100644 --- a/arch/tile/gxio/iorpc_trio.c +++ b/arch/tile/gxio/iorpc_trio.c @@ -21,7 +21,7 @@ struct alloc_asids_param { unsigned int flags; }; -int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count, +int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { struct alloc_asids_param temp; @@ -44,7 +44,7 @@ struct alloc_memory_maps_param { unsigned int flags; }; -int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context, +int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -67,7 +67,7 @@ struct alloc_scatter_queues_param { unsigned int flags; }; -int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context, +int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -91,7 +91,7 @@ struct alloc_pio_regions_param { unsigned int flags; }; -int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context, +int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -115,7 +115,7 @@ struct init_pio_region_aux_param { unsigned int flags; }; -int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context, +int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context, unsigned int pio_region, unsigned int mac, uint32_t bus_address_hi, unsigned int flags) { @@ -145,7 +145,7 @@ struct init_memory_map_mmu_aux_param { unsigned int order_mode; }; -int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context, +int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context, unsigned int map, unsigned long va, uint64_t size, unsigned int asid, unsigned int mac, uint64_t bus_address, @@ -175,7 +175,7 @@ struct get_port_property_param { struct pcie_trio_ports_property trio_ports; }; -int gxio_trio_get_port_property(gxio_trio_context_t * context, +int gxio_trio_get_port_property(gxio_trio_context_t *context, struct pcie_trio_ports_property *trio_ports) { int __result; @@ -198,7 +198,7 @@ struct config_legacy_intr_param { unsigned int intx; }; -int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int intx) { @@ -227,7 +227,7 @@ struct config_msi_intr_param { unsigned int asid; }; -int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int mem_map, uint64_t mem_map_base, uint64_t mem_map_limit, @@ -259,7 +259,7 @@ struct set_mps_mrs_param { unsigned int mac; }; -int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps, +int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps, uint16_t mrs, unsigned int mac) { struct set_mps_mrs_param temp; @@ -279,7 +279,7 @@ struct force_rc_link_up_param { unsigned int mac; }; -int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac) +int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac) { struct force_rc_link_up_param temp; struct force_rc_link_up_param *params = &temp; @@ -296,7 +296,7 @@ struct force_ep_link_up_param { unsigned int mac; }; -int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac) +int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac) { struct force_ep_link_up_param temp; struct force_ep_link_up_param *params = &temp; @@ -313,7 +313,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base) +int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -334,7 +334,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_trio_check_mmio_offset(gxio_trio_context_t * context, +int gxio_trio_check_mmio_offset(gxio_trio_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_usb_host.c b/arch/tile/gxio/iorpc_usb_host.c index cf3c3cc12204..9c820073bfc0 100644 --- a/arch/tile/gxio/iorpc_usb_host.c +++ b/arch/tile/gxio/iorpc_usb_host.c @@ -19,7 +19,7 @@ struct cfg_interrupt_param { union iorpc_interrupt interrupt; }; -int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x, +int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event) { struct cfg_interrupt_param temp; @@ -41,7 +41,7 @@ struct register_client_memory_param { unsigned int flags; }; -int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context, +int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context, HV_PTE pte, unsigned int flags) { struct register_client_memory_param temp; @@ -61,7 +61,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context, HV_PTE *base) +int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -82,7 +82,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context, +int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/usb_host.c b/arch/tile/gxio/usb_host.c index 66b002f54ecc..785afad7922e 100644 --- a/arch/tile/gxio/usb_host.c +++ b/arch/tile/gxio/usb_host.c @@ -26,7 +26,7 @@ #include #include -int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, +int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index, int is_ehci) { char file[32]; @@ -63,7 +63,7 @@ int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, EXPORT_SYMBOL_GPL(gxio_usb_host_init); -int gxio_usb_host_destroy(gxio_usb_host_context_t * context) +int gxio_usb_host_destroy(gxio_usb_host_context_t *context) { iounmap((void __force __iomem *)(context->mmio_base)); hv_dev_close(context->fd); @@ -76,14 +76,14 @@ int gxio_usb_host_destroy(gxio_usb_host_context_t * context) EXPORT_SYMBOL_GPL(gxio_usb_host_destroy); -void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context) +void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context) { return context->mmio_base; } EXPORT_SYMBOL_GPL(gxio_usb_host_get_reg_start); -size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context) +size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context) { return HV_USB_HOST_MMIO_SIZE; } diff --git a/arch/tile/include/gxio/iorpc_mpipe.h b/arch/tile/include/gxio/iorpc_mpipe.h index fdd07f88cfd7..4cda03de734f 100644 --- a/arch/tile/include/gxio/iorpc_mpipe.h +++ b/arch/tile/include/gxio/iorpc_mpipe.h @@ -56,89 +56,89 @@ #define GXIO_MPIPE_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_MPIPE_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int stack, unsigned int buffer_size_enum); -int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring); -int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int ring); -int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context, unsigned int ring); -int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context, +int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, unsigned int group, gxio_mpipe_notif_group_bits_t bits); -int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count, +int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket, +int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket, MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info); -int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring, unsigned int channel); -int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob, +int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob, size_t blob_size); -int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context, +int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, unsigned int iotlb, HV_PTE pte, unsigned int flags); -int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context, _gxio_mpipe_link_name_t name, unsigned int flags); -int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac); +int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac); -int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac, +int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac, uint32_t attr, int64_t val); -int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec, - uint64_t * nsec, uint64_t * cycles); +int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec, + uint64_t *nsec, uint64_t *cycles); -int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec, +int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec, uint64_t nsec, uint64_t cycles); -int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec); -int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context, +int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb); -int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie); +int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); -int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie); +int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); -int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base); +int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base); -int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context, +int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_MPIPE_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_mpipe_info.h b/arch/tile/include/gxio/iorpc_mpipe_info.h index 476c5e5ca22c..f0b04284468b 100644 --- a/arch/tile/include/gxio/iorpc_mpipe_info.h +++ b/arch/tile/include/gxio/iorpc_mpipe_info.h @@ -33,18 +33,18 @@ #define GXIO_MPIPE_INFO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context, _gxio_mpipe_link_name_t name); -int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context, unsigned int idx, - _gxio_mpipe_link_name_t * name, - _gxio_mpipe_link_mac_t * mac); + _gxio_mpipe_link_name_t *name, + _gxio_mpipe_link_mac_t *mac); -int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context, HV_PTE *base); -int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_MPIPE_INFO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h index d95b96fd6c93..376a4f771167 100644 --- a/arch/tile/include/gxio/iorpc_trio.h +++ b/arch/tile/include/gxio/iorpc_trio.h @@ -46,59 +46,59 @@ #define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count, +int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context, +int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context, +int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context, +int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context, +int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context, unsigned int pio_region, unsigned int mac, uint32_t bus_address_hi, unsigned int flags); -int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context, +int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context, unsigned int map, unsigned long va, uint64_t size, unsigned int asid, unsigned int mac, uint64_t bus_address, unsigned int node, unsigned int order_mode); -int gxio_trio_get_port_property(gxio_trio_context_t * context, +int gxio_trio_get_port_property(gxio_trio_context_t *context, struct pcie_trio_ports_property *trio_ports); -int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int intx); -int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int mem_map, uint64_t mem_map_base, uint64_t mem_map_limit, unsigned int asid); -int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps, +int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps, uint16_t mrs, unsigned int mac); -int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac); +int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac); -int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac); +int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac); -int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base); +int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base); -int gxio_trio_check_mmio_offset(gxio_trio_context_t * context, +int gxio_trio_check_mmio_offset(gxio_trio_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_TRIO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_usb_host.h b/arch/tile/include/gxio/iorpc_usb_host.h index 8622e7d126ad..79962a97de8e 100644 --- a/arch/tile/include/gxio/iorpc_usb_host.h +++ b/arch/tile/include/gxio/iorpc_usb_host.h @@ -31,16 +31,16 @@ #define GXIO_USB_HOST_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_USB_HOST_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x, +int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event); -int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context, +int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context, HV_PTE pte, unsigned int flags); -int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context, +int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base); -int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context, +int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_USB_HOST_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/usb_host.h b/arch/tile/include/gxio/usb_host.h index 5eedec0e988e..93c9636d2dd7 100644 --- a/arch/tile/include/gxio/usb_host.h +++ b/arch/tile/include/gxio/usb_host.h @@ -53,7 +53,7 @@ typedef struct { * @return Zero if the context was successfully initialized, else a * GXIO_ERR_xxx error code. */ -extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, +extern int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index, int is_ehci); /* Destroy a USB context. @@ -68,20 +68,20 @@ extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, * @return Zero if the context was successfully destroyed, else a * GXIO_ERR_xxx error code. */ -extern int gxio_usb_host_destroy(gxio_usb_host_context_t * context); +extern int gxio_usb_host_destroy(gxio_usb_host_context_t *context); /* Retrieve the address of the shim's MMIO registers. * * @param context Pointer to a properly initialized gxio_usb_host_context_t. * @return The address of the shim's MMIO registers. */ -extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context); +extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context); /* Retrieve the length of the shim's MMIO registers. * * @param context Pointer to a properly initialized gxio_usb_host_context_t. * @return The length of the shim's MMIO registers. */ -extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context); +extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context); #endif /* _GXIO_USB_H_ */ -- cgit v1.2.3-59-g8ed1b From 88e2692a4dff0d1f80e9761ade3db2ea59206c1e Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 16 Sep 2013 14:18:21 -0400 Subject: tile: header updates from upstream The hardware architecture descriptor headers have been updated, in particular to reflect some larger MMIO fields on the mPIPE shims for controlling the network hardware, from the recent Gx72 release. Signed-off-by: Chris Metcalf --- arch/tile/include/arch/mpipe.h | 24 ++++++++++---- arch/tile/include/arch/mpipe_constants.h | 6 ++-- arch/tile/include/arch/mpipe_shm.h | 54 +++++++++++++++++++------------- arch/tile/include/arch/trio_constants.h | 10 +++--- 4 files changed, 59 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/tile/include/arch/mpipe.h b/arch/tile/include/arch/mpipe.h index 8a33912fd6cc..904538e754d8 100644 --- a/arch/tile/include/arch/mpipe.h +++ b/arch/tile/include/arch/mpipe.h @@ -176,7 +176,18 @@ typedef union */ uint_reg_t stack_idx : 5; /* Reserved. */ - uint_reg_t __reserved_2 : 5; + uint_reg_t __reserved_2 : 3; + /* + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. This field is ignored on writes. + */ + uint_reg_t inst : 2; /* * Reads as one to indicate that this is a hardware managed buffer. * Ignored on writes since all buffers on a given stack are the same size. @@ -205,7 +216,8 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_2 : 5; + uint_reg_t inst : 2; + uint_reg_t __reserved_2 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_1 : 6; int_reg_t va : 35; @@ -231,9 +243,9 @@ typedef union /* Reserved. */ uint_reg_t __reserved_0 : 3; /* eDMA ring being accessed */ - uint_reg_t ring : 5; + uint_reg_t ring : 6; /* Reserved. */ - uint_reg_t __reserved_1 : 18; + uint_reg_t __reserved_1 : 17; /* * This field of the address selects the region (address space) to be * accessed. For the egress DMA post region, this field must be 5. @@ -250,8 +262,8 @@ typedef union uint_reg_t svc_dom : 5; uint_reg_t __reserved_2 : 6; uint_reg_t region : 3; - uint_reg_t __reserved_1 : 18; - uint_reg_t ring : 5; + uint_reg_t __reserved_1 : 17; + uint_reg_t ring : 6; uint_reg_t __reserved_0 : 3; #endif }; diff --git a/arch/tile/include/arch/mpipe_constants.h b/arch/tile/include/arch/mpipe_constants.h index 410a0400e055..84022ac5fe82 100644 --- a/arch/tile/include/arch/mpipe_constants.h +++ b/arch/tile/include/arch/mpipe_constants.h @@ -16,13 +16,13 @@ #ifndef __ARCH_MPIPE_CONSTANTS_H__ #define __ARCH_MPIPE_CONSTANTS_H__ -#define MPIPE_NUM_CLASSIFIERS 10 +#define MPIPE_NUM_CLASSIFIERS 16 #define MPIPE_CLS_MHZ 1200 -#define MPIPE_NUM_EDMA_RINGS 32 +#define MPIPE_NUM_EDMA_RINGS 64 #define MPIPE_NUM_SGMII_MACS 16 -#define MPIPE_NUM_XAUI_MACS 4 +#define MPIPE_NUM_XAUI_MACS 16 #define MPIPE_NUM_LOOPBACK_CHANNELS 4 #define MPIPE_NUM_NON_LB_CHANNELS 28 diff --git a/arch/tile/include/arch/mpipe_shm.h b/arch/tile/include/arch/mpipe_shm.h index f2e9e122818d..13b3c4300e50 100644 --- a/arch/tile/include/arch/mpipe_shm.h +++ b/arch/tile/include/arch/mpipe_shm.h @@ -44,8 +44,14 @@ typedef union * descriptors toggles each time the ring tail pointer wraps. */ uint_reg_t gen : 1; + /** + * For devices with EDMA reorder support, this field allows the + * descriptor to select the egress FIFO. The associated DMA ring must + * have ALLOW_EFIFO_SEL enabled. + */ + uint_reg_t efifo_sel : 6; /** Reserved. Must be zero. */ - uint_reg_t r0 : 7; + uint_reg_t r0 : 1; /** Checksum generation enabled for this transfer. */ uint_reg_t csum : 1; /** @@ -110,7 +116,8 @@ typedef union uint_reg_t notif : 1; uint_reg_t ns : 1; uint_reg_t csum : 1; - uint_reg_t r0 : 7; + uint_reg_t r0 : 1; + uint_reg_t efifo_sel : 6; uint_reg_t gen : 1; #endif @@ -126,14 +133,16 @@ typedef union /** Reserved. */ uint_reg_t __reserved_1 : 3; /** - * Instance ID. For devices that support more than one mPIPE instance, - * this field indicates the buffer owner. If the INST field does not - * match the mPIPE's instance number when a packet is egressed, buffers - * with HWB set will be returned to the other mPIPE instance. + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. */ - uint_reg_t inst : 1; - /** Reserved. */ - uint_reg_t __reserved_2 : 1; + uint_reg_t inst : 2; /** * Always set to one by hardware in iDMA packet descriptors. For eDMA, * indicates whether the buffer will be released to the buffer stack @@ -166,8 +175,7 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_2 : 1; - uint_reg_t inst : 1; + uint_reg_t inst : 2; uint_reg_t __reserved_1 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_0 : 6; @@ -408,7 +416,10 @@ typedef union /** * Sequence number applied when packet is distributed. Classifier * selects which sequence number is to be applied by writing the 13-bit - * SQN-selector into this field. + * SQN-selector into this field. For devices that support EXT_SQN (as + * indicated in IDMA_INFO.EXT_SQN_SUPPORT), the GP_SQN can be extended to + * 32-bits via the IDMA_CTL.EXT_SQN register. In this case the + * PACKET_SQN will be reduced to 32 bits. */ uint_reg_t gp_sqn : 16; /** @@ -451,14 +462,16 @@ typedef union /** Reserved. */ uint_reg_t __reserved_5 : 3; /** - * Instance ID. For devices that support more than one mPIPE instance, - * this field indicates the buffer owner. If the INST field does not - * match the mPIPE's instance number when a packet is egressed, buffers - * with HWB set will be returned to the other mPIPE instance. + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. */ - uint_reg_t inst : 1; - /** Reserved. */ - uint_reg_t __reserved_6 : 1; + uint_reg_t inst : 2; /** * Always set to one by hardware in iDMA packet descriptors. For eDMA, * indicates whether the buffer will be released to the buffer stack @@ -491,8 +504,7 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_6 : 1; - uint_reg_t inst : 1; + uint_reg_t inst : 2; uint_reg_t __reserved_5 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_4 : 6; diff --git a/arch/tile/include/arch/trio_constants.h b/arch/tile/include/arch/trio_constants.h index 628b045436b8..85647e91a458 100644 --- a/arch/tile/include/arch/trio_constants.h +++ b/arch/tile/include/arch/trio_constants.h @@ -16,21 +16,21 @@ #ifndef __ARCH_TRIO_CONSTANTS_H__ #define __ARCH_TRIO_CONSTANTS_H__ -#define TRIO_NUM_ASIDS 16 +#define TRIO_NUM_ASIDS 32 #define TRIO_NUM_TLBS_PER_ASID 16 #define TRIO_NUM_TPIO_REGIONS 8 #define TRIO_LOG2_NUM_TPIO_REGIONS 3 -#define TRIO_NUM_MAP_MEM_REGIONS 16 -#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 4 +#define TRIO_NUM_MAP_MEM_REGIONS 32 +#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 5 #define TRIO_NUM_MAP_SQ_REGIONS 8 #define TRIO_LOG2_NUM_MAP_SQ_REGIONS 3 #define TRIO_LOG2_NUM_SQ_FIFO_ENTRIES 6 -#define TRIO_NUM_PUSH_DMA_RINGS 32 +#define TRIO_NUM_PUSH_DMA_RINGS 64 -#define TRIO_NUM_PULL_DMA_RINGS 32 +#define TRIO_NUM_PULL_DMA_RINGS 64 #endif /* __ARCH_TRIO_CONSTANTS_H__ */ -- cgit v1.2.3-59-g8ed1b From e823acc0a9e36a5645cdf0c57fa5f738b51bb999 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 16 Sep 2013 14:41:21 -0400 Subject: tile: remove stray blank space The compat sys_llseek() definition addition added a bogus space on an otherwise-blank line. Signed-off-by: Chris Metcalf --- arch/tile/kernel/compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index ed378416b86a..49120843ff96 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -84,7 +84,7 @@ COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high, { return sys_llseek(fd, offset_high, offset_low, result, origin); } - + /* Provide the compat syscall number to call mapping. */ #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), -- cgit v1.2.3-59-g8ed1b From 7c37b617b4e539269b53c625c8d814bf4590a695 Mon Sep 17 00:00:00 2001 From: Gwenhael Goavec-Merou Date: Fri, 16 Aug 2013 08:45:35 +0200 Subject: ARM: imx27.dtsi: fix CSPI PER clock id CSPI PER clock is per2clk (per2_gate id 60) instead of cspiX_ipg_gate. Signed-off-by: Gwenhael Goavec-Merou Acked-by: Sascha Hauer Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx27.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index c037c223619a..b7a1c6d950b9 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -187,7 +187,7 @@ compatible = "fsl,imx27-cspi"; reg = <0x1000e000 0x1000>; interrupts = <16>; - clocks = <&clks 53>, <&clks 53>; + clocks = <&clks 53>, <&clks 60>; clock-names = "ipg", "per"; status = "disabled"; }; @@ -198,7 +198,7 @@ compatible = "fsl,imx27-cspi"; reg = <0x1000f000 0x1000>; interrupts = <15>; - clocks = <&clks 52>, <&clks 52>; + clocks = <&clks 52>, <&clks 60>; clock-names = "ipg", "per"; status = "disabled"; }; @@ -309,7 +309,7 @@ compatible = "fsl,imx27-cspi"; reg = <0x10017000 0x1000>; interrupts = <6>; - clocks = <&clks 51>, <&clks 51>; + clocks = <&clks 51>, <&clks 60>; clock-names = "ipg", "per"; status = "disabled"; }; -- cgit v1.2.3-59-g8ed1b From bdb1b5f2ddd8a32fd0cd78bb68fc76c30266b27c Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 4 Sep 2013 20:49:04 +0800 Subject: ARM: imx: initialize clk_init_data.flags for clk-fixup-mux The clk_init_data.flags of clk-fixup-mux is left there without initialization. It may hold some random data and cause clock framework interpret the clock in an unexpected way. At least on imx6sl, the following division by zero error with sched_clock is seen because of it. Division by zero in kernel. CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.11.0-rc3+ #19 Backtrace: [<80011af0>] (dump_backtrace+0x0/0x10c) from [<80011c90>] (show_stack+0x18/0x1c) r6:3b9aca00 r5:00000020 r4:00000000 r3:00000000 [<80011c78>] (show_stack+0x0/0x1c) from [<8055e02c>] (dump_stack+0x78/0x94) [<8055dfb4>] (dump_stack+0x0/0x94) from [<80011924>] (__div0+0x18/0x20) r4:00000000 r3:00000000 [<8001190c>] (__div0+0x0/0x20) from [<8026c408>] (Ldiv0_64+0x8/0x18) [<8006330c>] (clocks_calc_mult_shift+0x0/0xf8) from [<8072f604>] (setup_sched_clock+0x88/0x1f0) [<8072f57c>] (setup_sched_clock+0x0/0x1f0) from [<8071ad48>] (mxc_timer_init+0xe8/0x17c) [<8071ac60>] (mxc_timer_init+0x0/0x17c) from [<807290b0>] (imx6sl_clocks_init+0x1db8/0x1dc0) r8:807a9ca4 r7:00000000 r6:80777564 r5:8100c1f4 r4:c0820000 [<807272f8>] (imx6sl_clocks_init+0x0/0x1dc0) from [<807420ac>] (of_clk_init+0x40/0x6c) [<8074206c>] (of_clk_init+0x0/0x6c) from [<807290cc>] (imx6sl_timer_init+0x14/0x18) r5:807a8e80 r4:ffffffff [<807290b8>] (imx6sl_timer_init+0x0/0x18) from [<80716e1c>] (time_init+0x24/0x34) [<80716df8>] (time_init+0x0/0x34) from [<80713738>] (start_kernel+0x1b0/0x310) [<80713588>] (start_kernel+0x0/0x310) from [<80008074>] (0x80008074) r7:80770b08 r6:80754cd4 r5:8076c8c4 r4:10c53c7d sched_clock: 32 bits at 0 Hz, resolution 0ns, wraps every 0ms Fix the bug by initializing init.flags as zero. Signed-off-by: Shawn Guo --- arch/arm/mach-imx/clk-fixup-mux.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-imx/clk-fixup-mux.c b/arch/arm/mach-imx/clk-fixup-mux.c index deb4b8093b30..0d40b35c557c 100644 --- a/arch/arm/mach-imx/clk-fixup-mux.c +++ b/arch/arm/mach-imx/clk-fixup-mux.c @@ -90,6 +90,7 @@ struct clk *imx_clk_fixup_mux(const char *name, void __iomem *reg, init.ops = &clk_fixup_mux_ops; init.parent_names = parents; init.num_parents = num_parents; + init.flags = 0; fixup_mux->mux.reg = reg; fixup_mux->mux.shift = shift; -- cgit v1.2.3-59-g8ed1b From 5d5248a6d110d9ec58c1c1525d338e28357a25c2 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 5 Sep 2013 16:02:57 -0300 Subject: ARM: mach-imx: clk-imx51-imx53: Fix 'spdif1_pred' clock registration Since commit beb2d1c1ba (ARM i.MX5: Add S/PDIF clocks), the following clock error appears on mx51: TrustZone Interrupt Controller (TZIC) initialized i.MX51 clk 180: register failed with -17 i.MX5 clk 180: register failed with -17 sched_clock: 32 bits at 24MHz, resolution 41ns, wraps every 178956ms CPU identified as i.MX51, silicon rev 3.0 ... Clock 180 corresponds to 'spdif1_podf' and this clock is getting registered twice. Fix it, by properly registering the 'spdif1_pred' clock, which should not reference 'spdif1_podf'. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/clk-imx51-imx53.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/clk-imx51-imx53.c b/arch/arm/mach-imx/clk-imx51-imx53.c index 1a56a3319997..d9094b9a5185 100644 --- a/arch/arm/mach-imx/clk-imx51-imx53.c +++ b/arch/arm/mach-imx/clk-imx51-imx53.c @@ -397,7 +397,7 @@ int __init mx51_clocks_init(unsigned long rate_ckil, unsigned long rate_osc, mx51_spdif_xtal_sel, ARRAY_SIZE(mx51_spdif_xtal_sel)); clk[spdif1_sel] = imx_clk_mux("spdif1_sel", MXC_CCM_CSCMR2, 2, 2, spdif_sel, ARRAY_SIZE(spdif_sel)); - clk[spdif1_pred] = imx_clk_divider("spdif1_podf", "spdif1_sel", MXC_CCM_CDCDR, 16, 3); + clk[spdif1_pred] = imx_clk_divider("spdif1_pred", "spdif1_sel", MXC_CCM_CDCDR, 16, 3); clk[spdif1_podf] = imx_clk_divider("spdif1_podf", "spdif1_pred", MXC_CCM_CDCDR, 9, 6); clk[spdif1_com_sel] = imx_clk_mux("spdif1_com_sel", MXC_CCM_CSCMR2, 5, 1, mx51_spdif1_com_sel, ARRAY_SIZE(mx51_spdif1_com_sel)); -- cgit v1.2.3-59-g8ed1b From 6a030ee36a3c3ed5f08a330e88060a6b71e6c7e3 Mon Sep 17 00:00:00 2001 From: "Arnaud Patard (Rtp)" Date: Sat, 7 Sep 2013 15:23:14 +0200 Subject: ARM: imx51.dtsi: fix PATA device clock Commit 718a350 (ARM: i.MX51: Add PATA support) adds pata support to the imx51.dtsi file and is using clock 161. The problem is that the right clock is 172, according to commit 5d530bb (ARM: i.MX5: Add PATA and SRTC clocks). Using the clock 172 makes things work again (and kills a nasty system freeze). Tested-by: Steev Klimaszewski Signed-off-by: Arnaud Patard Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx51.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi index a85abb424c34..54cee6517902 100644 --- a/arch/arm/boot/dts/imx51.dtsi +++ b/arch/arm/boot/dts/imx51.dtsi @@ -474,7 +474,7 @@ compatible = "fsl,imx51-pata", "fsl,imx27-pata"; reg = <0x83fe0000 0x4000>; interrupts = <70>; - clocks = <&clks 161>; + clocks = <&clks 172>; status = "disabled"; }; -- cgit v1.2.3-59-g8ed1b From 9779f0e1d75c99c79000ed9e303287fc23da3e9e Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Mon, 16 Sep 2013 09:29:03 +0800 Subject: ARM: imx: i.mx6d/q: disable the double linefill feature of PL310 The L2 cache controller(PL310) version on the i.MX6D/Q is r3p1-50rel0 The L2 cache controller(PL310) version on the i.MX6DL/SOLO/SL is r3p2 But according to ARM PL310 errata: 752271 ID: 752271: Double linefill feature can cause data corruption Fault Status: Present in: r3p0, r3p1, r3p1-50rel0. Fixed in r3p2 Workaround: The only workaround to this erratum is to disable the double linefill feature. This is the default behavior. without this patch, you will meet the following error when run the memtester application at: http://pyropus.ca/software/memtester/ FAILURE: 0x00100000 != 0x00200000 at offset 0x01365664. FAILURE: 0x00100000 != 0x00200000 at offset 0x01365668. FAILURE: 0x00100000 != 0x00200000 at offset 0x0136566c. FAILURE: 0x00100000 != 0x00200000 at offset 0x01365670. FAILURE: 0x00100000 != 0x00200000 at offset 0x01365674. FAILURE: 0x00100000 != 0x00200000 at offset 0x01365678. Signed-off-by: Jason Liu Signed-off-by: Shawn Guo --- arch/arm/mach-imx/system.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-imx/system.c b/arch/arm/mach-imx/system.c index 64ff37ea72b1..80c177c36c5f 100644 --- a/arch/arm/mach-imx/system.c +++ b/arch/arm/mach-imx/system.c @@ -117,6 +117,17 @@ void __init imx_init_l2cache(void) /* Configure the L2 PREFETCH and POWER registers */ val = readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL); val |= 0x70800000; + /* + * The L2 cache controller(PL310) version on the i.MX6D/Q is r3p1-50rel0 + * The L2 cache controller(PL310) version on the i.MX6DL/SOLO/SL is r3p2 + * But according to ARM PL310 errata: 752271 + * ID: 752271: Double linefill feature can cause data corruption + * Fault Status: Present in: r3p0, r3p1, r3p1-50rel0. Fixed in r3p2 + * Workaround: The only workaround to this erratum is to disable the + * double linefill feature. This is the default behavior. + */ + if (cpu_is_imx6q()) + val &= ~(1 << 30 | 1 << 23); writel_relaxed(val, l2x0_base + L2X0_PREFETCH_CTRL); val = L2X0_DYNAMIC_CLK_GATING_EN | L2X0_STNDBY_MODE_EN; writel_relaxed(val, l2x0_base + L2X0_POWER_CTRL); -- cgit v1.2.3-59-g8ed1b From 538bcbe251d621aa19c46babafd01ede8fb6ddde Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Mon, 16 Sep 2013 17:02:45 +0800 Subject: ARM: dts: imx6q: fix the wrong offset of the Pad Mux register The patch "0b7a76a ARM: dts: imx6q{dl}: add DTE pads for uart" adds the DTE pads for uart. For PAD_EIM_D29, the offset of the Pad Mux register should be 0x0c8, not 0x0c4. This patch fixes it. Signed-off-by: Huang Shijie Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6q-pinfunc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6q-pinfunc.h b/arch/arm/boot/dts/imx6q-pinfunc.h index c0e38a45e4bb..9bbe82bdee41 100644 --- a/arch/arm/boot/dts/imx6q-pinfunc.h +++ b/arch/arm/boot/dts/imx6q-pinfunc.h @@ -207,8 +207,8 @@ #define MX6QDL_PAD_EIM_D29__ECSPI4_SS0 0x0c8 0x3dc 0x824 0x2 0x1 #define MX6QDL_PAD_EIM_D29__UART2_RTS_B 0x0c8 0x3dc 0x924 0x4 0x1 #define MX6QDL_PAD_EIM_D29__UART2_CTS_B 0x0c8 0x3dc 0x000 0x4 0x0 -#define MX6QDL_PAD_EIM_D29__UART2_DTE_RTS_B 0x0c4 0x3dc 0x000 0x4 0x0 -#define MX6QDL_PAD_EIM_D29__UART2_DTE_CTS_B 0x0c4 0x3dc 0x924 0x4 0x1 +#define MX6QDL_PAD_EIM_D29__UART2_DTE_RTS_B 0x0c8 0x3dc 0x000 0x4 0x0 +#define MX6QDL_PAD_EIM_D29__UART2_DTE_CTS_B 0x0c8 0x3dc 0x924 0x4 0x1 #define MX6QDL_PAD_EIM_D29__GPIO3_IO29 0x0c8 0x3dc 0x000 0x5 0x0 #define MX6QDL_PAD_EIM_D29__IPU2_CSI1_VSYNC 0x0c8 0x3dc 0x8e4 0x6 0x0 #define MX6QDL_PAD_EIM_D29__IPU1_DI0_PIN14 0x0c8 0x3dc 0x000 0x7 0x0 -- cgit v1.2.3-59-g8ed1b From 3261107ebfd8f6bba57cfcdb89385779fd149a00 Mon Sep 17 00:00:00 2001 From: Bruce Rogers Date: Mon, 9 Sep 2013 09:40:20 -0600 Subject: KVM: x86 emulator: emulate RETF imm Opcode CA This gets used by a DOS based NetWare guest. Signed-off-by: Bruce Rogers Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2bc1e81045b0..ddc3f3d2afdb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2025,6 +2025,17 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) return rc; } +static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt) +{ + int rc; + + rc = em_ret_far(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + rsp_increment(ctxt, ctxt->src.val); + return X86EMUL_CONTINUE; +} + static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) { /* Save real source value, then compare EAX against destination. */ @@ -3763,7 +3774,8 @@ static const struct opcode opcode_table[256] = { G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), - N, I(ImplicitOps | Stack, em_ret_far), + I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm), + I(ImplicitOps | Stack, em_ret_far), D(ImplicitOps), DI(SrcImmByte, intn), D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), /* 0xD0 - 0xD7 */ -- cgit v1.2.3-59-g8ed1b From ba6a3541545542721ce821d1e7e5ce35752e6fdf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Sep 2013 13:52:33 +0200 Subject: KVM: mmu: allow page tables to be in read-only slots Page tables in a read-only memory slot will currently cause a triple fault because the page walker uses gfn_to_hva and it fails on such a slot. OVMF uses such a page table; however, real hardware seems to be fine with that as long as the accessed/dirty bits are set. Save whether the slot is readonly, and later check it when updating the accessed and dirty bits. Reviewed-by: Xiao Guangrong Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/paging_tmpl.h | 20 +++++++++++++++++++- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 14 +++++++++----- 3 files changed, 29 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 043330159179..ad75d77999d0 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -99,6 +99,7 @@ struct guest_walker { pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; + bool pte_writable[PT_MAX_FULL_LEVELS]; unsigned pt_access; unsigned pte_access; gfn_t gfn; @@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, if (pte == orig_pte) continue; + /* + * If the slot is read-only, simply do not process the accessed + * and dirty bits. This is the correct thing to do if the slot + * is ROM, and page tables in read-as-ROM/write-as-MMIO slots + * are only supported if the accessed and dirty bits are already + * set in the ROM (so that MMIO writes are never needed). + * + * Note that NPT does not allow this at all and faults, since + * it always wants nested page table entries for the guest + * page tables to be writable. And EPT works but will simply + * overwrite the read-only memory to set the accessed and dirty + * bits. + */ + if (unlikely(!walker->pte_writable[level - 1])) + continue; + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); if (ret) return ret; @@ -309,7 +326,8 @@ retry_walk: goto error; real_gfn = gpa_to_gfn(real_gfn); - host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, + &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ca645a01d37a..0fbbc7aa02cb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -533,6 +533,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bf040c4e02b3..979bff485fb0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) EXPORT_SYMBOL_GPL(gfn_to_hva); /* - * The hva returned by this function is only allowed to be read. - * It should pair with kvm_read_hva() or kvm_read_hva_atomic(). + * If writable is set to false, the hva returned by this function is only + * allowed to be read. */ -static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) { + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + if (writable) + *writable = !memslot_is_readonly(slot); + return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); } @@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int r; unsigned long addr; - addr = gfn_to_hva_read(kvm, gfn); + addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; r = kvm_read_hva(data, (void __user *)addr + offset, len); @@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, gfn_t gfn = gpa >> PAGE_SHIFT; int offset = offset_in_page(gpa); - addr = gfn_to_hva_read(kvm, gfn); + addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; pagefault_disable(); -- cgit v1.2.3-59-g8ed1b From 72f857950f6f19cba42a9ded078bbc99f10aa667 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 2 Sep 2013 15:25:28 +0300 Subject: KVM: nEPT: reset PDPTR register cache on nested vmentry emulation After nested vmentry stale cache can be used to reload L2 PDPTR pointers which will cause L2 guest to fail. Fix it by invalidating cache on nested vmentry emulation. https://bugzilla.kernel.org/show_bug.cgi?id=60830 Signed-off-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1f1da43ff2a2..48735936860c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7766,6 +7766,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); + __clear_bit(VCPU_EXREG_PDPTR, + (unsigned long *)&vcpu->arch.regs_avail); + __clear_bit(VCPU_EXREG_PDPTR, + (unsigned long *)&vcpu->arch.regs_dirty); } kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); -- cgit v1.2.3-59-g8ed1b From c5f66596313734cd7e95cc748d643d3b9ba2ca81 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 17 Sep 2013 13:58:12 +0200 Subject: MIPS: Provide nice way to access boot CPU's data. boot_cpu_data is used the same as current_cpu_data but returns the CPU data for CPU 0. This means it doesn't have to use smp_processor_id() thus no need to disable preemption. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cpu-info.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index 41401d8eb7d1..21c8e29c8f91 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h @@ -84,6 +84,7 @@ struct cpuinfo_mips { extern struct cpuinfo_mips cpu_data[]; #define current_cpu_data cpu_data[smp_processor_id()] #define raw_current_cpu_data cpu_data[raw_smp_processor_id()] +#define boot_cpu_data cpu_data[0] extern void cpu_probe(void); extern void cpu_report(void); -- cgit v1.2.3-59-g8ed1b From b11247637f798f5eb0af2100acde3711d809e8fd Mon Sep 17 00:00:00 2001 From: Robert Nelson Date: Wed, 7 Aug 2013 09:14:15 -0500 Subject: ARM: dts: omap3-beagle-xm: fix string error in compatible property The beagle and beagle-xm entries were inside the same double quote. Split them to have two distinct entries. Signed-off-by: Robert Nelson Signed-off-by: Benoit Cousson --- arch/arm/boot/dts/omap3-beagle-xm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts index afdb16417d4e..0c514dc8460c 100644 --- a/arch/arm/boot/dts/omap3-beagle-xm.dts +++ b/arch/arm/boot/dts/omap3-beagle-xm.dts @@ -11,7 +11,7 @@ / { model = "TI OMAP3 BeagleBoard xM"; - compatible = "ti,omap3-beagle-xm, ti,omap3-beagle", "ti,omap3"; + compatible = "ti,omap3-beagle-xm", "ti,omap3-beagle", "ti,omap3"; cpus { cpu@0 { -- cgit v1.2.3-59-g8ed1b From 2ba3549352277514a8e4790adff77a783ee1b9e2 Mon Sep 17 00:00:00 2001 From: Koen Kooi Date: Mon, 9 Sep 2013 16:29:21 +0200 Subject: ARM: dts: am335x-bone*: add DT for BeagleBone Black The BeagleBone Black is basically a regular BeagleBone with eMMC and HDMI added, so create a common dtsi both can use. IMPORTANT: booting the existing am335x-bone.dts will blow up the HDMI transceiver after a dozen boots with an uSD card inserted because LDO will be at 3.3V instead of 1.8. MMC support for AM335x still isn't in, so only the LDO change has been added. Signed-off-by: Koen Kooi Tested-by: Tom Rini Tested-by: Matt Porter Acked-by: Kevin Hilman Tested-by: Kevin Hilman Tested-by: Joel Fernandes Signed-off-by: Benoit Cousson --- arch/arm/boot/dts/Makefile | 1 + arch/arm/boot/dts/am335x-bone-common.dtsi | 262 ++++++++++++++++++++++++++++++ arch/arm/boot/dts/am335x-bone.dts | 256 +---------------------------- arch/arm/boot/dts/am335x-boneblack.dts | 17 ++ 4 files changed, 281 insertions(+), 255 deletions(-) create mode 100644 arch/arm/boot/dts/am335x-bone-common.dtsi create mode 100644 arch/arm/boot/dts/am335x-boneblack.dts (limited to 'arch') diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index cc0f1fb61753..e95af3f5433b 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -183,6 +183,7 @@ dtb-$(CONFIG_ARCH_OMAP2PLUS) += omap2420-h4.dtb \ am335x-evm.dtb \ am335x-evmsk.dtb \ am335x-bone.dtb \ + am335x-boneblack.dtb \ am3517-evm.dtb \ am3517_mt_ventoux.dtb \ am43x-epos-evm.dtb diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi new file mode 100644 index 000000000000..2f66deda9f5c --- /dev/null +++ b/arch/arm/boot/dts/am335x-bone-common.dtsi @@ -0,0 +1,262 @@ +/* + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/ { + model = "TI AM335x BeagleBone"; + compatible = "ti,am335x-bone", "ti,am33xx"; + + cpus { + cpu@0 { + cpu0-supply = <&dcdc2_reg>; + }; + }; + + memory { + device_type = "memory"; + reg = <0x80000000 0x10000000>; /* 256 MB */ + }; + + am33xx_pinmux: pinmux@44e10800 { + pinctrl-names = "default"; + pinctrl-0 = <&clkout2_pin>; + + user_leds_s0: user_leds_s0 { + pinctrl-single,pins = < + 0x54 (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a5.gpio1_21 */ + 0x58 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a6.gpio1_22 */ + 0x5c (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a7.gpio1_23 */ + 0x60 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a8.gpio1_24 */ + >; + }; + + i2c0_pins: pinmux_i2c0_pins { + pinctrl-single,pins = < + 0x188 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_sda.i2c0_sda */ + 0x18c (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_scl.i2c0_scl */ + >; + }; + + uart0_pins: pinmux_uart0_pins { + pinctrl-single,pins = < + 0x170 (PIN_INPUT_PULLUP | MUX_MODE0) /* uart0_rxd.uart0_rxd */ + 0x174 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* uart0_txd.uart0_txd */ + >; + }; + + clkout2_pin: pinmux_clkout2_pin { + pinctrl-single,pins = < + 0x1b4 (PIN_OUTPUT_PULLDOWN | MUX_MODE3) /* xdma_event_intr1.clkout2 */ + >; + }; + + cpsw_default: cpsw_default { + pinctrl-single,pins = < + /* Slave 1 */ + 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxerr.mii1_rxerr */ + 0x114 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txen.mii1_txen */ + 0x118 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxdv.mii1_rxdv */ + 0x11c (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd3.mii1_txd3 */ + 0x120 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd2.mii1_txd2 */ + 0x124 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd1.mii1_txd1 */ + 0x128 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd0.mii1_txd0 */ + 0x12c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_txclk.mii1_txclk */ + 0x130 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxclk.mii1_rxclk */ + 0x134 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd3.mii1_rxd3 */ + 0x138 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd2.mii1_rxd2 */ + 0x13c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd1.mii1_rxd1 */ + 0x140 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd0.mii1_rxd0 */ + >; + }; + + cpsw_sleep: cpsw_sleep { + pinctrl-single,pins = < + /* Slave 1 reset value */ + 0x110 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x114 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x118 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x11c (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x120 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x124 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x128 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x12c (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x130 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x13c (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x140 (PIN_INPUT_PULLDOWN | MUX_MODE7) + >; + }; + + davinci_mdio_default: davinci_mdio_default { + pinctrl-single,pins = < + /* MDIO */ + 0x148 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0) /* mdio_data.mdio_data */ + 0x14c (PIN_OUTPUT_PULLUP | MUX_MODE0) /* mdio_clk.mdio_clk */ + >; + }; + + davinci_mdio_sleep: davinci_mdio_sleep { + pinctrl-single,pins = < + /* MDIO reset value */ + 0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7) + 0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7) + >; + }; + }; + + ocp { + uart0: serial@44e09000 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_pins>; + + status = "okay"; + }; + + musb: usb@47400000 { + status = "okay"; + + control@44e10000 { + status = "okay"; + }; + + usb-phy@47401300 { + status = "okay"; + }; + + usb-phy@47401b00 { + status = "okay"; + }; + + usb@47401000 { + status = "okay"; + }; + + usb@47401800 { + status = "okay"; + dr_mode = "host"; + }; + + dma-controller@07402000 { + status = "okay"; + }; + }; + + i2c0: i2c@44e0b000 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c0_pins>; + + status = "okay"; + clock-frequency = <400000>; + + tps: tps@24 { + reg = <0x24>; + }; + + }; + }; + + leds { + pinctrl-names = "default"; + pinctrl-0 = <&user_leds_s0>; + + compatible = "gpio-leds"; + + led@2 { + label = "beaglebone:green:heartbeat"; + gpios = <&gpio1 21 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "heartbeat"; + default-state = "off"; + }; + + led@3 { + label = "beaglebone:green:mmc0"; + gpios = <&gpio1 22 GPIO_ACTIVE_HIGH>; + linux,default-trigger = "mmc0"; + default-state = "off"; + }; + + led@4 { + label = "beaglebone:green:usr2"; + gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + + led@5 { + label = "beaglebone:green:usr3"; + gpios = <&gpio1 24 GPIO_ACTIVE_HIGH>; + default-state = "off"; + }; + }; +}; + +/include/ "tps65217.dtsi" + +&tps { + regulators { + dcdc1_reg: regulator@0 { + regulator-always-on; + }; + + dcdc2_reg: regulator@1 { + /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */ + regulator-name = "vdd_mpu"; + regulator-min-microvolt = <925000>; + regulator-max-microvolt = <1325000>; + regulator-boot-on; + regulator-always-on; + }; + + dcdc3_reg: regulator@2 { + /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */ + regulator-name = "vdd_core"; + regulator-min-microvolt = <925000>; + regulator-max-microvolt = <1150000>; + regulator-boot-on; + regulator-always-on; + }; + + ldo1_reg: regulator@3 { + regulator-always-on; + }; + + ldo2_reg: regulator@4 { + regulator-always-on; + }; + + ldo3_reg: regulator@5 { + regulator-always-on; + }; + + ldo4_reg: regulator@6 { + regulator-always-on; + }; + }; +}; + +&cpsw_emac0 { + phy_id = <&davinci_mdio>, <0>; + phy-mode = "mii"; +}; + +&cpsw_emac1 { + phy_id = <&davinci_mdio>, <1>; + phy-mode = "mii"; +}; + +&mac { + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&cpsw_default>; + pinctrl-1 = <&cpsw_sleep>; + +}; + +&davinci_mdio { + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&davinci_mdio_default>; + pinctrl-1 = <&davinci_mdio_sleep>; +}; diff --git a/arch/arm/boot/dts/am335x-bone.dts b/arch/arm/boot/dts/am335x-bone.dts index d318987d44a1..7993c489982c 100644 --- a/arch/arm/boot/dts/am335x-bone.dts +++ b/arch/arm/boot/dts/am335x-bone.dts @@ -8,258 +8,4 @@ /dts-v1/; #include "am33xx.dtsi" - -/ { - model = "TI AM335x BeagleBone"; - compatible = "ti,am335x-bone", "ti,am33xx"; - - cpus { - cpu@0 { - cpu0-supply = <&dcdc2_reg>; - }; - }; - - memory { - device_type = "memory"; - reg = <0x80000000 0x10000000>; /* 256 MB */ - }; - - am33xx_pinmux: pinmux@44e10800 { - pinctrl-names = "default"; - pinctrl-0 = <&clkout2_pin>; - - user_leds_s0: user_leds_s0 { - pinctrl-single,pins = < - 0x54 (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a5.gpio1_21 */ - 0x58 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a6.gpio1_22 */ - 0x5c (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a7.gpio1_23 */ - 0x60 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_a8.gpio1_24 */ - >; - }; - - i2c0_pins: pinmux_i2c0_pins { - pinctrl-single,pins = < - 0x188 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_sda.i2c0_sda */ - 0x18c (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c0_scl.i2c0_scl */ - >; - }; - - uart0_pins: pinmux_uart0_pins { - pinctrl-single,pins = < - 0x170 (PIN_INPUT_PULLUP | MUX_MODE0) /* uart0_rxd.uart0_rxd */ - 0x174 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* uart0_txd.uart0_txd */ - >; - }; - - clkout2_pin: pinmux_clkout2_pin { - pinctrl-single,pins = < - 0x1b4 (PIN_OUTPUT_PULLDOWN | MUX_MODE3) /* xdma_event_intr1.clkout2 */ - >; - }; - - cpsw_default: cpsw_default { - pinctrl-single,pins = < - /* Slave 1 */ - 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxerr.mii1_rxerr */ - 0x114 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txen.mii1_txen */ - 0x118 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxdv.mii1_rxdv */ - 0x11c (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd3.mii1_txd3 */ - 0x120 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd2.mii1_txd2 */ - 0x124 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd1.mii1_txd1 */ - 0x128 (PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mii1_txd0.mii1_txd0 */ - 0x12c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_txclk.mii1_txclk */ - 0x130 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxclk.mii1_rxclk */ - 0x134 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd3.mii1_rxd3 */ - 0x138 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd2.mii1_rxd2 */ - 0x13c (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd1.mii1_rxd1 */ - 0x140 (PIN_INPUT_PULLUP | MUX_MODE0) /* mii1_rxd0.mii1_rxd0 */ - >; - }; - - cpsw_sleep: cpsw_sleep { - pinctrl-single,pins = < - /* Slave 1 reset value */ - 0x110 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x114 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x118 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x11c (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x120 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x124 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x128 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x12c (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x130 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x13c (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x140 (PIN_INPUT_PULLDOWN | MUX_MODE7) - >; - }; - - davinci_mdio_default: davinci_mdio_default { - pinctrl-single,pins = < - /* MDIO */ - 0x148 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0) /* mdio_data.mdio_data */ - 0x14c (PIN_OUTPUT_PULLUP | MUX_MODE0) /* mdio_clk.mdio_clk */ - >; - }; - - davinci_mdio_sleep: davinci_mdio_sleep { - pinctrl-single,pins = < - /* MDIO reset value */ - 0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7) - 0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7) - >; - }; - }; - - ocp { - uart0: serial@44e09000 { - pinctrl-names = "default"; - pinctrl-0 = <&uart0_pins>; - - status = "okay"; - }; - - musb: usb@47400000 { - status = "okay"; - - control@44e10000 { - status = "okay"; - }; - - usb-phy@47401300 { - status = "okay"; - }; - - usb-phy@47401b00 { - status = "okay"; - }; - - usb@47401000 { - status = "okay"; - }; - - usb@47401800 { - status = "okay"; - dr_mode = "host"; - }; - - dma-controller@07402000 { - status = "okay"; - }; - }; - - i2c0: i2c@44e0b000 { - pinctrl-names = "default"; - pinctrl-0 = <&i2c0_pins>; - - status = "okay"; - clock-frequency = <400000>; - - tps: tps@24 { - reg = <0x24>; - }; - - }; - }; - - leds { - pinctrl-names = "default"; - pinctrl-0 = <&user_leds_s0>; - - compatible = "gpio-leds"; - - led@2 { - label = "beaglebone:green:heartbeat"; - gpios = <&gpio1 21 GPIO_ACTIVE_HIGH>; - linux,default-trigger = "heartbeat"; - default-state = "off"; - }; - - led@3 { - label = "beaglebone:green:mmc0"; - gpios = <&gpio1 22 GPIO_ACTIVE_HIGH>; - linux,default-trigger = "mmc0"; - default-state = "off"; - }; - - led@4 { - label = "beaglebone:green:usr2"; - gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>; - default-state = "off"; - }; - - led@5 { - label = "beaglebone:green:usr3"; - gpios = <&gpio1 24 GPIO_ACTIVE_HIGH>; - default-state = "off"; - }; - }; -}; - -/include/ "tps65217.dtsi" - -&tps { - regulators { - dcdc1_reg: regulator@0 { - regulator-always-on; - }; - - dcdc2_reg: regulator@1 { - /* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */ - regulator-name = "vdd_mpu"; - regulator-min-microvolt = <925000>; - regulator-max-microvolt = <1325000>; - regulator-boot-on; - regulator-always-on; - }; - - dcdc3_reg: regulator@2 { - /* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */ - regulator-name = "vdd_core"; - regulator-min-microvolt = <925000>; - regulator-max-microvolt = <1150000>; - regulator-boot-on; - regulator-always-on; - }; - - ldo1_reg: regulator@3 { - regulator-always-on; - }; - - ldo2_reg: regulator@4 { - regulator-always-on; - }; - - ldo3_reg: regulator@5 { - regulator-always-on; - }; - - ldo4_reg: regulator@6 { - regulator-always-on; - }; - }; -}; - -&cpsw_emac0 { - phy_id = <&davinci_mdio>, <0>; - phy-mode = "mii"; -}; - -&cpsw_emac1 { - phy_id = <&davinci_mdio>, <1>; - phy-mode = "mii"; -}; - -&mac { - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&cpsw_default>; - pinctrl-1 = <&cpsw_sleep>; - -}; - -&davinci_mdio { - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&davinci_mdio_default>; - pinctrl-1 = <&davinci_mdio_sleep>; -}; +#include "am335x-bone-common.dtsi" diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts new file mode 100644 index 000000000000..197cadf72d2c --- /dev/null +++ b/arch/arm/boot/dts/am335x-boneblack.dts @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +#include "am33xx.dtsi" +#include "am335x-bone-common.dtsi" + +&ldo3_reg { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; +}; -- cgit v1.2.3-59-g8ed1b From 6f61ee232a338190e0c8fd59f169604c9b90f748 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 21 Aug 2013 20:01:30 +0530 Subject: ARM: dts: OMAP5: fix reg property size USB3 block has a 64KiB space, another 64KiB is used for the wrapper. Without this change, resource_size() will get confused and driver won't probe because size will be negative. Signed-off-by: Felipe Balbi Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Benoit Cousson --- arch/arm/boot/dts/omap5.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 07be2cd7b318..359498b5a509 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -637,7 +637,7 @@ omap_dwc3@4a020000 { compatible = "ti,dwc3"; ti,hwmods = "usb_otg_ss"; - reg = <0x4a020000 0x1000>; + reg = <0x4a020000 0x10000>; interrupts = ; #address-cells = <1>; #size-cells = <1>; @@ -645,7 +645,7 @@ ranges; dwc3@4a030000 { compatible = "snps,dwc3"; - reg = <0x4a030000 0x1000>; + reg = <0x4a030000 0x10000>; interrupts = ; usb-phy = <&usb2_phy>, <&usb3_phy>; tx-fifo-resize; -- cgit v1.2.3-59-g8ed1b From b6731f78c2c03e5d8732c809b86719cbbf5c7794 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 21 Aug 2013 20:01:31 +0530 Subject: ARM: dts: OMAP5: fix ocp2scp DTS data Fix the DTS data for ocp2scp node by adding the missing reg property. Signed-off-by: Felipe Balbi Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Benoit Cousson --- arch/arm/boot/dts/omap5.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 359498b5a509..7cdea1bfea09 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -652,10 +652,11 @@ }; }; - ocp2scp { + ocp2scp@4a080000 { compatible = "ti,omap-ocp2scp"; #address-cells = <1>; #size-cells = <1>; + reg = <0x4a080000 0x20>; ranges; ti,hwmods = "ocp2scp1"; usb2_phy: usb2phy@4a084000 { -- cgit v1.2.3-59-g8ed1b From ff522058bd717506b2fa066fa564657f2b86477e Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 17 Sep 2013 12:44:31 +0200 Subject: MIPS: Fix accessing to per-cpu data when flushing the cache This fixes the following issue BUG: using smp_processor_id() in preemptible [00000000] code: kjournald/1761 caller is blast_dcache32+0x30/0x254 Call Trace: [<8047f02c>] dump_stack+0x8/0x34 [<802e7e40>] debug_smp_processor_id+0xe0/0xf0 [<80114d94>] blast_dcache32+0x30/0x254 [<80118484>] r4k_dma_cache_wback_inv+0x200/0x288 [<80110ff0>] mips_dma_map_sg+0x108/0x180 [<80355098>] ide_dma_prepare+0xf0/0x1b8 [<8034eaa4>] do_rw_taskfile+0x1e8/0x33c [<8035951c>] ide_do_rw_disk+0x298/0x3e4 [<8034a3c4>] do_ide_request+0x2e0/0x704 [<802bb0dc>] __blk_run_queue+0x44/0x64 [<802be000>] queue_unplugged.isra.36+0x1c/0x54 [<802beb94>] blk_flush_plug_list+0x18c/0x24c [<802bec6c>] blk_finish_plug+0x18/0x48 [<8026554c>] journal_commit_transaction+0x3b8/0x151c [<80269648>] kjournald+0xec/0x238 [<8014ac00>] kthread+0xb8/0xc0 [<8010268c>] ret_from_kernel_thread+0x14/0x1c Caches in most systems are identical - but not always, so we can't avoid the use of smp_call_function() by just looking at the boot CPU's data, have to fiddle with preemption instead. Signed-off-by: Ralf Baechle Cc: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5835 --- arch/mips/mm/c-r4k.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index f749f687ee87..40dced23e768 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -601,6 +602,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); + preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) r4k_blast_scache(); @@ -621,6 +623,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) R4600_HIT_CACHEOP_WAR_IMPL; blast_dcache_range(addr, addr + size); } + preempt_enable(); bc_wback_inv(addr, size); __sync(); @@ -631,6 +634,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); + preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) r4k_blast_scache(); @@ -655,6 +659,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) R4600_HIT_CACHEOP_WAR_IMPL; blast_inv_dcache_range(addr, addr + size); } + preempt_enable(); bc_inv(addr, size); __sync(); -- cgit v1.2.3-59-g8ed1b From 6cefc8ee768402db087adb4193c7919564e1ac1b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 16 Sep 2013 14:22:04 +0200 Subject: ARM: ux500: disable outer cache debug This fixes a multiplatform regression on the Ux500. When compiling the Ux500 platforms in multiplatform configurations both PL310_ERRATA_588369 and PL310_ERRATA_727915 would crash the platform when trying to launch the init process. The Ux500 cannot access the debug registers of the PL310, it will just crash if you try this. So disable this by setting the debug callback to NULL when initializing the l2x0 on this platform. Cc: Lee Jones Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson --- arch/arm/mach-ux500/cache-l2x0.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c index 82ccf1d98735..264f894c0e3d 100644 --- a/arch/arm/mach-ux500/cache-l2x0.c +++ b/arch/arm/mach-ux500/cache-l2x0.c @@ -69,6 +69,7 @@ static int __init ux500_l2x0_init(void) * some SMI service available. */ outer_cache.disable = NULL; + outer_cache.set_debug = NULL; return 0; } -- cgit v1.2.3-59-g8ed1b From a0396b9bd5a4a7baf598b60d2ca53c605c440a42 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Mon, 16 Sep 2013 09:01:24 -0700 Subject: ARM: multi_v7_defconfig: enable ARM_ATAG_DTB_COMPAT Without this, legacy platforms that can boot with a multiplatform kernel but that need the DTB to be appended, won't have a way to pass firmware-set bootargs to the kernel. This is needed to boot multi_v7_defconfig on snowball, for instance. Signed-off-by: Olof Johansson --- arch/arm/configs/multi_v7_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 6e572c64cf5a..aba4ec728651 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -46,6 +46,7 @@ CONFIG_ARCH_ZYNQ=y CONFIG_SMP=y CONFIG_HIGHPTE=y CONFIG_ARM_APPENDED_DTB=y +CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_NET=y CONFIG_UNIX=y CONFIG_INET=y -- cgit v1.2.3-59-g8ed1b From 0be9c7a89f75072e091cd079d76194aec8d1fb09 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 15 Sep 2013 11:07:23 +0300 Subject: KVM: VMX: set "blocked by NMI" flag if EPT violation happens during IRET from NMI Set "blocked by NMI" flag if EPT violation happens during IRET from NMI otherwise NMI can be called recursively causing stack corruption. Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 48735936860c..a1216de9ffda 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5339,6 +5339,15 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) return 0; } + /* + * EPT violation happened while executing iret from NMI, + * "blocked by NMI" bit has to be set before next VM entry. + * There are errata that may cause this bit to not be set: + * AAK134, BY25. + */ + if (exit_qualification & INTR_INFO_UNBLOCK_NMI) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); trace_kvm_page_fault(gpa, exit_qualification); -- cgit v1.2.3-59-g8ed1b From 69f24d1784b631b81a54eb57c49bf46536dd2382 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 17 Sep 2013 10:25:47 +0200 Subject: MIPS: Optimize current_cpu_type() for better code. o Move current_cpu_type() to a separate header file o #ifdefing on supported CPU types lets modern GCC know that certain code in callers may be discarded ideally turning current_cpu_type() into a function returning a constant. o Use current_cpu_type() rather than direct access to struct cpuinfo_mips. Signed-off-by: Ralf Baechle Cc: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5833/ --- arch/mips/cavium-octeon/csrc-octeon.c | 1 + arch/mips/dec/prom/init.c | 1 + arch/mips/include/asm/cpu-features.h | 6 - arch/mips/include/asm/cpu-type.h | 203 +++++++++++++++++++++ .../include/asm/mach-ip22/cpu-feature-overrides.h | 2 + .../include/asm/mach-ip27/cpu-feature-overrides.h | 2 + .../include/asm/mach-ip28/cpu-feature-overrides.h | 2 + arch/mips/kernel/cpu-probe.c | 3 +- arch/mips/kernel/idle.c | 3 +- arch/mips/kernel/time.c | 1 + arch/mips/kernel/traps.c | 3 +- arch/mips/mm/c-octeon.c | 6 +- arch/mips/mm/c-r4k.c | 14 +- arch/mips/mm/dma-default.c | 1 + arch/mips/mm/page.c | 1 + arch/mips/mm/sc-mips.c | 3 +- arch/mips/mm/tlb-r4k.c | 1 + arch/mips/mm/tlbex.c | 1 + arch/mips/oprofile/common.c | 1 + 19 files changed, 236 insertions(+), 19 deletions(-) create mode 100644 arch/mips/include/asm/cpu-type.h (limited to 'arch') diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c index 02193953eb9e..b752c4ed0b79 100644 --- a/arch/mips/cavium-octeon/csrc-octeon.c +++ b/arch/mips/cavium-octeon/csrc-octeon.c @@ -12,6 +12,7 @@ #include #include +#include #include #include diff --git a/arch/mips/dec/prom/init.c b/arch/mips/dec/prom/init.c index ab169046e442..468f665de7bb 100644 --- a/arch/mips/dec/prom/init.c +++ b/arch/mips/dec/prom/init.c @@ -13,6 +13,7 @@ #include #include +#include #include #include diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index fa44f3ec5302..51680d15ca8e 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -13,12 +13,6 @@ #include #include -#ifndef current_cpu_type -#define current_cpu_type() current_cpu_data.cputype -#endif - -#define boot_cpu_type() cpu_data[0].cputype - /* * SMP assumption: Options of CPU 0 are a superset of all processors. * This is true for all known MIPS systems. diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h new file mode 100644 index 000000000000..4a402cc60c03 --- /dev/null +++ b/arch/mips/include/asm/cpu-type.h @@ -0,0 +1,203 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003, 2004 Ralf Baechle + * Copyright (C) 2004 Maciej W. Rozycki + */ +#ifndef __ASM_CPU_TYPE_H +#define __ASM_CPU_TYPE_H + +#include +#include + +static inline int __pure __get_cpu_type(const int cpu_type) +{ + switch (cpu_type) { +#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \ + defined(CONFIG_SYS_HAS_CPU_LOONGSON2F) + case CPU_LOONGSON2: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_LOONGSON1B + case CPU_LOONGSON1: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R1 + case CPU_4KC: + case CPU_ALCHEMY: + case CPU_BMIPS3300: + case CPU_BMIPS4350: + case CPU_PR4450: + case CPU_BMIPS32: + case CPU_JZRISC: +#endif + +#if defined(CONFIG_SYS_HAS_CPU_MIPS32_R1) || \ + defined(CONFIG_SYS_HAS_CPU_MIPS32_R2) + case CPU_4KEC: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R2 + case CPU_4KSC: + case CPU_24K: + case CPU_34K: + case CPU_1004K: + case CPU_74K: + case CPU_M14KC: + case CPU_M14KEC: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS64_R1 + case CPU_5KC: + case CPU_5KE: + case CPU_20KC: + case CPU_25KF: + case CPU_SB1: + case CPU_SB1A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS64_R2 + /* + * All MIPS64 R2 processors have their own special symbols. That is, + * there currently is no pure R2 core + */ +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R3000 + case CPU_R2000: + case CPU_R3000: + case CPU_R3000A: + case CPU_R3041: + case CPU_R3051: + case CPU_R3052: + case CPU_R3081: + case CPU_R3081E: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_TX39XX + case CPU_TX3912: + case CPU_TX3922: + case CPU_TX3927: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_VR41XX + case CPU_VR41XX: + case CPU_VR4111: + case CPU_VR4121: + case CPU_VR4122: + case CPU_VR4131: + case CPU_VR4133: + case CPU_VR4181: + case CPU_VR4181A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R4300 + case CPU_R4300: + case CPU_R4310: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R4X00 + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4200: + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + case CPU_R4600: + case CPU_R4700: + case CPU_R4640: + case CPU_R4650: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_TX49XX + case CPU_TX49XX: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5000 + case CPU_R5000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5432 + case CPU_R5432: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5500 + case CPU_R5500: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R6000 + case CPU_R6000: + case CPU_R6000A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_NEVADA + case CPU_NEVADA: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R8000 + case CPU_R8000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R10000 + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_RM7000 + case CPU_RM7000: + case CPU_SR71000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_RM9000 + case CPU_RM9000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_SB1 + case CPU_SB1: + case CPU_SB1A: +#endif +#ifdef CONFIG_SYS_HAS_CPU_CAVIUM_OCTEON + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_BMIPS4380 + case CPU_BMIPS4380: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_BMIPS5000 + case CPU_BMIPS5000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_XLP + case CPU_XLP: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_XLR + case CPU_XLR: +#endif + break; + default: + unreachable(); + } + + return cpu_type; +} + +static inline int __pure current_cpu_type(void) +{ + const int cpu_type = current_cpu_data.cputype; + + return __get_cpu_type(cpu_type); +} + +static inline int __pure boot_cpu_type(void) +{ + const int cpu_type = cpu_data[0].cputype; + + return __get_cpu_type(cpu_type); +} + +#endif /* __ASM_CPU_TYPE_H */ diff --git a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h index f4caacd25552..1bcb6421205e 100644 --- a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h @@ -8,6 +8,8 @@ #ifndef __ASM_MACH_IP22_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP22_CPU_FEATURE_OVERRIDES_H +#include + /* * IP22 with a variety of processors so we can't use defaults for everything. */ diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h index 1d2b6ff60d33..d6111aa2e886 100644 --- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h @@ -8,6 +8,8 @@ #ifndef __ASM_MACH_IP27_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP27_CPU_FEATURE_OVERRIDES_H +#include + /* * IP27 only comes with R10000 family processors all using the same config */ diff --git a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h index 65e9c856390d..4cec06d133db 100644 --- a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h @@ -9,6 +9,8 @@ #ifndef __ASM_MACH_IP28_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP28_CPU_FEATURE_OVERRIDES_H +#include + /* * IP28 only comes with R10000 family processors all using the same config */ diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 37663c7862a5..9ef2b049d3c0 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -55,7 +56,7 @@ static inline void check_errata(void) { struct cpuinfo_mips *c = ¤t_cpu_data; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_34K: /* * Erratum "RPS May Cause Incorrect Instruction Execution" diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 42f8875d2444..f7991d95bff9 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -136,7 +137,7 @@ void __init check_wait(void) return; } - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R3081: case CPU_R3081E: cpu_wait = r3081_wait; diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index 364d26ae4215..dcb8e5d3bb8a 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index aec3408edd4b..524841f02803 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -622,7 +623,7 @@ static int simulate_rdhwr(struct pt_regs *regs, int rd, int rt) regs->regs[rt] = read_c0_count(); return 0; case 3: /* Count register resolution */ - switch (current_cpu_data.cputype) { + switch (current_cpu_type()) { case CPU_20KC: case CPU_25KF: regs->regs[rt] = 1; diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 729e7702b1de..c8efdb5b6ee0 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -186,9 +187,10 @@ static void probe_octeon(void) unsigned long dcache_size; unsigned int config1; struct cpuinfo_mips *c = ¤t_cpu_data; + int cputype = current_cpu_type(); config1 = read_c0_config1(); - switch (c->cputype) { + switch (cputype) { case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: c->icache.linesz = 2 << ((config1 >> 19) & 7); @@ -199,7 +201,7 @@ static void probe_octeon(void) c->icache.sets * c->icache.ways * c->icache.linesz; c->icache.waybit = ffs(icache_size / c->icache.ways) - 1; c->dcache.linesz = 128; - if (c->cputype == CPU_CAVIUM_OCTEON_PLUS) + if (cputype == CPU_CAVIUM_OCTEON_PLUS) c->dcache.sets = 2; /* CN5XXX has two Dcache sets */ else c->dcache.sets = 1; /* CN3XXX has one Dcache set */ diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 40dced23e768..73ca8c52e83f 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -814,7 +815,7 @@ static void probe_pcache(void) unsigned long config1; unsigned int lsize; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4600: /* QED style two way caches? */ case CPU_R4700: case CPU_R5000: @@ -1050,7 +1051,7 @@ static void probe_pcache(void) * normally they'd suffer from aliases but magic in the hardware deals * with that for us so we don't need to take care ourselves. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_20KC: case CPU_25KF: case CPU_SB1: @@ -1070,7 +1071,7 @@ static void probe_pcache(void) case CPU_34K: case CPU_74K: case CPU_1004K: - if (c->cputype == CPU_74K) + if (current_cpu_type() == CPU_74K) alias_74k_erratum(c); if ((read_c0_config7() & (1 << 16))) { /* effectively physically indexed dcache, @@ -1083,7 +1084,7 @@ static void probe_pcache(void) c->dcache.flags |= MIPS_CACHE_ALIASES; } - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_20KC: /* * Some older 20Kc chips doesn't have the 'VI' bit in @@ -1212,7 +1213,7 @@ static void setup_scache(void) * processors don't have a S-cache that would be relevant to the * Linux memory management. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4000SC: case CPU_R4000MC: case CPU_R4400SC: @@ -1389,9 +1390,8 @@ static void r4k_cache_error_setup(void) { extern char __weak except_vec2_generic; extern char __weak except_vec2_sb1; - struct cpuinfo_mips *c = ¤t_cpu_data; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_SB1: case CPU_SB1A: set_uncached_handler(0x100, &except_vec2_sb1, 0x80); diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 664e523653d0..f25a7e9f8cbc 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -18,6 +18,7 @@ #include #include +#include #include #include diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index 218c2109a55d..cbd81d17793a 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index 5d01392e3518..08d05aee8788 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -71,7 +72,7 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c) unsigned int tmp; /* Check the bypass bit (L2B) */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_34K: case CPU_74K: case CPU_1004K: diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 00b26a67a06d..bb3a5f643e97 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 821b45175dc1..9bb3a9363b06 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index 5e5424753b56..4d1736fc1955 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "op_impl.h" -- cgit v1.2.3-59-g8ed1b From 851320e3f33503f557135a7fef6da66a2f7eec55 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 13 Sep 2013 12:09:53 -0700 Subject: ARM: dts: Fix muxing and regulator for wl12xx on the SDIO bus for pandaboard Commit b42b9181 (ARM: OMAP2+: Remove board-omap4panda.c) removed legacy booting in favor of device tree based booting for pandaboard. That caused the WLAN to stop working as the related .dts entries fell through the cracks. The legacy muxing was setting pulls for GPIO 48 and 49, so let's keep that behaviour for now to avoid further regressions for BT and FM. Also input logic was enabled for MMC CLK line, but I've verified that the input logic we don't need enabled for CLK line as it's not bidirectional. Also, we want to use non-removable instead of ti,non-removable as the ti,non-removable also sets no_regulator_off_init which is really not what we want as then wl12xx won't get powered up and down which is needed for resetting it. Note that looks like the WLAN interface fails to come up after a warm reset, but that most likely was also happening with the legacy booting and needs a separate fix. Cc: Paolo Pisati Cc: Rajendra Nayak Cc: Luciano Coelho Signed-off-by: Tony Lindgren Signed-off-by: Benoit Cousson --- arch/arm/boot/dts/omap4-panda-common.dtsi | 46 ++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi index faa95b5b242e..814ab67c8c29 100644 --- a/arch/arm/boot/dts/omap4-panda-common.dtsi +++ b/arch/arm/boot/dts/omap4-panda-common.dtsi @@ -107,6 +107,19 @@ */ clock-frequency = <19200000>; }; + + /* regulator for wl12xx on sdio5 */ + wl12xx_vmmc: wl12xx_vmmc { + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_gpio>; + compatible = "regulator-fixed"; + regulator-name = "vwl1271"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + gpio = <&gpio2 11 0>; + startup-delay-us = <70000>; + enable-active-high; + }; }; &omap4_pmx_wkup { @@ -235,6 +248,33 @@ 0x1c (PIN_OUTPUT | MUX_MODE3) /* gpio_wk8 */ >; }; + + /* + * wl12xx GPIO outputs for WLAN_EN, BT_EN, FM_EN, BT_WAKEUP + * REVISIT: Are the pull-ups needed for GPIO 48 and 49? + */ + wl12xx_gpio: pinmux_wl12xx_gpio { + pinctrl-single,pins = < + 0x26 (PIN_OUTPUT | MUX_MODE3) /* gpmc_a19.gpio_43 */ + 0x2c (PIN_OUTPUT | MUX_MODE3) /* gpmc_a22.gpio_46 */ + 0x30 (PIN_OUTPUT_PULLUP | MUX_MODE3) /* gpmc_a24.gpio_48 */ + 0x32 (PIN_OUTPUT_PULLUP | MUX_MODE3) /* gpmc_a25.gpio_49 */ + >; + }; + + /* wl12xx GPIO inputs and SDIO pins */ + wl12xx_pins: pinmux_wl12xx_pins { + pinctrl-single,pins = < + 0x38 (PIN_INPUT | MUX_MODE3) /* gpmc_ncs2.gpio_52 */ + 0x3a (PIN_INPUT | MUX_MODE3) /* gpmc_ncs3.gpio_53 */ + 0x108 (PIN_OUTPUT | MUX_MODE0) /* sdmmc5_clk.sdmmc5_clk */ + 0x10a (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_cmd.sdmmc5_cmd */ + 0x10c (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat0.sdmmc5_dat0 */ + 0x10e (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat1.sdmmc5_dat1 */ + 0x110 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat2.sdmmc5_dat2 */ + 0x112 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc5_dat3.sdmmc5_dat3 */ + >; + }; }; &i2c1 { @@ -314,8 +354,12 @@ }; &mmc5 { - ti,non-removable; + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_pins>; + vmmc-supply = <&wl12xx_vmmc>; + non-removable; bus-width = <4>; + cap-power-off-card; }; &emif1 { -- cgit v1.2.3-59-g8ed1b From 775d2418f309052641d94c896f73dc779cf7ba1b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 13 Sep 2013 12:09:57 -0700 Subject: ARM: dts: Fix muxing and regulator for wl12xx on the SDIO bus for blaze Commit 76787b3b (ARM: OMAP2+: Remove board-4430sdp.c) removed legacy booting in favor of device tree based booting for 4430sdp. That caused the WLAN to stop working as the related .dts entries fell through the cracks. I don't have the "1283 PG 2.21 connectivity device" on my 4430sdp, but the earlier version of this patch was tested by Luciano Coelho. This version has left out the input logic for MMC CLK line compared to the earlier version as that is not bidirectional, and should be safe to do. Cc: Rajendra Nayak Cc: Luciano Coelho Cc: Ruslan Bilovol Signed-off-by: Tony Lindgren Signed-off-by: Benoit Cousson --- arch/arm/boot/dts/omap4-sdp.dts | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index 7951b4ea500a..4f78380ecdb8 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -140,6 +140,19 @@ "DMic", "Digital Mic", "Digital Mic", "Digital Mic1 Bias"; }; + + /* regulator for wl12xx on sdio5 */ + wl12xx_vmmc: wl12xx_vmmc { + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_gpio>; + compatible = "regulator-fixed"; + regulator-name = "vwl1271"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + gpio = <&gpio2 22 0>; + startup-delay-us = <70000>; + enable-active-high; + }; }; &omap4_pmx_wkup { @@ -295,6 +308,26 @@ 0xf0 (PIN_INPUT_PULLUP | MUX_MODE0) /* i2c4_sda */ >; }; + + /* wl12xx GPIO output for WLAN_EN */ + wl12xx_gpio: pinmux_wl12xx_gpio { + pinctrl-single,pins = < + 0x3c (PIN_OUTPUT | MUX_MODE3) /* gpmc_nwp.gpio_54 */ + >; + }; + + /* wl12xx GPIO inputs and SDIO pins */ + wl12xx_pins: pinmux_wl12xx_pins { + pinctrl-single,pins = < + 0x3a (PIN_INPUT | MUX_MODE3) /* gpmc_ncs3.gpio_53 */ + 0x108 (PIN_OUTPUT | MUX_MODE3) /* sdmmc5_clk.sdmmc5_clk */ + 0x10a (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_cmd.sdmmc5_cmd */ + 0x10c (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat0.sdmmc5_dat0 */ + 0x10e (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat1.sdmmc5_dat1 */ + 0x110 (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat2.sdmmc5_dat2 */ + 0x112 (PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc5_dat3.sdmmc5_dat3 */ + >; + }; }; &i2c1 { @@ -420,8 +453,12 @@ }; &mmc5 { + pinctrl-names = "default"; + pinctrl-0 = <&wl12xx_pins>; + vmmc-supply = <&wl12xx_vmmc>; + non-removable; bus-width = <4>; - ti,non-removable; + cap-power-off-card; }; &emif1 { -- cgit v1.2.3-59-g8ed1b From c89efa731e70a12908a0c155518ebbfd2d6ca605 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 17 Sep 2013 19:16:38 +0100 Subject: ARM: 7836/1: add __get_user_unaligned/__put_user_unaligned BTRFS is now relying on those since v3.12-rc1. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/include/asm/uaccess.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 7e1f76027f66..72abdc541f38 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -19,6 +19,13 @@ #include #include +#if __LINUX_ARM_ARCH__ < 6 +#include +#else +#define __get_user_unaligned __get_user +#define __put_user_unaligned __put_user +#endif + #define VERIFY_READ 0 #define VERIFY_WRITE 1 -- cgit v1.2.3-59-g8ed1b From 65399f03266e138506417920952e1c8ed022ec47 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Tue, 10 Sep 2013 17:35:23 +0200 Subject: ARM: dts: igep00x0: Add pinmux configuration for MCBSP2 Add pinmux configuration for MCBSP2 connected to the TDM interface. With this configuration the Headset modules works as expected. Signed-off-by: Enric Balletbo i Serra Acked-by: Javier Martinez Canillas Signed-off-by: Benoit Cousson --- arch/arm/boot/dts/omap3-igep.dtsi | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-igep.dtsi b/arch/arm/boot/dts/omap3-igep.dtsi index bc48b114eae6..2326d11462a5 100644 --- a/arch/arm/boot/dts/omap3-igep.dtsi +++ b/arch/arm/boot/dts/omap3-igep.dtsi @@ -48,6 +48,15 @@ >; }; + mcbsp2_pins: pinmux_mcbsp2_pins { + pinctrl-single,pins = < + 0x10c (PIN_INPUT | MUX_MODE0) /* mcbsp2_fsx.mcbsp2_fsx */ + 0x10e (PIN_INPUT | MUX_MODE0) /* mcbsp2_clkx.mcbsp2_clkx */ + 0x110 (PIN_INPUT | MUX_MODE0) /* mcbsp2_dr.mcbsp2.dr */ + 0x112 (PIN_OUTPUT | MUX_MODE0) /* mcbsp2_dx.mcbsp2_dx */ + >; + }; + mmc1_pins: pinmux_mmc1_pins { pinctrl-single,pins = < 0x114 (PIN_INPUT_PULLUP | MUX_MODE0) /* sdmmc1_clk.sdmmc1_clk */ @@ -93,6 +102,11 @@ clock-frequency = <400000>; }; +&mcbsp2 { + pinctrl-names = "default"; + pinctrl-0 = <&mcbsp2_pins>; +}; + &mmc1 { pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; -- cgit v1.2.3-59-g8ed1b From 700870119f49084da004ab588ea2b799689efaf7 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 18 Apr 2013 07:51:34 -0700 Subject: x86, efi: Don't map Boot Services on i386 Add patch to fix 32bit EFI service mapping (rhbz 726701) Multiple people are reporting hitting the following WARNING on i386, WARNING: at arch/x86/mm/ioremap.c:102 __ioremap_caller+0x3d3/0x440() Modules linked in: Pid: 0, comm: swapper Not tainted 3.9.0-rc7+ #95 Call Trace: [] warn_slowpath_common+0x5f/0x80 [] ? __ioremap_caller+0x3d3/0x440 [] ? __ioremap_caller+0x3d3/0x440 [] warn_slowpath_null+0x1d/0x20 [] __ioremap_caller+0x3d3/0x440 [] ? get_usage_chars+0xfb/0x110 [] ? vprintk_emit+0x147/0x480 [] ? efi_enter_virtual_mode+0x1e4/0x3de [] ioremap_cache+0x1a/0x20 [] ? efi_enter_virtual_mode+0x1e4/0x3de [] efi_enter_virtual_mode+0x1e4/0x3de [] start_kernel+0x286/0x2f4 [] ? repair_env_string+0x51/0x51 [] i386_start_kernel+0x12c/0x12f Due to the workaround described in commit 916f676f8 ("x86, efi: Retain boot service code until after switching to virtual mode") EFI Boot Service regions are mapped for a period during boot. Unfortunately, with the limited size of the i386 direct kernel map it's possible that some of the Boot Service regions will not be directly accessible, which causes them to be ioremap()'d, triggering the above warning as the regions are marked as E820_RAM in the e820 memmap. There are currently only two situations where we need to map EFI Boot Service regions, 1. To workaround the firmware bug described in 916f676f8 2. To access the ACPI BGRT image but since we haven't seen an i386 implementation that requires either, this simple fix should suffice for now. [ Added to changelog - Matt ] Reported-by: Bryan O'Donoghue Acked-by: Tom Zanussi Acked-by: Darren Hart Cc: Josh Triplett Cc: Matthew Garrett Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Signed-off-by: Josh Boyer Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 90f6ed127096..c7e22ab29a5a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -912,10 +912,13 @@ void __init efi_enter_virtual_mode(void) for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; + if (!(md->attribute & EFI_MEMORY_RUNTIME)) { +#ifdef CONFIG_X86_64 + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) +#endif + continue; + } size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; -- cgit v1.2.3-59-g8ed1b From 9c9b415c50bc298ac61412dff856eae2f54889ee Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 12 Sep 2013 13:47:32 +0200 Subject: MIPS: Reimplement get_cycles(). This essentially reverts commit efb9ca08b5a2374b29938cdcab417ce4feb14b54 (kernel.org) / 58020a106879a8b372068741c81f0015c9b0b96dbv [[MIPS] Change get_cycles to always return 0.] Most users of get_cycles() invoke it as a timing interface. That's why in modern kernels it was never very much missed for. /dev/random however uses get_cycles() in the how the jitter in the interrupt timing contains some useful entropy. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/timex.h | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index 6529704aa73a..c5424757da65 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -10,7 +10,9 @@ #ifdef __KERNEL__ +#include #include +#include /* * This is the clock rate of the i8253 PIT. A MIPS system may not have @@ -33,9 +35,38 @@ typedef unsigned int cycles_t; +/* + * On R4000/R4400 before version 5.0 an erratum exists such that if the + * cycle counter is read in the exact moment that it is matching the + * compare register, no interrupt will be generated. + * + * There is a suggested workaround and also the erratum can't strike if + * the compare interrupt isn't being used as the clock source device. + * However for now the implementaton of this function doesn't get these + * fine details right. + */ static inline cycles_t get_cycles(void) { - return 0; + switch (boot_cpu_type()) { + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + if ((read_c0_prid() & 0xff) >= 0x0050) + return read_c0_count(); + break; + + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + break; + + default: + if (cpu_has_counter) + return read_c0_count(); + break; + } + + return 0; /* no usable counter */ } #endif /* __KERNEL__ */ -- cgit v1.2.3-59-g8ed1b From 258e1e73793650f74ca5626b9c02d8631d5fbdf2 Mon Sep 17 00:00:00 2001 From: Leonid Yegoshin Date: Tue, 10 Sep 2013 19:36:04 -0500 Subject: MIPS: Fix VGA_MAP_MEM macro. Use the CKSEG1ADDR macro when calculating VGA_MAP_MEM. [ralf@linux-mips.org: Include Signed-off-by: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5814/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/vga.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h index f4cff7e4fa8a..f82c83749a08 100644 --- a/arch/mips/include/asm/vga.h +++ b/arch/mips/include/asm/vga.h @@ -6,6 +6,7 @@ #ifndef _ASM_VGA_H #define _ASM_VGA_H +#include #include /* @@ -13,7 +14,7 @@ * access the videoram directly without any black magic. */ -#define VGA_MAP_MEM(x, s) (0xb0000000L + (unsigned long)(x)) +#define VGA_MAP_MEM(x, s) CKSEG1ADDR(0x10000000L + (unsigned long)(x)) #define vga_readb(x) (*(x)) #define vga_writeb(x, y) (*(y) = (x)) -- cgit v1.2.3-59-g8ed1b From c8a5b7bc75342542ce5e3556780e8a03357ff686 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 18 Sep 2013 15:08:52 +0200 Subject: ARM: u300: hide submenus Right now the U300 submenus are showcased for everyone even if we're not on v5 multiplatforms. Hide this in the multiplatform configuration properly. Cc: arm@kernel.org Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson --- arch/arm/mach-u300/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig index a85adcd00882..a1659863bfd5 100644 --- a/arch/arm/mach-u300/Kconfig +++ b/arch/arm/mach-u300/Kconfig @@ -1,7 +1,3 @@ -menu "ST-Ericsson AB U300/U335 Platform" - -comment "ST-Ericsson Mobile Platform Products" - config ARCH_U300 bool "ST-Ericsson U300 Series" if ARCH_MULTI_V5 depends on MMU @@ -25,7 +21,9 @@ config ARCH_U300 help Support for ST-Ericsson U300 series mobile platforms. -comment "ST-Ericsson U300/U335 Feature Selections" +if ARCH_U300 + +menu "ST-Ericsson AB U300/U335 Platform" config MACH_U300 depends on ARCH_U300 @@ -53,3 +51,5 @@ config MACH_U300_SPIDUMMY SPI framework and ARM PL022 support. endmenu + +endif -- cgit v1.2.3-59-g8ed1b From d26b17edafc45187c30cae134a5e5429d58ad676 Mon Sep 17 00:00:00 2001 From: Andrea Adami Date: Mon, 16 Sep 2013 00:04:42 +0200 Subject: ARM: sa1100: collie.c: fall back to jedec_probe flash detection Zaurus collie contains 2 LH28F640BFHE-PTTL90 (64M 4Mx16) and at the moment cfi will not detect the collie NOR. In the meanwhile we can revert to the jedec-probe map which has been fixed with following commit: mtd: jedec_probe: fix LH28F640BF definition fe2f4c8e0bf2756b670ee78fa9772613a2ea8495 Somehow this is unsatisfactory because the flash is mounted READ ONLY (as from factory, with a RO cramfs) Signed-off-by: Andrea Adami Signed-off-by: Olof Johansson --- arch/arm/mach-sa1100/collie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index 612a45689770..7fb96ebdc0fb 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -289,7 +289,7 @@ static void collie_flash_exit(void) } static struct flash_platform_data collie_flash_data = { - .map_name = "cfi_probe", + .map_name = "jedec_probe", .init = collie_flash_init, .set_vpp = collie_set_vpp, .exit = collie_flash_exit, -- cgit v1.2.3-59-g8ed1b From b42b4f3af8b1ac2f7b09781d523bf918f366d8d6 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Wed, 10 Nov 2010 00:25:53 +0800 Subject: MIPS: Remove useless comment about kprobe from arch/mips/Makefile The commit c1bf207d6ee1eb72e9c10365edbdc7c9ff7fb9b0 (kernel.org) rsp. 58e9ad32a48dce37ffeea912f55bd1c94b85ad7f (lmo) [MIPS: kprobe: Add support] introduced a useless comment. Signed-off-by: Wu Zhangjin Cc: linux-mips@linux-mips.org Cc: David Daney Patchwork: https://patchwork.linux-mips.org/patch/1765/ Signed-off-by: Ralf Baechle --- arch/mips/Makefile | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 75a36ad11ff5..ca8f8340d75f 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -288,9 +288,6 @@ endif vmlinux.32: vmlinux $(OBJCOPY) -O $(32bit-bfd) $(OBJCOPYFLAGS) $< $@ - -#obj-$(CONFIG_KPROBES) += kprobes.o - # # The 64-bit ELF tools are pretty broken so at this time we generate 64-bit # ELF files from 32-bit files by conversion. -- cgit v1.2.3-59-g8ed1b From 8ff374b9c296b96484d5e63b45b22d0862ffee8f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 17 Sep 2013 16:58:10 +0100 Subject: MIPS: Cleanup CP0 PRId and CP1 FPIR register access masks Replace hardcoded CP0 PRId and CP1 FPIR register access masks throughout. The change does not touch places that use shifted or partial masks. Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5838/ Signed-off-by: Ralf Baechle --- arch/mips/alchemy/common/usb.c | 3 ++- arch/mips/bcm63xx/cpu.c | 4 +-- arch/mips/include/asm/cpu.h | 38 ++++++++++++++++++++------- arch/mips/include/asm/mach-au1x00/au1000.h | 4 ++- arch/mips/kernel/cpu-probe.c | 42 ++++++++++++++++-------------- arch/mips/mm/c-r4k.c | 11 ++++---- arch/mips/mti-malta/malta-time.c | 5 ++-- arch/mips/mti-sead3/sead3-time.c | 3 ++- arch/mips/netlogic/xlr/fmn-config.c | 3 ++- arch/mips/sibyte/bcm1480/setup.c | 3 ++- arch/mips/sibyte/sb1250/setup.c | 3 ++- arch/mips/sni/setup.c | 3 ++- 12 files changed, 77 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/mips/alchemy/common/usb.c b/arch/mips/alchemy/common/usb.c index fcc695626117..2adc7edda49c 100644 --- a/arch/mips/alchemy/common/usb.c +++ b/arch/mips/alchemy/common/usb.c @@ -14,6 +14,7 @@ #include #include #include +#include #include /* control register offsets */ @@ -358,7 +359,7 @@ static inline int au1200_coherency_bug(void) { #if defined(CONFIG_DMA_COHERENT) /* Au1200 AB USB does not support coherent memory */ - if (!(read_c0_prid() & 0xff)) { + if (!(read_c0_prid() & PRID_REV_MASK)) { printk(KERN_INFO "Au1200 USB: this is chip revision AB !!\n"); printk(KERN_INFO "Au1200 USB: update your board or re-configure" " the kernel\n"); diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c index 7e17374a9ae8..b713cd64b087 100644 --- a/arch/mips/bcm63xx/cpu.c +++ b/arch/mips/bcm63xx/cpu.c @@ -306,14 +306,14 @@ void __init bcm63xx_cpu_init(void) switch (c->cputype) { case CPU_BMIPS3300: - if ((read_c0_prid() & 0xff00) != PRID_IMP_BMIPS3300_ALT) + if ((read_c0_prid() & PRID_IMP_MASK) != PRID_IMP_BMIPS3300_ALT) __cpu_name[cpu] = "Broadcom BCM6338"; /* fall-through */ case CPU_BMIPS32: chipid_reg = BCM_6345_PERF_BASE; break; case CPU_BMIPS4350: - switch ((read_c0_prid() & 0xff)) { + switch ((read_c0_prid() & PRID_REV_MASK)) { case 0x04: chipid_reg = BCM_3368_PERF_BASE; break; diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 71b9f1998be7..d2035e16502a 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -3,15 +3,14 @@ * various MIPS cpu types. * * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - * Copyright (C) 2004 Maciej W. Rozycki + * Copyright (C) 2004, 2013 Maciej W. Rozycki */ #ifndef _ASM_CPU_H #define _ASM_CPU_H -/* Assigned Company values for bits 23:16 of the PRId Register - (CP0 register 15, select 0). As of the MIPS32 and MIPS64 specs from - MTI, the PRId register is defined in this (backwards compatible) - way: +/* + As of the MIPS32 and MIPS64 specs from MTI, the PRId register (CP0 + register 15, select 0) is defined in this (backwards compatible) way: +----------------+----------------+----------------+----------------+ | Company Options| Company ID | Processor ID | Revision | @@ -23,6 +22,14 @@ spec. */ +#define PRID_OPT_MASK 0xff000000 + +/* + * Assigned Company values for bits 23:16 of the PRId register. + */ + +#define PRID_COMP_MASK 0xff0000 + #define PRID_COMP_LEGACY 0x000000 #define PRID_COMP_MIPS 0x010000 #define PRID_COMP_BROADCOM 0x020000 @@ -38,10 +45,17 @@ #define PRID_COMP_INGENIC 0xd00000 /* - * Assigned values for the product ID register. In order to detect a - * certain CPU type exactly eventually additional registers may need to - * be examined. These are valid when 23:16 == PRID_COMP_LEGACY + * Assigned Processor ID (implementation) values for bits 15:8 of the PRId + * register. In order to detect a certain CPU type exactly eventually + * additional registers may need to be examined. */ + +#define PRID_IMP_MASK 0xff00 + +/* + * These are valid when 23:16 == PRID_COMP_LEGACY + */ + #define PRID_IMP_R2000 0x0100 #define PRID_IMP_AU1_REV1 0x0100 #define PRID_IMP_AU1_REV2 0x0200 @@ -182,11 +196,15 @@ #define PRID_IMP_NETLOGIC_XLP2XX 0x1200 /* - * Definitions for 7:0 on legacy processors + * Particular Revision values for bits 7:0 of the PRId register. */ #define PRID_REV_MASK 0x00ff +/* + * Definitions for 7:0 on legacy processors + */ + #define PRID_REV_TX4927 0x0022 #define PRID_REV_TX4937 0x0030 #define PRID_REV_R4400 0x0040 @@ -227,6 +245,8 @@ * 31 16 15 8 7 0 */ +#define FPIR_IMP_MASK 0xff00 + #define FPIR_IMP_NONE 0x0000 enum cpu_type_enum { diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h index 3e11a468cdf8..54f9e84db8ac 100644 --- a/arch/mips/include/asm/mach-au1x00/au1000.h +++ b/arch/mips/include/asm/mach-au1x00/au1000.h @@ -43,6 +43,8 @@ #include #include +#include + /* cpu pipeline flush */ void static inline au_sync(void) { @@ -140,7 +142,7 @@ static inline int au1xxx_cpu_needs_config_od(void) static inline int alchemy_get_cputype(void) { - switch (read_c0_prid() & 0xffff0000) { + switch (read_c0_prid() & (PRID_OPT_MASK | PRID_COMP_MASK)) { case 0x00030000: return ALCHEMY_CPU_AU1000; break; diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 9ef2b049d3c0..9be68091bdf2 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -123,7 +123,7 @@ static inline unsigned long cpu_get_fpu_id(void) */ static inline int __cpu_has_fpu(void) { - return ((cpu_get_fpu_id() & 0xff00) != FPIR_IMP_NONE); + return ((cpu_get_fpu_id() & FPIR_IMP_MASK) != FPIR_IMP_NONE); } static inline void cpu_probe_vmbits(struct cpuinfo_mips *c) @@ -323,7 +323,7 @@ static void decode_configs(struct cpuinfo_mips *c) static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) { - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_R2000: c->cputype = CPU_R2000; __cpu_name[cpu] = "R2000"; @@ -334,7 +334,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->tlbsize = 64; break; case PRID_IMP_R3000: - if ((c->processor_id & 0xff) == PRID_REV_R3000A) { + if ((c->processor_id & PRID_REV_MASK) == PRID_REV_R3000A) { if (cpu_has_confreg()) { c->cputype = CPU_R3081E; __cpu_name[cpu] = "R3081"; @@ -354,7 +354,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) break; case PRID_IMP_R4000: if (read_c0_config() & CONF_SC) { - if ((c->processor_id & 0xff) >= PRID_REV_R4400) { + if ((c->processor_id & PRID_REV_MASK) >= + PRID_REV_R4400) { c->cputype = CPU_R4400PC; __cpu_name[cpu] = "R4400PC"; } else { @@ -362,7 +363,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "R4000PC"; } } else { - if ((c->processor_id & 0xff) >= PRID_REV_R4400) { + if ((c->processor_id & PRID_REV_MASK) >= + PRID_REV_R4400) { c->cputype = CPU_R4400SC; __cpu_name[cpu] = "R4400SC"; } else { @@ -455,7 +457,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "TX3927"; c->tlbsize = 64; } else { - switch (c->processor_id & 0xff) { + switch (c->processor_id & PRID_REV_MASK) { case PRID_REV_TX3912: c->cputype = CPU_TX3912; __cpu_name[cpu] = "TX3912"; @@ -641,7 +643,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_4KC: c->cputype = CPU_4KC; __cpu_name[cpu] = "MIPS 4Kc"; @@ -712,7 +714,7 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_AU1_REV1: case PRID_IMP_AU1_REV2: c->cputype = CPU_ALCHEMY; @@ -731,7 +733,7 @@ static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu) break; case 4: __cpu_name[cpu] = "Au1200"; - if ((c->processor_id & 0xff) == 2) + if ((c->processor_id & PRID_REV_MASK) == 2) __cpu_name[cpu] = "Au1250"; break; case 5: @@ -749,12 +751,12 @@ static inline void cpu_probe_sibyte(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_SB1: c->cputype = CPU_SB1; __cpu_name[cpu] = "SiByte SB1"; /* FPU in pass1 is known to have issues. */ - if ((c->processor_id & 0xff) < 0x02) + if ((c->processor_id & PRID_REV_MASK) < 0x02) c->options &= ~(MIPS_CPU_FPU | MIPS_CPU_32FPR); break; case PRID_IMP_SB1A: @@ -767,7 +769,7 @@ static inline void cpu_probe_sibyte(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_SR71000: c->cputype = CPU_SR71000; __cpu_name[cpu] = "Sandcraft SR71000"; @@ -780,7 +782,7 @@ static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_nxp(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_PR4450: c->cputype = CPU_PR4450; __cpu_name[cpu] = "Philips PR4450"; @@ -792,7 +794,7 @@ static inline void cpu_probe_nxp(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_BMIPS32_REV4: case PRID_IMP_BMIPS32_REV8: c->cputype = CPU_BMIPS32; @@ -807,7 +809,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) set_elf_platform(cpu, "bmips3300"); break; case PRID_IMP_BMIPS43XX: { - int rev = c->processor_id & 0xff; + int rev = c->processor_id & PRID_REV_MASK; if (rev >= PRID_REV_BMIPS4380_LO && rev <= PRID_REV_BMIPS4380_HI) { @@ -833,7 +835,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_cavium(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_CAVIUM_CN38XX: case PRID_IMP_CAVIUM_CN31XX: case PRID_IMP_CAVIUM_CN30XX: @@ -876,7 +878,7 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) decode_configs(c); /* JZRISC does not implement the CP0 counter. */ c->options &= ~MIPS_CPU_COUNTER; - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_JZRISC: c->cputype = CPU_JZRISC; __cpu_name[cpu] = "Ingenic JZRISC"; @@ -891,7 +893,7 @@ static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu) { decode_configs(c); - if ((c->processor_id & 0xff00) == PRID_IMP_NETLOGIC_AU13XX) { + if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_NETLOGIC_AU13XX) { c->cputype = CPU_ALCHEMY; __cpu_name[cpu] = "Au1300"; /* following stuff is not for Alchemy */ @@ -906,7 +908,7 @@ static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu) MIPS_CPU_EJTAG | MIPS_CPU_LLSC); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_NETLOGIC_XLP2XX: c->cputype = CPU_XLP; __cpu_name[cpu] = "Broadcom XLPII"; @@ -985,7 +987,7 @@ void cpu_probe(void) c->cputype = CPU_UNKNOWN; c->processor_id = read_c0_prid(); - switch (c->processor_id & 0xff0000) { + switch (c->processor_id & PRID_COMP_MASK) { case PRID_COMP_LEGACY: cpu_probe_legacy(c, cpu); break; diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 73ca8c52e83f..ae500ca76580 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -792,12 +792,12 @@ static inline void alias_74k_erratum(struct cpuinfo_mips *c) * aliases. In this case it is better to treat the cache as always * having aliases. */ - if ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(2, 4, 0)) + if ((c->processor_id & PRID_REV_MASK) <= PRID_REV_ENCODE_332(2, 4, 0)) c->dcache.flags |= MIPS_CACHE_VTAG; - if ((c->processor_id & 0xff) == PRID_REV_ENCODE_332(2, 4, 0)) + if ((c->processor_id & PRID_REV_MASK) == PRID_REV_ENCODE_332(2, 4, 0)) write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); - if (((c->processor_id & 0xff00) == PRID_IMP_1074K) && - ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(1, 1, 0))) { + if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_1074K && + (c->processor_id & PRID_REV_MASK) <= PRID_REV_ENCODE_332(1, 1, 0)) { c->dcache.flags |= MIPS_CACHE_VTAG; write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); } @@ -1031,7 +1031,8 @@ static void probe_pcache(void) * presumably no vendor is shipping his hardware in the "bad" * configuration. */ - if ((prid & 0xff00) == PRID_IMP_R4000 && (prid & 0xff) < 0x40 && + if ((prid & PRID_IMP_MASK) == PRID_IMP_R4000 && + (prid & PRID_REV_MASK) < PRID_REV_R4400 && !(config & CONF_SC) && c->icache.linesz != 16 && PAGE_SIZE <= 0x8000) panic("Improper R4000SC processor configuration detected"); diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 53aad4a35375..a18af5fce67e 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -76,7 +77,7 @@ static void __init estimate_frequencies(void) #endif #if defined (CONFIG_KVM_GUEST) && defined (CONFIG_KVM_HOST_FREQ) - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); /* * XXXKYMA: hardwire the CPU frequency to Host Freq/4 @@ -169,7 +170,7 @@ unsigned int get_c0_compare_int(void) void __init plat_time_init(void) { - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); unsigned int freq; estimate_frequencies(); diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c index a43ea3cc0a3b..552d26c34386 100644 --- a/arch/mips/mti-sead3/sead3-time.c +++ b/arch/mips/mti-sead3/sead3-time.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -34,7 +35,7 @@ static void __iomem *status_reg = (void __iomem *)0xbf000410; */ static unsigned int __init estimate_cpu_frequency(void) { - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); unsigned int tick = 0; unsigned int freq; unsigned int orig; diff --git a/arch/mips/netlogic/xlr/fmn-config.c b/arch/mips/netlogic/xlr/fmn-config.c index ed3bf0e3f309..c7622c6e5f67 100644 --- a/arch/mips/netlogic/xlr/fmn-config.c +++ b/arch/mips/netlogic/xlr/fmn-config.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -187,7 +188,7 @@ void xlr_board_info_setup(void) int processor_id, num_core; num_core = hweight32(nlm_current_node()->coremask); - processor_id = read_c0_prid() & 0xff00; + processor_id = read_c0_prid() & PRID_IMP_MASK; setup_cpu_fmninfo(cpu, num_core); switch (processor_id) { diff --git a/arch/mips/sibyte/bcm1480/setup.c b/arch/mips/sibyte/bcm1480/setup.c index 05ed92c92b69..8e2e04f77870 100644 --- a/arch/mips/sibyte/bcm1480/setup.c +++ b/arch/mips/sibyte/bcm1480/setup.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -119,7 +120,7 @@ void __init bcm1480_setup(void) uint64_t sys_rev; int plldiv; - sb1_pass = read_c0_prid() & 0xff; + sb1_pass = read_c0_prid() & PRID_REV_MASK; sys_rev = __raw_readq(IOADDR(A_SCD_SYSTEM_REVISION)); soc_type = SYS_SOC_TYPE(sys_rev); part_type = G_SYS_PART(sys_rev); diff --git a/arch/mips/sibyte/sb1250/setup.c b/arch/mips/sibyte/sb1250/setup.c index a14bd4cb0bc0..3c02b2a77ae9 100644 --- a/arch/mips/sibyte/sb1250/setup.c +++ b/arch/mips/sibyte/sb1250/setup.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -182,7 +183,7 @@ void __init sb1250_setup(void) int plldiv; int bad_config = 0; - sb1_pass = read_c0_prid() & 0xff; + sb1_pass = read_c0_prid() & PRID_REV_MASK; sys_rev = __raw_readq(IOADDR(A_SCD_SYSTEM_REVISION)); soc_type = SYS_SOC_TYPE(sys_rev); soc_pass = G_SYS_REVISION(sys_rev); diff --git a/arch/mips/sni/setup.c b/arch/mips/sni/setup.c index 5b09b3544edd..efad85c8c823 100644 --- a/arch/mips/sni/setup.c +++ b/arch/mips/sni/setup.c @@ -25,6 +25,7 @@ #endif #include +#include #include #include #include @@ -173,7 +174,7 @@ void __init plat_mem_setup(void) system_type = "RM300-Cxx"; break; case SNI_BRD_PCI_DESKTOP: - switch (read_c0_prid() & 0xff00) { + switch (read_c0_prid() & PRID_IMP_MASK) { case PRID_IMP_R4600: case PRID_IMP_R4700: system_type = "RM200-C20"; -- cgit v1.2.3-59-g8ed1b From 9213ad77070ea75fc3a5e43e3d9e9c4146e4930a Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 18 Sep 2013 19:08:15 +0100 Subject: MIPS: 74K/1074K: Correct erratum workaround. Make sure 74K revision numbers are not applied to the 1074K. Also catch invalid usage. Signed-off-by: Maciej W. Rozycki Cc: Steven J. Hill Cc: Leonid Yegoshin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5857/ Signed-off-by: Ralf Baechle --- arch/mips/mm/c-r4k.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index ae500ca76580..627883bc6d5f 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -786,20 +786,30 @@ static inline void rm7k_erratum31(void) static inline void alias_74k_erratum(struct cpuinfo_mips *c) { + unsigned int imp = c->processor_id & PRID_IMP_MASK; + unsigned int rev = c->processor_id & PRID_REV_MASK; + /* * Early versions of the 74K do not update the cache tags on a * vtag miss/ptag hit which can occur in the case of KSEG0/KUSEG * aliases. In this case it is better to treat the cache as always * having aliases. */ - if ((c->processor_id & PRID_REV_MASK) <= PRID_REV_ENCODE_332(2, 4, 0)) - c->dcache.flags |= MIPS_CACHE_VTAG; - if ((c->processor_id & PRID_REV_MASK) == PRID_REV_ENCODE_332(2, 4, 0)) - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); - if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_1074K && - (c->processor_id & PRID_REV_MASK) <= PRID_REV_ENCODE_332(1, 1, 0)) { - c->dcache.flags |= MIPS_CACHE_VTAG; - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + switch (imp) { + case PRID_IMP_74K: + if (rev <= PRID_REV_ENCODE_332(2, 4, 0)) + c->dcache.flags |= MIPS_CACHE_VTAG; + if (rev == PRID_REV_ENCODE_332(2, 4, 0)) + write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + break; + case PRID_IMP_1074K: + if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) { + c->dcache.flags |= MIPS_CACHE_VTAG; + write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + } + break; + default: + BUG(); } } -- cgit v1.2.3-59-g8ed1b From 2cfeed314207f808077edb2f1ba41ba1ebbe3e69 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 18 Sep 2013 12:01:58 -0700 Subject: ARM: OMAP4: Fix clock_get error for GPMC during boot Looks like we still have the legacy clock alias name for omap4 GPMC (General Purpose Memory Controller), so let's fix it for the device tree naming. There's no need to keep the legacy naming as omap4 is DT only nowadays. Without this fix we get the following error while booting: [ 0.440399] omap-gpmc 50000000.gpmc: error: clk_get Reported-by: Olof Johansson Cc: stable@vger.kernel.org # v3.11 Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/cclock44xx_data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/cclock44xx_data.c b/arch/arm/mach-omap2/cclock44xx_data.c index 1d5b5290d2af..b237950eb8a3 100644 --- a/arch/arm/mach-omap2/cclock44xx_data.c +++ b/arch/arm/mach-omap2/cclock44xx_data.c @@ -1632,7 +1632,7 @@ static struct omap_clk omap44xx_clks[] = { CLK(NULL, "auxclk5_src_ck", &auxclk5_src_ck), CLK(NULL, "auxclk5_ck", &auxclk5_ck), CLK(NULL, "auxclkreq5_ck", &auxclkreq5_ck), - CLK("omap-gpmc", "fck", &dummy_ck), + CLK("50000000.gpmc", "fck", &dummy_ck), CLK("omap_i2c.1", "ick", &dummy_ck), CLK("omap_i2c.2", "ick", &dummy_ck), CLK("omap_i2c.3", "ick", &dummy_ck), -- cgit v1.2.3-59-g8ed1b From 4cf9cf8967f0316722c6cb4ebf8093b74a8e1dd6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 18 Sep 2013 12:01:58 -0700 Subject: ARM: OMAP: fix return value check in omap_device_build_from_dt() In case of error, the function omap_device_alloc() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Wei Yongjun Acked-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index f99f68e1e85b..b69dd9abb50a 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -158,7 +158,7 @@ static int omap_device_build_from_dt(struct platform_device *pdev) } od = omap_device_alloc(pdev, hwmods, oh_cnt); - if (!od) { + if (IS_ERR(od)) { dev_err(&pdev->dev, "Cannot allocate omap_device for :%s\n", oh_name); ret = PTR_ERR(od); -- cgit v1.2.3-59-g8ed1b From f70bf2a3fdc1d8c53d1c3b1d84a72d71a17606a1 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 18 Sep 2013 12:01:59 -0700 Subject: ARM: mach-omap2: gpmc: Fix warning when CONFIG_ARM_LPAE=y When CONFIG_ARM_LPAE=y the following build warning is generated: arch/arm/mach-omap2/gpmc.c:1495:4: warning: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'resource_size_t' [-Wformat] According to Documentation/printk-formats.txt '%pa' can be used to properly print 'resource_size_t'. Reported-by: Kevin Hilman Signed-off-by: Fabio Estevam Acked-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/gpmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index 9f4795aff48a..579697adaae7 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -1491,8 +1491,8 @@ static int gpmc_probe_generic_child(struct platform_device *pdev, */ ret = gpmc_cs_remap(cs, res.start); if (ret < 0) { - dev_err(&pdev->dev, "cannot remap GPMC CS %d to 0x%x\n", - cs, res.start); + dev_err(&pdev->dev, "cannot remap GPMC CS %d to %pa\n", + cs, &res.start); goto err; } -- cgit v1.2.3-59-g8ed1b From 783502719cdf066fcb91f37c77728916730ae60f Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Wed, 18 Sep 2013 12:02:00 -0700 Subject: ARM: OMAP4: cpuidle: fix: call cpu_cluster_pm_exit conditionally We call cpu_cluster_pm_enter for dev->cpu == 0 only, but cpu_cluster_pm_exit called without that check. Because of that unhandled page fault may happen: [ 3.803405] Unable to handle kernel paging request at virtual address 00002500 [ 3.810974] pgd = c0004000 [ 3.813812] [00002500] *pgd=00000000 [ 3.817596] Internal error: Oops: 5 [#1] SMP ARM [ 3.822418] Modules linked in: [ 3.825653] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.11.0-rc6+ #21 [ 3.832397] task: ed86ef40 ti: ed896000 task.ti: ed896000 [ 3.838073] PC is at irq_notifier+0x234/0x25c [ 3.842651] LR is at irq_notifier+0x218/0x25c [ 3.847229] pc : [] lr : [] psr: 80000193 [ 3.847229] sp : ed897ee8 ip : 00000005 fp : 00000001 [ 3.859283] r10: c0b395f0 r9 : c0b30594 r8 : c0b8c2ac [ 3.864776] r7 : ffffffff r6 : 00000000 r5 : 00000005 r4 : 00000000 [ 3.871643] r3 : 00002500 r2 : 00000000 r1 : 00000005 r0 : 44302244 [ 3.878479] Flags: Nzcv IRQs off FIQs on Mode SVC_32 ISA ARM Segment kernel [ 3.886260] Control: 10c5387d Table: 8000404a DAC: 00000015 [ 3.892272] Process swapper/1 (pid: 0, stack limit = 0xed896240) [ 3.898590] Stack: (0xed897ee8 to 0xed898000) [ 3.903167] 7ee0: c0979c3a 00000001 ed897ef8 ed896000 c0014f7c 00000000 [ 3.911743] 7f00: 00000005 00000000 ffffffff c0b8c2ac c0b395f0 c077c04c c0c94b48 c0b3953c [ 3.920318] 7f20: c0bcd928 00000002 c0b39524 c00cfad8 00000000 ffffffff 00000000 c00cfb10 [ 3.928924] 7f40: c14e62c0 c002c1c8 c002c0ac c14e62c0 00000002 e251c37d 00000000 c0b39548 [ 3.937499] 7f60: c0b395f0 c05a1bc4 e251c37d 00000000 00000005 c05a3870 edc90380 edc90380 [ 3.946105] 7f80: edc90394 c14e62c0 c0b39548 00000002 c0784064 c05a3c78 c0b395e0 c14e62c0 [ 3.954681] 7fa0: 00000002 c0b39548 c0bc9db8 00000000 00000001 c05a1dc0 ed896000 00000015 [ 3.963287] 7fc0: c0bc9db8 ed896000 8000406a c0b30594 c0784064 c000e504 00000746 c007a528 [ 3.971862] 7fe0: 00000001 0000001d 600001d3 c0bcc004 00000000 800086c4 ee0aa6a7 d2aabaa9 [ 3.980499] [] (irq_notifier+0x234/0x25c) from [] (notifier_call_chain+0x38/0x68) [ 3.990173] [] (notifier_call_chain+0x38/0x68) from [] (cpu_pm_notify+0x20/0x38) [ 3.999786] [] (cpu_pm_notify+0x20/0x38) from [] (cpu_cluster_pm_exit+0x20/0x50) [ 4.009399] [] (cpu_cluster_pm_exit+0x20/0x50) from [] (omap_enter_idle_coupled+0x11c/0x14c) [ 4.020111] [] (omap_enter_idle_coupled+0x11c/0x14c) from [] (cpuidle_enter_state+0x40/0xec) [ 4.030822] [] (cpuidle_enter_state+0x40/0xec) from [] (cpuidle_enter_state_coupled+0x1f4/0x240) [ 4.041870] [] (cpuidle_enter_state_coupled+0x1f4/0x240) from [] (cpuidle_idle_call+0x150/0x228) [ 4.052947] [] (cpuidle_idle_call+0x150/0x228) from [] (arch_cpu_idle+0x8/0x38) [ 4.062499] [] (arch_cpu_idle+0x8/0x38) from [] (cpu_startup_entry+0x178/0x1e4) [ 4.071990] [] (cpu_startup_entry+0x178/0x1e4) from [<800086c4>] (0x800086c4) [ 4.080383] Code: e5922288 03a03b0a 13a03c25 e0823003 (e5932000) [ 4.086791] ---[ end trace d83954a84a6fa69e ]--- It is supposed that sar_base is initialized in irq_save_context, which is called on CPU_CLUSTER_PM_ENTER notification. If this notification has been missed and CPU_CLUSTER_PM_EXIT is received sar_base is NULL. Fix it by calling CPU_CLUSTER_PM_{ENTER,EXIT} under the same condition. Signed-off-by: Vladimir Murzin Acked-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/cpuidle44xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index c443f2e97e10..4c8982ae9529 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -143,7 +143,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, * Call idle CPU cluster PM exit notifier chain * to restore GIC and wakeupgen context. */ - if ((cx->mpu_state == PWRDM_POWER_RET) && + if (dev->cpu == 0 && (cx->mpu_state == PWRDM_POWER_RET) && (cx->mpu_logic_state == PWRDM_POWER_OFF)) cpu_cluster_pm_exit(); -- cgit v1.2.3-59-g8ed1b From b6b2485214d2b6af534ae433b588c0bb76fe78af Mon Sep 17 00:00:00 2001 From: Anoop Thomas Mathew Date: Wed, 18 Sep 2013 12:02:00 -0700 Subject: ARM: OMAP4 SMP: Corrected a typo fucntions to functions Corrected the functions spelling mistake in the OMAP4 SMP source file. Signed-off-by: Anoop Thomas Mathew Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap-smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 8708b2a9da45..891211093295 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -1,5 +1,5 @@ /* - * OMAP4 SMP source file. It contains platform specific fucntions + * OMAP4 SMP source file. It contains platform specific functions * needed for the linux smp kernel. * * Copyright (C) 2009 Texas Instruments, Inc. -- cgit v1.2.3-59-g8ed1b From e942cc06e2d183975dd47d8da9e569316cb870ea Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Wed, 18 Sep 2013 12:02:01 -0700 Subject: ARM: OMAP2+: mux: fix trivial typo in name Fix trivial typo in name. Signed-off-by: Phil Carmody Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/mux34xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/mux34xx.c b/arch/arm/mach-omap2/mux34xx.c index c53609f46294..be271f1d585b 100644 --- a/arch/arm/mach-omap2/mux34xx.c +++ b/arch/arm/mach-omap2/mux34xx.c @@ -620,7 +620,7 @@ static struct omap_mux __initdata omap3_muxmodes[] = { "uart1_rts", "ssi1_flag_tx", NULL, NULL, "gpio_149", NULL, NULL, "safe_mode"), _OMAP3_MUXENTRY(UART1_RX, 151, - "uart1_rx", "ss1_wake_tx", "mcbsp1_clkr", "mcspi4_clk", + "uart1_rx", "ssi1_wake_tx", "mcbsp1_clkr", "mcspi4_clk", "gpio_151", NULL, NULL, "safe_mode"), _OMAP3_MUXENTRY(UART1_TX, 148, "uart1_tx", "ssi1_dat_tx", NULL, NULL, -- cgit v1.2.3-59-g8ed1b From 3244aae5757d1117113a78ddb9f97845d5a4e49a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 16 Sep 2013 14:22:32 +0200 Subject: ARM: multi_v7: add HREFv60 to multi_v7 defconfig This is just a standard board for the Ux500, include it in the v7 multiplatform defconfig. Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson --- arch/arm/configs/multi_v7_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index aba4ec728651..f3935b46df29 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -36,6 +36,7 @@ CONFIG_ARCH_TEGRA_114_SOC=y CONFIG_TEGRA_PCI=y CONFIG_TEGRA_EMC_SCALING_ENABLE=y CONFIG_ARCH_U8500=y +CONFIG_MACH_HREFV60=y CONFIG_MACH_SNOWBALL=y CONFIG_MACH_UX500_DT=y CONFIG_ARCH_VEXPRESS=y -- cgit v1.2.3-59-g8ed1b From 4c2924b725fb35d3acc6367d500458d70cdd09a9 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Mon, 13 Dec 2010 21:48:10 +0100 Subject: MIPS: PCI: Use pci_resource_to_user to map pci memory space properly [ralf@linux-mips.org: This only matters to Alchemy platforms. On other platforms fixup_bigphys_addr is just an identidy mapping.] Signed-off-by: Wolfgang Grandegger Cc: tiejun.chen Cc: Linux-MIPS Patchwork: https://patchwork.linux-mips.org/patch/1868/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/pci.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index f194c08bd057..12d6842962be 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -83,6 +83,18 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); +#define HAVE_ARCH_PCI_RESOURCE_TO_USER + +static inline void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, resource_size_t *start, + resource_size_t *end) +{ + phys_t size = resource_size(rsrc); + + *start = fixup_bigphys_addr(rsrc->start, size); + *end = rsrc->start + size; +} + /* * Dynamic DMA mapping stuff. * MIPS has everything mapped statically. -- cgit v1.2.3-59-g8ed1b From b494b48dacb4dd848d76bc8871e55716486f0253 Mon Sep 17 00:00:00 2001 From: Sudeep KarkadaNagesha Date: Tue, 10 Sep 2013 18:59:47 +0100 Subject: cpufreq: imx6q-cpufreq: assign cpu_dev correctly to cpu0 device Commit cdc58d602d2e657602a90c190cbf745886c95977 "cpufreq: imx6q-cpufreq: remove device tree parsing for cpu nodes" assumed the pdev->dev is set to cpu0 device in the platform code. But it actually points to the virtual cpufreq-cpu0 platform device which is not present in the device tree. Most of the information needed by cpufreq is stored in cpu0 DT node. So cpu_dev must point to cpu0 device. This patch fixes the wrong assignment to cpu_dev. Reported-by: Guennadi Liakhovetski Tested-by: Shawn Guo Signed-off-by: Sudeep KarkadaNagesha Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-imx/mach-imx6q.c | 9 +++++++-- drivers/cpufreq/imx6q-cpufreq.c | 7 ++++++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 85a1b51346c8..90372a21087f 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -233,10 +233,15 @@ put_node: of_node_put(np); } -static void __init imx6q_opp_init(struct device *cpu_dev) +static void __init imx6q_opp_init(void) { struct device_node *np; + struct device *cpu_dev = get_cpu_device(0); + if (!cpu_dev) { + pr_warn("failed to get cpu0 device\n"); + return; + } np = of_node_get(cpu_dev->of_node); if (!np) { pr_warn("failed to find cpu0 node\n"); @@ -268,7 +273,7 @@ static void __init imx6q_init_late(void) imx6q_cpuidle_init(); if (IS_ENABLED(CONFIG_ARM_IMX6Q_CPUFREQ)) { - imx6q_opp_init(&imx6q_cpufreq_pdev.dev); + imx6q_opp_init(); platform_device_register(&imx6q_cpufreq_pdev); } } diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 3e396543aea4..c3fd2a101ca0 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -202,7 +203,11 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) unsigned long min_volt, max_volt; int num, ret; - cpu_dev = &pdev->dev; + cpu_dev = get_cpu_device(0); + if (!cpu_dev) { + pr_err("failed to get cpu0 device\n"); + return -ENODEV; + } np = of_node_get(cpu_dev->of_node); if (!np) { -- cgit v1.2.3-59-g8ed1b From 3d10a887de7ffe704687c9a77abaac9fbe5e9208 Mon Sep 17 00:00:00 2001 From: Sudeep KarkadaNagesha Date: Tue, 10 Sep 2013 18:59:48 +0100 Subject: ARM: i.MX: change dev_id to cpu0 while registering cpu clock Currently all clkdev registration use "cpufreq-cpu0.0" as dev_id for cpu clock which refers to virtual platform device. It needs to be "cpu0" instead which is actual cpu0 device id. This patch changes the dev_id from "cpufreq-cpu0.0" to "cpu0". Reported-by: Guennadi Liakhovetski Tested-by: Shawn Guo Signed-off-by: Sudeep KarkadaNagesha Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-imx/clk-imx27.c | 2 +- arch/arm/mach-imx/clk-imx51-imx53.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/clk-imx27.c b/arch/arm/mach-imx/clk-imx27.c index c3cfa4116dc0..c6b40f386786 100644 --- a/arch/arm/mach-imx/clk-imx27.c +++ b/arch/arm/mach-imx/clk-imx27.c @@ -285,7 +285,7 @@ int __init mx27_clocks_init(unsigned long fref) clk_register_clkdev(clk[ata_ahb_gate], "ata", NULL); clk_register_clkdev(clk[rtc_ipg_gate], NULL, "imx21-rtc"); clk_register_clkdev(clk[scc_ipg_gate], "scc", NULL); - clk_register_clkdev(clk[cpu_div], NULL, "cpufreq-cpu0.0"); + clk_register_clkdev(clk[cpu_div], NULL, "cpu0"); clk_register_clkdev(clk[emi_ahb_gate], "emi_ahb" , NULL); mxc_timer_init(MX27_IO_ADDRESS(MX27_GPT1_BASE_ADDR), MX27_INT_GPT1); diff --git a/arch/arm/mach-imx/clk-imx51-imx53.c b/arch/arm/mach-imx/clk-imx51-imx53.c index 1a56a3319997..de1964c7b8bc 100644 --- a/arch/arm/mach-imx/clk-imx51-imx53.c +++ b/arch/arm/mach-imx/clk-imx51-imx53.c @@ -328,7 +328,7 @@ static void __init mx5_clocks_common_init(unsigned long rate_ckil, clk_register_clkdev(clk[ssi2_ipg_gate], NULL, "imx-ssi.1"); clk_register_clkdev(clk[ssi3_ipg_gate], NULL, "imx-ssi.2"); clk_register_clkdev(clk[sdma_gate], NULL, "imx35-sdma"); - clk_register_clkdev(clk[cpu_podf], NULL, "cpufreq-cpu0.0"); + clk_register_clkdev(clk[cpu_podf], NULL, "cpu0"); clk_register_clkdev(clk[iim_gate], "iim", NULL); clk_register_clkdev(clk[dummy], NULL, "imx2-wdt.0"); clk_register_clkdev(clk[dummy], NULL, "imx2-wdt.1"); -- cgit v1.2.3-59-g8ed1b From e4a6a29d1250022a885123cc0a04bd176b508854 Mon Sep 17 00:00:00 2001 From: Sudeep KarkadaNagesha Date: Tue, 10 Sep 2013 18:59:49 +0100 Subject: ARM: shmobile: change dev_id to cpu0 while registering cpu clock Currently all clkdev registration use "cpufreq-cpu0.0" as dev_id for cpu clock which refers to virtual platform device. It needs to be "cpu0" instead which is actual cpu0 device id. This patch changes the dev_id from "cpufreq-cpu0.0" to "cpu0". Reported-and-tested-by: Guennadi Liakhovetski Cc: Shawn Guo Cc: Magnus Damm Signed-off-by: Sudeep KarkadaNagesha Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-shmobile/clock-r8a73a4.c | 2 +- arch/arm/mach-shmobile/clock-sh73a0.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-shmobile/clock-r8a73a4.c b/arch/arm/mach-shmobile/clock-r8a73a4.c index 8ea5ef6c79cc..5bd2e851e3c7 100644 --- a/arch/arm/mach-shmobile/clock-r8a73a4.c +++ b/arch/arm/mach-shmobile/clock-r8a73a4.c @@ -555,7 +555,7 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("pll2h", &pll2h_clk), /* CPU clock */ - CLKDEV_DEV_ID("cpufreq-cpu0", &z_clk), + CLKDEV_DEV_ID("cpu0", &z_clk), /* DIV6 */ CLKDEV_CON_ID("zb", &div6_clks[DIV6_ZB]), diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c index 1942eaef5181..c92c023f0d27 100644 --- a/arch/arm/mach-shmobile/clock-sh73a0.c +++ b/arch/arm/mach-shmobile/clock-sh73a0.c @@ -616,7 +616,7 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("smp_twd", &twd_clk), /* smp_twd */ /* DIV4 clocks */ - CLKDEV_DEV_ID("cpufreq-cpu0", &div4_clks[DIV4_Z]), + CLKDEV_DEV_ID("cpu0", &div4_clks[DIV4_Z]), /* DIV6 clocks */ CLKDEV_CON_ID("vck1_clk", &div6_clks[DIV6_VCK1]), -- cgit v1.2.3-59-g8ed1b From 2f9ee82c2a1af01966cedaa9cb144acb6fca9932 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 19 Sep 2013 11:09:48 +0200 Subject: MIPS: Add MIPS R5 config5 register. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mipsregs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index fed1c3e9b486..e0331414c7d6 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -603,6 +603,13 @@ #define MIPS_CONF4_MMUEXTDEF (_ULCAST_(3) << 14) #define MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT (_ULCAST_(1) << 14) +#define MIPS_CONF5_NF (_ULCAST_(1) << 0) +#define MIPS_CONF5_UFR (_ULCAST_(1) << 2) +#define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27) +#define MIPS_CONF5_EVA (_ULCAST_(1) << 28) +#define MIPS_CONF5_CV (_ULCAST_(1) << 29) +#define MIPS_CONF5_K (_ULCAST_(1) << 30) + #define MIPS_CONF6_SYND (_ULCAST_(1) << 13) #define MIPS_CONF7_WII (_ULCAST_(1) << 31) -- cgit v1.2.3-59-g8ed1b From 8b8a7634315be747959b0165e38138879f93cf6c Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 19 Sep 2013 11:15:49 +0200 Subject: MIPS: Disable usermode switching of the FR bit for MIPS R5 CPUs. Currently the kernel will always use the FR=0 register model for O32. If an O32 application did enable FR=1 mode, some data from another application might be leaked in the extra registers becoming visible. Iow, this patch is meant to make the kernel MIPS R5 tolerant but leaves proper MIPS R5 support to a future patchset. Signed-off-by: Ralf Baechle --- arch/mips/kernel/cpu-probe.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 9be68091bdf2..5465dc183e5a 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -291,6 +291,17 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) return config4 & MIPS_CONF_M; } +static inline unsigned int decode_config5(struct cpuinfo_mips *c) +{ + unsigned int config5; + + config5 = read_c0_config5(); + config5 &= ~MIPS_CONF5_UFR; + write_c0_config5(config5); + + return config5 & MIPS_CONF_M; +} + static void decode_configs(struct cpuinfo_mips *c) { int ok; @@ -311,6 +322,8 @@ static void decode_configs(struct cpuinfo_mips *c) ok = decode_config3(c); if (ok) ok = decode_config4(c); + if (ok) + ok = decode_config5(c); mips_probe_watch_registers(c); -- cgit v1.2.3-59-g8ed1b From 88f02518d8ae846fc11001d7765ecee343cb5349 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Thu, 19 Sep 2013 10:27:52 +0100 Subject: MIPS: PCI: pci-bcm1480: Include missing vt.h header It's needed for the MAX_NR_CONSOLES macro. Fixes the following build problem on a randconfig: arch/mips/pci/pci-bcm1480.c: In function 'bcm1480_pcibios_init': arch/mips/pci/pci-bcm1480.c:261:36: error: 'MAX_NR_CONSOLES' undeclared (first use in this function) arch/mips/pci/pci-bcm1480.c:261:36: note: each undeclared identifier is reported only once for each function it appears in make[1]: *** [arch/mips/pci/pci-bcm1480.o] Error 1 Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5858/ Signed-off-by: Ralf Baechle --- arch/mips/pci/pci-bcm1480.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/mips/pci/pci-bcm1480.c b/arch/mips/pci/pci-bcm1480.c index 44dd5aa2e36f..5ec2a7bae02c 100644 --- a/arch/mips/pci/pci-bcm1480.c +++ b/arch/mips/pci/pci-bcm1480.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3-59-g8ed1b From 66b10574b86046dbe10c4326e43964a79e0b3a64 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 19 Sep 2013 18:39:08 +0530 Subject: MIPS: Fix invalid symbolic link file Commit 3b29aa5ba204c [MIPS: add symlink] created a symlink file in include/dt-bindings. Even though commit diff is fine, the symlink is invalid and ls -lb shows a newline character at the end of the filename: lrwxrwxrwx 1 maddy maddy 35 Sep 19 18:11 dt-bindings -> ../../../../../include/dt-bindings\n Signed-off-by: Madhavan Srinivasan Cc: steven.hill@imgtec.com Cc: mmarek@suse.cz Cc: swarren@nvidia.com Cc: linux-mips@linux-mips.org Cc: linux-kbuild@vger.kernel.org Cc: james.hogan@imgtec.com Patchwork: https://patchwork.linux-mips.org/patch/5859/ Signed-off-by: Ralf Baechle --- arch/mips/boot/dts/include/dt-bindings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/boot/dts/include/dt-bindings b/arch/mips/boot/dts/include/dt-bindings index 68ae3887b3e5..08c00e4972fa 120000 --- a/arch/mips/boot/dts/include/dt-bindings +++ b/arch/mips/boot/dts/include/dt-bindings @@ -1 +1 @@ -../../../../../include/dt-bindings +../../../../../include/dt-bindings \ No newline at end of file -- cgit v1.2.3-59-g8ed1b From 73c4427c6ca3b32fa0441791e9c6eadceff7242f Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 17 Sep 2013 14:48:13 +0800 Subject: perf/x86/intel/uncore: Don't use smp_processor_id() in validate_group() uncore_validate_group() can't call smp_processor_id() because it is in preemptible context. Pass NUMA_NO_NODE to the allocator instead. Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379400493-11505-1-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 8ed44589b0e4..4118f9f68315 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2706,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) box->hrtimer.function = uncore_pmu_hrtimer; } -struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu) +static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) { struct intel_uncore_box *box; int i, size; size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); - box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); + box = kzalloc_node(size, GFP_KERNEL, node); if (!box) return NULL; @@ -3031,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu, struct intel_uncore_box *fake_box; int ret = -EINVAL, n; - fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); + fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); if (!fake_box) return -ENOMEM; @@ -3294,7 +3294,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id } type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; - box = uncore_alloc_box(type, 0); + box = uncore_alloc_box(type, NUMA_NO_NODE); if (!box) return -ENOMEM; @@ -3499,7 +3499,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id) if (pmu->func_id < 0) pmu->func_id = j; - box = uncore_alloc_box(type, cpu); + box = uncore_alloc_box(type, cpu_to_node(cpu)); if (!box) return -ENOMEM; -- cgit v1.2.3-59-g8ed1b From fa7315871046b9a4c48627905691dbde57e51033 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Sep 2013 10:16:42 +0200 Subject: perf: Fix capabilities bitfield compatibility in 'struct perf_event_mmap_page' Solve the problems around the broken definition of perf_event_mmap_page:: cap_usr_time and cap_usr_rdpmc fields which used to overlap, partially fixed by: 860f085b74e9 ("perf: Fix broken union in 'struct perf_event_mmap_page'") The problem with the fix (merged in v3.12-rc1 and not yet released officially), noticed by Vince Weaver is that the new behavior is not detectable by new user-space, and that due to the reuse of the field names it's easy to mis-compile a binary if old headers are used on a new kernel or new headers are used on an old kernel. To solve all that make this change explicit, detectable and self-contained, by iterating the ABI the following way: - Always clear bit 0, and rename it to usrpage->cap_bit0, to at least not confuse old user-space binaries. RDPMC will be marked as unavailable to old binaries but that's within the ABI, this is a capability bit. - Rename bit 1 to ->cap_bit0_is_deprecated and always set it to 1, so new libraries can reliably detect that bit 0 is deprecated and perma-zero without having to check the kernel version. - Use bits 2, 3, 4 for the newly defined, correct functionality: cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ cap_user_time : 1, /* The time_* fields are used */ cap_user_time_zero : 1, /* The time_zero field is used */ - Rename all the bitfield names in perf_event.h to be different from the old names, to make sure it's not possible to mis-compile it accidentally with old assumptions. The 'size' field can then be used in the future to add new fields and it will act as a natural ABI version indicator as well. Also adjust tools/perf/ userspace for the new definitions, noticed by Adrian Hunter. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra Also-Fixed-by: Adrian Hunter Link: http://lkml.kernel.org/n/tip-zr03yxjrpXesOzzupszqglbv@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 10 +++++----- include/uapi/linux/perf_event.h | 14 +++++++++----- kernel/events/core.c | 21 +++++++++++++++++++++ tools/perf/arch/x86/util/tsc.c | 6 +++--- 4 files changed, 38 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8355c84b9729..a9c606bb4945 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1883,9 +1883,9 @@ static struct pmu pmu = { void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { - userpg->cap_usr_time = 0; - userpg->cap_usr_time_zero = 0; - userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; userpg->pmc_width = x86_pmu.cntval_bits; if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) @@ -1894,13 +1894,13 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) return; - userpg->cap_usr_time = 1; + userpg->cap_user_time = 1; userpg->time_mult = this_cpu_read(cyc2ns); userpg->time_shift = CYC2NS_SCALE_FACTOR; userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; if (sched_clock_stable && !check_tsc_disabled()) { - userpg->cap_usr_time_zero = 1; + userpg->cap_user_time_zero = 1; userpg->time_zero = this_cpu_read(cyc2ns_offset); } } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 7f6d584c267b..009a655a5d35 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -380,10 +380,13 @@ struct perf_event_mmap_page { union { __u64 capabilities; struct { - __u64 cap_usr_time : 1, - cap_usr_rdpmc : 1, - cap_usr_time_zero : 1, - cap_____res : 61; + __u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */ + cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */ + + cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ + cap_user_time : 1, /* The time_* fields are used */ + cap_user_time_zero : 1, /* The time_zero field is used */ + cap_____res : 59; }; }; @@ -442,12 +445,13 @@ struct perf_event_mmap_page { * ((rem * time_mult) >> time_shift); */ __u64 time_zero; + __u32 size; /* Header size up to __reserved[] fields. */ /* * Hole for extension of the self monitor capabilities */ - __u64 __reserved[119]; /* align to 1k */ + __u8 __reserved[118*8+4]; /* align to 1k. */ /* * Control data for the mmap() data buffer. diff --git a/kernel/events/core.c b/kernel/events/core.c index dd236b66ca3a..cb4238e85b38 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3660,6 +3660,26 @@ static void calc_timer_values(struct perf_event *event, *running = ctx_time - event->tstamp_running; } +static void perf_event_init_userpage(struct perf_event *event) +{ + struct perf_event_mmap_page *userpg; + struct ring_buffer *rb; + + rcu_read_lock(); + rb = rcu_dereference(event->rb); + if (!rb) + goto unlock; + + userpg = rb->user_page; + + /* Allow new userspace to detect that bit 0 is deprecated */ + userpg->cap_bit0_is_deprecated = 1; + userpg->size = offsetof(struct perf_event_mmap_page, __reserved); + +unlock: + rcu_read_unlock(); +} + void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { } @@ -4044,6 +4064,7 @@ again: ring_buffer_attach(event, rb); rcu_assign_pointer(event->rb, rb); + perf_event_init_userpage(event); perf_event_update_userpage(event); unlock: diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 9570c2b0f83c..b2519e49424f 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -32,7 +32,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, struct perf_tsc_conversion *tc) { - bool cap_usr_time_zero; + bool cap_user_time_zero; u32 seq; int i = 0; @@ -42,7 +42,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, tc->time_mult = pc->time_mult; tc->time_shift = pc->time_shift; tc->time_zero = pc->time_zero; - cap_usr_time_zero = pc->cap_usr_time_zero; + cap_user_time_zero = pc->cap_user_time_zero; rmb(); if (pc->lock == seq && !(seq & 1)) break; @@ -52,7 +52,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, } } - if (!cap_usr_time_zero) + if (!cap_user_time_zero) return -EOPNOTSUPP; return 0; -- cgit v1.2.3-59-g8ed1b From 59f67e16e6b79697241c3fd030e3da300377893e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 16 Sep 2013 15:18:28 +0100 Subject: arm64: Make do_bad_area() function static This function is only called from arch/arm64/mm/fault.c. Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6d6acf153bff..c23751b06120 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -130,7 +130,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, force_sig_info(sig, &si, tsk); } -void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->active_mm; -- cgit v1.2.3-59-g8ed1b From 6ca68e802612c87c31aa83d50c37ed0d88774a46 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 17 Sep 2013 18:49:46 +0100 Subject: arm64: Correctly report LR and SP for compat tasks When a task crashes and we print debugging information, ensure that compat tasks show the actual AArch32 LR and SP registers rather than the AArch64 ones. Signed-off-by: Catalin Marinas --- arch/arm64/kernel/process.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 57fb55c44c90..7ae8a1f00c3c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -143,15 +143,26 @@ void machine_restart(char *cmd) void __show_regs(struct pt_regs *regs) { - int i; + int i, top_reg; + u64 lr, sp; + + if (compat_user_mode(regs)) { + lr = regs->compat_lr; + sp = regs->compat_sp; + top_reg = 12; + } else { + lr = regs->regs[30]; + sp = regs->sp; + top_reg = 29; + } show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); - print_symbol("LR is at %s\n", regs->regs[30]); + print_symbol("LR is at %s\n", lr); printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", - regs->pc, regs->regs[30], regs->pstate); - printk("sp : %016llx\n", regs->sp); - for (i = 29; i >= 0; i--) { + regs->pc, lr, regs->pstate); + printk("sp : %016llx\n", sp); + for (i = top_reg; i >= 0; i--) { printk("x%-2d: %016llx ", i, regs->regs[i]); if (i % 2 == 0) printk("\n"); -- cgit v1.2.3-59-g8ed1b From 25804e6a96681d5d2142058948e218999e4f547c Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 18 Sep 2013 16:14:28 +0100 Subject: arm64: Widen hwcap to be 64 bit Under arm64 elf_hwcap is a 32 bit quantity, but it is stored in a 64 bit auxiliary ELF field and glibc reads hwcap as 64 bit. This patch widens elf_hwcap to be 64 bit. Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hwcap.h | 2 +- arch/arm64/kernel/setup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6d4482fa35bc..e2950b098e76 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -43,6 +43,6 @@ COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) -extern unsigned int elf_hwcap; +extern unsigned long elf_hwcap; #endif #endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 12ad8f3d0cfd..055cfb80e05c 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -57,7 +57,7 @@ unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int elf_hwcap __read_mostly; +unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); static const char *cpu_name; -- cgit v1.2.3-59-g8ed1b From d95bc2501da97e2884b957c48df37c258a34db8d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 19 Sep 2013 10:32:20 +0100 Subject: ARM: 7839/1: entry: fix tracing of ARM-private syscalls Commit 377747c40657 ("ARM: entry: allow ARM-private syscalls to be restarted") reworked the low-level syscall dispatcher to allow restarting of ARM-private syscalls. Unfortunately, this relocated the label used to dispatch a private syscall from the trace path, so that the invocation would be bypassed altogether! This causes applications to fail under strace as soon as they rely on a private syscall (e.g. set_tls): set_tls(0xb6fad4c0, 0xb6fadb98, 0xb6fb1050, 0xb6fad4c0, 0xb6fb1050) = -1 ENOSYS (Function not implemented) This patch fixes the label so that we correctly dispatch private syscalls from the trace path. Reported-by: Jason Gunthorpe Tested-by: Jason Gunthorpe Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/entry-common.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 74ad15d1a065..bc6bd9683ba4 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -442,10 +442,10 @@ local_restart: ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine add r1, sp, #S_OFF - cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) +2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back bcs arm_syscall -2: mov why, #0 @ no longer a real syscall + mov why, #0 @ no longer a real syscall b sys_ni_syscall @ not private func #if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI) -- cgit v1.2.3-59-g8ed1b From c4a30c3b2997bbc7d81cd0f5fde43599700834f5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 22 Sep 2013 11:08:50 +0100 Subject: ARM: only allow kernel mode neon with AEABI This prevents the linker erroring with: arm-linux-ld: error: arch/arm/lib/xor-neon.o uses VFP instructions, whereas arch/arm/lib/built-in.o does not arm-linux-ld: failed to merge target specific data of file arch/arm/lib/xor-neon.o This is due to the non-neon files being marked as containing FPA data/ instructions (even though they do not) being mixed with files which contain VFP, which is an incompatible floating point format. Signed-off-by: Russell King --- arch/arm/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3f7714d8d2d2..1ad6fb6c094d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2217,8 +2217,7 @@ config NEON config KERNEL_MODE_NEON bool "Support for NEON in kernel mode" - default n - depends on NEON + depends on NEON && AEABI help Say Y to include support for NEON in kernel mode. -- cgit v1.2.3-59-g8ed1b From 40190c85f427dcfdbab5dbef4ffd2510d649da1f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 21 Sep 2013 11:23:50 +0100 Subject: ARM: 7837/3: fix Thumb-2 bug in AES assembler code Patch 638591c enabled building the AES assembler code in Thumb2 mode. However, this code used arithmetic involving PC rather than adr{l} instructions to generate PC-relative references to the lookup tables, and this needs to take into account the different PC offset when running in Thumb mode. Signed-off-by: Ard Biesheuvel Acked-by: Nicolas Pitre Cc: stable@vger.kernel.org Signed-off-by: Russell King --- arch/arm/crypto/aes-armv4.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S index 19d6cd6f29f9..3a14ea8fe97e 100644 --- a/arch/arm/crypto/aes-armv4.S +++ b/arch/arm/crypto/aes-armv4.S @@ -148,7 +148,7 @@ AES_Te: @ const AES_KEY *key) { .align 5 ENTRY(AES_encrypt) - sub r3,pc,#8 @ AES_encrypt + adr r3,AES_encrypt stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 @@ -381,7 +381,7 @@ _armv4_AES_encrypt: .align 5 ENTRY(private_AES_set_encrypt_key) _armv4_AES_set_encrypt_key: - sub r3,pc,#8 @ AES_set_encrypt_key + adr r3,_armv4_AES_set_encrypt_key teq r0,#0 moveq r0,#-1 beq .Labrt @@ -843,7 +843,7 @@ AES_Td: @ const AES_KEY *key) { .align 5 ENTRY(AES_decrypt) - sub r3,pc,#8 @ AES_decrypt + adr r3,AES_decrypt stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 -- cgit v1.2.3-59-g8ed1b From cf3b425dd8d99e01214515a6754f9e69ecc6dce8 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 22 Sep 2013 16:19:13 +0800 Subject: perf/x86/intel: Add model number for Avoton Silvermont Signed-off-by: Yan, Zheng Cc: a.p.zijlstra@chello.nl Cc: eranian@google.com Cc: ak@linux.intel.com Link: http://lkml.kernel.org/r/1379837953-17755-1-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9db76c31b3c3..f31a1655d1ff 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2325,6 +2325,7 @@ __init int intel_pmu_init(void) break; case 55: /* Atom 22nm "Silvermont" */ + case 77: /* Avoton "Silvermont" */ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, -- cgit v1.2.3-59-g8ed1b From 4f0acd31c31f03ba42494c8baf6c0465150e2621 Mon Sep 17 00:00:00 2001 From: Masoud Sharbiani Date: Fri, 20 Sep 2013 15:59:07 -0700 Subject: x86/reboot: Add quirk to make Dell C6100 use reboot=pci automatically Dell PowerEdge C6100 machines fail to completely reboot about 20% of the time. Signed-off-by: Masoud Sharbiani Signed-off-by: Vinson Lee Cc: Robin Holt Cc: Russell King Cc: Guan Xuetao Cc: Link: http://lkml.kernel.org/r/1379717947-18042-1-git-send-email-vlee@freedesktop.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 563ed91e6faa..5f4ad2714109 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -358,6 +358,22 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), }, }, + { /* Handle problems with rebooting on the Dell PowerEdge C6100. */ + .callback = set_pci_reboot, + .ident = "Dell PowerEdge C6100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), + }, + }, + { /* Some C6100 machines were shipped with vendor being 'Dell'. */ + .callback = set_pci_reboot, + .ident = "Dell PowerEdge C6100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), + }, + }, { } }; -- cgit v1.2.3-59-g8ed1b From becee6b8c7b2b4adc9a3e0bec633abecd591b9ef Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 22 Sep 2013 22:04:27 +0100 Subject: MIPS: cpu-features.h: s/MIPS53/MIPS64/ No support for MIPS53 processors yet. Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5876/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cpu-features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 51680d15ca8e..d445d060e346 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -187,7 +187,7 @@ /* * MIPS32, MIPS64, VR5500, IDT32332, IDT32334 and maybe a few other - * pre-MIPS32/MIPS53 processors have CLO, CLZ. The IDT RC64574 is 64-bit and + * pre-MIPS32/MIPS64 processors have CLO, CLZ. The IDT RC64574 is 64-bit and * has CLO and CLZ but not DCLO nor DCLZ. For 64-bit kernels * cpu_has_clo_clz also indicates the availability of DCLO and DCLZ. */ -- cgit v1.2.3-59-g8ed1b From a945928ea2709bc0e8e8165d33aed855a0110279 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 Sep 2013 22:29:44 -0400 Subject: xen: Do not enable spinlocks before jump_label_init() has executed xen_init_spinlocks() currently calls static_key_slow_inc() before jump_label_init() is invoked. When CONFIG_JUMP_LABEL is set (which usually is the case) the effect of this static_key_slow_inc() is deferred until after jump_label_init(). This is different from when CONFIG_JUMP_LABEL is not set, in which case the key is set immediately. Thus, depending on the value of config option, we may observe different behavior. In addition, when we come to __jump_label_transform() from jump_label_init(), the key (paravirt_ticketlocks_enabled) is already enabled. On processors where ideal_nop is not the same as default_nop this will cause a BUG() since it is expected that before a key is enabled the latter is replaced by the former during initialization. To address this problem we need to move static_key_slow_inc(¶virt_ticketlocks_enabled) so that it is called after jump_label_init(). We also need to make sure that this is done before other cpus start to boot. early_initcall appears to be a good place to do so. (Note that we cannot move whole xen_init_spinlocks() there since pv_lock_ops need to be set before alternative_instructions() runs.) Signed-off-by: Konrad Rzeszutek Wilk [v2: Added extra comments in the code] Signed-off-by: Boris Ostrovsky Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Steven Rostedt --- arch/x86/xen/spinlock.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 253f63fceea1..be6b86078957 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -259,6 +259,14 @@ void xen_uninit_lock_cpu(int cpu) } +/* + * Our init of PV spinlocks is split in two init functions due to us + * using paravirt patching and jump labels patching and having to do + * all of this before SMP code is invoked. + * + * The paravirt patching needs to be done _before_ the alternative asm code + * is started, otherwise we would not patch the core kernel code. + */ void __init xen_init_spinlocks(void) { @@ -267,12 +275,26 @@ void __init xen_init_spinlocks(void) return; } - static_key_slow_inc(¶virt_ticketlocks_enabled); - pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.unlock_kick = xen_unlock_kick; } +/* + * While the jump_label init code needs to happend _after_ the jump labels are + * enabled and before SMP is started. Hence we use pre-SMP initcall level + * init. We cannot do it in xen_init_spinlocks as that is done before + * jump labels are activated. + */ +static __init int xen_init_spinlocks_jump(void) +{ + if (!xen_pvspin) + return 0; + + static_key_slow_inc(¶virt_ticketlocks_enabled); + return 0; +} +early_initcall(xen_init_spinlocks_jump); + static __init int xen_parse_nopvspin(char *arg) { xen_pvspin = false; -- cgit v1.2.3-59-g8ed1b From ede2033c405bfbd6b74b113cf69c6f54cef35902 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 6 Sep 2013 16:49:18 -0500 Subject: openrisc: clean-up prom.h Clean-up some copy/paste declarations that are not necessary. All the functions either don't exist or are already declared in other headers. This is needed in preparation of of_irq.h clean-up. Signed-off-by: Rob Herring Cc: Jonas Bonn Cc: linux@lists.openrisc.net --- arch/openrisc/include/asm/prom.h | 44 ---------------------------------------- 1 file changed, 44 deletions(-) (limited to 'arch') diff --git a/arch/openrisc/include/asm/prom.h b/arch/openrisc/include/asm/prom.h index eb59bfe23e85..93c9980e1b6b 100644 --- a/arch/openrisc/include/asm/prom.h +++ b/arch/openrisc/include/asm/prom.h @@ -14,53 +14,9 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ - -#include /* linux/of.h gets to determine #include ordering */ - #ifndef _ASM_OPENRISC_PROM_H #define _ASM_OPENRISC_PROM_H -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ -#include -#include -#include -#include -#include -#include -#include -#include -#include #define HAVE_ARCH_DEVTREE_FIXUPS -/* Other Prototypes */ -extern int early_uartlite_console(void); - -/* Parse the ibm,dma-window property of an OF node into the busno, phys and - * size parameters. - */ -void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, - unsigned long *busno, unsigned long *phys, unsigned long *size); - -extern void kdump_move_device_tree(void); - -/* Get the MAC address */ -extern const void *of_get_mac_address(struct device_node *np); - -/** - * of_irq_map_pci - Resolve the interrupt for a PCI device - * @pdev: the device whose interrupt is to be resolved - * @out_irq: structure of_irq filled by this function - * - * This function resolves the PCI interrupt for a given PCI device. If a - * device-node exists for a given pci_dev, it will use normal OF tree - * walking. If not, it will implement standard swizzling and walk up the - * PCI tree until an device-node is found, at which point it will finish - * resolving using the OF tree walking. - */ -struct pci_dev; -extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq); - -#endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* _ASM_OPENRISC_PROM_H */ -- cgit v1.2.3-59-g8ed1b From 0366a1c70b89efed4f9d590216bb004a16effbed Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 23 Sep 2013 14:29:11 +1000 Subject: powerpc/irq: Run softirqs off the top of the irq stack Nowadays, irq_exit() calls __do_softirq() pretty much directly instead of calling do_softirq() which switches to the decicated softirq stack. This has lead to observed stack overflows on powerpc since we call irq_enter() and irq_exit() outside of the scope that switches to the irq stack. This fixes it by moving the stack switching up a level, making irq_enter() and irq_exit() run off the irq stack. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/irq.h | 4 +- arch/powerpc/kernel/irq.c | 104 ++++++++++++++++++++--------------------- arch/powerpc/kernel/misc_32.S | 9 ++-- arch/powerpc/kernel/misc_64.S | 10 ++-- 4 files changed, 62 insertions(+), 65 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 0e40843a1c6e..41f13cec8a8f 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -69,9 +69,9 @@ extern struct thread_info *softirq_ctx[NR_CPUS]; extern void irq_ctx_init(void); extern void call_do_softirq(struct thread_info *tp); -extern int call_handle_irq(int irq, void *p1, - struct thread_info *tp, void *func); +extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp); extern void do_IRQ(struct pt_regs *regs); +extern void __do_irq(struct pt_regs *regs); int irq_choose_cpu(const struct cpumask *mask); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c69440cef7af..2234a1276a77 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -441,50 +441,6 @@ void migrate_irqs(void) } #endif -static inline void handle_one_irq(unsigned int irq) -{ - struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit; - struct irq_desc *desc; - - desc = irq_to_desc(irq); - if (!desc) - return; - - /* Switch to the irq stack to handle this */ - curtp = current_thread_info(); - irqtp = hardirq_ctx[smp_processor_id()]; - - if (curtp == irqtp) { - /* We're already on the irq stack, just handle it */ - desc->handle_irq(irq, desc); - return; - } - - saved_sp_limit = current->thread.ksp_limit; - - irqtp->task = curtp->task; - irqtp->flags = 0; - - /* Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. */ - irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) | - (curtp->preempt_count & SOFTIRQ_MASK); - - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); - - call_handle_irq(irq, desc, irqtp, desc->handle_irq); - current->thread.ksp_limit = saved_sp_limit; - irqtp->task = NULL; - - /* Set any flag that may have been set on the - * alternate stack - */ - if (irqtp->flags) - set_bits(irqtp->flags, &curtp->flags); -} - static inline void check_stack_overflow(void) { #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -501,9 +457,9 @@ static inline void check_stack_overflow(void) #endif } -void do_IRQ(struct pt_regs *regs) +void __do_irq(struct pt_regs *regs) { - struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; unsigned int irq; irq_enter(); @@ -519,18 +475,64 @@ void do_IRQ(struct pt_regs *regs) */ irq = ppc_md.get_irq(); - /* We can hard enable interrupts now */ + /* We can hard enable interrupts now to allow perf interrupts */ may_hard_irq_enable(); /* And finally process it */ - if (irq != NO_IRQ) - handle_one_irq(irq); - else + if (unlikely(irq == NO_IRQ)) __get_cpu_var(irq_stat).spurious_irqs++; + else { + desc = irq_to_desc(irq); + if (likely(desc)) + desc->handle_irq(irq, desc); + } trace_irq_exit(regs); irq_exit(); +} + +void do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct thread_info *curtp, *irqtp; + unsigned long saved_sp_limit; + + /* Switch to the irq stack to handle this */ + curtp = current_thread_info(); + irqtp = hardirq_ctx[raw_smp_processor_id()]; + + /* Already there ? */ + if (unlikely(curtp == irqtp)) { + __do_irq(regs); + set_irq_regs(old_regs); + return; + } + + /* Adjust the stack limit */ + saved_sp_limit = current->thread.ksp_limit; + current->thread.ksp_limit = (unsigned long)irqtp + + _ALIGN_UP(sizeof(struct thread_info), 16); + + + /* Prepare the thread_info in the irq stack */ + irqtp->task = curtp->task; + irqtp->flags = 0; + + /* Copy the preempt_count so that the [soft]irq checks work. */ + irqtp->preempt_count = curtp->preempt_count; + + /* Switch stack and call */ + call_do_irq(regs, irqtp); + + /* Restore stack limit */ + current->thread.ksp_limit = saved_sp_limit; + irqtp->task = NULL; + + /* Copy back updates to the thread_info */ + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); + set_irq_regs(old_regs); } @@ -592,12 +594,10 @@ void irq_ctx_init(void) memset((void *)softirq_ctx[i], 0, THREAD_SIZE); tp = softirq_ctx[i]; tp->cpu = i; - tp->preempt_count = 0; memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); tp = hardirq_ctx[i]; tp->cpu = i; - tp->preempt_count = HARDIRQ_OFFSET; } } diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 777d999f563b..7da3882a3622 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -47,13 +47,12 @@ _GLOBAL(call_do_softirq) mtlr r0 blr -_GLOBAL(call_handle_irq) +_GLOBAL(call_do_irq) mflr r0 stw r0,4(r1) - mtctr r6 - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + bl __do_irq lwz r1,0(r1) lwz r0,4(r1) mtlr r0 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 971d7e78aff2..e59caf874d05 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -40,14 +40,12 @@ _GLOBAL(call_do_softirq) mtlr r0 blr -_GLOBAL(call_handle_irq) - ld r8,0(r6) +_GLOBAL(call_do_irq) mflr r0 std r0,16(r1) - mtctr r8 - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) - mr r1,r5 - bctrl + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) + mr r1,r4 + bl .__do_irq ld r1,0(r1) ld r0,16(r1) mtlr r0 -- cgit v1.2.3-59-g8ed1b From cbc9565ee82694dec31d8137dec975b83175183b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 24 Sep 2013 15:17:21 +1000 Subject: powerpc: Remove ksp_limit on ppc64 We've been keeping that field in thread_struct for a while, it contains the "limit" of the current stack pointer and is meant to be used for detecting stack overflows. It has a few problems however: - First, it was never actually *used* on 64-bit. Set and updated but not actually exploited - When switching stack to/from irq and softirq stacks, it's update is racy unless we hard disable interrupts, which is costly. This is fine on 32-bit as we don't soft-disable there but not on 64-bit. Thus rather than fixing 2 in order to implement 1 in some hypothetical future, let's remove the code completely from 64-bit. In order to avoid a clutter of ifdef's, we remove the updates from C code completely during interrupt stack switching, and instead maintain it from the asm helper that is used to do the stack switching in the first place. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/processor.h | 4 +--- arch/powerpc/kernel/asm-offsets.c | 3 ++- arch/powerpc/kernel/irq.c | 12 ------------ arch/powerpc/kernel/misc_32.S | 16 ++++++++++++++++ arch/powerpc/kernel/process.c | 3 ++- arch/powerpc/lib/sstep.c | 3 ++- 6 files changed, 23 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index e378cccfca55..ce4de5aed7b5 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -149,8 +149,6 @@ typedef struct { struct thread_struct { unsigned long ksp; /* Kernel stack pointer */ - unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ - #ifdef CONFIG_PPC64 unsigned long ksp_vsid; #endif @@ -162,6 +160,7 @@ struct thread_struct { #endif #ifdef CONFIG_PPC32 void *pgdir; /* root of page-table tree */ + unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ #endif #ifdef CONFIG_PPC_ADV_DEBUG_REGS /* @@ -321,7 +320,6 @@ struct thread_struct { #else #define INIT_THREAD { \ .ksp = INIT_SP, \ - .ksp_limit = INIT_SP_LIMIT, \ .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ .fs = KERNEL_DS, \ .fpr = {{0}}, \ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d8958be5f31a..502c7a4e73f7 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -80,10 +80,11 @@ int main(void) DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); + DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); + DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); #endif /* CONFIG_PPC64 */ DEFINE(KSP, offsetof(struct thread_struct, ksp)); - DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); #ifdef CONFIG_BOOKE DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0])); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2234a1276a77..57d286a78f86 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -496,7 +496,6 @@ void do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit; /* Switch to the irq stack to handle this */ curtp = current_thread_info(); @@ -509,12 +508,6 @@ void do_IRQ(struct pt_regs *regs) return; } - /* Adjust the stack limit */ - saved_sp_limit = current->thread.ksp_limit; - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); - - /* Prepare the thread_info in the irq stack */ irqtp->task = curtp->task; irqtp->flags = 0; @@ -526,7 +519,6 @@ void do_IRQ(struct pt_regs *regs) call_do_irq(regs, irqtp); /* Restore stack limit */ - current->thread.ksp_limit = saved_sp_limit; irqtp->task = NULL; /* Copy back updates to the thread_info */ @@ -604,16 +596,12 @@ void irq_ctx_init(void) static inline void do_softirq_onstack(void) { struct thread_info *curtp, *irqtp; - unsigned long saved_sp_limit = current->thread.ksp_limit; curtp = current_thread_info(); irqtp = softirq_ctx[smp_processor_id()]; irqtp->task = curtp->task; irqtp->flags = 0; - current->thread.ksp_limit = (unsigned long)irqtp + - _ALIGN_UP(sizeof(struct thread_info), 16); call_do_softirq(irqtp); - current->thread.ksp_limit = saved_sp_limit; irqtp->task = NULL; /* Set any flag that may have been set on the diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 7da3882a3622..2b0ad9845363 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -36,25 +36,41 @@ .text +/* + * We store the saved ksp_limit in the unused part + * of the STACK_FRAME_OVERHEAD + */ _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r3,THREAD_INFO_GAP stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) bl __do_softirq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr _GLOBAL(call_do_irq) mflr r0 stw r0,4(r1) + lwz r10,THREAD+KSP_LIMIT(r2) + addi r11,r3,THREAD_INFO_GAP stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) mr r1,r4 + stw r10,8(r1) + stw r11,THREAD+KSP_LIMIT(r2) bl __do_irq + lwz r10,8(r1) lwz r1,0(r1) lwz r0,4(r1) + stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6f428da53e20..96d2fdf3aa9e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1000,9 +1000,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; +#ifdef CONFIG_PPC32 p->thread.ksp_limit = (unsigned long)task_stack_page(p) + _ALIGN_UP(sizeof(struct thread_info), 16); - +#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT p->thread.ptrace_bps[0] = NULL; #endif diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a7ee978fb860..b1faa1593c90 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1505,6 +1505,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) */ if ((ra == 1) && !(regs->msr & MSR_PR) \ && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { +#ifdef CONFIG_PPC32 /* * Check if we will touch kernel sack overflow */ @@ -1513,7 +1514,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) err = -EINVAL; break; } - +#endif /* CONFIG_PPC32 */ /* * Check if we already set since that means we'll * lose the previous value. -- cgit v1.2.3-59-g8ed1b From 0c9fa29149d3726e14262aeb0c8461a948cc9d56 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 24 Sep 2013 16:10:38 +1000 Subject: powerpc/zImage: make the "OF" wrapper support ePAPR boot This makes the "OF" zImage wrapper (zImage.pseries, zImage.pmac, zImage.maple) work if booted via a flat device-tree (ePAPR boot mode), and thus potentially usable with kexec. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/boot/Makefile | 4 ++-- arch/powerpc/boot/epapr-wrapper.c | 9 +++++++++ arch/powerpc/boot/epapr.c | 4 ++-- arch/powerpc/boot/of.c | 16 +++++++++++++++- arch/powerpc/boot/wrapper | 9 +++++---- 5 files changed, 33 insertions(+), 9 deletions(-) create mode 100644 arch/powerpc/boot/epapr-wrapper.c (limited to 'arch') diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 6a15c968d214..15ca2255f438 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -74,7 +74,7 @@ src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c -src-plat-y := of.c +src-plat-y := of.c epapr.c src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ treeboot-walnut.c cuboot-acadia.c \ cuboot-kilauea.c simpleboot.c \ @@ -97,7 +97,7 @@ src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \ prpmc2800.c src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c -src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c +src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c src-wlib := $(sort $(src-wlib-y)) src-plat := $(sort $(src-plat-y)) diff --git a/arch/powerpc/boot/epapr-wrapper.c b/arch/powerpc/boot/epapr-wrapper.c new file mode 100644 index 000000000000..c10191006673 --- /dev/null +++ b/arch/powerpc/boot/epapr-wrapper.c @@ -0,0 +1,9 @@ +extern void epapr_platform_init(unsigned long r3, unsigned long r4, + unsigned long r5, unsigned long r6, + unsigned long r7); + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + epapr_platform_init(r3, r4, r5, r6, r7); +} diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c index 06c1961bd124..02e91aa2194a 100644 --- a/arch/powerpc/boot/epapr.c +++ b/arch/powerpc/boot/epapr.c @@ -48,8 +48,8 @@ static void platform_fixups(void) fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size); } -void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) +void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) { epapr_magic = r6; ima_size = r7; diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c index 61d9899aa0d0..62e2f43ec1df 100644 --- a/arch/powerpc/boot/of.c +++ b/arch/powerpc/boot/of.c @@ -26,6 +26,9 @@ static unsigned long claim_base; +void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7); + static void *of_try_claim(unsigned long size) { unsigned long addr = 0; @@ -61,7 +64,7 @@ static void of_image_hdr(const void *hdr) } } -void platform_init(unsigned long a1, unsigned long a2, void *promptr) +static void of_platform_init(unsigned long a1, unsigned long a2, void *promptr) { platform_ops.image_hdr = of_image_hdr; platform_ops.malloc = of_try_claim; @@ -81,3 +84,14 @@ void platform_init(unsigned long a1, unsigned long a2, void *promptr) loader_info.initrd_size = a2; } } + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + /* Detect OF vs. ePAPR boot */ + if (r5) + of_platform_init(r3, r4, (void *)r5); + else + epapr_platform_init(r3, r4, r5, r6, r7); +} + diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 6761c746048d..cd7af841ba05 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -148,18 +148,18 @@ make_space=y case "$platform" in pseries) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" link_address='0x4000000' ;; maple) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" link_address='0x400000' ;; pmac|chrp) - platformo=$object/of.o + platformo="$object/of.o $object/epapr.o" ;; coff) - platformo="$object/crt0.o $object/of.o" + platformo="$object/crt0.o $object/of.o $object/epapr.o" lds=$object/zImage.coff.lds link_address='0x500000' pie= @@ -253,6 +253,7 @@ treeboot-iss4xx-mpic) platformo="$object/treeboot-iss4xx.o" ;; epapr) + platformo="$object/epapr.o $object/epapr-wrapper.o" link_address='0x20000000' pie=-pie ;; -- cgit v1.2.3-59-g8ed1b From dbe78b40118636f2d5d276144239dd4bfd5f04f9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 25 Sep 2013 14:02:50 +1000 Subject: powerpc/pseries: Do not start secondaries in Open Firmware Starting secondary CPUs early on from Open Firmware and placing them in a holding spin loop slows down the boot process significantly under some hypervisors such as KVM. This is also unnecessary when RTAS supports querying the CPU state So let's not do it. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 21 +++++++++++++++++++++ arch/powerpc/platforms/pseries/smp.c | 26 ++++++++++++++++---------- 2 files changed, 37 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 12e656ffe60e..5fe2842e8bab 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -196,6 +196,8 @@ static int __initdata mem_reserve_cnt; static cell_t __initdata regbuf[1024]; +static bool rtas_has_query_cpu_stopped; + /* * Error results ... some OF calls will return "-1" on error, some @@ -1574,6 +1576,11 @@ static void __init prom_instantiate_rtas(void) prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", &val, sizeof(val)); + /* Check if it supports "query-cpu-stopped-state" */ + if (prom_getprop(rtas_node, "query-cpu-stopped-state", + &val, sizeof(val)) != PROM_ERROR) + rtas_has_query_cpu_stopped = true; + #if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) /* PowerVN takeover hack */ prom_rtas_data = base; @@ -1815,6 +1822,18 @@ static void __init prom_hold_cpus(void) = (void *) LOW_ADDR(__secondary_hold_acknowledge); unsigned long secondary_hold = LOW_ADDR(__secondary_hold); + /* + * On pseries, if RTAS supports "query-cpu-stopped-state", + * we skip this stage, the CPUs will be started by the + * kernel using RTAS. + */ + if ((of_platform == PLATFORM_PSERIES || + of_platform == PLATFORM_PSERIES_LPAR) && + rtas_has_query_cpu_stopped) { + prom_printf("prom_hold_cpus: skipped\n"); + return; + } + prom_debug("prom_hold_cpus: start...\n"); prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); @@ -3011,6 +3030,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * On non-powermacs, put all CPUs in spin-loops. * * PowerMacs use a different mechanism to spin CPUs + * + * (This must be done after instanciating RTAS) */ if (of_platform != PLATFORM_POWERMAC && of_platform != PLATFORM_OPAL) diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 1c1771a40250..24f58cb0a543 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -233,18 +233,24 @@ static void __init smp_init_pseries(void) alloc_bootmem_cpumask_var(&of_spin_mask); - /* Mark threads which are still spinning in hold loops. */ - if (cpu_has_feature(CPU_FTR_SMT)) { - for_each_present_cpu(i) { - if (cpu_thread_in_core(i) == 0) - cpumask_set_cpu(i, of_spin_mask); - } - } else { - cpumask_copy(of_spin_mask, cpu_present_mask); + /* + * Mark threads which are still spinning in hold loops + * + * We know prom_init will not have started them if RTAS supports + * query-cpu-stopped-state. + */ + if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) { + if (cpu_has_feature(CPU_FTR_SMT)) { + for_each_present_cpu(i) { + if (cpu_thread_in_core(i) == 0) + cpumask_set_cpu(i, of_spin_mask); + } + } else + cpumask_copy(of_spin_mask, cpu_present_mask); + + cpumask_clear_cpu(boot_cpuid, of_spin_mask); } - cpumask_clear_cpu(boot_cpuid, of_spin_mask); - /* Non-lpar has additional take/give timebase */ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { smp_ops->give_timebase = rtas_give_timebase; -- cgit v1.2.3-59-g8ed1b From 7a20c2fad61aa3624e83c671d36dbd36b2661476 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 24 Sep 2013 20:13:44 -0400 Subject: x86/reboot: Fix apparent cut-n-paste mistake in Dell reboot workaround This seems to have been copied from the Optiplex 990 entry above, but somoene forgot to change the ident text. Signed-off-by: Dave Jones Link: http://lkml.kernel.org/r/20130925001344.GA13554@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 5f4ad2714109..e643e744e4d8 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -352,7 +352,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, { /* Handle problems with rebooting on the Precision M6600. */ .callback = set_pci_reboot, - .ident = "Dell OptiPlex 990", + .ident = "Dell Precision M6600", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), -- cgit v1.2.3-59-g8ed1b From 0160676bba69523e8b0ac83f306cce7d342ed7c8 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 13 Sep 2013 15:13:30 +0100 Subject: xen/p2m: check MFN is in range before using the m2p table On hosts with more than 168 GB of memory, a 32-bit guest may attempt to grant map an MFN that is error cannot lookup in its mapping of the m2p table. There is an m2p lookup as part of m2p_add_override() and m2p_remove_override(). The lookup falls off the end of the mapped portion of the m2p and (because the mapping is at the highest virtual address) wraps around and the lookup causes a fault on what appears to be a user space address. do_page_fault() (thinking it's a fault to a userspace address), tries to lock mm->mmap_sem. If the gntdev device is used for the grant map, m2p_add_override() is called from from gnttab_mmap() with mm->mmap_sem already locked. do_page_fault() then deadlocks. The deadlock would most commonly occur when a 64-bit guest is started and xenconsoled attempts to grant map its console ring. Introduce mfn_to_pfn_no_overrides() which checks the MFN is within the mapped portion of the m2p table before accessing the table and use this in m2p_add_override(), m2p_remove_override(), and mfn_to_pfn() (which already had the correct range check). All faults caused by accessing the non-existant parts of the m2p are thus within the kernel address space and exception_fixup() is called without trying to lock mm->mmap_sem. This means that for MFNs that are outside the mapped range of the m2p then mfn_to_pfn() will always look in the m2p overrides. This is correct because it must be a foreign MFN (and the PFN in the m2p in this case is only relevant for the other domain). Signed-off-by: David Vrabel Cc: Stefano Stabellini Cc: Jan Beulich -- v3: check for auto_translated_physmap in mfn_to_pfn_no_overrides() v2: in mfn_to_pfn() look in m2p_overrides if the MFN is out of range as it's probably foreign. Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Stefano Stabellini --- arch/x86/include/asm/xen/page.h | 31 ++++++++++++++++++++----------- arch/x86/xen/p2m.c | 10 ++++------ 2 files changed, 24 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 6aef9fbc09b7..b913915e8e63 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -79,30 +79,38 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY; } -static inline unsigned long mfn_to_pfn(unsigned long mfn) +static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn) { unsigned long pfn; - int ret = 0; + int ret; if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; - if (unlikely(mfn >= machine_to_phys_nr)) { - pfn = ~0; - goto try_override; - } - pfn = 0; + if (unlikely(mfn >= machine_to_phys_nr)) + return ~0; + /* * The array access can fail (e.g., device space beyond end of RAM). * In such cases it doesn't matter what we return (we return garbage), * but we must handle the fault without crashing! */ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); -try_override: - /* ret might be < 0 if there are no entries in the m2p for mfn */ if (ret < 0) - pfn = ~0; - else if (get_phys_to_machine(pfn) != mfn) + return ~0; + + return pfn; +} + +static inline unsigned long mfn_to_pfn(unsigned long mfn) +{ + unsigned long pfn; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return mfn; + + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) != mfn) { /* * If this appears to be a foreign mfn (because the pfn * doesn't map back to the mfn), then check the local override @@ -111,6 +119,7 @@ try_override: * m2p_find_override_pfn returns ~0 if it doesn't find anything. */ pfn = m2p_find_override_pfn(mfn, ~0); + } /* * pfn is ~0 if there are no entries in the m2p for mfn or if the diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 8b901e8d782d..a61c7d5811be 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -879,7 +879,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; - int ret = 0; pfn = page_to_pfn(page); if (!PageHighMem(page)) { @@ -926,8 +925,8 @@ int m2p_add_override(unsigned long mfn, struct page *page, * frontend pages while they are being shared with the backend, * because mfn_to_pfn (that ends up being called by GUPF) will * return the backend pfn rather than the frontend pfn. */ - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); - if (ret == 0 && get_phys_to_machine(pfn) == mfn) + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) == mfn) set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); return 0; @@ -942,7 +941,6 @@ int m2p_remove_override(struct page *page, unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; - int ret = 0; pfn = page_to_pfn(page); mfn = get_phys_to_machine(pfn); @@ -1029,8 +1027,8 @@ int m2p_remove_override(struct page *page, * the original pfn causes mfn_to_pfn(mfn) to return the frontend * pfn again. */ mfn &= ~FOREIGN_FRAME_BIT; - ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); - if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && + pfn = mfn_to_pfn_no_overrides(mfn); + if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && m2p_find_override(mfn) == NULL) set_phys_to_machine(pfn, mfn); -- cgit v1.2.3-59-g8ed1b From 55c25c2f14496badefd780a9f179442756216b67 Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Wed, 25 Sep 2013 18:31:05 +0530 Subject: MIPS: mm: Move some checks out of 'for' loop in DMA operations The check cpu_needs_post_dma_flush() in mips_dma_sync_sg_for_cpu() and the check !plat_device_is_coherent() in mips_dma_sync_sg_for_device() can be moved outside the for loop. As a side effect, this also avoids a GCC bug that caused kernel compile to fail with the error: arch/mips/mm/dma-default.c: In function 'mips_dma_sync_sg_for_cpu': arch/mips/mm/dma-default.c:316:1: internal compiler error: in add_insn_before, at emit-rtl.c:3852 This gcc failure is seen in Code Sourcery toolchains [e.g. gcc version 4.7.2 (Sourcery CodeBench Lite 2012.09-99)] after commit "MIPS: Optimize current_cpu_type() for better code." Signed-off-by: Jayachandran C Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5907/ Reviewed-by: Markos Chandras Tested-by: Markos Chandras Signed-off-by: Ralf Baechle --- arch/mips/mm/dma-default.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index f25a7e9f8cbc..5f8b95512580 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -308,12 +308,10 @@ static void mips_dma_sync_sg_for_cpu(struct device *dev, { int i; - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) { - if (cpu_needs_post_dma_flush(dev)) + if (cpu_needs_post_dma_flush(dev)) + for (i = 0; i < nelems; i++, sg++) __dma_sync(sg_page(sg), sg->offset, sg->length, direction); - } } static void mips_dma_sync_sg_for_device(struct device *dev, @@ -321,12 +319,10 @@ static void mips_dma_sync_sg_for_device(struct device *dev, { int i; - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) { - if (!plat_device_is_coherent(dev)) + if (!plat_device_is_coherent(dev)) + for (i = 0; i < nelems; i++, sg++) __dma_sync(sg_page(sg), sg->offset, sg->length, direction); - } } int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -- cgit v1.2.3-59-g8ed1b From accd1e823ed1d5980106dd522a4c535084400830 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 29 Sep 2010 19:27:12 -0500 Subject: x86/microcode/AMD: Fix patch level reporting for family 15h On AMD family 14h, applying microcode patch on the a core (core0) would also affect the other core (core1) in the same compute unit. The driver would skip applying the patch on core1, but it still need to update kernel structures to reflect the proper patch level. The current logic is not updating the struct ucode_cpu_info.cpu_sig.rev of the skipped core. This causes the /sys/devices/system/cpu/cpu1/microcode/version to report incorrect patch level as shown below: $ grep . cpu?/microcode/version cpu0/microcode/version:0x600063d cpu1/microcode/version:0x6000626 cpu2/microcode/version:0x600063d cpu3/microcode/version:0x6000626 cpu4/microcode/version:0x600063d Signed-off-by: Suravee Suthikulpanit Acked-by: Borislav Petkov Cc: Cc: Cc: Link: http://lkml.kernel.org/r/1285806432-1995-1-git-send-email-suravee.suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 7123b5df479d..af99f71aeb7f 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -216,6 +216,7 @@ int apply_microcode_amd(int cpu) /* need to apply patch? */ if (rev >= mc_amd->hdr.patch_id) { c->microcode = rev; + uci->cpu_sig.rev = rev; return 0; } -- cgit v1.2.3-59-g8ed1b From 0c06a5d4b13cd66c833805a0d1db76b977944aac Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 10 Sep 2013 00:54:17 +0200 Subject: arm: Fix build error with context tracking calls ad65782fba50 (context_tracking: Optimize main APIs off case with static key) converted context tracking main APIs to inline function and left ARM asm callers behind. This can be easily fixed by making ARM calling the post static keys context tracking function. We just need to replicate the static key checks there. We'll remove these later when ARM will support the context tracking static keys. Reported-by: Guenter Roeck Reported-by: Russell King Signed-off-by: Frederic Weisbecker Tested-by: Kevin Hilman Cc: Nicolas Pitre Cc: Anil Kumar Cc: Tony Lindgren Cc: Benoit Cousson Cc: Guenter Roeck Cc: Russell King Cc: Kevin Hilman --- arch/arm/kernel/entry-header.S | 8 ++++---- kernel/context_tracking.c | 12 ++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index de23a9beed13..39f89fbd5111 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -329,10 +329,10 @@ #ifdef CONFIG_CONTEXT_TRACKING .if \save stmdb sp!, {r0-r3, ip, lr} - bl user_exit + bl context_tracking_user_exit ldmia sp!, {r0-r3, ip, lr} .else - bl user_exit + bl context_tracking_user_exit .endif #endif .endm @@ -341,10 +341,10 @@ #ifdef CONFIG_CONTEXT_TRACKING .if \save stmdb sp!, {r0-r3, ip, lr} - bl user_enter + bl context_tracking_user_enter ldmia sp!, {r0-r3, ip, lr} .else - bl user_enter + bl context_tracking_user_enter .endif #endif .endm diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 247091bf0587..859c8dfd78a1 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -50,6 +50,15 @@ void context_tracking_user_enter(void) { unsigned long flags; + /* + * Repeat the user_enter() check here because some archs may be calling + * this from asm and if no CPU needs context tracking, they shouldn't + * go further. Repeat the check here until they support the static key + * check. + */ + if (!static_key_false(&context_tracking_enabled)) + return; + /* * Some contexts may involve an exception occuring in an irq, * leading to that nesting: @@ -151,6 +160,9 @@ void context_tracking_user_exit(void) { unsigned long flags; + if (!static_key_false(&context_tracking_enabled)) + return; + if (in_interrupt()) return; -- cgit v1.2.3-59-g8ed1b From 083986e8248d978b6c961d3da6beb0c921c68220 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 28 Sep 2013 11:23:59 +0200 Subject: mutex: replace CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX with simple ifdef Linus suggested to replace #ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX #define arch_mutex_cpu_relax() cpu_relax() #endif with just a simple #ifndef arch_mutex_cpu_relax # define arch_mutex_cpu_relax() cpu_relax() #endif to get rid of CONFIG_HAVE_CPU_RELAX_SIMPLE. So architectures can simply define arch_mutex_cpu_relax if they want an architecture specific function instead of having to add a select statement in their Kconfig in addition. Suggested-by: Linus Torvalds Signed-off-by: Heiko Carstens --- arch/Kconfig | 3 --- arch/s390/Kconfig | 1 - arch/s390/include/asm/mutex.h | 2 -- arch/s390/include/asm/processor.h | 2 ++ include/linux/mutex.h | 6 +++--- 5 files changed, 5 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 1feb169274fe..af2cc6eabcc7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -286,9 +286,6 @@ config HAVE_PERF_USER_STACK_DUMP config HAVE_ARCH_JUMP_LABEL bool -config HAVE_ARCH_MUTEX_CPU_RELAX - bool - config HAVE_RCU_TABLE_FREE bool diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index dcc6ac2d8026..d3fa84070b82 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -102,7 +102,6 @@ config S390 select GENERIC_TIME_VSYSCALL_OLD select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 - select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h index 688271f5f2e4..458c1f7fbc18 100644 --- a/arch/s390/include/asm/mutex.h +++ b/arch/s390/include/asm/mutex.h @@ -7,5 +7,3 @@ */ #include - -#define arch_mutex_cpu_relax() barrier() diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 0eb37505cab1..ca7821f07260 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -198,6 +198,8 @@ static inline void cpu_relax(void) barrier(); } +#define arch_mutex_cpu_relax() barrier() + static inline void psw_set_key(unsigned int key) { asm volatile("spka 0(%0)" : : "d" (key)); diff --git a/include/linux/mutex.h b/include/linux/mutex.h index ccd4260834c5..bab49da8a0f0 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -15,8 +15,8 @@ #include #include #include - #include +#include /* * Simple, straightforward mutexes with strict semantics: @@ -175,8 +175,8 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); -#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX -#define arch_mutex_cpu_relax() cpu_relax() +#ifndef arch_mutex_cpu_relax +# define arch_mutex_cpu_relax() cpu_relax() #endif #endif -- cgit v1.2.3-59-g8ed1b From efc1d23b3d9af8cbff9f26677d67fb9c1b9cb792 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 5 Sep 2013 13:26:17 +0200 Subject: s390: enable ARCH_USE_CMPXCHG_LOCKREF Enable ARCH_USE_CMPXCHG_LOCKREF since it shows performance improvements with Linus' simple stat() test case of up to 50% on a 30 cpu system. Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + arch/s390/include/asm/spinlock.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d3fa84070b82..7143793859fa 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -93,6 +93,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS2 diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 701fe8c59e1f..83e5d216105e 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -44,6 +44,11 @@ extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); extern int arch_spin_trylock_retry(arch_spinlock_t *); extern void arch_spin_relax(arch_spinlock_t *lock); +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.owner_cpu == 0; +} + static inline void arch_spin_lock(arch_spinlock_t *lp) { int old; -- cgit v1.2.3-59-g8ed1b From 8a3da6c7d0031fcb6a0d17f9c7a68b0e01f52855 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 28 Sep 2013 15:48:48 +0200 Subject: perf/x86: Fix PMU detection printout when no PMU is detected Ran into this cryptic PMU bootup log recently: [ 0.124047] Performance Events: [ 0.125000] smpboot: ... Turns out we print this if no PMU is detected. Fall back to the right condition so that the following is printed: [ 0.122381] Performance Events: no PMU driver, software events only. Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/n/tip-u2fwaUffakjp0qkpRfqljgsn@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index a9c606bb4945..897783b3302a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1506,7 +1506,7 @@ static int __init init_hw_perf_events(void) err = amd_pmu_init(); break; default: - return 0; + err = -ENOTSUPP; } if (err != 0) { pr_cont("no PMU driver, software events only.\n"); -- cgit v1.2.3-59-g8ed1b From cf154b7e22e9f6714f0a806c023c329b890132a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 20 Sep 2013 09:57:37 +0200 Subject: ARM: pull in from asm-generic Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index d3db39860b9c..6577b8aeb711 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -24,6 +24,7 @@ generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h generic-y += siginfo.h +generic-y += simd.h generic-y += sizes.h generic-y += socket.h generic-y += sockios.h -- cgit v1.2.3-59-g8ed1b From 5ce26f3b5ae8fafdd28375a004f7b8e924e9bacb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 15 Sep 2013 17:10:43 +0200 Subject: ARM: move AES typedefs and function prototypes to separate header Put the struct definitions for AES keys and the asm function prototypes in a separate header and export the asm functions from the module. This allows other drivers to use them directly. Signed-off-by: Ard Biesheuvel --- arch/arm/crypto/aes_glue.c | 22 ++++++---------------- arch/arm/crypto/aes_glue.h | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 arch/arm/crypto/aes_glue.h (limited to 'arch') diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c index 59f7877ead6a..3003fa1f6fb4 100644 --- a/arch/arm/crypto/aes_glue.c +++ b/arch/arm/crypto/aes_glue.c @@ -6,22 +6,12 @@ #include #include -#define AES_MAXNR 14 +#include "aes_glue.h" -typedef struct { - unsigned int rd_key[4 *(AES_MAXNR + 1)]; - int rounds; -} AES_KEY; - -struct AES_CTX { - AES_KEY enc_key; - AES_KEY dec_key; -}; - -asmlinkage void AES_encrypt(const u8 *in, u8 *out, AES_KEY *ctx); -asmlinkage void AES_decrypt(const u8 *in, u8 *out, AES_KEY *ctx); -asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); -asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); +EXPORT_SYMBOL(AES_encrypt); +EXPORT_SYMBOL(AES_decrypt); +EXPORT_SYMBOL(private_AES_set_encrypt_key); +EXPORT_SYMBOL(private_AES_set_decrypt_key); static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { @@ -81,7 +71,7 @@ static struct crypto_alg aes_alg = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, + .cia_setkey = aes_set_key, .cia_encrypt = aes_encrypt, .cia_decrypt = aes_decrypt } diff --git a/arch/arm/crypto/aes_glue.h b/arch/arm/crypto/aes_glue.h new file mode 100644 index 000000000000..cca3e51eb606 --- /dev/null +++ b/arch/arm/crypto/aes_glue.h @@ -0,0 +1,19 @@ + +#define AES_MAXNR 14 + +struct AES_KEY { + unsigned int rd_key[4 * (AES_MAXNR + 1)]; + int rounds; +}; + +struct AES_CTX { + struct AES_KEY enc_key; + struct AES_KEY dec_key; +}; + +asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); +asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx); +asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, + const int bits, struct AES_KEY *key); +asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, + const int bits, struct AES_KEY *key); -- cgit v1.2.3-59-g8ed1b From e4e7f10bfc4069925e99cc4b428c3434e30b6c3f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 16 Sep 2013 18:31:38 +0200 Subject: ARM: add support for bit sliced AES using NEON instructions Bit sliced AES gives around 45% speedup on Cortex-A15 for encryption and around 25% for decryption. This implementation of the AES algorithm does not rely on any lookup tables so it is believed to be invulnerable to cache timing attacks. This algorithm processes up to 8 blocks in parallel in constant time. This means that it is not usable by chaining modes that are strictly sequential in nature, such as CBC encryption. CBC decryption, however, can benefit from this implementation and runs about 25% faster. The other chaining modes implemented in this module, XTS and CTR, can execute fully in parallel in both directions. The core code has been adopted from the OpenSSL project (in collaboration with the original author, on cc). For ease of maintenance, this version is identical to the upstream OpenSSL code, i.e., all modifications that were required to make it suitable for inclusion into the kernel have been made upstream. The original can be found here: http://git.openssl.org/gitweb/?p=openssl.git;a=commit;h=6f6a6130 Note to integrators: While this implementation is significantly faster than the existing table based ones (generic or ARM asm), especially in CTR mode, the effects on power efficiency are unclear as of yet. This code does fundamentally more work, by calculating values that the table based code obtains by a simple lookup; only by doing all of that work in a SIMD fashion, it manages to perform better. Cc: Andy Polyakov Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/crypto/Makefile | 14 +- arch/arm/crypto/aesbs-core.S_shipped | 2544 ++++++++++++++++++++++++++++++++++ arch/arm/crypto/aesbs-glue.c | 434 ++++++ arch/arm/crypto/bsaes-armv7.pl | 2467 +++++++++++++++++++++++++++++++++ crypto/Kconfig | 16 + 5 files changed, 5473 insertions(+), 2 deletions(-) create mode 100644 arch/arm/crypto/aesbs-core.S_shipped create mode 100644 arch/arm/crypto/aesbs-glue.c create mode 100644 arch/arm/crypto/bsaes-armv7.pl (limited to 'arch') diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index a2c83851bc90..81cda39860c5 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -3,7 +3,17 @@ # obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o +obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o -aes-arm-y := aes-armv4.o aes_glue.o -sha1-arm-y := sha1-armv4-large.o sha1_glue.o +aes-arm-y := aes-armv4.o aes_glue.o +aes-arm-bs-y := aesbs-core.o aesbs-glue.o +sha1-arm-y := sha1-armv4-large.o sha1_glue.o + +quiet_cmd_perl = PERL $@ + cmd_perl = $(PERL) $(<) > $(@) + +$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped new file mode 100644 index 000000000000..64205d453260 --- /dev/null +++ b/arch/arm/crypto/aesbs-core.S_shipped @@ -0,0 +1,2544 @@ + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel +@ . Permission to use under GPL terms is +@ granted. +@ ==================================================================== + +@ Bit-sliced AES for ARM NEON +@ +@ February 2012. +@ +@ This implementation is direct adaptation of bsaes-x86_64 module for +@ ARM NEON. Except that this module is endian-neutral [in sense that +@ it can be compiled for either endianness] by courtesy of vld1.8's +@ neutrality. Initial version doesn't implement interface to OpenSSL, +@ only low-level primitives and unsupported entry points, just enough +@ to collect performance results, which for Cortex-A8 core are: +@ +@ encrypt 19.5 cycles per byte processed with 128-bit key +@ decrypt 22.1 cycles per byte processed with 128-bit key +@ key conv. 440 cycles per 128-bit key/0.18 of 8x block +@ +@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +@ which is [much] worse than anticipated (for further details see +@ http://www.openssl.org/~appro/Snapdragon-S4.html). +@ +@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +@ manages in 20.0 cycles]. +@ +@ When comparing to x86_64 results keep in mind that NEON unit is +@ [mostly] single-issue and thus can't [fully] benefit from +@ instruction-level parallelism. And when comparing to aes-armv4 +@ results keep in mind key schedule conversion overhead (see +@ bsaes-x86_64.pl for further details)... +@ +@ + +@ April-August 2013 +@ +@ Add CBC, CTR and XTS subroutines, adapt for kernel use. +@ +@ + +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_ARCH__>=7 +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#ifdef __thumb2__ +.thumb +#else +.code 32 +#endif + +.fpu neon + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr r6,_bsaes_decrypt8 + vldmia r4!, {q9} @ round 0 key + add r6,r6,#.LM0ISR-_bsaes_decrypt8 + + vldmia r6!, {q8} @ .LM0ISR + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Ldec_sbox: + veor q1, q1, q4 + veor q3, q3, q4 + + veor q4, q4, q7 + veor q1, q1, q6 + veor q2, q2, q7 + veor q6, q6, q4 + + veor q0, q0, q1 + veor q2, q2, q5 + veor q7, q7, q6 + veor q3, q3, q0 + veor q5, q5, q0 + veor q1, q1, q3 + veor q11, q3, q0 + veor q10, q7, q4 + veor q9, q1, q6 + veor q13, q4, q0 + vmov q8, q10 + veor q12, q5, q2 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q6, q2 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q3, q7 + veor q12, q1, q5 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q4, q6 + veor q9, q9, q14 + vand q13, q0, q2 + vand q14, q7, q1 + vorr q15, q3, q5 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q5, q2 + veor q8, q1, q6 + veor q10, q15, q14 + vand q10, q10, q5 + veor q5, q5, q1 + vand q11, q1, q15 + vand q5, q5, q14 + veor q1, q11, q10 + veor q5, q5, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q2 + veor q12, q12, q8 + veor q2, q2, q6 + vand q8, q8, q15 + vand q6, q6, q13 + vand q12, q12, q14 + vand q2, q2, q9 + veor q8, q8, q12 + veor q2, q2, q6 + veor q12, q12, q11 + veor q6, q6, q10 + veor q5, q5, q12 + veor q2, q2, q12 + veor q1, q1, q8 + veor q6, q6, q8 + + veor q12, q3, q0 + veor q8, q7, q4 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q4 + vand q8, q8, q15 + vand q4, q4, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q4 + veor q12, q12, q11 + veor q4, q4, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q3 + veor q3, q3, q7 + vand q11, q7, q15 + vand q3, q3, q14 + veor q7, q11, q10 + veor q3, q3, q11 + veor q3, q3, q12 + veor q0, q0, q12 + veor q7, q7, q8 + veor q4, q4, q8 + veor q1, q1, q7 + veor q6, q6, q5 + + veor q4, q4, q1 + veor q2, q2, q7 + veor q5, q5, q7 + veor q4, q4, q2 + veor q7, q7, q0 + veor q4, q4, q5 + veor q3, q3, q6 + veor q6, q6, q1 + veor q3, q3, q4 + + veor q4, q4, q0 + veor q7, q7, q3 + subs r5,r5,#1 + bcc .Ldec_done + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 q8, q0, q0, #8 + vext.8 q14, q3, q3, #8 + vext.8 q15, q5, q5, #8 + veor q8, q8, q0 + vext.8 q9, q1, q1, #8 + veor q14, q14, q3 + vext.8 q10, q6, q6, #8 + veor q15, q15, q5 + vext.8 q11, q4, q4, #8 + veor q9, q9, q1 + vext.8 q12, q2, q2, #8 + veor q10, q10, q6 + vext.8 q13, q7, q7, #8 + veor q11, q11, q4 + veor q12, q12, q2 + veor q13, q13, q7 + + veor q0, q0, q14 + veor q1, q1, q14 + veor q6, q6, q8 + veor q2, q2, q10 + veor q4, q4, q9 + veor q1, q1, q15 + veor q6, q6, q15 + veor q2, q2, q14 + veor q7, q7, q11 + veor q4, q4, q14 + veor q3, q3, q12 + veor q2, q2, q15 + veor q7, q7, q15 + veor q5, q5, q13 + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q6, q6, #12 + veor q1, q1, q9 + vext.8 q11, q4, q4, #12 + veor q6, q6, q10 + vext.8 q12, q2, q2, #12 + veor q4, q4, q11 + vext.8 q13, q7, q7, #12 + veor q2, q2, q12 + vext.8 q14, q3, q3, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q3, q3, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q2 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q2, q2, #8 + veor q12, q12, q4 + vext.8 q9, q7, q7, #8 + veor q15, q15, q3 + vext.8 q2, q4, q4, #8 + veor q11, q11, q6 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q4, q3, q3, #8 + veor q11, q11, q5 + vext.8 q3, q6, q6, #8 + veor q5, q9, q13 + veor q11, q11, q2 + veor q7, q7, q15 + veor q6, q4, q14 + veor q4, q8, q12 + veor q2, q3, q10 + vmov q3, q11 + @ vmov q5, q9 + vldmia r6, {q12} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq r6,r6,#0x10 + bne .Ldec_loop + vldmia r6, {q12} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q3, #1 + vshr.u64 q11, q2, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q4 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q4, q4, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q2, #2 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q3, q3, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q4 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q4, q4, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q4, #4 + vshr.u64 q11, q6, #4 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q4, q4, q10 + veor q6, q6, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q6, q6, q8 + veor q4, q4, q8 + veor q2, q2, q8 + veor q7, q7, q8 + veor q3, q3, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR: @ InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR: @ ShiftRows constants + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: + .quad 0x090d01050c000408, 0x03070b0f060a0e02 +.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by " +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr r6,_bsaes_encrypt8 + vldmia r4!, {q9} @ round 0 key + sub r6,r6,#_bsaes_encrypt8-.LM0SR + + vldmia r6!, {q8} @ .LM0SR +_bsaes_encrypt8_alt: + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 +_bsaes_encrypt8_bitslice: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Lenc_sbox: + veor q2, q2, q1 + veor q5, q5, q6 + veor q3, q3, q0 + veor q6, q6, q2 + veor q5, q5, q0 + + veor q6, q6, q3 + veor q3, q3, q7 + veor q7, q7, q5 + veor q3, q3, q4 + veor q4, q4, q5 + + veor q2, q2, q7 + veor q3, q3, q1 + veor q1, q1, q5 + veor q11, q7, q4 + veor q10, q1, q2 + veor q9, q5, q3 + veor q13, q2, q4 + vmov q8, q10 + veor q12, q6, q0 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q3, q0 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q7, q1 + veor q12, q5, q6 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q2, q3 + veor q9, q9, q14 + vand q13, q4, q0 + vand q14, q1, q5 + vorr q15, q7, q6 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q6, q0 + veor q8, q5, q3 + veor q10, q15, q14 + vand q10, q10, q6 + veor q6, q6, q5 + vand q11, q5, q15 + vand q6, q6, q14 + veor q5, q11, q10 + veor q6, q6, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q3 + vand q8, q8, q15 + vand q3, q3, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q3 + veor q12, q12, q11 + veor q3, q3, q10 + veor q6, q6, q12 + veor q0, q0, q12 + veor q5, q5, q8 + veor q3, q3, q8 + + veor q12, q7, q4 + veor q8, q1, q2 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q4 + veor q12, q12, q8 + veor q4, q4, q2 + vand q8, q8, q15 + vand q2, q2, q13 + vand q12, q12, q14 + vand q4, q4, q9 + veor q8, q8, q12 + veor q4, q4, q2 + veor q12, q12, q11 + veor q2, q2, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q7 + veor q7, q7, q1 + vand q11, q1, q15 + vand q7, q7, q14 + veor q1, q11, q10 + veor q7, q7, q11 + veor q7, q7, q12 + veor q4, q4, q12 + veor q1, q1, q8 + veor q2, q2, q8 + veor q7, q7, q0 + veor q1, q1, q6 + veor q6, q6, q0 + veor q4, q4, q7 + veor q0, q0, q1 + + veor q1, q1, q5 + veor q5, q5, q2 + veor q2, q2, q3 + veor q3, q3, q5 + veor q4, q4, q5 + + veor q6, q6, q3 + subs r5,r5,#1 + bcc .Lenc_done + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q4, q4, #12 + veor q1, q1, q9 + vext.8 q11, q6, q6, #12 + veor q4, q4, q10 + vext.8 q12, q3, q3, #12 + veor q6, q6, q11 + vext.8 q13, q7, q7, #12 + veor q3, q3, q12 + vext.8 q14, q2, q2, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q2, q2, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q3 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q3, q3, #8 + veor q12, q12, q6 + vext.8 q9, q7, q7, #8 + veor q15, q15, q2 + vext.8 q3, q6, q6, #8 + veor q11, q11, q4 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q6, q2, q2, #8 + veor q11, q11, q5 + vext.8 q2, q4, q4, #8 + veor q5, q9, q13 + veor q4, q8, q12 + veor q3, q3, q11 + veor q7, q7, q15 + veor q6, q6, q14 + @ vmov q4, q8 + veor q2, q2, q10 + @ vmov q5, q9 + vldmia r6, {q12} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq r6,r6,#0x10 + bne .Lenc_loop + vldmia r6, {q12} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q3, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q3, q3, q11 + vshr.u64 q10, q4, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q6 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q6, q6, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q4, q4, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q3, #2 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q3, q3, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q6 + veor q11, q11, q4 + vand q10, q10, q9 + vand q11, q11, q9 + veor q6, q6, q10 + vshl.u64 q10, q10, #2 + veor q4, q4, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q6, #4 + vshr.u64 q11, q4, #4 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q4, q4, q8 + veor q6, q6, q8 + veor q3, q3, q8 + veor q7, q7, q8 + veor q2, q2, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr r6,_bsaes_key_convert + vld1.8 {q7}, [r4]! @ load round 0 key + sub r6,r6,#_bsaes_key_convert-.LM0 + vld1.8 {q15}, [r4]! @ load round 1 key + + vmov.i8 q8, #0x01 @ bit masks + vmov.i8 q9, #0x02 + vmov.i8 q10, #0x04 + vmov.i8 q11, #0x08 + vmov.i8 q12, #0x10 + vmov.i8 q13, #0x20 + vldmia r6, {q14} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 q7, q7 + vrev32.8 q15, q15 +#endif + sub r5,r5,#1 + vstmia r12!, {q7} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 d14,{q15},d28 + vtbl.8 d15,{q15},d29 + vmov.i8 q6, #0x40 + vmov.i8 q15, #0x80 + + vtst.8 q0, q7, q8 + vtst.8 q1, q7, q9 + vtst.8 q2, q7, q10 + vtst.8 q3, q7, q11 + vtst.8 q4, q7, q12 + vtst.8 q5, q7, q13 + vtst.8 q6, q7, q6 + vtst.8 q7, q7, q15 + vld1.8 {q15}, [r4]! @ load next round key + vmvn q0, q0 @ "pnot" + vmvn q1, q1 + vmvn q5, q5 + vmvn q6, q6 +#ifdef __ARMEL__ + vrev32.8 q15, q15 +#endif + subs r5,r5,#1 + vstmia r12!,{q0-q7} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 q7,#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +.extern AES_cbc_encrypt +.extern AES_decrypt + +.global bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp r2, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ IV is 1st arg on the stack + mov r2, r2, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + vldmia sp, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia sp, {q7} +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r3, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: +#endif + + vld1.8 {q15}, [r8] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs r2, r2, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {q0-q1}, [r0]! @ load input + vld1.8 {q2-q3}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + vld1.8 {q4-q5}, [r0]! + mov r5, r10 + vld1.8 {q6-q7}, [r0] + sub r0, r0, #0x60 + vstmia r9, {q15} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q14-q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + veor q5, q5, q14 + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + vst1.8 {q5}, [r1]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds r2, r2, #8 + beq .Lcbc_dec_done + + vld1.8 {q0}, [r0]! @ load input + cmp r2, #2 + blo .Lcbc_dec_one + vld1.8 {q1}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + mov r5, r10 + vstmia r9, {q15} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {q2}, [r0]! + cmp r2, #4 + blo .Lcbc_dec_three + vld1.8 {q3}, [r0]! + beq .Lcbc_dec_four + vld1.8 {q4}, [r0]! + cmp r2, #6 + blo .Lcbc_dec_five + vld1.8 {q5}, [r0]! + beq .Lcbc_dec_six + vld1.8 {q6}, [r0]! + sub r0, r0, #0x70 + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub r0, r0, #0x60 + bl _bsaes_decrypt8 + vldmia r9,{q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub r0, r0, #0x50 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + veor q2, q2, q11 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub r0, r0, #0x40 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub r0, r0, #0x30 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub r0, r0, #0x20 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! @ reload input + veor q1, q1, q8 + vst1.8 {q0-q1}, [r1]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub r0, r0, #0x10 + mov r10, r1 @ save original out pointer + mov r1, r9 @ use the iv scratch space as out buffer + mov r2, r3 + vmov q4,q15 @ just in case ensure that IV + vmov q5,q0 @ and input are preserved + bl AES_decrypt + vld1.8 {q0}, [r9,:64] @ load result + veor q0, q0, q4 @ ^= IV + vmov q15, q5 @ q5 holds input + vst1.8 {q0}, [r10] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lcbc_dec_bzero +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + vst1.8 {q15}, [r8] @ return IV + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +.extern AES_encrypt +.global bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp r2, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + + vld1.8 {q0}, [r8] @ load counter + add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 + vldmia sp, {q4} @ load round0 key +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + +.align 2 +0: add r12, r3, #248 + vld1.8 {q0}, [r8] @ load counter + adrl r8, .LREVM0SR @ borrow r8 + vldmia r12, {q4} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 q8,#1 @ compose 1<<96 + veor q9,q9,q9 + vrev32.8 q0,q0 + vext.8 q8,q9,q8,#4 + vrev32.8 q4,q4 + vadd.u32 q9,q8,q8 @ compose 2<<96 + vstmia sp, {q4} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 q10, q8, q9 @ compose 3<<96 + vadd.u32 q1, q0, q8 @ +1 + vadd.u32 q2, q0, q9 @ +2 + vadd.u32 q3, q0, q10 @ +3 + vadd.u32 q4, q1, q10 + vadd.u32 q5, q2, q10 + vadd.u32 q6, q3, q10 + vadd.u32 q7, q4, q10 + vadd.u32 q10, q5, q10 @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia sp, {q9} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x10 @ pass next round key +#else + add r4, r3, #264 +#endif + vldmia r8, {q8} @ .LREVM0SR + mov r5, r10 @ pass rounds + vstmia r9, {q10} @ save next counter + sub r6, r8, #.LREVM0SR-.LSR @ pass constants + + bl _bsaes_encrypt8_alt + + subs r2, r2, #8 + blo .Lctr_enc_loop_done + + vld1.8 {q8-q9}, [r0]! @ load input + vld1.8 {q10-q11}, [r0]! + veor q0, q8 + veor q1, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q10 + veor q6, q11 + vld1.8 {q14-q15}, [r0]! + veor q3, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q7, q13 + veor q2, q14 + vst1.8 {q4}, [r1]! + veor q5, q15 + vst1.8 {q6}, [r1]! + vmov.i32 q8, #1 @ compose 1<<96 + vst1.8 {q3}, [r1]! + veor q9, q9, q9 + vst1.8 {q7}, [r1]! + vext.8 q8, q9, q8, #4 + vst1.8 {q2}, [r1]! + vadd.u32 q9,q8,q8 @ compose 2<<96 + vst1.8 {q5}, [r1]! + vldmia r9, {q0} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add r2, r2, #8 + vld1.8 {q8}, [r0]! @ load input + veor q0, q8 + vst1.8 {q0}, [r1]! @ write output + cmp r2, #2 + blo .Lctr_enc_done + vld1.8 {q9}, [r0]! + veor q1, q9 + vst1.8 {q1}, [r1]! + beq .Lctr_enc_done + vld1.8 {q10}, [r0]! + veor q4, q10 + vst1.8 {q4}, [r1]! + cmp r2, #4 + blo .Lctr_enc_done + vld1.8 {q11}, [r0]! + veor q6, q11 + vst1.8 {q6}, [r1]! + beq .Lctr_enc_done + vld1.8 {q12}, [r0]! + veor q3, q12 + vst1.8 {q3}, [r1]! + cmp r2, #6 + blo .Lctr_enc_done + vld1.8 {q13}, [r0]! + veor q7, q13 + vst1.8 {q7}, [r1]! + beq .Lctr_enc_done + vld1.8 {q14}, [r0] + veor q2, q14 + vst1.8 {q2}, [r1]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lctr_enc_bzero +#else + vstmia sp, {q0-q1} +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4-r8, lr} + + mov r4, r0 @ copy arguments + mov r5, r1 + mov r6, r2 + mov r7, r3 + ldr r8, [ip, #12] @ load counter LSW + vld1.8 {q1}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {q1}, [sp,:64] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {q0}, [r4]! @ load input + vld1.8 {q1}, [sp,:64] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor q0,q0,q1 + vst1.8 {q0}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0-q1} + + ldmia sp!, {r4-r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} @ save last round key +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + subs r9, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds r9, #0x70 + bmi .Lxts_enc_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: + .quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + + vmov q8, q9 @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_enc_ret + sub r6, r8, #0x10 + +.Lxts_enc_steal: + ldrb r0, [r7], #1 + ldrb r1, [r8, #-0x10] + strb r0, [r8, #-0x10] + strb r1, [r8], #1 + + subs r9, #1 + bhi .Lxts_enc_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_enc_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r10, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + tst r9, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne r9, #0x10 @ subtract another 16 bytes + subs r9, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds r9, #0x70 + bmi .Lxts_dec_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + mov r5, r2 @ preserve magic + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + mov r2, r5 + + vmov q8, q9 @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia r2, {q5} + vshr.s64 q6, q8, #63 + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vswp d13,d12 + veor q9, q9, q6 + + @ perform the final decryption with the last tweak value + vld1.8 {q0}, [r7]! + mov r0, sp + veor q0, q0, q9 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q9 + vst1.8 {q0}, [r8] + + mov r6, r8 +.Lxts_dec_steal: + ldrb r1, [r8] + ldrb r0, [r7], #1 + strb r1, [r8, #0x10] + strb r0, [r8], #1 + + subs r9, #1 + bhi .Lxts_dec_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_dec_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +#endif diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c new file mode 100644 index 000000000000..4522366da759 --- /dev/null +++ b/arch/arm/crypto/aesbs-glue.c @@ -0,0 +1,434 @@ +/* + * linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include "aes_glue.h" + +#define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE) + +struct BS_KEY { + struct AES_KEY rk; + int converted; + u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE]; +} __aligned(8); + +asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in); +asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in); + +asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes, + struct BS_KEY *key, u8 iv[]); + +asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks, + struct BS_KEY *key, u8 const iv[]); + +asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes, + struct BS_KEY *key, u8 tweak[]); + +asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes, + struct BS_KEY *key, u8 tweak[]); + +struct aesbs_cbc_ctx { + struct AES_KEY enc; + struct BS_KEY dec; +}; + +struct aesbs_ctr_ctx { + struct BS_KEY enc; +}; + +struct aesbs_xts_ctx { + struct BS_KEY enc; + struct BS_KEY dec; + struct AES_KEY twkey; +}; + +static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + int bits = key_len * 8; + + if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + ctx->dec.rk = ctx->enc; + private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); + ctx->dec.converted = 0; + return 0; +} + +static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + int bits = key_len * 8; + + if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + ctx->enc.converted = 0; + return 0; +} + +static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm); + int bits = key_len * 4; + + if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + ctx->dec.rk = ctx->enc.rk; + private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk); + private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey); + ctx->enc.converted = ctx->dec.converted = 0; + return 0; +} + +static int aesbs_cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while (walk.nbytes) { + u32 blocks = walk.nbytes / AES_BLOCK_SIZE; + u8 *src = walk.src.virt.addr; + + if (walk.dst.virt.addr == walk.src.virt.addr) { + u8 *iv = walk.iv; + + do { + crypto_xor(src, iv, AES_BLOCK_SIZE); + AES_encrypt(src, src, &ctx->enc); + iv = src; + src += AES_BLOCK_SIZE; + } while (--blocks); + memcpy(walk.iv, iv, AES_BLOCK_SIZE); + } else { + u8 *dst = walk.dst.virt.addr; + + do { + crypto_xor(walk.iv, src, AES_BLOCK_SIZE); + AES_encrypt(walk.iv, dst, &ctx->enc); + memcpy(walk.iv, dst, AES_BLOCK_SIZE); + src += AES_BLOCK_SIZE; + dst += AES_BLOCK_SIZE; + } while (--blocks); + } + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) { + kernel_neon_begin(); + bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, &ctx->dec, walk.iv); + kernel_neon_end(); + err = blkcipher_walk_done(desc, &walk, 0); + } + while (walk.nbytes) { + u32 blocks = walk.nbytes / AES_BLOCK_SIZE; + u8 *dst = walk.dst.virt.addr; + u8 *src = walk.src.virt.addr; + u8 bk[2][AES_BLOCK_SIZE]; + u8 *iv = walk.iv; + + do { + if (walk.dst.virt.addr == walk.src.virt.addr) + memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE); + + AES_decrypt(src, dst, &ctx->dec.rk); + crypto_xor(dst, iv, AES_BLOCK_SIZE); + + if (walk.dst.virt.addr == walk.src.virt.addr) + iv = bk[blocks & 1]; + else + iv = src; + + dst += AES_BLOCK_SIZE; + src += AES_BLOCK_SIZE; + } while (--blocks); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static void inc_be128_ctr(__be32 ctr[], u32 addend) +{ + int i; + + for (i = 3; i >= 0; i--, addend = 1) { + u32 n = be32_to_cpu(ctr[i]) + addend; + + ctr[i] = cpu_to_be32(n); + if (n >= addend) + break; + } +} + +static int aesbs_ctr_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + u32 blocks; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + __be32 *ctr = (__be32 *)walk.iv; + u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]); + + /* avoid 32 bit counter overflow in the NEON code */ + if (unlikely(headroom < blocks)) { + blocks = headroom + 1; + tail = walk.nbytes - blocks * AES_BLOCK_SIZE; + } + kernel_neon_begin(); + bsaes_ctr32_encrypt_blocks(walk.src.virt.addr, + walk.dst.virt.addr, blocks, + &ctx->enc, walk.iv); + kernel_neon_end(); + inc_be128_ctr(ctr, blocks); + + nbytes -= blocks * AES_BLOCK_SIZE; + if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE) + break; + + err = blkcipher_walk_done(desc, &walk, tail); + } + if (walk.nbytes) { + u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + u8 ks[AES_BLOCK_SIZE]; + + AES_encrypt(walk.iv, ks, &ctx->enc.rk); + if (tdst != tsrc) + memcpy(tdst, tsrc, nbytes); + crypto_xor(tdst, ks, nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static int aesbs_xts_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + /* generate the initial tweak */ + AES_encrypt(walk.iv, walk.iv, &ctx->twkey); + + while (walk.nbytes) { + kernel_neon_begin(); + bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, &ctx->enc, walk.iv); + kernel_neon_end(); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static int aesbs_xts_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + + /* generate the initial tweak */ + AES_encrypt(walk.iv, walk.iv, &ctx->twkey); + + while (walk.nbytes) { + kernel_neon_begin(); + bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, &ctx->dec, walk.iv); + kernel_neon_end(); + err = blkcipher_walk_done(desc, &walk, 0); + } + return err; +} + +static struct crypto_alg aesbs_algs[] = { { + .cra_name = "__cbc-aes-neonbs", + .cra_driver_name = "__driver-cbc-aes-neonbs", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_cbc_set_key, + .encrypt = aesbs_cbc_encrypt, + .decrypt = aesbs_cbc_decrypt, + }, +}, { + .cra_name = "__ctr-aes-neonbs", + .cra_driver_name = "__driver-ctr-aes-neonbs", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_ctr_set_key, + .encrypt = aesbs_ctr_encrypt, + .decrypt = aesbs_ctr_encrypt, + }, +}, { + .cra_name = "__xts-aes-neonbs", + .cra_driver_name = "__driver-xts-aes-neonbs", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesbs_xts_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_xts_set_key, + .encrypt = aesbs_xts_encrypt, + .decrypt = aesbs_xts_decrypt, + }, +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +} }; + +static int __init aesbs_mod_init(void) +{ + if (!cpu_has_neon()) + return -ENODEV; + + return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs)); +} + +static void __exit aesbs_mod_exit(void) +{ + crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs)); +} + +module_init(aesbs_mod_init); +module_exit(aesbs_mod_exit); + +MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL"); diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl new file mode 100644 index 000000000000..f3d96d932573 --- /dev/null +++ b/arch/arm/crypto/bsaes-armv7.pl @@ -0,0 +1,2467 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Specific modes and adaptation for Linux kernel by Ard Biesheuvel +# . Permission to use under GPL terms is +# granted. +# ==================================================================== + +# Bit-sliced AES for ARM NEON +# +# February 2012. +# +# This implementation is direct adaptation of bsaes-x86_64 module for +# ARM NEON. Except that this module is endian-neutral [in sense that +# it can be compiled for either endianness] by courtesy of vld1.8's +# neutrality. Initial version doesn't implement interface to OpenSSL, +# only low-level primitives and unsupported entry points, just enough +# to collect performance results, which for Cortex-A8 core are: +# +# encrypt 19.5 cycles per byte processed with 128-bit key +# decrypt 22.1 cycles per byte processed with 128-bit key +# key conv. 440 cycles per 128-bit key/0.18 of 8x block +# +# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +# which is [much] worse than anticipated (for further details see +# http://www.openssl.org/~appro/Snapdragon-S4.html). +# +# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +# manages in 20.0 cycles]. +# +# When comparing to x86_64 results keep in mind that NEON unit is +# [mostly] single-issue and thus can't [fully] benefit from +# instruction-level parallelism. And when comparing to aes-armv4 +# results keep in mind key schedule conversion overhead (see +# bsaes-x86_64.pl for further details)... +# +# + +# April-August 2013 +# +# Add CBC, CTR and XTS subroutines, adapt for kernel use. +# +# + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +my ($inp,$out,$len,$key)=("r0","r1","r2","r3"); +my @XMM=map("q$_",(0..15)); + +{ +my ($key,$rounds,$const)=("r4","r5","r6"); + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +sub Sbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InBasisChange (@b); + &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); + &OutBasisChange (@b[7,1,4,2,6,5,0,3]); +} + +sub InBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb +my @b=@_[0..7]; +$code.=<<___; + veor @b[2], @b[2], @b[1] + veor @b[5], @b[5], @b[6] + veor @b[3], @b[3], @b[0] + veor @b[6], @b[6], @b[2] + veor @b[5], @b[5], @b[0] + + veor @b[6], @b[6], @b[3] + veor @b[3], @b[3], @b[7] + veor @b[7], @b[7], @b[5] + veor @b[3], @b[3], @b[4] + veor @b[4], @b[4], @b[5] + + veor @b[2], @b[2], @b[7] + veor @b[3], @b[3], @b[1] + veor @b[1], @b[1], @b[5] +___ +} + +sub OutBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb +my @b=@_[0..7]; +$code.=<<___; + veor @b[0], @b[0], @b[6] + veor @b[1], @b[1], @b[4] + veor @b[4], @b[4], @b[6] + veor @b[2], @b[2], @b[0] + veor @b[6], @b[6], @b[1] + + veor @b[1], @b[1], @b[5] + veor @b[5], @b[5], @b[3] + veor @b[3], @b[3], @b[7] + veor @b[7], @b[7], @b[5] + veor @b[2], @b[2], @b[5] + + veor @b[4], @b[4], @b[7] +___ +} + +sub InvSbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InvInBasisChange (@b); + &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); + &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); +} + +sub InvInBasisChange { # OutBasisChange in reverse (with twist) +my @b=@_[5,1,2,6,3,7,0,4]; +$code.=<<___ + veor @b[1], @b[1], @b[7] + veor @b[4], @b[4], @b[7] + + veor @b[7], @b[7], @b[5] + veor @b[1], @b[1], @b[3] + veor @b[2], @b[2], @b[5] + veor @b[3], @b[3], @b[7] + + veor @b[6], @b[6], @b[1] + veor @b[2], @b[2], @b[0] + veor @b[5], @b[5], @b[3] + veor @b[4], @b[4], @b[6] + veor @b[0], @b[0], @b[6] + veor @b[1], @b[1], @b[4] +___ +} + +sub InvOutBasisChange { # InBasisChange in reverse +my @b=@_[2,5,7,3,6,1,0,4]; +$code.=<<___; + veor @b[1], @b[1], @b[5] + veor @b[2], @b[2], @b[7] + + veor @b[3], @b[3], @b[1] + veor @b[4], @b[4], @b[5] + veor @b[7], @b[7], @b[5] + veor @b[3], @b[3], @b[4] + veor @b[5], @b[5], @b[0] + veor @b[3], @b[3], @b[7] + veor @b[6], @b[6], @b[2] + veor @b[2], @b[2], @b[1] + veor @b[6], @b[6], @b[3] + + veor @b[3], @b[3], @b[0] + veor @b[5], @b[5], @b[6] +___ +} + +sub Mul_GF4 { +#;************************************************************* +#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * +#;************************************************************* +my ($x0,$x1,$y0,$y1,$t0,$t1)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + vand $t0, $t0, $x0 + veor $x0, $x0, $x1 + vand $t1, $x1, $y0 + vand $x0, $x0, $y1 + veor $x1, $t1, $t0 + veor $x0, $x0, $t1 +___ +} + +sub Mul_GF4_N { # not used, see next subroutine +# multiply and scale by N +my ($x0,$x1,$y0,$y1,$t0)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + vand $t0, $t0, $x0 + veor $x0, $x0, $x1 + vand $x1, $x1, $y0 + vand $x0, $x0, $y1 + veor $x1, $x1, $x0 + veor $x0, $x0, $t0 +___ +} + +sub Mul_GF4_N_GF4 { +# interleaved Mul_GF4_N and Mul_GF4 +my ($x0,$x1,$y0,$y1,$t0, + $x2,$x3,$y2,$y3,$t1)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + veor $t1, $y2, $y3 + vand $t0, $t0, $x0 + vand $t1, $t1, $x2 + veor $x0, $x0, $x1 + veor $x2, $x2, $x3 + vand $x1, $x1, $y0 + vand $x3, $x3, $y2 + vand $x0, $x0, $y1 + vand $x2, $x2, $y3 + veor $x1, $x1, $x0 + veor $x2, $x2, $x3 + veor $x0, $x0, $t0 + veor $x3, $x3, $t1 +___ +} +sub Mul_GF16_2 { +my @x=@_[0..7]; +my @y=@_[8..11]; +my @t=@_[12..15]; +$code.=<<___; + veor @t[0], @x[0], @x[2] + veor @t[1], @x[1], @x[3] +___ + &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]); +$code.=<<___; + veor @y[0], @y[0], @y[2] + veor @y[1], @y[1], @y[3] +___ + Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[2], @x[3], @y[2], @y[3], @t[2]); +$code.=<<___; + veor @x[0], @x[0], @t[0] + veor @x[2], @x[2], @t[0] + veor @x[1], @x[1], @t[1] + veor @x[3], @x[3], @t[1] + + veor @t[0], @x[4], @x[6] + veor @t[1], @x[5], @x[7] +___ + &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[6], @x[7], @y[2], @y[3], @t[2]); +$code.=<<___; + veor @y[0], @y[0], @y[2] + veor @y[1], @y[1], @y[3] +___ + &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]); +$code.=<<___; + veor @x[4], @x[4], @t[0] + veor @x[6], @x[6], @t[0] + veor @x[5], @x[5], @t[1] + veor @x[7], @x[7], @t[1] +___ +} +sub Inv_GF256 { +#;******************************************************************** +#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * +#;******************************************************************** +my @x=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; +# direct optimizations from hardware +$code.=<<___; + veor @t[3], @x[4], @x[6] + veor @t[2], @x[5], @x[7] + veor @t[1], @x[1], @x[3] + veor @s[1], @x[7], @x[6] + vmov @t[0], @t[2] + veor @s[0], @x[0], @x[2] + + vorr @t[2], @t[2], @t[1] + veor @s[3], @t[3], @t[0] + vand @s[2], @t[3], @s[0] + vorr @t[3], @t[3], @s[0] + veor @s[0], @s[0], @t[1] + vand @t[0], @t[0], @t[1] + veor @t[1], @x[3], @x[2] + vand @s[3], @s[3], @s[0] + vand @s[1], @s[1], @t[1] + veor @t[1], @x[4], @x[5] + veor @s[0], @x[1], @x[0] + veor @t[3], @t[3], @s[1] + veor @t[2], @t[2], @s[1] + vand @s[1], @t[1], @s[0] + vorr @t[1], @t[1], @s[0] + veor @t[3], @t[3], @s[3] + veor @t[0], @t[0], @s[1] + veor @t[2], @t[2], @s[2] + veor @t[1], @t[1], @s[3] + veor @t[0], @t[0], @s[2] + vand @s[0], @x[7], @x[3] + veor @t[1], @t[1], @s[2] + vand @s[1], @x[6], @x[2] + vand @s[2], @x[5], @x[1] + vorr @s[3], @x[4], @x[0] + veor @t[3], @t[3], @s[0] + veor @t[1], @t[1], @s[2] + veor @t[0], @t[0], @s[3] + veor @t[2], @t[2], @s[1] + + @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + + @ new smaller inversion + + vand @s[2], @t[3], @t[1] + vmov @s[0], @t[0] + + veor @s[1], @t[2], @s[2] + veor @s[3], @t[0], @s[2] + veor @s[2], @t[0], @s[2] @ @s[2]=@s[3] + + vbsl @s[1], @t[1], @t[0] + vbsl @s[3], @t[3], @t[2] + veor @t[3], @t[3], @t[2] + + vbsl @s[0], @s[1], @s[2] + vbsl @t[0], @s[2], @s[1] + + vand @s[2], @s[0], @s[3] + veor @t[1], @t[1], @t[0] + + veor @s[2], @s[2], @t[3] +___ +# output in s3, s2, s1, t1 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); + +### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb +} + +# AES linear components + +sub ShiftRows { +my @x=@_[0..7]; +my @t=@_[8..11]; +my $mask=pop; +$code.=<<___; + vldmia $key!, {@t[0]-@t[3]} + veor @t[0], @t[0], @x[0] + veor @t[1], @t[1], @x[1] + vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)` + vldmia $key!, {@t[0]} + veor @t[2], @t[2], @x[2] + vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)` + vldmia $key!, {@t[1]} + veor @t[3], @t[3], @x[3] + vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)` + vldmia $key!, {@t[2]} + vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)` + vldmia $key!, {@t[3]} + veor @t[0], @t[0], @x[4] + veor @t[1], @t[1], @x[5] + vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)` + veor @t[2], @t[2], @x[6] + vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)` + veor @t[3], @t[3], @x[7] + vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)` + vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)` +___ +} + +sub MixColumns { +# modified to emit output in order suitable for feeding back to aesenc[last] +my @x=@_[0..7]; +my @t=@_[8..15]; +my $inv=@_[16]; # optional +$code.=<<___; + vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32 + vext.8 @t[1], @x[1], @x[1], #12 + veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32) + vext.8 @t[2], @x[2], @x[2], #12 + veor @x[1], @x[1], @t[1] + vext.8 @t[3], @x[3], @x[3], #12 + veor @x[2], @x[2], @t[2] + vext.8 @t[4], @x[4], @x[4], #12 + veor @x[3], @x[3], @t[3] + vext.8 @t[5], @x[5], @x[5], #12 + veor @x[4], @x[4], @t[4] + vext.8 @t[6], @x[6], @x[6], #12 + veor @x[5], @x[5], @t[5] + vext.8 @t[7], @x[7], @x[7], #12 + veor @x[6], @x[6], @t[6] + + veor @t[1], @t[1], @x[0] + veor @x[7], @x[7], @t[7] + vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor @t[2], @t[2], @x[1] + veor @t[0], @t[0], @x[7] + veor @t[1], @t[1], @x[7] + vext.8 @x[1], @x[1], @x[1], #8 + veor @t[5], @t[5], @x[4] + veor @x[0], @x[0], @t[0] + veor @t[6], @t[6], @x[5] + veor @x[1], @x[1], @t[1] + vext.8 @t[0], @x[4], @x[4], #8 + veor @t[4], @t[4], @x[3] + vext.8 @t[1], @x[5], @x[5], #8 + veor @t[7], @t[7], @x[6] + vext.8 @x[4], @x[3], @x[3], #8 + veor @t[3], @t[3], @x[2] + vext.8 @x[5], @x[7], @x[7], #8 + veor @t[4], @t[4], @x[7] + vext.8 @x[3], @x[6], @x[6], #8 + veor @t[3], @t[3], @x[7] + vext.8 @x[6], @x[2], @x[2], #8 + veor @x[7], @t[1], @t[5] +___ +$code.=<<___ if (!$inv); + veor @x[2], @t[0], @t[4] + veor @x[4], @x[4], @t[3] + veor @x[5], @x[5], @t[7] + veor @x[3], @x[3], @t[6] + @ vmov @x[2], @t[0] + veor @x[6], @x[6], @t[2] + @ vmov @x[7], @t[1] +___ +$code.=<<___ if ($inv); + veor @t[3], @t[3], @x[4] + veor @x[5], @x[5], @t[7] + veor @x[2], @x[3], @t[6] + veor @x[3], @t[0], @t[4] + veor @x[4], @x[6], @t[2] + vmov @x[6], @t[3] + @ vmov @x[7], @t[1] +___ +} + +sub InvMixColumns_orig { +my @x=@_[0..7]; +my @t=@_[8..15]; + +$code.=<<___; + @ multiplication by 0x0e + vext.8 @t[7], @x[7], @x[7], #12 + vmov @t[2], @x[2] + veor @x[2], @x[2], @x[5] @ 2 5 + veor @x[7], @x[7], @x[5] @ 7 5 + vext.8 @t[0], @x[0], @x[0], #12 + vmov @t[5], @x[5] + veor @x[5], @x[5], @x[0] @ 5 0 [1] + veor @x[0], @x[0], @x[1] @ 0 1 + vext.8 @t[1], @x[1], @x[1], #12 + veor @x[1], @x[1], @x[2] @ 1 25 + veor @x[0], @x[0], @x[6] @ 01 6 [2] + vext.8 @t[3], @x[3], @x[3], #12 + veor @x[1], @x[1], @x[3] @ 125 3 [4] + veor @x[2], @x[2], @x[0] @ 25 016 [3] + veor @x[3], @x[3], @x[7] @ 3 75 + veor @x[7], @x[7], @x[6] @ 75 6 [0] + vext.8 @t[6], @x[6], @x[6], #12 + vmov @t[4], @x[4] + veor @x[6], @x[6], @x[4] @ 6 4 + veor @x[4], @x[4], @x[3] @ 4 375 [6] + veor @x[3], @x[3], @x[7] @ 375 756=36 + veor @x[6], @x[6], @t[5] @ 64 5 [7] + veor @x[3], @x[3], @t[2] @ 36 2 + vext.8 @t[5], @t[5], @t[5], #12 + veor @x[3], @x[3], @t[4] @ 362 4 [5] +___ + my @y = @x[7,5,0,2,1,3,4,6]; +$code.=<<___; + @ multiplication by 0x0b + veor @y[1], @y[1], @y[0] + veor @y[0], @y[0], @t[0] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[1], @y[1], @t[1] + veor @y[0], @y[0], @t[5] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[1], @y[1], @t[6] + veor @y[0], @y[0], @t[7] + veor @t[7], @t[7], @t[6] @ clobber t[7] + + veor @y[3], @y[3], @t[0] + veor @y[1], @y[1], @y[0] + vext.8 @t[0], @t[0], @t[0], #12 + veor @y[2], @y[2], @t[1] + veor @y[4], @y[4], @t[1] + vext.8 @t[1], @t[1], @t[1], #12 + veor @y[2], @y[2], @t[2] + veor @y[3], @y[3], @t[2] + veor @y[5], @y[5], @t[2] + veor @y[2], @y[2], @t[7] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[3], @y[3], @t[3] + veor @y[6], @y[6], @t[3] + veor @y[4], @y[4], @t[3] + veor @y[7], @y[7], @t[4] + vext.8 @t[3], @t[3], @t[3], #12 + veor @y[5], @y[5], @t[4] + veor @y[7], @y[7], @t[7] + veor @t[7], @t[7], @t[5] @ clobber t[7] even more + veor @y[3], @y[3], @t[5] + veor @y[4], @y[4], @t[4] + + veor @y[5], @y[5], @t[7] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[6], @y[6], @t[7] + veor @y[4], @y[4], @t[7] + + veor @t[7], @t[7], @t[5] + vext.8 @t[5], @t[5], @t[5], #12 + + @ multiplication by 0x0d + veor @y[4], @y[4], @y[7] + veor @t[7], @t[7], @t[6] @ restore t[7] + veor @y[7], @y[7], @t[4] + vext.8 @t[6], @t[6], @t[6], #12 + veor @y[2], @y[2], @t[0] + veor @y[7], @y[7], @t[5] + vext.8 @t[7], @t[7], @t[7], #12 + veor @y[2], @y[2], @t[2] + + veor @y[3], @y[3], @y[1] + veor @y[1], @y[1], @t[1] + veor @y[0], @y[0], @t[0] + veor @y[3], @y[3], @t[0] + veor @y[1], @y[1], @t[5] + veor @y[0], @y[0], @t[5] + vext.8 @t[0], @t[0], @t[0], #12 + veor @y[1], @y[1], @t[7] + veor @y[0], @y[0], @t[6] + veor @y[3], @y[3], @y[1] + veor @y[4], @y[4], @t[1] + vext.8 @t[1], @t[1], @t[1], #12 + + veor @y[7], @y[7], @t[7] + veor @y[4], @y[4], @t[2] + veor @y[5], @y[5], @t[2] + veor @y[2], @y[2], @t[6] + veor @t[6], @t[6], @t[3] @ clobber t[6] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[4], @y[4], @y[7] + veor @y[3], @y[3], @t[6] + + veor @y[6], @y[6], @t[6] + veor @y[5], @y[5], @t[5] + vext.8 @t[5], @t[5], @t[5], #12 + veor @y[6], @y[6], @t[4] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[5], @y[5], @t[6] + veor @y[6], @y[6], @t[7] + vext.8 @t[7], @t[7], @t[7], #12 + veor @t[6], @t[6], @t[3] @ restore t[6] + vext.8 @t[3], @t[3], @t[3], #12 + + @ multiplication by 0x09 + veor @y[4], @y[4], @y[1] + veor @t[1], @t[1], @y[1] @ t[1]=y[1] + veor @t[0], @t[0], @t[5] @ clobber t[0] + vext.8 @t[6], @t[6], @t[6], #12 + veor @t[1], @t[1], @t[5] + veor @y[3], @y[3], @t[0] + veor @t[0], @t[0], @y[0] @ t[0]=y[0] + veor @t[1], @t[1], @t[6] + veor @t[6], @t[6], @t[7] @ clobber t[6] + veor @y[4], @y[4], @t[1] + veor @y[7], @y[7], @t[4] + veor @y[6], @y[6], @t[3] + veor @y[5], @y[5], @t[2] + veor @t[4], @t[4], @y[4] @ t[4]=y[4] + veor @t[3], @t[3], @y[3] @ t[3]=y[3] + veor @t[5], @t[5], @y[5] @ t[5]=y[5] + veor @t[2], @t[2], @y[2] @ t[2]=y[2] + veor @t[3], @t[3], @t[7] + veor @XMM[5], @t[5], @t[6] + veor @XMM[6], @t[6], @y[6] @ t[6]=y[6] + veor @XMM[2], @t[2], @t[6] + veor @XMM[7], @t[7], @y[7] @ t[7]=y[7] + + vmov @XMM[0], @t[0] + vmov @XMM[1], @t[1] + @ vmov @XMM[2], @t[2] + vmov @XMM[3], @t[3] + vmov @XMM[4], @t[4] + @ vmov @XMM[5], @t[5] + @ vmov @XMM[6], @t[6] + @ vmov @XMM[7], @t[7] +___ +} + +sub InvMixColumns { +my @x=@_[0..7]; +my @t=@_[8..15]; + +# Thanks to Jussi Kivilinna for providing pointer to +# +# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | +# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | +# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | +# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | + +$code.=<<___; + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 @t[0], @x[0], @x[0], #8 + vext.8 @t[6], @x[6], @x[6], #8 + vext.8 @t[7], @x[7], @x[7], #8 + veor @t[0], @t[0], @x[0] + vext.8 @t[1], @x[1], @x[1], #8 + veor @t[6], @t[6], @x[6] + vext.8 @t[2], @x[2], @x[2], #8 + veor @t[7], @t[7], @x[7] + vext.8 @t[3], @x[3], @x[3], #8 + veor @t[1], @t[1], @x[1] + vext.8 @t[4], @x[4], @x[4], #8 + veor @t[2], @t[2], @x[2] + vext.8 @t[5], @x[5], @x[5], #8 + veor @t[3], @t[3], @x[3] + veor @t[4], @t[4], @x[4] + veor @t[5], @t[5], @x[5] + + veor @x[0], @x[0], @t[6] + veor @x[1], @x[1], @t[6] + veor @x[2], @x[2], @t[0] + veor @x[4], @x[4], @t[2] + veor @x[3], @x[3], @t[1] + veor @x[1], @x[1], @t[7] + veor @x[2], @x[2], @t[7] + veor @x[4], @x[4], @t[6] + veor @x[5], @x[5], @t[3] + veor @x[3], @x[3], @t[6] + veor @x[6], @x[6], @t[4] + veor @x[4], @x[4], @t[7] + veor @x[5], @x[5], @t[7] + veor @x[7], @x[7], @t[5] +___ + &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 +} + +sub swapmove { +my ($a,$b,$n,$mask,$t)=@_; +$code.=<<___; + vshr.u64 $t, $b, #$n + veor $t, $t, $a + vand $t, $t, $mask + veor $a, $a, $t + vshl.u64 $t, $t, #$n + veor $b, $b, $t +___ +} +sub swapmove2x { +my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; +$code.=<<___; + vshr.u64 $t0, $b0, #$n + vshr.u64 $t1, $b1, #$n + veor $t0, $t0, $a0 + veor $t1, $t1, $a1 + vand $t0, $t0, $mask + vand $t1, $t1, $mask + veor $a0, $a0, $t0 + vshl.u64 $t0, $t0, #$n + veor $a1, $a1, $t1 + vshl.u64 $t1, $t1, #$n + veor $b0, $b0, $t0 + veor $b1, $b1, $t1 +___ +} + +sub bitslice { +my @x=reverse(@_[0..7]); +my ($t0,$t1,$t2,$t3)=@_[8..11]; +$code.=<<___; + vmov.i8 $t0,#0x55 @ compose .LBS0 + vmov.i8 $t1,#0x33 @ compose .LBS1 +___ + &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); + &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); +$code.=<<___; + vmov.i8 $t0,#0x0f @ compose .LBS2 +___ + &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); + &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + + &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); + &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); +} + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_ARCH__>=7 +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#ifdef __thumb2__ +.thumb +#else +.code 32 +#endif + +.fpu neon + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr $const,_bsaes_decrypt8 + vldmia $key!, {@XMM[9]} @ round 0 key + add $const,$const,#.LM0ISR-_bsaes_decrypt8 + + vldmia $const!, {@XMM[8]} @ .LM0ISR + veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key + veor @XMM[11], @XMM[1], @XMM[9] + vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` + veor @XMM[12], @XMM[2], @XMM[9] + vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` + veor @XMM[13], @XMM[3], @XMM[9] + vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` + veor @XMM[14], @XMM[4], @XMM[9] + vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` + veor @XMM[15], @XMM[5], @XMM[9] + vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` + veor @XMM[10], @XMM[6], @XMM[9] + vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` + veor @XMM[11], @XMM[7], @XMM[9] + vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` + vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + sub $rounds,$rounds,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: +___ + &ShiftRows (@XMM[0..7, 8..12]); +$code.=".Ldec_sbox:\n"; + &InvSbox (@XMM[0..7, 8..15]); +$code.=<<___; + subs $rounds,$rounds,#1 + bcc .Ldec_done +___ + &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); +$code.=<<___; + vldmia $const, {@XMM[12]} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq $const,$const,#0x10 + bne .Ldec_loop + vldmia $const, {@XMM[12]} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: +___ + &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); +$code.=<<___; + vldmia $key, {@XMM[8]} @ last round key + veor @XMM[6], @XMM[6], @XMM[8] + veor @XMM[4], @XMM[4], @XMM[8] + veor @XMM[2], @XMM[2], @XMM[8] + veor @XMM[7], @XMM[7], @XMM[8] + veor @XMM[3], @XMM[3], @XMM[8] + veor @XMM[5], @XMM[5], @XMM[8] + veor @XMM[0], @XMM[0], @XMM[8] + veor @XMM[1], @XMM[1], @XMM[8] + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR: @ InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR: @ ShiftRows constants + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: + .quad 0x090d01050c000408, 0x03070b0f060a0e02 +.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by " +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr $const,_bsaes_encrypt8 + vldmia $key!, {@XMM[9]} @ round 0 key + sub $const,$const,#_bsaes_encrypt8-.LM0SR + + vldmia $const!, {@XMM[8]} @ .LM0SR +_bsaes_encrypt8_alt: + veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key + veor @XMM[11], @XMM[1], @XMM[9] + vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` + veor @XMM[12], @XMM[2], @XMM[9] + vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` + veor @XMM[13], @XMM[3], @XMM[9] + vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` + veor @XMM[14], @XMM[4], @XMM[9] + vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` + veor @XMM[15], @XMM[5], @XMM[9] + vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` + veor @XMM[10], @XMM[6], @XMM[9] + vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` + veor @XMM[11], @XMM[7], @XMM[9] + vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` + vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` +_bsaes_encrypt8_bitslice: +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + sub $rounds,$rounds,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: +___ + &ShiftRows (@XMM[0..7, 8..12]); +$code.=".Lenc_sbox:\n"; + &Sbox (@XMM[0..7, 8..15]); +$code.=<<___; + subs $rounds,$rounds,#1 + bcc .Lenc_done +___ + &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); +$code.=<<___; + vldmia $const, {@XMM[12]} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq $const,$const,#0x10 + bne .Lenc_loop + vldmia $const, {@XMM[12]} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: +___ + # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb + &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); +$code.=<<___; + vldmia $key, {@XMM[8]} @ last round key + veor @XMM[4], @XMM[4], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[8] + veor @XMM[3], @XMM[3], @XMM[8] + veor @XMM[7], @XMM[7], @XMM[8] + veor @XMM[2], @XMM[2], @XMM[8] + veor @XMM[5], @XMM[5], @XMM[8] + veor @XMM[0], @XMM[0], @XMM[8] + veor @XMM[1], @XMM[1], @XMM[8] + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +___ +} +{ +my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6"); + +sub bitslice_key { +my @x=reverse(@_[0..7]); +my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; + + &swapmove (@x[0,1],1,$bs0,$t2,$t3); +$code.=<<___; + @ &swapmove(@x[2,3],1,$t0,$t2,$t3); + vmov @x[2], @x[0] + vmov @x[3], @x[1] +___ + #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); + + &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); +$code.=<<___; + @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + vmov @x[4], @x[0] + vmov @x[6], @x[2] + vmov @x[5], @x[1] + vmov @x[7], @x[3] +___ + &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); + &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); +} + +$code.=<<___; +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr $const,_bsaes_key_convert + vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key + sub $const,$const,#_bsaes_key_convert-.LM0 + vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key + + vmov.i8 @XMM[8], #0x01 @ bit masks + vmov.i8 @XMM[9], #0x02 + vmov.i8 @XMM[10], #0x04 + vmov.i8 @XMM[11], #0x08 + vmov.i8 @XMM[12], #0x10 + vmov.i8 @XMM[13], #0x20 + vldmia $const, {@XMM[14]} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 @XMM[7], @XMM[7] + vrev32.8 @XMM[15], @XMM[15] +#endif + sub $rounds,$rounds,#1 + vstmia $out!, {@XMM[7]} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])` + vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])` + vmov.i8 @XMM[6], #0x40 + vmov.i8 @XMM[15], #0x80 + + vtst.8 @XMM[0], @XMM[7], @XMM[8] + vtst.8 @XMM[1], @XMM[7], @XMM[9] + vtst.8 @XMM[2], @XMM[7], @XMM[10] + vtst.8 @XMM[3], @XMM[7], @XMM[11] + vtst.8 @XMM[4], @XMM[7], @XMM[12] + vtst.8 @XMM[5], @XMM[7], @XMM[13] + vtst.8 @XMM[6], @XMM[7], @XMM[6] + vtst.8 @XMM[7], @XMM[7], @XMM[15] + vld1.8 {@XMM[15]}, [$inp]! @ load next round key + vmvn @XMM[0], @XMM[0] @ "pnot" + vmvn @XMM[1], @XMM[1] + vmvn @XMM[5], @XMM[5] + vmvn @XMM[6], @XMM[6] +#ifdef __ARMEL__ + vrev32.8 @XMM[15], @XMM[15] +#endif + subs $rounds,$rounds,#1 + vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 @XMM[7],#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +___ +} + +if (0) { # following four functions are unsupported interface + # used for benchmarking... +$code.=<<___; +.globl bsaes_enc_key_convert +.type bsaes_enc_key_convert,%function +.align 4 +bsaes_enc_key_convert: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + + ldr r5,[$inp,#240] @ pass rounds + mov r4,$inp @ pass key + mov r12,$out @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_enc_key_convert,.-bsaes_enc_key_convert + +.globl bsaes_encrypt_128 +.type bsaes_encrypt_128,%function +.align 4 +bsaes_encrypt_128: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so +.Lenc128_loop: + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! + mov r4,$key @ pass the key + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5,#10 @ pass rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + + bl _bsaes_encrypt8 + + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + subs $len,$len,#0x80 + vst1.8 {@XMM[5]}, [$out]! + bhi .Lenc128_loop + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_encrypt_128,.-bsaes_encrypt_128 + +.globl bsaes_dec_key_convert +.type bsaes_dec_key_convert,%function +.align 4 +bsaes_dec_key_convert: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + + ldr r5,[$inp,#240] @ pass rounds + mov r4,$inp @ pass key + mov r12,$out @ pass key schedule + bl _bsaes_key_convert + vldmia $out, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia $out, {@XMM[7]} + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_dec_key_convert,.-bsaes_dec_key_convert + +.globl bsaes_decrypt_128 +.type bsaes_decrypt_128,%function +.align 4 +bsaes_decrypt_128: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so +.Ldec128_loop: + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! + mov r4,$key @ pass the key + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5,#10 @ pass rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + + bl _bsaes_decrypt8 + + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + subs $len,$len,#0x80 + vst1.8 {@XMM[5]}, [$out]! + bhi .Ldec128_loop + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_decrypt_128,.-bsaes_decrypt_128 +___ +} +{ +my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10)); +my ($keysched)=("sp"); + +$code.=<<___; +.extern AES_cbc_encrypt +.extern AES_decrypt + +.global bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp $len, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr $ivp, [ip] @ IV is 1st arg on the stack + mov $len, $len, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov $fp, sp @ save sp + + ldr $rounds, [$key, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + add r12, #`128-32` @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 @ sp is $keysched + bl _bsaes_key_convert + vldmia $keysched, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia $keysched, {@XMM[7]} +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, $key, #248 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} + +.align 2 +0: +#endif + + vld1.8 {@XMM[15]}, [$ivp] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs $len, $len, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, $keysched @ pass the key +#else + add r4, $key, #248 +#endif + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5, $rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp] + sub $inp, $inp, #0x60 + vstmia $fp, {@XMM[15]} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[3], @XMM[3], @XMM[13] + vst1.8 {@XMM[6]}, [$out]! + veor @XMM[5], @XMM[5], @XMM[14] + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + vst1.8 {@XMM[5]}, [$out]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds $len, $len, #8 + beq .Lcbc_dec_done + + vld1.8 {@XMM[0]}, [$inp]! @ load input + cmp $len, #2 + blo .Lcbc_dec_one + vld1.8 {@XMM[1]}, [$inp]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, $keysched @ pass the key +#else + add r4, $key, #248 +#endif + mov r5, $rounds + vstmia $fp, {@XMM[15]} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {@XMM[2]}, [$inp]! + cmp $len, #4 + blo .Lcbc_dec_three + vld1.8 {@XMM[3]}, [$inp]! + beq .Lcbc_dec_four + vld1.8 {@XMM[4]}, [$inp]! + cmp $len, #6 + blo .Lcbc_dec_five + vld1.8 {@XMM[5]}, [$inp]! + beq .Lcbc_dec_six + vld1.8 {@XMM[6]}, [$inp]! + sub $inp, $inp, #0x70 + + bl _bsaes_decrypt8 + + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[3], @XMM[3], @XMM[13] + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub $inp, $inp, #0x60 + bl _bsaes_decrypt8 + vldmia $fp,{@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub $inp, $inp, #0x50 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[2], @XMM[2], @XMM[11] + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub $inp, $inp, #0x40 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub $inp, $inp, #0x30 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub $inp, $inp, #0x20 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[15]}, [$inp]! @ reload input + veor @XMM[1], @XMM[1], @XMM[8] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub $inp, $inp, #0x10 + mov $rounds, $out @ save original out pointer + mov $out, $fp @ use the iv scratch space as out buffer + mov r2, $key + vmov @XMM[4],@XMM[15] @ just in case ensure that IV + vmov @XMM[5],@XMM[0] @ and input are preserved + bl AES_decrypt + vld1.8 {@XMM[0]}, [$fp,:64] @ load result + veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV + vmov @XMM[15], @XMM[5] @ @XMM[5] holds input + vst1.8 {@XMM[0]}, [$rounds] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero: @ wipe key schedule [if any] + vstmia $keysched!, {q0-q1} + cmp $keysched, $fp + bne .Lcbc_dec_bzero +#endif + + mov sp, $fp + add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb + vst1.8 {@XMM[15]}, [$ivp] @ return IV + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +___ +} +{ +my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10))); +my $const = "r6"; # shared with _bsaes_encrypt8_alt +my $keysched = "sp"; + +$code.=<<___; +.extern AES_encrypt +.global bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp $len, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr $ctr, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov $fp, sp @ save sp + + ldr $rounds, [$key, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + add r12, #`128-32` @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 @ sp is $keysched + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + + vld1.8 {@XMM[0]}, [$ctr] @ load counter + add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr + vldmia $keysched, {@XMM[4]} @ load round0 key +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + +.align 2 +0: add r12, $key, #248 + vld1.8 {@XMM[0]}, [$ctr] @ load counter + adrl $ctr, .LREVM0SR @ borrow $ctr + vldmia r12, {@XMM[4]} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 @XMM[8],#1 @ compose 1<<96 + veor @XMM[9],@XMM[9],@XMM[9] + vrev32.8 @XMM[0],@XMM[0] + vext.8 @XMM[8],@XMM[9],@XMM[8],#4 + vrev32.8 @XMM[4],@XMM[4] + vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 + vstmia $keysched, {@XMM[4]} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96 + vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1 + vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2 + vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3 + vadd.u32 @XMM[4], @XMM[1], @XMM[10] + vadd.u32 @XMM[5], @XMM[2], @XMM[10] + vadd.u32 @XMM[6], @XMM[3], @XMM[10] + vadd.u32 @XMM[7], @XMM[4], @XMM[10] + vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia $keysched, {@XMM[9]} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, $keysched, #0x10 @ pass next round key +#else + add r4, $key, #`248+16` +#endif + vldmia $ctr, {@XMM[8]} @ .LREVM0SR + mov r5, $rounds @ pass rounds + vstmia $fp, {@XMM[10]} @ save next counter + sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants + + bl _bsaes_encrypt8_alt + + subs $len, $len, #8 + blo .Lctr_enc_loop_done + + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[0], @XMM[8] + veor @XMM[1], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[10] + veor @XMM[6], @XMM[11] + vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! + veor @XMM[3], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[7], @XMM[13] + veor @XMM[2], @XMM[14] + vst1.8 {@XMM[4]}, [$out]! + veor @XMM[5], @XMM[15] + vst1.8 {@XMM[6]}, [$out]! + vmov.i32 @XMM[8], #1 @ compose 1<<96 + vst1.8 {@XMM[3]}, [$out]! + veor @XMM[9], @XMM[9], @XMM[9] + vst1.8 {@XMM[7]}, [$out]! + vext.8 @XMM[8], @XMM[9], @XMM[8], #4 + vst1.8 {@XMM[2]}, [$out]! + vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 + vst1.8 {@XMM[5]}, [$out]! + vldmia $fp, {@XMM[0]} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add $len, $len, #8 + vld1.8 {@XMM[8]}, [$inp]! @ load input + veor @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! @ write output + cmp $len, #2 + blo .Lctr_enc_done + vld1.8 {@XMM[9]}, [$inp]! + veor @XMM[1], @XMM[9] + vst1.8 {@XMM[1]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[10]}, [$inp]! + veor @XMM[4], @XMM[10] + vst1.8 {@XMM[4]}, [$out]! + cmp $len, #4 + blo .Lctr_enc_done + vld1.8 {@XMM[11]}, [$inp]! + veor @XMM[6], @XMM[11] + vst1.8 {@XMM[6]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[12]}, [$inp]! + veor @XMM[3], @XMM[12] + vst1.8 {@XMM[3]}, [$out]! + cmp $len, #6 + blo .Lctr_enc_done + vld1.8 {@XMM[13]}, [$inp]! + veor @XMM[7], @XMM[13] + vst1.8 {@XMM[7]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[14]}, [$inp] + veor @XMM[2], @XMM[14] + vst1.8 {@XMM[2]}, [$out]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero: @ wipe key schedule [if any] + vstmia $keysched!, {q0-q1} + cmp $keysched, $fp + bne .Lctr_enc_bzero +#else + vstmia $keysched, {q0-q1} +#endif + + mov sp, $fp + add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4-r8, lr} + + mov r4, $inp @ copy arguments + mov r5, $out + mov r6, $len + mov r7, $key + ldr r8, [ip, #12] @ load counter LSW + vld1.8 {@XMM[1]}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [r4]! @ load input + vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor @XMM[0],@XMM[0],@XMM[1] + vst1.8 {@XMM[0]}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0-q1} + + ldmia sp!, {r4-r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +___ +} +{ +###################################################################### +# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len, +# const AES_KEY *key1, const AES_KEY *key2, +# const unsigned char iv[16]); +# +my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3))); +my $const="r6"; # returned by _bsaes_key_convert +my $twmask=@XMM[5]; +my @T=@XMM[6..7]; + +$code.=<<___; +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future $fp + + mov $inp, r0 + mov $out, r1 + mov $len, r2 + mov $key, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr $rounds, [$key, #240] @ get # of rounds + mov $fp, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + @ add r12, #`128-32` @ size of bit-sliced key schedule + sub r12, #`32+16` @ place for tweak[9] + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {@XMM[8]}, [r0] @ initial tweak + adr $magic, .Lxts_magic + + subs $len, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + vadd.u64 @XMM[8], @XMM[15], @XMM[15] + vst1.64 {@XMM[15]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + veor @XMM[8], @XMM[8], @T[0] + vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + veor @XMM[7], @XMM[7], @XMM[15] + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[2], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + veor @XMM[13], @XMM[5], @XMM[15] + vst1.8 {@XMM[12]-@XMM[13]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + subs $len, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds $len, #0x70 + bmi .Lxts_enc_done + + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! + subs $len, #0x10 + bmi .Lxts_enc_`$i-9` +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + sub $len, #0x10 + vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vld1.64 {@XMM[14]}, [r0,:128]! + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[2], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + vst1.8 {@XMM[12]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak + + veor @XMM[4], @XMM[4], @XMM[12] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[5], @XMM[5], @XMM[13] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: + .quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak + + veor @XMM[3], @XMM[3], @XMM[11] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[4], @XMM[4], @XMM[12] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + vst1.8 {@XMM[10]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak + + veor @XMM[2], @XMM[2], @XMM[10] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[3], @XMM[3], @XMM[11] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak + + veor @XMM[1], @XMM[1], @XMM[9] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[2], @XMM[2], @XMM[10] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + vld1.64 {@XMM[10]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + vst1.8 {@XMM[8]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak + + veor @XMM[0], @XMM[0], @XMM[8] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[1], @XMM[1], @XMM[9] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! + mov $fp, r4 + + vmov @XMM[8], @XMM[9] @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds $len, #0x10 + beq .Lxts_enc_ret + sub r6, $out, #0x10 + +.Lxts_enc_steal: + ldrb r0, [$inp], #1 + ldrb r1, [$out, #-0x10] + strb r0, [$out, #-0x10] + strb r1, [$out], #1 + + subs $len, #1 + bhi .Lxts_enc_steal + + vld1.8 {@XMM[0]}, [r6] + mov r0, sp + veor @XMM[0], @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [r6] + mov $fp, r4 +#endif + +.Lxts_enc_ret: + bic r0, $fp, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, $fp +#ifdef XTS_CHAIN_TWEAK + vst1.8 {@XMM[8]}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future $fp + + mov $inp, r0 + mov $out, r1 + mov $len, r2 + mov $key, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr $rounds, [$key, #240] @ get # of rounds + mov $fp, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + @ add r12, #`128-32` @ size of bit-sliced key schedule + sub r12, #`32+16` @ place for tweak[9] + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, $key, #248 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {@XMM[8]}, [r0] @ initial tweak + adr $magic, .Lxts_magic + + tst $len, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne $len, #0x10 @ subtract another 16 bytes + subs $len, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + vadd.u64 @XMM[8], @XMM[15], @XMM[15] + vst1.64 {@XMM[15]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + veor @XMM[8], @XMM[8], @T[0] + vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + veor @XMM[7], @XMM[7], @XMM[15] + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[3], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + veor @XMM[13], @XMM[5], @XMM[15] + vst1.8 {@XMM[12]-@XMM[13]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + subs $len, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds $len, #0x70 + bmi .Lxts_dec_done + + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! + subs $len, #0x10 + bmi .Lxts_dec_`$i-9` +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + sub $len, #0x10 + vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vld1.64 {@XMM[14]}, [r0,:128]! + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[3], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + vst1.8 {@XMM[12]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak + + veor @XMM[4], @XMM[4], @XMM[12] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[5], @XMM[5], @XMM[13] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak + + veor @XMM[3], @XMM[3], @XMM[11] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[4], @XMM[4], @XMM[12] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + vst1.8 {@XMM[10]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak + + veor @XMM[2], @XMM[2], @XMM[10] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[3], @XMM[3], @XMM[11] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak + + veor @XMM[1], @XMM[1], @XMM[9] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[2], @XMM[2], @XMM[10] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + vld1.64 {@XMM[10]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + vst1.8 {@XMM[8]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak + + veor @XMM[0], @XMM[0], @XMM[8] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[1], @XMM[1], @XMM[9] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + mov r5, $magic @ preserve magic + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! + mov $fp, r4 + mov $magic, r5 + + vmov @XMM[8], @XMM[9] @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds $len, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia $magic, {$twmask} + vshr.s64 @XMM[6], @XMM[8], #63 + vand @XMM[6], @XMM[6], $twmask + vadd.u64 @XMM[9], @XMM[8], @XMM[8] + vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")` + veor @XMM[9], @XMM[9], @XMM[6] + + @ perform the final decryption with the last tweak value + vld1.8 {@XMM[0]}, [$inp]! + mov r0, sp + veor @XMM[0], @XMM[0], @XMM[9] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[9] + vst1.8 {@XMM[0]}, [$out] + + mov r6, $out +.Lxts_dec_steal: + ldrb r1, [$out] + ldrb r0, [$inp], #1 + strb r1, [$out, #0x10] + strb r0, [$out], #1 + + subs $len, #1 + bhi .Lxts_dec_steal + + vld1.8 {@XMM[0]}, [r6] + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [r6] + mov $fp, r4 +#endif + +.Lxts_dec_ret: + bic r0, $fp, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, $fp +#ifdef XTS_CHAIN_TWEAK + vst1.8 {@XMM[8]}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +___ +} +$code.=<<___; +#endif +___ + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +print $code; + +close STDOUT; diff --git a/crypto/Kconfig b/crypto/Kconfig index 69ce573f1224..71f337aefa39 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -776,6 +776,22 @@ config CRYPTO_AES_ARM See for more information. +config CRYPTO_AES_ARM_BS + tristate "Bit sliced AES using NEON instructions" + depends on ARM && KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES_ARM + select CRYPTO_ABLK_HELPER + help + Use a faster and more secure NEON based implementation of AES in CBC, + CTR and XTS modes + + Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode + and for XTS mode encryption, CBC and XTS mode decryption speedup is + around 25%. (CBC encryption speed is not affected by this driver.) + This implementation does not rely on any lookup tables so it is + believed to be invulnerable to cache timing attacks. + config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" select CRYPTO_ALGAPI -- cgit v1.2.3-59-g8ed1b