aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include/asm')
-rw-r--r--arch/arm64/include/asm/Kbuild2
-rw-r--r--arch/arm64/include/asm/acpi.h19
-rw-r--r--arch/arm64/include/asm/asm-prototypes.h26
-rw-r--r--arch/arm64/include/asm/assembler.h90
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h63
-rw-r--r--arch/arm64/include/asm/atomic_lse.h48
-rw-r--r--arch/arm64/include/asm/barrier.h4
-rw-r--r--arch/arm64/include/asm/cmpxchg.h116
-rw-r--r--arch/arm64/include/asm/cpucaps.h8
-rw-r--r--arch/arm64/include/asm/cpufeature.h124
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/elf.h4
-rw-r--r--arch/arm64/include/asm/esr.h17
-rw-r--r--arch/arm64/include/asm/ftrace.h14
-rw-r--r--arch/arm64/include/asm/image.h59
-rw-r--r--arch/arm64/include/asm/insn.h8
-rw-r--r--arch/arm64/include/asm/io.h32
-rw-r--r--arch/arm64/include/asm/kexec.h19
-rw-r--r--arch/arm64/include/asm/kvm_arm.h9
-rw-r--r--arch/arm64/include/asm/kvm_asm.h7
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h35
-rw-r--r--arch/arm64/include/asm/kvm_host.h15
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h8
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h48
-rw-r--r--arch/arm64/include/asm/memory.h30
-rw-r--r--arch/arm64/include/asm/mmu_context.h5
-rw-r--r--arch/arm64/include/asm/module.h44
-rw-r--r--arch/arm64/include/asm/neon-intrinsics.h39
-rw-r--r--arch/arm64/include/asm/percpu.h390
-rw-r--r--arch/arm64/include/asm/perf_event.h170
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h16
-rw-r--r--arch/arm64/include/asm/pgtable.h31
-rw-r--r--arch/arm64/include/asm/pointer_auth.h97
-rw-r--r--arch/arm64/include/asm/preempt.h89
-rw-r--r--arch/arm64/include/asm/processor.h42
-rw-r--r--arch/arm64/include/asm/smp.h15
-rw-r--r--arch/arm64/include/asm/stackprotector.h3
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h16
-rw-r--r--arch/arm64/include/asm/sysreg.h113
-rw-r--r--arch/arm64/include/asm/thread_info.h13
-rw-r--r--arch/arm64/include/asm/tlbflush.h19
-rw-r--r--arch/arm64/include/asm/uaccess.h3
-rw-r--r--arch/arm64/include/asm/xor.h73
43 files changed, 1448 insertions, 537 deletions
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 6cd5d77b6b44..1e17ea5c372b 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += msi.h
-generic-y += preempt.h
generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += rwsem.h
@@ -27,4 +26,3 @@ generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += user.h
generic-y += vga.h
-generic-y += xor.h
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 709208dfdc8b..2def77ec14be 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -22,12 +22,23 @@
#include <asm/tlbflush.h>
/* Macros for consistency checks of the GICC subtable of MADT */
-#define ACPI_MADT_GICC_LENGTH \
- (acpi_gbl_FADT.header.revision < 6 ? 76 : 80)
+
+/*
+ * MADT GICC minimum length refers to the MADT GICC structure table length as
+ * defined in the earliest ACPI version supported on arm64, ie ACPI 5.1.
+ *
+ * The efficiency_class member was added to the
+ * struct acpi_madt_generic_interrupt to represent the MADT GICC structure
+ * "Processor Power Efficiency Class" field, added in ACPI 6.0 whose offset
+ * is therefore used to delimit the MADT GICC structure minimum length
+ * appropriately.
+ */
+#define ACPI_MADT_GICC_MIN_LENGTH ACPI_OFFSET( \
+ struct acpi_madt_generic_interrupt, efficiency_class)
#define BAD_MADT_GICC_ENTRY(entry, end) \
- (!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH || \
- (unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end))
+ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
+ (unsigned long)(entry) + (entry)->header.length > (end))
/* Basic configuration for ACPI */
#ifdef CONFIG_ACPI
diff --git a/arch/arm64/include/asm/asm-prototypes.h b/arch/arm64/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..2173ad32d550
--- /dev/null
+++ b/arch/arm64/include/asm/asm-prototypes.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PROTOTYPES_H
+#define __ASM_PROTOTYPES_H
+/*
+ * CONFIG_MODEVERIONS requires a C declaration to generate the appropriate CRC
+ * for each symbol. Since commit:
+ *
+ * 4efca4ed05cbdfd1 ("kbuild: modversions for EXPORT_SYMBOL() for asm")
+ *
+ * ... kbuild will automatically pick these up from <asm/asm-prototypes.h> and
+ * feed this to genksyms when building assembly files.
+ */
+#include <linux/arm-smccc.h>
+
+#include <asm/ftrace.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <asm/uaccess.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+long long __ashlti3(long long a, int b);
+long long __ashrti3(long long a, int b);
+long long __lshrti3(long long a, int b);
+
+#endif /* __ASM_PROTOTYPES_H */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 6142402c2eb4..4feb6119c3c9 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -23,6 +23,8 @@
#ifndef __ASM_ASSEMBLER_H
#define __ASM_ASSEMBLER_H
+#include <asm-generic/export.h>
+
#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
@@ -123,6 +125,19 @@
.endm
/*
+ * Speculation barrier
+ */
+ .macro sb
+alternative_if_not ARM64_HAS_SB
+ dsb nsh
+ isb
+alternative_else
+ SB_BARRIER_INSN
+ nop
+alternative_endif
+ .endm
+
+/*
* Sanitise a 64-bit bounded index wrt speculation, returning zero if out
* of bounds.
*/
@@ -342,11 +357,10 @@ alternative_endif
.endm
/*
- * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map
*/
- .macro tcr_set_idmap_t0sz, valreg, tmpreg
- ldr_l \tmpreg, idmap_t0sz
- bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+ .macro tcr_set_t0sz, valreg, t0sz
+ bfi \valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
.endm
/*
@@ -377,27 +391,33 @@ alternative_endif
* size: size of the region
* Corrupts: kaddr, size, tmp1, tmp2
*/
+ .macro __dcache_op_workaround_clean_cache, op, kaddr
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+ dc \op, \kaddr
+alternative_else
+ dc civac, \kaddr
+alternative_endif
+ .endm
+
.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
dcache_line_size \tmp1, \tmp2
add \size, \kaddr, \size
sub \tmp2, \tmp1, #1
bic \kaddr, \kaddr, \tmp2
9998:
- .if (\op == cvau || \op == cvac)
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
- dc \op, \kaddr
-alternative_else
- dc civac, \kaddr
-alternative_endif
- .elseif (\op == cvap)
-alternative_if ARM64_HAS_DCPOP
- sys 3, c7, c12, 1, \kaddr // dc cvap
-alternative_else
- dc cvac, \kaddr
-alternative_endif
+ .ifc \op, cvau
+ __dcache_op_workaround_clean_cache \op, \kaddr
+ .else
+ .ifc \op, cvac
+ __dcache_op_workaround_clean_cache \op, \kaddr
+ .else
+ .ifc \op, cvap
+ sys 3, c7, c12, 1, \kaddr // dc cvap
.else
dc \op, \kaddr
.endif
+ .endif
+ .endif
add \kaddr, \kaddr, \tmp1
cmp \kaddr, \size
b.lo 9998b
@@ -477,6 +497,13 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
#else
#define NOKPROBE(x)
#endif
+
+#ifdef CONFIG_KASAN
+#define EXPORT_SYMBOL_NOKASAN(name)
+#else
+#define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name)
+#endif
+
/*
* Emit a 64-bit absolute little endian symbol reference in a way that
* ensures that it will be resolved at build time, even when building a
@@ -516,6 +543,29 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.endm
/*
+ * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
+ * orr is used as it can cover the immediate value (and is idempotent).
+ * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
+ * ttbr: Value of ttbr to set, modified.
+ */
+ .macro offset_ttbr1, ttbr
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+ orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
+#endif
+ .endm
+
+/*
+ * Perform the reverse of offset_ttbr1.
+ * bic is used as it can cover the immediate value and, in future, won't need
+ * to be nop'ed out when dealing with 52-bit kernel VAs.
+ */
+ .macro restore_ttbr1, ttbr
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+ bic \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
+#endif
+ .endm
+
+/*
* Arrange a physical address in a TTBR register, taking care of 52-bit
* addresses.
*
@@ -672,11 +722,9 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.macro if_will_cond_yield_neon
#ifdef CONFIG_PREEMPT
get_thread_info x0
- ldr w1, [x0, #TSK_TI_PREEMPT]
- ldr x0, [x0, #TSK_TI_FLAGS]
- cmp w1, #PREEMPT_DISABLE_OFFSET
- csel x0, x0, xzr, eq
- tbnz x0, #TIF_NEED_RESCHED, .Lyield_\@ // needs rescheduling?
+ ldr x0, [x0, #TSK_TI_PREEMPT]
+ sub x0, x0, #PREEMPT_DISABLE_OFFSET
+ cbz x0, .Lyield_\@
/* fall through to endif_yield_neon */
.subsection 1
.Lyield_\@ :
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index f5a2d09afb38..af7b99005453 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -248,48 +248,57 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
}
__LL_SC_EXPORT(atomic64_dec_if_positive);
-#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \
-__LL_SC_INLINE unsigned long \
-__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
- unsigned long old, \
- unsigned long new)) \
+#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl) \
+__LL_SC_INLINE u##sz \
+__LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr, \
+ unsigned long old, \
+ u##sz new)) \
{ \
- unsigned long tmp, oldval; \
+ unsigned long tmp; \
+ u##sz oldval; \
+ \
+ /* \
+ * Sub-word sizes require explicit casting so that the compare \
+ * part of the cmpxchg doesn't end up interpreting non-zero \
+ * upper bits of the register containing "old". \
+ */ \
+ if (sz < 32) \
+ old = (u##sz)old; \
\
asm volatile( \
" prfm pstl1strm, %[v]\n" \
- "1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \
+ "1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
" cbnz %" #w "[tmp], 2f\n" \
- " st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \
+ " st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \
" cbnz %w[tmp], 1b\n" \
" " #mb "\n" \
"2:" \
: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \
- [v] "+Q" (*(unsigned long *)ptr) \
- : [old] "Lr" (old), [new] "r" (new) \
+ [v] "+Q" (*(u##sz *)ptr) \
+ : [old] "Kr" (old), [new] "r" (new) \
: cl); \
\
return oldval; \
} \
-__LL_SC_EXPORT(__cmpxchg_case_##name);
+__LL_SC_EXPORT(__cmpxchg_case_##name##sz);
-__CMPXCHG_CASE(w, b, 1, , , , )
-__CMPXCHG_CASE(w, h, 2, , , , )
-__CMPXCHG_CASE(w, , 4, , , , )
-__CMPXCHG_CASE( , , 8, , , , )
-__CMPXCHG_CASE(w, b, acq_1, , a, , "memory")
-__CMPXCHG_CASE(w, h, acq_2, , a, , "memory")
-__CMPXCHG_CASE(w, , acq_4, , a, , "memory")
-__CMPXCHG_CASE( , , acq_8, , a, , "memory")
-__CMPXCHG_CASE(w, b, rel_1, , , l, "memory")
-__CMPXCHG_CASE(w, h, rel_2, , , l, "memory")
-__CMPXCHG_CASE(w, , rel_4, , , l, "memory")
-__CMPXCHG_CASE( , , rel_8, , , l, "memory")
-__CMPXCHG_CASE(w, b, mb_1, dmb ish, , l, "memory")
-__CMPXCHG_CASE(w, h, mb_2, dmb ish, , l, "memory")
-__CMPXCHG_CASE(w, , mb_4, dmb ish, , l, "memory")
-__CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory")
+__CMPXCHG_CASE(w, b, , 8, , , , )
+__CMPXCHG_CASE(w, h, , 16, , , , )
+__CMPXCHG_CASE(w, , , 32, , , , )
+__CMPXCHG_CASE( , , , 64, , , , )
+__CMPXCHG_CASE(w, b, acq_, 8, , a, , "memory")
+__CMPXCHG_CASE(w, h, acq_, 16, , a, , "memory")
+__CMPXCHG_CASE(w, , acq_, 32, , a, , "memory")
+__CMPXCHG_CASE( , , acq_, 64, , a, , "memory")
+__CMPXCHG_CASE(w, b, rel_, 8, , , l, "memory")
+__CMPXCHG_CASE(w, h, rel_, 16, , , l, "memory")
+__CMPXCHG_CASE(w, , rel_, 32, , , l, "memory")
+__CMPXCHG_CASE( , , rel_, 64, , , l, "memory")
+__CMPXCHG_CASE(w, b, mb_, 8, dmb ish, , l, "memory")
+__CMPXCHG_CASE(w, h, mb_, 16, dmb ish, , l, "memory")
+__CMPXCHG_CASE(w, , mb_, 32, dmb ish, , l, "memory")
+__CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory")
#undef __CMPXCHG_CASE
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index f9b0b09153e0..a424355240c5 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -446,22 +446,22 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
#define __LL_SC_CMPXCHG(op) __LL_SC_CALL(__cmpxchg_case_##op)
-#define __CMPXCHG_CASE(w, sz, name, mb, cl...) \
-static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \
- unsigned long old, \
- unsigned long new) \
+#define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...) \
+static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \
+ u##sz old, \
+ u##sz new) \
{ \
register unsigned long x0 asm ("x0") = (unsigned long)ptr; \
- register unsigned long x1 asm ("x1") = old; \
- register unsigned long x2 asm ("x2") = new; \
+ register u##sz x1 asm ("x1") = old; \
+ register u##sz x2 asm ("x2") = new; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- __LL_SC_CMPXCHG(name) \
+ __LL_SC_CMPXCHG(name##sz) \
__nops(2), \
/* LSE atomics */ \
" mov " #w "30, %" #w "[old]\n" \
- " cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n" \
+ " cas" #mb #sfx "\t" #w "30, %" #w "[new], %[v]\n" \
" mov %" #w "[ret], " #w "30") \
: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \
: [old] "r" (x1), [new] "r" (x2) \
@@ -470,22 +470,22 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \
return x0; \
}
-__CMPXCHG_CASE(w, b, 1, )
-__CMPXCHG_CASE(w, h, 2, )
-__CMPXCHG_CASE(w, , 4, )
-__CMPXCHG_CASE(x, , 8, )
-__CMPXCHG_CASE(w, b, acq_1, a, "memory")
-__CMPXCHG_CASE(w, h, acq_2, a, "memory")
-__CMPXCHG_CASE(w, , acq_4, a, "memory")
-__CMPXCHG_CASE(x, , acq_8, a, "memory")
-__CMPXCHG_CASE(w, b, rel_1, l, "memory")
-__CMPXCHG_CASE(w, h, rel_2, l, "memory")
-__CMPXCHG_CASE(w, , rel_4, l, "memory")
-__CMPXCHG_CASE(x, , rel_8, l, "memory")
-__CMPXCHG_CASE(w, b, mb_1, al, "memory")
-__CMPXCHG_CASE(w, h, mb_2, al, "memory")
-__CMPXCHG_CASE(w, , mb_4, al, "memory")
-__CMPXCHG_CASE(x, , mb_8, al, "memory")
+__CMPXCHG_CASE(w, b, , 8, )
+__CMPXCHG_CASE(w, h, , 16, )
+__CMPXCHG_CASE(w, , , 32, )
+__CMPXCHG_CASE(x, , , 64, )
+__CMPXCHG_CASE(w, b, acq_, 8, a, "memory")
+__CMPXCHG_CASE(w, h, acq_, 16, a, "memory")
+__CMPXCHG_CASE(w, , acq_, 32, a, "memory")
+__CMPXCHG_CASE(x, , acq_, 64, a, "memory")
+__CMPXCHG_CASE(w, b, rel_, 8, l, "memory")
+__CMPXCHG_CASE(w, h, rel_, 16, l, "memory")
+__CMPXCHG_CASE(w, , rel_, 32, l, "memory")
+__CMPXCHG_CASE(x, , rel_, 64, l, "memory")
+__CMPXCHG_CASE(w, b, mb_, 8, al, "memory")
+__CMPXCHG_CASE(w, h, mb_, 16, al, "memory")
+__CMPXCHG_CASE(w, , mb_, 32, al, "memory")
+__CMPXCHG_CASE(x, , mb_, 64, al, "memory")
#undef __LL_SC_CMPXCHG
#undef __CMPXCHG_CASE
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 822a9192c551..f66bb04fdf2d 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -34,6 +34,10 @@
#define psb_csync() asm volatile("hint #17" : : : "memory")
#define csdb() asm volatile("hint #20" : : : "memory")
+#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \
+ SB_BARRIER_INSN"nop\n", \
+ ARM64_HAS_SB))
+
#define mb() dsb(sy)
#define rmb() dsb(ld)
#define wmb() dsb(st)
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 3b0938281541..3f9376f1c409 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -30,46 +30,46 @@
* barrier case is generated as release+dmb for the former and
* acquire+release for the latter.
*/
-#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl) \
-static inline unsigned long __xchg_case_##name(unsigned long x, \
- volatile void *ptr) \
-{ \
- unsigned long ret, tmp; \
- \
- asm volatile(ARM64_LSE_ATOMIC_INSN( \
- /* LL/SC */ \
- " prfm pstl1strm, %2\n" \
- "1: ld" #acq "xr" #sz "\t%" #w "0, %2\n" \
- " st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n" \
- " cbnz %w1, 1b\n" \
- " " #mb, \
- /* LSE atomics */ \
- " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \
- __nops(3) \
- " " #nop_lse) \
- : "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr) \
- : "r" (x) \
- : cl); \
- \
- return ret; \
+#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \
+static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \
+{ \
+ u##sz ret; \
+ unsigned long tmp; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " prfm pstl1strm, %2\n" \
+ "1: ld" #acq "xr" #sfx "\t%" #w "0, %2\n" \
+ " st" #rel "xr" #sfx "\t%w1, %" #w "3, %2\n" \
+ " cbnz %w1, 1b\n" \
+ " " #mb, \
+ /* LSE atomics */ \
+ " swp" #acq_lse #rel #sfx "\t%" #w "3, %" #w "0, %2\n" \
+ __nops(3) \
+ " " #nop_lse) \
+ : "=&r" (ret), "=&r" (tmp), "+Q" (*(u##sz *)ptr) \
+ : "r" (x) \
+ : cl); \
+ \
+ return ret; \
}
-__XCHG_CASE(w, b, 1, , , , , , )
-__XCHG_CASE(w, h, 2, , , , , , )
-__XCHG_CASE(w, , 4, , , , , , )
-__XCHG_CASE( , , 8, , , , , , )
-__XCHG_CASE(w, b, acq_1, , , a, a, , "memory")
-__XCHG_CASE(w, h, acq_2, , , a, a, , "memory")
-__XCHG_CASE(w, , acq_4, , , a, a, , "memory")
-__XCHG_CASE( , , acq_8, , , a, a, , "memory")
-__XCHG_CASE(w, b, rel_1, , , , , l, "memory")
-__XCHG_CASE(w, h, rel_2, , , , , l, "memory")
-__XCHG_CASE(w, , rel_4, , , , , l, "memory")
-__XCHG_CASE( , , rel_8, , , , , l, "memory")
-__XCHG_CASE(w, b, mb_1, dmb ish, nop, , a, l, "memory")
-__XCHG_CASE(w, h, mb_2, dmb ish, nop, , a, l, "memory")
-__XCHG_CASE(w, , mb_4, dmb ish, nop, , a, l, "memory")
-__XCHG_CASE( , , mb_8, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE(w, b, , 8, , , , , , )
+__XCHG_CASE(w, h, , 16, , , , , , )
+__XCHG_CASE(w, , , 32, , , , , , )
+__XCHG_CASE( , , , 64, , , , , , )
+__XCHG_CASE(w, b, acq_, 8, , , a, a, , "memory")
+__XCHG_CASE(w, h, acq_, 16, , , a, a, , "memory")
+__XCHG_CASE(w, , acq_, 32, , , a, a, , "memory")
+__XCHG_CASE( , , acq_, 64, , , a, a, , "memory")
+__XCHG_CASE(w, b, rel_, 8, , , , , l, "memory")
+__XCHG_CASE(w, h, rel_, 16, , , , , l, "memory")
+__XCHG_CASE(w, , rel_, 32, , , , , l, "memory")
+__XCHG_CASE( , , rel_, 64, , , , , l, "memory")
+__XCHG_CASE(w, b, mb_, 8, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE(w, h, mb_, 16, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE(w, , mb_, 32, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory")
#undef __XCHG_CASE
@@ -80,13 +80,13 @@ static inline unsigned long __xchg##sfx(unsigned long x, \
{ \
switch (size) { \
case 1: \
- return __xchg_case##sfx##_1(x, ptr); \
+ return __xchg_case##sfx##_8(x, ptr); \
case 2: \
- return __xchg_case##sfx##_2(x, ptr); \
+ return __xchg_case##sfx##_16(x, ptr); \
case 4: \
- return __xchg_case##sfx##_4(x, ptr); \
+ return __xchg_case##sfx##_32(x, ptr); \
case 8: \
- return __xchg_case##sfx##_8(x, ptr); \
+ return __xchg_case##sfx##_64(x, ptr); \
default: \
BUILD_BUG(); \
} \
@@ -123,13 +123,13 @@ static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
{ \
switch (size) { \
case 1: \
- return __cmpxchg_case##sfx##_1(ptr, (u8)old, new); \
+ return __cmpxchg_case##sfx##_8(ptr, old, new); \
case 2: \
- return __cmpxchg_case##sfx##_2(ptr, (u16)old, new); \
+ return __cmpxchg_case##sfx##_16(ptr, old, new); \
case 4: \
- return __cmpxchg_case##sfx##_4(ptr, old, new); \
+ return __cmpxchg_case##sfx##_32(ptr, old, new); \
case 8: \
- return __cmpxchg_case##sfx##_8(ptr, old, new); \
+ return __cmpxchg_case##sfx##_64(ptr, old, new); \
default: \
BUILD_BUG(); \
} \
@@ -197,16 +197,16 @@ __CMPXCHG_GEN(_mb)
__ret; \
})
-#define __CMPWAIT_CASE(w, sz, name) \
-static inline void __cmpwait_case_##name(volatile void *ptr, \
- unsigned long val) \
+#define __CMPWAIT_CASE(w, sfx, sz) \
+static inline void __cmpwait_case_##sz(volatile void *ptr, \
+ unsigned long val) \
{ \
unsigned long tmp; \
\
asm volatile( \
" sevl\n" \
" wfe\n" \
- " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \
+ " ldxr" #sfx "\t%" #w "[tmp], %[v]\n" \
" eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
" cbnz %" #w "[tmp], 1f\n" \
" wfe\n" \
@@ -215,10 +215,10 @@ static inline void __cmpwait_case_##name(volatile void *ptr, \
: [val] "r" (val)); \
}
-__CMPWAIT_CASE(w, b, 1);
-__CMPWAIT_CASE(w, h, 2);
-__CMPWAIT_CASE(w, , 4);
-__CMPWAIT_CASE( , , 8);
+__CMPWAIT_CASE(w, b, 8);
+__CMPWAIT_CASE(w, h, 16);
+__CMPWAIT_CASE(w, , 32);
+__CMPWAIT_CASE( , , 64);
#undef __CMPWAIT_CASE
@@ -229,13 +229,13 @@ static inline void __cmpwait##sfx(volatile void *ptr, \
{ \
switch (size) { \
case 1: \
- return __cmpwait_case##sfx##_1(ptr, (u8)val); \
+ return __cmpwait_case##sfx##_8(ptr, (u8)val); \
case 2: \
- return __cmpwait_case##sfx##_2(ptr, (u16)val); \
+ return __cmpwait_case##sfx##_16(ptr, (u16)val); \
case 4: \
- return __cmpwait_case##sfx##_4(ptr, val); \
+ return __cmpwait_case##sfx##_32(ptr, val); \
case 8: \
- return __cmpwait_case##sfx##_8(ptr, val); \
+ return __cmpwait_case##sfx##_64(ptr, val); \
default: \
BUILD_BUG(); \
} \
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 6e2d254c09eb..82e9099834ae 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -54,7 +54,13 @@
#define ARM64_HAS_CRC32 33
#define ARM64_SSBS 34
#define ARM64_WORKAROUND_1188873 35
+#define ARM64_HAS_SB 36
+#define ARM64_WORKAROUND_1165522 37
+#define ARM64_HAS_ADDRESS_AUTH_ARCH 38
+#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39
+#define ARM64_HAS_GENERIC_AUTH_ARCH 40
+#define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41
-#define ARM64_NCAPS 36
+#define ARM64_NCAPS 42
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 7e2ec64aa414..dfcfba725d72 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -321,19 +321,20 @@ struct arm64_cpu_capabilities {
bool sign;
unsigned long hwcap;
};
- /*
- * A list of "matches/cpu_enable" pair for the same
- * "capability" of the same "type" as described by the parent.
- * Only matches(), cpu_enable() and fields relevant to these
- * methods are significant in the list. The cpu_enable is
- * invoked only if the corresponding entry "matches()".
- * However, if a cpu_enable() method is associated
- * with multiple matches(), care should be taken that either
- * the match criteria are mutually exclusive, or that the
- * method is robust against being called multiple times.
- */
- const struct arm64_cpu_capabilities *match_list;
};
+
+ /*
+ * An optional list of "matches/cpu_enable" pair for the same
+ * "capability" of the same "type" as described by the parent.
+ * Only matches(), cpu_enable() and fields relevant to these
+ * methods are significant in the list. The cpu_enable is
+ * invoked only if the corresponding entry "matches()".
+ * However, if a cpu_enable() method is associated
+ * with multiple matches(), care should be taken that either
+ * the match criteria are mutually exclusive, or that the
+ * method is robust against being called multiple times.
+ */
+ const struct arm64_cpu_capabilities *match_list;
};
static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
@@ -353,10 +354,46 @@ cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap)
return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU);
}
+/*
+ * Generic helper for handling capabilties with multiple (match,enable) pairs
+ * of call backs, sharing the same capability bit.
+ * Iterate over each entry to see if at least one matches.
+ */
+static inline bool
+cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ const struct arm64_cpu_capabilities *caps;
+
+ for (caps = entry->match_list; caps->matches; caps++)
+ if (caps->matches(caps, scope))
+ return true;
+
+ return false;
+}
+
+/*
+ * Take appropriate action for all matching entries in the shared capability
+ * entry.
+ */
+static inline void
+cpucap_multi_entry_cap_cpu_enable(const struct arm64_cpu_capabilities *entry)
+{
+ const struct arm64_cpu_capabilities *caps;
+
+ for (caps = entry->match_list; caps->matches; caps++)
+ if (caps->matches(caps, SCOPE_LOCAL_CPU) &&
+ caps->cpu_enable)
+ caps->cpu_enable(caps);
+}
+
extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
extern struct static_key_false arm64_const_caps_ready;
+#define for_each_available_cap(cap) \
+ for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS)
+
bool this_cpu_has_cap(unsigned int cap);
static inline bool cpu_have_feature(unsigned int num)
@@ -473,7 +510,6 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
void __init setup_cpu_features(void);
void check_local_cpu_capabilities(void);
-
u64 read_sanitised_ftr_reg(u32 id);
static inline bool cpu_supports_mixed_endian_el0(void)
@@ -486,11 +522,59 @@ static inline bool system_supports_32bit_el0(void)
return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
}
+static inline bool system_supports_4kb_granule(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_TGRAN4_SHIFT);
+
+ return val == ID_AA64MMFR0_TGRAN4_SUPPORTED;
+}
+
+static inline bool system_supports_64kb_granule(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_TGRAN64_SHIFT);
+
+ return val == ID_AA64MMFR0_TGRAN64_SUPPORTED;
+}
+
+static inline bool system_supports_16kb_granule(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_TGRAN16_SHIFT);
+
+ return val == ID_AA64MMFR0_TGRAN16_SUPPORTED;
+}
+
static inline bool system_supports_mixed_endian_el0(void)
{
return id_aa64mmfr0_mixed_endian_el0(read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1));
}
+static inline bool system_supports_mixed_endian(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_BIGENDEL_SHIFT);
+
+ return val == 0x1;
+}
+
static inline bool system_supports_fpsimd(void)
{
return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
@@ -514,6 +598,20 @@ static inline bool system_supports_cnp(void)
cpus_have_const_cap(ARM64_HAS_CNP);
}
+static inline bool system_supports_address_auth(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
+ (cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) ||
+ cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF));
+}
+
+static inline bool system_supports_generic_auth(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
+ (cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH_ARCH) ||
+ cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF));
+}
+
#define ARM64_SSBD_UNKNOWN -1
#define ARM64_SSBD_FORCE_DISABLE 0
#define ARM64_SSBD_KERNEL 1
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 12f93e4d2452..951ed1a4e5c9 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -151,6 +151,8 @@ struct midr_range {
.rv_max = MIDR_CPU_VAR_REV(v_max, r_max), \
}
+#define MIDR_REV_RANGE(m, v, r_min, r_max) MIDR_RANGE(m, v, r_min, v, r_max)
+#define MIDR_REV(m, v, r) MIDR_RANGE(m, v, r, v, r)
#define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf)
static inline bool is_midr_in_range(u32 midr, struct midr_range const *range)
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 433b9554c6a1..6adc1a90e7e6 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -117,7 +117,11 @@
* 64-bit, this is above 4GB to leave the entire 32-bit address
* space open for things that want to use the area for 32-bit pointers.
*/
+#ifdef CONFIG_ARM64_FORCE_52BIT
#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3)
+#else
+#define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW_64 / 3)
+#endif /* CONFIG_ARM64_FORCE_52BIT */
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 676de2ec1762..52233f00d53d 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -29,23 +29,24 @@
#define ESR_ELx_EC_CP14_MR (0x05)
#define ESR_ELx_EC_CP14_LS (0x06)
#define ESR_ELx_EC_FP_ASIMD (0x07)
-#define ESR_ELx_EC_CP10_ID (0x08)
-/* Unallocated EC: 0x09 - 0x0B */
+#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */
+#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
+/* Unallocated EC: 0x0A - 0x0B */
#define ESR_ELx_EC_CP14_64 (0x0C)
/* Unallocated EC: 0x0d */
#define ESR_ELx_EC_ILL (0x0E)
/* Unallocated EC: 0x0F - 0x10 */
#define ESR_ELx_EC_SVC32 (0x11)
-#define ESR_ELx_EC_HVC32 (0x12)
-#define ESR_ELx_EC_SMC32 (0x13)
+#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */
+#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */
/* Unallocated EC: 0x14 */
#define ESR_ELx_EC_SVC64 (0x15)
-#define ESR_ELx_EC_HVC64 (0x16)
-#define ESR_ELx_EC_SMC64 (0x17)
+#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */
+#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */
#define ESR_ELx_EC_SYS64 (0x18)
#define ESR_ELx_EC_SVE (0x19)
/* Unallocated EC: 0x1A - 0x1E */
-#define ESR_ELx_EC_IMP_DEF (0x1f)
+#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
#define ESR_ELx_EC_IABT_LOW (0x20)
#define ESR_ELx_EC_IABT_CUR (0x21)
#define ESR_ELx_EC_PC_ALIGN (0x22)
@@ -68,7 +69,7 @@
/* Unallocated EC: 0x36 - 0x37 */
#define ESR_ELx_EC_BKPT32 (0x38)
/* Unallocated EC: 0x39 */
-#define ESR_ELx_EC_VECTOR32 (0x3A)
+#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */
/* Unallocted EC: 0x3B */
#define ESR_ELx_EC_BRK64 (0x3C)
/* Unallocated EC: 0x3D - 0x3F */
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index caa955f10e19..15a6587e12f9 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -13,6 +13,7 @@
#include <asm/insn.h>
+#define HAVE_FUNCTION_GRAPH_FP_TEST
#define MCOUNT_ADDR ((unsigned long)_mcount)
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
@@ -56,6 +57,19 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
{
return is_compat_task();
}
+
+#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+
+static inline bool arch_syscall_match_sym_name(const char *sym,
+ const char *name)
+{
+ /*
+ * Since all syscall functions have __arm64_ prefix, we must skip it.
+ * However, as we described above, we decided to ignore compat
+ * syscalls, so we don't care about __arm64_compat_ prefix here.
+ */
+ return !strcmp(sym + 8, name);
+}
#endif /* ifndef __ASSEMBLY__ */
#endif /* __ASM_FTRACE_H */
diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h
new file mode 100644
index 000000000000..e2c27a2278e9
--- /dev/null
+++ b/arch/arm64/include/asm/image.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_IMAGE_H
+#define __ASM_IMAGE_H
+
+#define ARM64_IMAGE_MAGIC "ARM\x64"
+
+#define ARM64_IMAGE_FLAG_BE_SHIFT 0
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT (ARM64_IMAGE_FLAG_BE_SHIFT + 1)
+#define ARM64_IMAGE_FLAG_PHYS_BASE_SHIFT \
+ (ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT + 2)
+#define ARM64_IMAGE_FLAG_BE_MASK 0x1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_MASK 0x3
+#define ARM64_IMAGE_FLAG_PHYS_BASE_MASK 0x1
+
+#define ARM64_IMAGE_FLAG_LE 0
+#define ARM64_IMAGE_FLAG_BE 1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_4K 1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_16K 2
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_64K 3
+#define ARM64_IMAGE_FLAG_PHYS_BASE 1
+
+#ifndef __ASSEMBLY__
+
+#define arm64_image_flag_field(flags, field) \
+ (((flags) >> field##_SHIFT) & field##_MASK)
+
+/*
+ * struct arm64_image_header - arm64 kernel image header
+ * See Documentation/arm64/booting.txt for details
+ *
+ * @code0: Executable code, or
+ * @mz_header alternatively used for part of MZ header
+ * @code1: Executable code
+ * @text_offset: Image load offset
+ * @image_size: Effective Image size
+ * @flags: kernel flags
+ * @reserved: reserved
+ * @magic: Magic number
+ * @reserved5: reserved, or
+ * @pe_header: alternatively used for PE COFF offset
+ */
+
+struct arm64_image_header {
+ __le32 code0;
+ __le32 code1;
+ __le64 text_offset;
+ __le64 image_size;
+ __le64 flags;
+ __le64 res2;
+ __le64 res3;
+ __le64 res4;
+ __le32 magic;
+ __le32 res5;
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_IMAGE_H */
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index c6802dea6cab..9c01f04db64d 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -261,6 +261,11 @@ enum aarch64_insn_prfm_policy {
AARCH64_INSN_PRFM_POLICY_STRM,
};
+enum aarch64_insn_adr_type {
+ AARCH64_INSN_ADR_TYPE_ADRP,
+ AARCH64_INSN_ADR_TYPE_ADR,
+};
+
#define __AARCH64_INSN_FUNCS(abbr, mask, val) \
static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
{ return (code & (mask)) == (val); } \
@@ -393,6 +398,9 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
enum aarch64_insn_register src,
int imm, enum aarch64_insn_variant variant,
enum aarch64_insn_adsb_type type);
+u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_register reg,
+ enum aarch64_insn_adr_type type);
u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
enum aarch64_insn_register src,
int immr, int imms,
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 9f8b915af3a7..ee723835c1f4 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -104,7 +104,23 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
}
/* IO barriers */
-#define __iormb() rmb()
+#define __iormb(v) \
+({ \
+ unsigned long tmp; \
+ \
+ rmb(); \
+ \
+ /* \
+ * Create a dummy control dependency from the IO read to any \
+ * later instructions. This ensures that a subsequent call to \
+ * udelay() will be ordered due to the ISB in get_cycles(). \
+ */ \
+ asm volatile("eor %0, %1, %1\n" \
+ "cbnz %0, ." \
+ : "=r" (tmp) : "r" ((unsigned long)(v)) \
+ : "memory"); \
+})
+
#define __iowmb() wmb()
#define mmiowb() do { } while (0)
@@ -129,10 +145,10 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
* following Normal memory access. Writes are ordered relative to any prior
* Normal memory access.
*/
-#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; })
-#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; })
-#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; })
-#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(); __v; })
+#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(__v); __v; })
+#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(__v); __v; })
+#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
#define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); })
#define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); })
@@ -183,9 +199,9 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
/*
* io{read,write}{16,32,64}be() macros
*/
-#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; })
-#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; })
-#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(); __v; })
+#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(__v); __v; })
+#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(__v); __v; })
+#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(__v); __v; })
#define iowrite16be(v,p) ({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); })
#define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); })
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index e17f0529a882..67e4cb75d1fd 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -93,6 +93,25 @@ static inline void crash_prepare_suspend(void) {}
static inline void crash_post_resume(void) {}
#endif
+#ifdef CONFIG_KEXEC_FILE
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+ void *dtb;
+ unsigned long dtb_mem;
+};
+
+extern const struct kexec_file_ops kexec_image_ops;
+
+struct kimage;
+
+extern int arch_kimage_file_post_load_cleanup(struct kimage *image);
+extern int load_other_segments(struct kimage *image,
+ unsigned long kernel_load_addr, unsigned long kernel_size,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline);
+#endif
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 6f602af5263c..7f9d2bfcf82e 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -24,6 +24,8 @@
/* Hyp Configuration Register (HCR) bits */
#define HCR_FWB (UL(1) << 46)
+#define HCR_API (UL(1) << 41)
+#define HCR_APK (UL(1) << 40)
#define HCR_TEA (UL(1) << 37)
#define HCR_TERR (UL(1) << 36)
#define HCR_TLOR (UL(1) << 35)
@@ -87,6 +89,7 @@
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
HCR_FMO | HCR_IMO)
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
+#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* TCR_EL2 Registers bits */
@@ -104,7 +107,7 @@
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
/* VTCR_EL2 Registers bits */
-#define VTCR_EL2_RES1 (1 << 31)
+#define VTCR_EL2_RES1 (1U << 31)
#define VTCR_EL2_HD (1 << 22)
#define VTCR_EL2_HA (1 << 21)
#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
@@ -320,10 +323,6 @@
#define PAR_TO_HPFAR(par) \
(((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
-#define kvm_arm_exception_type \
- {0, "IRQ" }, \
- {1, "TRAP" }
-
#define ECN(x) { ESR_ELx_EC_##x, #x }
#define kvm_arm_exception_class \
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index aea01a09eb94..f5b79e995f40 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -25,6 +25,7 @@
#define ARM_EXIT_WITH_SERROR_BIT 31
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
+#define ARM_EXCEPTION_IS_TRAP(x) (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_TRAP)
#define ARM_SERROR_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT))
#define ARM_EXCEPTION_IRQ 0
@@ -34,6 +35,12 @@
/* The hyp-stub will return this for any kvm_call_hyp() call */
#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
+#define kvm_arm_exception_type \
+ {ARM_EXCEPTION_IRQ, "IRQ" }, \
+ {ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \
+ {ARM_EXCEPTION_TRAP, "TRAP" }, \
+ {ARM_EXCEPTION_HYP_GONE, "HYP_GONE" }
+
#ifndef __ASSEMBLY__
#include <linux/mm.h>
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 21247870def7..506386a3edde 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -24,6 +24,7 @@
#include <linux/kvm_host.h>
+#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_hyp.h>
@@ -147,14 +148,6 @@ static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
return true;
}
-static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
-{
- if (vcpu_mode_is_32bit(vcpu))
- kvm_skip_instr32(vcpu, is_wide_instr);
- else
- *vcpu_pc(vcpu) += 4;
-}
-
static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
{
*vcpu_cpsr(vcpu) |= PSR_AA32_T_BIT;
@@ -424,4 +417,30 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
return data; /* Leave LE untouched */
}
+static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+ if (vcpu_mode_is_32bit(vcpu))
+ kvm_skip_instr32(vcpu, is_wide_instr);
+ else
+ *vcpu_pc(vcpu) += 4;
+
+ /* advance the singlestep state machine */
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+}
+
+/*
+ * Skip an instruction which has been emulated at hyp while most guest sysregs
+ * are live.
+ */
+static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
+{
+ *vcpu_pc(vcpu) = read_sysreg_el2(elr);
+ vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr);
+
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+
+ write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr);
+ write_sysreg_el2(*vcpu_pc(vcpu), elr);
+}
+
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 52fbc823ff8c..7732d0ba4e60 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -319,7 +319,7 @@ struct kvm_vcpu_arch {
*/
#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
-u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
+u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
/*
@@ -360,7 +360,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
@@ -422,7 +422,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
}
}
-static inline bool kvm_arch_check_sve_has_vhe(void)
+static inline bool kvm_arch_requires_vhe(void)
{
/*
* The Arm architecture specifies that implementation of SVE
@@ -430,9 +430,13 @@ static inline bool kvm_arch_check_sve_has_vhe(void)
* relies on this when SVE is present:
*/
if (system_supports_sve())
- return has_vhe();
- else
return true;
+
+ /* Some implementations have defects that confine them to VHE */
+ if (cpus_have_cap(ARM64_WORKAROUND_1165522))
+ return true;
+
+ return false;
}
static inline void kvm_arch_hardware_unsetup(void) {}
@@ -445,7 +449,6 @@ void kvm_arm_init_debug(void);
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
-bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 23aca66767f9..a80a7ef57325 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -20,6 +20,7 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
+#include <asm/alternative.h>
#include <asm/sysreg.h>
#define __hyp_text __section(.hyp.text) notrace
@@ -163,6 +164,13 @@ static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
{
write_sysreg(kvm->arch.vtcr, vtcr_el2);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
+
+ /*
+ * ARM erratum 1165522 requires the actual execution of the above
+ * before we can switch to the EL1/EL0 translation regime used by
+ * the guest.
+ */
+ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522));
}
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 658657367f2f..8af4b1befa42 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -184,6 +184,17 @@ void kvm_clear_hyp_idmap(void);
#define kvm_mk_pgd(pudp) \
__pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE)
+#define kvm_set_pud(pudp, pud) set_pud(pudp, pud)
+
+#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot)
+#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
+#define kvm_pfn_pud(pfn, prot) pfn_pud(pfn, prot)
+
+#define kvm_pud_pfn(pud) pud_pfn(pud)
+
+#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd)
+#define kvm_pud_mkhuge(pud) pud_mkhuge(pud)
+
static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
{
pte_val(pte) |= PTE_S2_RDWR;
@@ -196,6 +207,12 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
return pmd;
}
+static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
+{
+ pud_val(pud) |= PUD_S2_RDWR;
+ return pud;
+}
+
static inline pte_t kvm_s2pte_mkexec(pte_t pte)
{
pte_val(pte) &= ~PTE_S2_XN;
@@ -208,6 +225,12 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
return pmd;
}
+static inline pud_t kvm_s2pud_mkexec(pud_t pud)
+{
+ pud_val(pud) &= ~PUD_S2_XN;
+ return pud;
+}
+
static inline void kvm_set_s2pte_readonly(pte_t *ptep)
{
pteval_t old_pteval, pteval;
@@ -246,6 +269,31 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
}
+static inline void kvm_set_s2pud_readonly(pud_t *pudp)
+{
+ kvm_set_s2pte_readonly((pte_t *)pudp);
+}
+
+static inline bool kvm_s2pud_readonly(pud_t *pudp)
+{
+ return kvm_s2pte_readonly((pte_t *)pudp);
+}
+
+static inline bool kvm_s2pud_exec(pud_t *pudp)
+{
+ return !(READ_ONCE(pud_val(*pudp)) & PUD_S2_XN);
+}
+
+static inline pud_t kvm_s2pud_mkyoung(pud_t pud)
+{
+ return pud_mkyoung(pud);
+}
+
+static inline bool kvm_s2pud_young(pud_t pud)
+{
+ return pud_young(pud);
+}
+
#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
#ifdef __PAGETABLE_PMD_FOLDED
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index b96442960aea..0385752bd079 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -35,15 +35,6 @@
#define PCI_IO_SIZE SZ_16M
/*
- * Log2 of the upper bound of the size of a struct page. Used for sizing
- * the vmemmap region only, does not affect actual memory footprint.
- * We don't use sizeof(struct page) directly since taking its size here
- * requires its definition to be available at this point in the inclusion
- * chain, and it may not be a power of 2 in the first place.
- */
-#define STRUCT_PAGE_MAX_SHIFT 6
-
-/*
* VMEMMAP_SIZE - allows the whole linear region to be covered by
* a struct page array
*/
@@ -62,8 +53,11 @@
#define PAGE_OFFSET (UL(0xffffffffffffffff) - \
(UL(1) << (VA_BITS - 1)) + 1)
#define KIMAGE_VADDR (MODULES_END)
+#define BPF_JIT_REGION_START (VA_START + KASAN_SHADOW_SIZE)
+#define BPF_JIT_REGION_SIZE (SZ_128M)
+#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
-#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE)
+#define MODULES_VADDR (BPF_JIT_REGION_END)
#define MODULES_VSIZE (SZ_128M)
#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE)
#define PCI_IO_END (VMEMMAP_START - SZ_2M)
@@ -73,15 +67,26 @@
#define KERNEL_START _text
#define KERNEL_END _end
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+#define MAX_USER_VA_BITS 52
+#else
+#define MAX_USER_VA_BITS VA_BITS
+#endif
+
/*
* KASAN requires 1/8th of the kernel virtual address space for the shadow
* region. KASAN can bloat the stack significantly, so double the (minimum)
- * stack size when KASAN is in use.
+ * stack size when KASAN is in use, and then double it again if KASAN_EXTRA is
+ * on.
*/
#ifdef CONFIG_KASAN
#define KASAN_SHADOW_SCALE_SHIFT 3
#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT))
+#ifdef CONFIG_KASAN_EXTRA
+#define KASAN_THREAD_SHIFT 2
+#else
#define KASAN_THREAD_SHIFT 1
+#endif /* CONFIG_KASAN_EXTRA */
#else
#define KASAN_SHADOW_SIZE (0)
#define KASAN_THREAD_SHIFT 0
@@ -196,6 +201,9 @@ static inline unsigned long kaslr_offset(void)
return kimage_vaddr - KIMAGE_VADDR;
}
+/* the actual size of a user virtual address */
+extern u64 vabits_user;
+
/*
* Allow all memory at the discovery stage. We will clip it later.
*/
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 1e58bf58c22b..2da3e478fd8f 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -35,6 +35,8 @@
#include <asm/sysreg.h>
#include <asm/tlbflush.h>
+extern bool rodata_full;
+
static inline void contextidr_thread_switch(struct task_struct *next)
{
if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR))
@@ -72,6 +74,9 @@ extern u64 idmap_ptrs_per_pgd;
static inline bool __cpu_uses_extended_idmap(void)
{
+ if (IS_ENABLED(CONFIG_ARM64_USER_VA_BITS_52))
+ return false;
+
return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS));
}
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 97d0ef12e2ff..905e1bb0e7bd 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -22,7 +22,7 @@
#ifdef CONFIG_ARM64_MODULE_PLTS
struct mod_plt_sec {
- struct elf64_shdr *plt;
+ int plt_shndx;
int plt_num_entries;
int plt_max_entries;
};
@@ -36,10 +36,12 @@ struct mod_arch_specific {
};
#endif
-u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
+u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
+ void *loc, const Elf64_Rela *rela,
Elf64_Sym *sym);
-u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val);
+u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
+ void *loc, u64 val);
#ifdef CONFIG_RANDOMIZE_BASE
extern u64 module_alloc_base;
@@ -56,39 +58,19 @@ struct plt_entry {
* is exactly what we are dealing with here, we are free to use x16
* as a scratch register in the PLT veneers.
*/
- __le32 mov0; /* movn x16, #0x.... */
- __le32 mov1; /* movk x16, #0x...., lsl #16 */
- __le32 mov2; /* movk x16, #0x...., lsl #32 */
+ __le32 adrp; /* adrp x16, .... */
+ __le32 add; /* add x16, x16, #0x.... */
__le32 br; /* br x16 */
};
-static inline struct plt_entry get_plt_entry(u64 val)
+static inline bool is_forbidden_offset_for_adrp(void *place)
{
- /*
- * MOVK/MOVN/MOVZ opcode:
- * +--------+------------+--------+-----------+-------------+---------+
- * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
- * +--------+------------+--------+-----------+-------------+---------+
- *
- * Rd := 0x10 (x16)
- * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
- * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
- * sf := 1 (64-bit variant)
- */
- return (struct plt_entry){
- cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
- cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
- cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
- cpu_to_le32(0xd61f0200)
- };
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
+ cpus_have_const_cap(ARM64_WORKAROUND_843419) &&
+ ((u64)place & 0xfff) >= 0xff8;
}
-static inline bool plt_entries_equal(const struct plt_entry *a,
- const struct plt_entry *b)
-{
- return a->mov0 == b->mov0 &&
- a->mov1 == b->mov1 &&
- a->mov2 == b->mov2;
-}
+struct plt_entry get_plt_entry(u64 dst, void *pc);
+bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/neon-intrinsics.h b/arch/arm64/include/asm/neon-intrinsics.h
new file mode 100644
index 000000000000..2ba6c6b9541f
--- /dev/null
+++ b/arch/arm64/include/asm/neon-intrinsics.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2018 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_NEON_INTRINSICS_H
+#define __ASM_NEON_INTRINSICS_H
+
+#include <asm-generic/int-ll64.h>
+
+/*
+ * In the kernel, u64/s64 are [un]signed long long, not [un]signed long.
+ * So by redefining these macros to the former, we can force gcc-stdint.h
+ * to define uint64_t / in64_t in a compatible manner.
+ */
+
+#ifdef __INT64_TYPE__
+#undef __INT64_TYPE__
+#define __INT64_TYPE__ long long
+#endif
+
+#ifdef __UINT64_TYPE__
+#undef __UINT64_TYPE__
+#define __UINT64_TYPE__ unsigned long long
+#endif
+
+/*
+ * genksyms chokes on the ARM NEON instrinsics system header, but we
+ * don't export anything it defines anyway, so just disregard when
+ * genksyms execute.
+ */
+#ifndef __GENKSYMS__
+#include <arm_neon.h>
+#endif
+
+#endif /* __ASM_NEON_INTRINSICS_H */
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 21a81b59a0cc..6b81dd8cee01 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -48,263 +48,193 @@ static inline unsigned long __my_cpu_offset(void)
}
#define __my_cpu_offset __my_cpu_offset()
-#define PERCPU_OP(op, asm_op) \
-static inline unsigned long __percpu_##op(void *ptr, \
- unsigned long val, int size) \
+#define PERCPU_RW_OPS(sz) \
+static inline unsigned long __percpu_read_##sz(void *ptr) \
{ \
- unsigned long loop, ret; \
+ return READ_ONCE(*(u##sz *)ptr); \
+} \
\
- switch (size) { \
- case 1: \
- asm ("//__per_cpu_" #op "_1\n" \
- "1: ldxrb %w[ret], %[ptr]\n" \
- #asm_op " %w[ret], %w[ret], %w[val]\n" \
- " stxrb %w[loop], %w[ret], %[ptr]\n" \
- " cbnz %w[loop], 1b" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u8 *)ptr) \
- : [val] "Ir" (val)); \
- break; \
- case 2: \
- asm ("//__per_cpu_" #op "_2\n" \
- "1: ldxrh %w[ret], %[ptr]\n" \
- #asm_op " %w[ret], %w[ret], %w[val]\n" \
- " stxrh %w[loop], %w[ret], %[ptr]\n" \
- " cbnz %w[loop], 1b" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u16 *)ptr) \
- : [val] "Ir" (val)); \
- break; \
- case 4: \
- asm ("//__per_cpu_" #op "_4\n" \
- "1: ldxr %w[ret], %[ptr]\n" \
- #asm_op " %w[ret], %w[ret], %w[val]\n" \
- " stxr %w[loop], %w[ret], %[ptr]\n" \
- " cbnz %w[loop], 1b" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u32 *)ptr) \
- : [val] "Ir" (val)); \
- break; \
- case 8: \
- asm ("//__per_cpu_" #op "_8\n" \
- "1: ldxr %[ret], %[ptr]\n" \
- #asm_op " %[ret], %[ret], %[val]\n" \
- " stxr %w[loop], %[ret], %[ptr]\n" \
- " cbnz %w[loop], 1b" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u64 *)ptr) \
- : [val] "Ir" (val)); \
- break; \
- default: \
- ret = 0; \
- BUILD_BUG(); \
- } \
- \
- return ret; \
-}
-
-PERCPU_OP(add, add)
-PERCPU_OP(and, and)
-PERCPU_OP(or, orr)
-#undef PERCPU_OP
-
-static inline unsigned long __percpu_read(void *ptr, int size)
-{
- unsigned long ret;
-
- switch (size) {
- case 1:
- ret = READ_ONCE(*(u8 *)ptr);
- break;
- case 2:
- ret = READ_ONCE(*(u16 *)ptr);
- break;
- case 4:
- ret = READ_ONCE(*(u32 *)ptr);
- break;
- case 8:
- ret = READ_ONCE(*(u64 *)ptr);
- break;
- default:
- ret = 0;
- BUILD_BUG();
- }
-
- return ret;
+static inline void __percpu_write_##sz(void *ptr, unsigned long val) \
+{ \
+ WRITE_ONCE(*(u##sz *)ptr, (u##sz)val); \
}
-static inline void __percpu_write(void *ptr, unsigned long val, int size)
-{
- switch (size) {
- case 1:
- WRITE_ONCE(*(u8 *)ptr, (u8)val);
- break;
- case 2:
- WRITE_ONCE(*(u16 *)ptr, (u16)val);
- break;
- case 4:
- WRITE_ONCE(*(u32 *)ptr, (u32)val);
- break;
- case 8:
- WRITE_ONCE(*(u64 *)ptr, (u64)val);
- break;
- default:
- BUILD_BUG();
- }
+#define __PERCPU_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \
+static inline void \
+__percpu_##name##_case_##sz(void *ptr, unsigned long val) \
+{ \
+ unsigned int loop; \
+ u##sz tmp; \
+ \
+ asm volatile (ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ "1: ldxr" #sfx "\t%" #w "[tmp], %[ptr]\n" \
+ #op_llsc "\t%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
+ " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \
+ " cbnz %w[loop], 1b", \
+ /* LSE atomics */ \
+ #op_lse "\t%" #w "[val], %[ptr]\n" \
+ __nops(3)) \
+ : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \
+ [ptr] "+Q"(*(u##sz *)ptr) \
+ : [val] "r" ((u##sz)(val))); \
}
-static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
- int size)
-{
- unsigned long ret, loop;
-
- switch (size) {
- case 1:
- asm ("//__percpu_xchg_1\n"
- "1: ldxrb %w[ret], %[ptr]\n"
- " stxrb %w[loop], %w[val], %[ptr]\n"
- " cbnz %w[loop], 1b"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u8 *)ptr)
- : [val] "r" (val));
- break;
- case 2:
- asm ("//__percpu_xchg_2\n"
- "1: ldxrh %w[ret], %[ptr]\n"
- " stxrh %w[loop], %w[val], %[ptr]\n"
- " cbnz %w[loop], 1b"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u16 *)ptr)
- : [val] "r" (val));
- break;
- case 4:
- asm ("//__percpu_xchg_4\n"
- "1: ldxr %w[ret], %[ptr]\n"
- " stxr %w[loop], %w[val], %[ptr]\n"
- " cbnz %w[loop], 1b"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u32 *)ptr)
- : [val] "r" (val));
- break;
- case 8:
- asm ("//__percpu_xchg_8\n"
- "1: ldxr %[ret], %[ptr]\n"
- " stxr %w[loop], %[val], %[ptr]\n"
- " cbnz %w[loop], 1b"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u64 *)ptr)
- : [val] "r" (val));
- break;
- default:
- ret = 0;
- BUILD_BUG();
- }
-
- return ret;
+#define __PERCPU_RET_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \
+static inline u##sz \
+__percpu_##name##_return_case_##sz(void *ptr, unsigned long val) \
+{ \
+ unsigned int loop; \
+ u##sz ret; \
+ \
+ asm volatile (ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ "1: ldxr" #sfx "\t%" #w "[ret], %[ptr]\n" \
+ #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \
+ " stxr" #sfx "\t%w[loop], %" #w "[ret], %[ptr]\n" \
+ " cbnz %w[loop], 1b", \
+ /* LSE atomics */ \
+ #op_lse "\t%" #w "[val], %" #w "[ret], %[ptr]\n" \
+ #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \
+ __nops(2)) \
+ : [loop] "=&r" (loop), [ret] "=&r" (ret), \
+ [ptr] "+Q"(*(u##sz *)ptr) \
+ : [val] "r" ((u##sz)(val))); \
+ \
+ return ret; \
}
-/* this_cpu_cmpxchg */
-#define _protect_cmpxchg_local(pcp, o, n) \
-({ \
- typeof(*raw_cpu_ptr(&(pcp))) __ret; \
- preempt_disable(); \
- __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \
- preempt_enable(); \
- __ret; \
-})
-
-#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define PERCPU_OP(name, op_llsc, op_lse) \
+ __PERCPU_OP_CASE(w, b, name, 8, op_llsc, op_lse) \
+ __PERCPU_OP_CASE(w, h, name, 16, op_llsc, op_lse) \
+ __PERCPU_OP_CASE(w, , name, 32, op_llsc, op_lse) \
+ __PERCPU_OP_CASE( , , name, 64, op_llsc, op_lse)
+
+#define PERCPU_RET_OP(name, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE(w, b, name, 8, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE(w, h, name, 16, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE(w, , name, 32, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE( , , name, 64, op_llsc, op_lse)
+
+PERCPU_RW_OPS(8)
+PERCPU_RW_OPS(16)
+PERCPU_RW_OPS(32)
+PERCPU_RW_OPS(64)
+PERCPU_OP(add, add, stadd)
+PERCPU_OP(andnot, bic, stclr)
+PERCPU_OP(or, orr, stset)
+PERCPU_RET_OP(add, add, ldadd)
+
+#undef PERCPU_RW_OPS
+#undef __PERCPU_OP_CASE
+#undef __PERCPU_RET_OP_CASE
+#undef PERCPU_OP
+#undef PERCPU_RET_OP
+/*
+ * It would be nice to avoid the conditional call into the scheduler when
+ * re-enabling preemption for preemptible kernels, but doing that in a way
+ * which builds inside a module would mean messing directly with the preempt
+ * count. If you do this, peterz and tglx will hunt you down.
+ */
#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
({ \
int __ret; \
- preempt_disable(); \
+ preempt_disable_notrace(); \
__ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \
raw_cpu_ptr(&(ptr2)), \
o1, o2, n1, n2); \
- preempt_enable(); \
+ preempt_enable_notrace(); \
__ret; \
})
-#define _percpu_read(pcp) \
+#define _pcp_protect(op, pcp, ...) \
({ \
- typeof(pcp) __retval; \
preempt_disable_notrace(); \
- __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \
- sizeof(pcp)); \
+ op(raw_cpu_ptr(&(pcp)), __VA_ARGS__); \
preempt_enable_notrace(); \
- __retval; \
})
-#define _percpu_write(pcp, val) \
-do { \
+#define _pcp_protect_return(op, pcp, args...) \
+({ \
+ typeof(pcp) __retval; \
preempt_disable_notrace(); \
- __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \
- sizeof(pcp)); \
+ __retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \
preempt_enable_notrace(); \
-} while(0) \
-
-#define _pcp_protect(operation, pcp, val) \
-({ \
- typeof(pcp) __retval; \
- preempt_disable(); \
- __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \
- (val), sizeof(pcp)); \
- preempt_enable(); \
- __retval; \
+ __retval; \
})
-#define _percpu_add(pcp, val) \
- _pcp_protect(__percpu_add, pcp, val)
-
-#define _percpu_add_return(pcp, val) _percpu_add(pcp, val)
-
-#define _percpu_and(pcp, val) \
- _pcp_protect(__percpu_and, pcp, val)
-
-#define _percpu_or(pcp, val) \
- _pcp_protect(__percpu_or, pcp, val)
-
-#define _percpu_xchg(pcp, val) (typeof(pcp)) \
- _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))
-
-#define this_cpu_add_1(pcp, val) _percpu_add(pcp, val)
-#define this_cpu_add_2(pcp, val) _percpu_add(pcp, val)
-#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val)
-#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val)
-
-#define this_cpu_add_return_1(pcp, val) _percpu_add_return(pcp, val)
-#define this_cpu_add_return_2(pcp, val) _percpu_add_return(pcp, val)
-#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val)
-#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val)
-
-#define this_cpu_and_1(pcp, val) _percpu_and(pcp, val)
-#define this_cpu_and_2(pcp, val) _percpu_and(pcp, val)
-#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val)
-#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val)
-
-#define this_cpu_or_1(pcp, val) _percpu_or(pcp, val)
-#define this_cpu_or_2(pcp, val) _percpu_or(pcp, val)
-#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val)
-#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val)
-
-#define this_cpu_read_1(pcp) _percpu_read(pcp)
-#define this_cpu_read_2(pcp) _percpu_read(pcp)
-#define this_cpu_read_4(pcp) _percpu_read(pcp)
-#define this_cpu_read_8(pcp) _percpu_read(pcp)
-
-#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val)
-
-#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val)
-#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val)
-#define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val)
-#define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val)
+#define this_cpu_read_1(pcp) \
+ _pcp_protect_return(__percpu_read_8, pcp)
+#define this_cpu_read_2(pcp) \
+ _pcp_protect_return(__percpu_read_16, pcp)
+#define this_cpu_read_4(pcp) \
+ _pcp_protect_return(__percpu_read_32, pcp)
+#define this_cpu_read_8(pcp) \
+ _pcp_protect_return(__percpu_read_64, pcp)
+
+#define this_cpu_write_1(pcp, val) \
+ _pcp_protect(__percpu_write_8, pcp, (unsigned long)val)
+#define this_cpu_write_2(pcp, val) \
+ _pcp_protect(__percpu_write_16, pcp, (unsigned long)val)
+#define this_cpu_write_4(pcp, val) \
+ _pcp_protect(__percpu_write_32, pcp, (unsigned long)val)
+#define this_cpu_write_8(pcp, val) \
+ _pcp_protect(__percpu_write_64, pcp, (unsigned long)val)
+
+#define this_cpu_add_1(pcp, val) \
+ _pcp_protect(__percpu_add_case_8, pcp, val)
+#define this_cpu_add_2(pcp, val) \
+ _pcp_protect(__percpu_add_case_16, pcp, val)
+#define this_cpu_add_4(pcp, val) \
+ _pcp_protect(__percpu_add_case_32, pcp, val)
+#define this_cpu_add_8(pcp, val) \
+ _pcp_protect(__percpu_add_case_64, pcp, val)
+
+#define this_cpu_add_return_1(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_8, pcp, val)
+#define this_cpu_add_return_2(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_16, pcp, val)
+#define this_cpu_add_return_4(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_32, pcp, val)
+#define this_cpu_add_return_8(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_64, pcp, val)
+
+#define this_cpu_and_1(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_8, pcp, ~val)
+#define this_cpu_and_2(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_16, pcp, ~val)
+#define this_cpu_and_4(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_32, pcp, ~val)
+#define this_cpu_and_8(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_64, pcp, ~val)
+
+#define this_cpu_or_1(pcp, val) \
+ _pcp_protect(__percpu_or_case_8, pcp, val)
+#define this_cpu_or_2(pcp, val) \
+ _pcp_protect(__percpu_or_case_16, pcp, val)
+#define this_cpu_or_4(pcp, val) \
+ _pcp_protect(__percpu_or_case_32, pcp, val)
+#define this_cpu_or_8(pcp, val) \
+ _pcp_protect(__percpu_or_case_64, pcp, val)
+
+#define this_cpu_xchg_1(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_2(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_4(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_8(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+
+#define this_cpu_cmpxchg_1(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_2(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_4(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_8(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
#include <asm-generic/percpu.h>
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index f9ccc36d3dc3..c593761ba61c 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -24,6 +24,160 @@
#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
/*
+ * Common architectural and microarchitectural event numbers.
+ */
+#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05
+#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06
+#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07
+#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
+#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09
+#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A
+#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B
+#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C
+#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D
+#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E
+#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
+#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
+#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18
+#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19
+#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A
+#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B
+#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C
+#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D
+#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20
+#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22
+#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23
+#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x31
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x32
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x33
+#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x34
+#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x35
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38
+
+/* Statistical profiling extension microarchitectural events */
+#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000
+#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001
+#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002
+#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003
+
+/* ARMv8 recommended implementation defined event types */
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x42
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x43
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x44
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x45
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x46
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x47
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x48
+
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x4C
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x4D
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x4E
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x4F
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x50
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x51
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x52
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x53
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x56
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x57
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x58
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x5C
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x5D
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x5E
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x5F
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x60
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x61
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x62
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x63
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x64
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x65
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x66
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x67
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x68
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x69
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x6A
+
+#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x6C
+#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x6D
+#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x6E
+#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x6F
+#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x70
+#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x71
+#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x72
+#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x73
+#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x74
+#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x75
+#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x76
+#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x77
+#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x78
+#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x79
+#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x7A
+
+#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x7C
+#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x7D
+#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x7E
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x81
+#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x82
+#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x83
+#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x84
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x86
+#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x87
+#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x88
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x8A
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x8B
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x8C
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x8D
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x8E
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x8F
+#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x90
+#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x91
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0xA0
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0xA1
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0xA2
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0xA3
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0xA6
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0xA7
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0xA8
+
+/*
* Per-CPU PMCR: config reg
*/
#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */
@@ -50,21 +204,11 @@
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
/*
- * PMUv3 event types: required events
- */
-#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
-#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
-#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
-
-/*
* Event filters for PMUv3
*/
-#define ARMV8_PMU_EXCLUDE_EL1 (1 << 31)
-#define ARMV8_PMU_EXCLUDE_EL0 (1 << 30)
-#define ARMV8_PMU_INCLUDE_EL2 (1 << 27)
+#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
+#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
+#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
/*
* PMUSERENR: user enable reg
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 1d7d8da2ef9b..22bb3ae514f5 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -80,7 +80,7 @@
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
+#define PTRS_PER_PGD (1 << (MAX_USER_VA_BITS - PGDIR_SHIFT))
/*
* Section address mask and size definitions.
@@ -193,6 +193,10 @@
#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */
+#define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */
+#define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */
+#define PUD_S2_XN (_AT(pudval_t, 2) << 53) /* XN[1:0] */
+
/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
*/
@@ -224,6 +228,8 @@
#define TCR_TxSZ_WIDTH 6
#define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET)
+#define TCR_EPD0_SHIFT 7
+#define TCR_EPD0_MASK (UL(1) << TCR_EPD0_SHIFT)
#define TCR_IRGN0_SHIFT 8
#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
@@ -231,6 +237,8 @@
#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
+#define TCR_EPD1_SHIFT 23
+#define TCR_EPD1_MASK (UL(1) << TCR_EPD1_SHIFT)
#define TCR_IRGN1_SHIFT 24
#define TCR_IRGN1_MASK (UL(3) << TCR_IRGN1_SHIFT)
#define TCR_IRGN1_NC (UL(0) << TCR_IRGN1_SHIFT)
@@ -306,4 +314,10 @@
#define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2)
#endif
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+/* Must be at least 64-byte aligned to prevent corruption of the TTBR */
+#define TTBR1_BADDR_4852_OFFSET (((UL(1) << (52 - PGDIR_SHIFT)) - \
+ (UL(1) << (48 - PGDIR_SHIFT))) * 8)
+#endif
+
#endif
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 50b1ef8584c0..de70c1eabf33 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -22,6 +22,7 @@
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable-prot.h>
+#include <asm/tlbflush.h>
/*
* VMALLOC range.
@@ -314,6 +315,11 @@ static inline pte_t pud_pte(pud_t pud)
return __pte(pud_val(pud));
}
+static inline pud_t pte_pud(pte_t pte)
+{
+ return __pud(pte_val(pte));
+}
+
static inline pmd_t pud_pmd(pud_t pud)
{
return __pmd(pud_val(pud));
@@ -381,8 +387,12 @@ static inline int pmd_protnone(pmd_t pmd)
#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
+#define pud_young(pud) pte_young(pud_pte(pud))
+#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
#define pud_write(pud) pte_write(pud_pte(pud))
+#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT))
+
#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
@@ -685,6 +695,27 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
return __ptep_test_and_clear_young(ptep);
}
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+{
+ int young = ptep_test_and_clear_young(vma, address, ptep);
+
+ if (young) {
+ /*
+ * We can elide the trailing DSB here since the worst that can
+ * happen is that a CPU continues to use the young entry in its
+ * TLB and we mistakenly reclaim the associated page. The
+ * window for such an event is bounded by the next
+ * context-switch, which provides a DSB to complete the TLB
+ * invalidation.
+ */
+ flush_tlb_page_nosync(vma, address);
+ }
+
+ return young;
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
new file mode 100644
index 000000000000..15d49515efdd
--- /dev/null
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __ASM_POINTER_AUTH_H
+#define __ASM_POINTER_AUTH_H
+
+#include <linux/bitops.h>
+#include <linux/random.h>
+
+#include <asm/cpufeature.h>
+#include <asm/memory.h>
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+/*
+ * Each key is a 128-bit quantity which is split across a pair of 64-bit
+ * registers (Lo and Hi).
+ */
+struct ptrauth_key {
+ unsigned long lo, hi;
+};
+
+/*
+ * We give each process its own keys, which are shared by all threads. The keys
+ * are inherited upon fork(), and reinitialised upon exec*().
+ */
+struct ptrauth_keys {
+ struct ptrauth_key apia;
+ struct ptrauth_key apib;
+ struct ptrauth_key apda;
+ struct ptrauth_key apdb;
+ struct ptrauth_key apga;
+};
+
+static inline void ptrauth_keys_init(struct ptrauth_keys *keys)
+{
+ if (system_supports_address_auth()) {
+ get_random_bytes(&keys->apia, sizeof(keys->apia));
+ get_random_bytes(&keys->apib, sizeof(keys->apib));
+ get_random_bytes(&keys->apda, sizeof(keys->apda));
+ get_random_bytes(&keys->apdb, sizeof(keys->apdb));
+ }
+
+ if (system_supports_generic_auth())
+ get_random_bytes(&keys->apga, sizeof(keys->apga));
+}
+
+#define __ptrauth_key_install(k, v) \
+do { \
+ struct ptrauth_key __pki_v = (v); \
+ write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \
+ write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \
+} while (0)
+
+static inline void ptrauth_keys_switch(struct ptrauth_keys *keys)
+{
+ if (system_supports_address_auth()) {
+ __ptrauth_key_install(APIA, keys->apia);
+ __ptrauth_key_install(APIB, keys->apib);
+ __ptrauth_key_install(APDA, keys->apda);
+ __ptrauth_key_install(APDB, keys->apdb);
+ }
+
+ if (system_supports_generic_auth())
+ __ptrauth_key_install(APGA, keys->apga);
+}
+
+extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg);
+
+/*
+ * The EL0 pointer bits used by a pointer authentication code.
+ * This is dependent on TBI0 being enabled, or bits 63:56 would also apply.
+ */
+#define ptrauth_user_pac_mask() GENMASK(54, vabits_user)
+
+/* Only valid for EL0 TTBR0 instruction pointers */
+static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
+{
+ return ptr & ~ptrauth_user_pac_mask();
+}
+
+#define ptrauth_thread_init_user(tsk) \
+do { \
+ struct task_struct *__ptiu_tsk = (tsk); \
+ ptrauth_keys_init(&__ptiu_tsk->thread.keys_user); \
+ ptrauth_keys_switch(&__ptiu_tsk->thread.keys_user); \
+} while (0)
+
+#define ptrauth_thread_switch(tsk) \
+ ptrauth_keys_switch(&(tsk)->thread.keys_user)
+
+#else /* CONFIG_ARM64_PTR_AUTH */
+#define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL)
+#define ptrauth_strip_insn_pac(lr) (lr)
+#define ptrauth_thread_init_user(tsk)
+#define ptrauth_thread_switch(tsk)
+#endif /* CONFIG_ARM64_PTR_AUTH */
+
+#endif /* __ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
new file mode 100644
index 000000000000..d49951647014
--- /dev/null
+++ b/arch/arm64/include/asm/preempt.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <linux/thread_info.h>
+
+#define PREEMPT_NEED_RESCHED BIT(32)
+#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED)
+
+static inline int preempt_count(void)
+{
+ return READ_ONCE(current_thread_info()->preempt.count);
+}
+
+static inline void preempt_count_set(u64 pc)
+{
+ /* Preserve existing value of PREEMPT_NEED_RESCHED */
+ WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+#define init_task_preempt_count(p) do { \
+ task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \
+} while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+ task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+} while (0)
+
+static inline void set_preempt_need_resched(void)
+{
+ current_thread_info()->preempt.need_resched = 0;
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+ current_thread_info()->preempt.need_resched = 1;
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+ return !current_thread_info()->preempt.need_resched;
+}
+
+static inline void __preempt_count_add(int val)
+{
+ u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+ pc += val;
+ WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline void __preempt_count_sub(int val)
+{
+ u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+ pc -= val;
+ WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+ struct thread_info *ti = current_thread_info();
+ u64 pc = READ_ONCE(ti->preempt_count);
+
+ /* Update only the count field, leaving need_resched unchanged */
+ WRITE_ONCE(ti->preempt.count, --pc);
+
+ /*
+ * If we wrote back all zeroes, then we're preemptible and in
+ * need of a reschedule. Otherwise, we need to reload the
+ * preempt_count in case the need_resched flag was cleared by an
+ * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
+ * pair.
+ */
+ return !pc || !READ_ONCE(ti->preempt_count);
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+ u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+ return pc == preempt_offset;
+}
+
+#ifdef CONFIG_PREEMPT
+void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+void preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
+#endif /* CONFIG_PREEMPT */
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 3e2091708b8e..f1a7ab18faf3 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -19,10 +19,16 @@
#ifndef __ASM_PROCESSOR_H
#define __ASM_PROCESSOR_H
-#define TASK_SIZE_64 (UL(1) << VA_BITS)
+#define KERNEL_DS UL(-1)
+#define USER_DS ((UL(1) << MAX_USER_VA_BITS) - 1)
-#define KERNEL_DS UL(-1)
-#define USER_DS (TASK_SIZE_64 - 1)
+/*
+ * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
+ * no point in shifting all network buffers by 2 bytes just to make some IP
+ * header fields appear aligned in memory, potentially sacrificing some DMA
+ * performance on some platforms.
+ */
+#define NET_IP_ALIGN 0
#ifndef __ASSEMBLY__
#ifdef __KERNEL__
@@ -38,6 +44,7 @@
#include <asm/hw_breakpoint.h>
#include <asm/lse.h>
#include <asm/pgtable-hwdef.h>
+#include <asm/pointer_auth.h>
#include <asm/ptrace.h>
#include <asm/types.h>
@@ -45,19 +52,31 @@
* TASK_SIZE - the maximum size of a user space task.
* TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
*/
+
+#define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS)
+#define TASK_SIZE_64 (UL(1) << vabits_user)
+
#ifdef CONFIG_COMPAT
#define TASK_SIZE_32 UL(0x100000000)
#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
TASK_SIZE_32 : TASK_SIZE_64)
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
TASK_SIZE_32 : TASK_SIZE_64)
+#define DEFAULT_MAP_WINDOW (test_thread_flag(TIF_32BIT) ? \
+ TASK_SIZE_32 : DEFAULT_MAP_WINDOW_64)
#else
#define TASK_SIZE TASK_SIZE_64
+#define DEFAULT_MAP_WINDOW DEFAULT_MAP_WINDOW_64
#endif /* CONFIG_COMPAT */
+#ifdef CONFIG_ARM64_FORCE_52BIT
+#define STACK_TOP_MAX TASK_SIZE_64
#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
+#else
+#define STACK_TOP_MAX DEFAULT_MAP_WINDOW_64
+#define TASK_UNMAPPED_BASE (PAGE_ALIGN(DEFAULT_MAP_WINDOW / 4))
+#endif /* CONFIG_ARM64_FORCE_52BIT */
-#define STACK_TOP_MAX TASK_SIZE_64
#ifdef CONFIG_COMPAT
#define AARCH32_VECTORS_BASE 0xffff0000
#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \
@@ -66,6 +85,15 @@
#define STACK_TOP STACK_TOP_MAX
#endif /* CONFIG_COMPAT */
+#ifndef CONFIG_ARM64_FORCE_52BIT
+#define arch_get_mmap_end(addr) ((addr > DEFAULT_MAP_WINDOW) ? TASK_SIZE :\
+ DEFAULT_MAP_WINDOW)
+
+#define arch_get_mmap_base(addr, base) ((addr > DEFAULT_MAP_WINDOW) ? \
+ base + TASK_SIZE - DEFAULT_MAP_WINDOW :\
+ base)
+#endif /* CONFIG_ARM64_FORCE_52BIT */
+
extern phys_addr_t arm64_dma_phys_limit;
#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1)
@@ -119,6 +147,9 @@ struct thread_struct {
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
+#ifdef CONFIG_ARM64_PTR_AUTH
+ struct ptrauth_keys keys_user;
+#endif
};
static inline void arch_thread_struct_whitelist(unsigned long *offset,
@@ -262,6 +293,9 @@ extern void __init minsigstksz_setup(void);
#define SVE_SET_VL(arg) sve_set_current_vl(arg)
#define SVE_GET_VL() sve_get_current_vl()
+/* PR_PAC_RESET_KEYS prctl */
+#define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg)
+
/*
* For CONFIG_GCC_PLUGIN_STACKLEAK
*
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index f82b447bd34f..1895561839a9 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -17,15 +17,20 @@
#define __ASM_SMP_H
/* Values for secondary_data.status */
+#define CPU_STUCK_REASON_SHIFT (8)
+#define CPU_BOOT_STATUS_MASK ((1U << CPU_STUCK_REASON_SHIFT) - 1)
-#define CPU_MMU_OFF (-1)
-#define CPU_BOOT_SUCCESS (0)
+#define CPU_MMU_OFF (-1)
+#define CPU_BOOT_SUCCESS (0)
/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
-#define CPU_KILL_ME (1)
+#define CPU_KILL_ME (1)
/* The cpu couldn't die gracefully and is looping in the kernel */
-#define CPU_STUCK_IN_KERNEL (2)
+#define CPU_STUCK_IN_KERNEL (2)
/* Fatal system error detected by secondary CPU, crash the system */
-#define CPU_PANIC_KERNEL (3)
+#define CPU_PANIC_KERNEL (3)
+
+#define CPU_STUCK_REASON_52_BIT_VA (1U << CPU_STUCK_REASON_SHIFT)
+#define CPU_STUCK_REASON_NO_GRAN (2U << CPU_STUCK_REASON_SHIFT)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h
index 58d15be11c4d..5884a2b02827 100644
--- a/arch/arm64/include/asm/stackprotector.h
+++ b/arch/arm64/include/asm/stackprotector.h
@@ -34,7 +34,8 @@ static __always_inline void boot_init_stack_canary(void)
canary &= CANARY_MASK;
current->stack_canary = canary;
- __stack_chk_guard = current->stack_canary;
+ if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK))
+ __stack_chk_guard = current->stack_canary;
}
#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index d352f6df8d2c..5412fa40825e 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -30,16 +30,14 @@
#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls))
/*
- * The hardware supports concatenation of up to 16 tables at stage2 entry level
- * and we use the feature whenever possible.
+ * The hardware supports concatenation of up to 16 tables at stage2 entry
+ * level and we use the feature whenever possible, which means we resolve 4
+ * additional bits of address at the entry level.
*
- * Now, the minimum number of bits resolved at any level is (PAGE_SHIFT - 3).
- * On arm64, the smallest PAGE_SIZE supported is 4k, which means
- * (PAGE_SHIFT - 3) > 4 holds for all page sizes.
- * This implies, the total number of page table levels at stage2 expected
- * by the hardware is actually the number of levels required for (IPA_SHIFT - 4)
- * in normal translations(e.g, stage1), since we cannot have another level in
- * the range (IPA_SHIFT, IPA_SHIFT - 4).
+ * This implies, the total number of page table levels required for
+ * IPA_SHIFT at stage2 expected by the hardware can be calculated using
+ * the same logic used for the (non-collapsable) stage1 page tables but for
+ * (IPA_SHIFT - 4).
*/
#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 0c909c4a932f..72dc4c011014 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -20,6 +20,7 @@
#ifndef __ASM_SYSREG_H
#define __ASM_SYSREG_H
+#include <linux/const.h>
#include <linux/stringify.h>
/*
@@ -104,6 +105,11 @@
#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift))
#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift))
+#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
+ __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
+
+#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
+
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
@@ -183,6 +189,19 @@
#define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1)
#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2)
+#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0)
+#define SYS_APIAKEYHI_EL1 sys_reg(3, 0, 2, 1, 1)
+#define SYS_APIBKEYLO_EL1 sys_reg(3, 0, 2, 1, 2)
+#define SYS_APIBKEYHI_EL1 sys_reg(3, 0, 2, 1, 3)
+
+#define SYS_APDAKEYLO_EL1 sys_reg(3, 0, 2, 2, 0)
+#define SYS_APDAKEYHI_EL1 sys_reg(3, 0, 2, 2, 1)
+#define SYS_APDBKEYLO_EL1 sys_reg(3, 0, 2, 2, 2)
+#define SYS_APDBKEYHI_EL1 sys_reg(3, 0, 2, 2, 3)
+
+#define SYS_APGAKEYLO_EL1 sys_reg(3, 0, 2, 3, 0)
+#define SYS_APGAKEYHI_EL1 sys_reg(3, 0, 2, 3, 1)
+
#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0)
@@ -431,27 +450,31 @@
#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
/* Common SCTLR_ELx flags. */
-#define SCTLR_ELx_DSSBS (1UL << 44)
-#define SCTLR_ELx_EE (1 << 25)
-#define SCTLR_ELx_IESB (1 << 21)
-#define SCTLR_ELx_WXN (1 << 19)
-#define SCTLR_ELx_I (1 << 12)
-#define SCTLR_ELx_SA (1 << 3)
-#define SCTLR_ELx_C (1 << 2)
-#define SCTLR_ELx_A (1 << 1)
-#define SCTLR_ELx_M 1
+#define SCTLR_ELx_DSSBS (_BITUL(44))
+#define SCTLR_ELx_ENIA (_BITUL(31))
+#define SCTLR_ELx_ENIB (_BITUL(30))
+#define SCTLR_ELx_ENDA (_BITUL(27))
+#define SCTLR_ELx_EE (_BITUL(25))
+#define SCTLR_ELx_IESB (_BITUL(21))
+#define SCTLR_ELx_WXN (_BITUL(19))
+#define SCTLR_ELx_ENDB (_BITUL(13))
+#define SCTLR_ELx_I (_BITUL(12))
+#define SCTLR_ELx_SA (_BITUL(3))
+#define SCTLR_ELx_C (_BITUL(2))
+#define SCTLR_ELx_A (_BITUL(1))
+#define SCTLR_ELx_M (_BITUL(0))
#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB)
/* SCTLR_EL2 specific flags. */
-#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \
- (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \
- (1 << 29))
-#define SCTLR_EL2_RES0 ((1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | \
- (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \
- (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \
- (1 << 27) | (1 << 30) | (1 << 31) | \
+#define SCTLR_EL2_RES1 ((_BITUL(4)) | (_BITUL(5)) | (_BITUL(11)) | (_BITUL(16)) | \
+ (_BITUL(18)) | (_BITUL(22)) | (_BITUL(23)) | (_BITUL(28)) | \
+ (_BITUL(29)))
+#define SCTLR_EL2_RES0 ((_BITUL(6)) | (_BITUL(7)) | (_BITUL(8)) | (_BITUL(9)) | \
+ (_BITUL(10)) | (_BITUL(13)) | (_BITUL(14)) | (_BITUL(15)) | \
+ (_BITUL(17)) | (_BITUL(20)) | (_BITUL(24)) | (_BITUL(26)) | \
+ (_BITUL(27)) | (_BITUL(30)) | (_BITUL(31)) | \
(0xffffefffUL << 32))
#ifdef CONFIG_CPU_BIG_ENDIAN
@@ -468,28 +491,28 @@
SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \
SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0)
-#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff
+#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffffUL
#error "Inconsistent SCTLR_EL2 set/clear bits"
#endif
/* SCTLR_EL1 specific flags. */
-#define SCTLR_EL1_UCI (1 << 26)
-#define SCTLR_EL1_E0E (1 << 24)
-#define SCTLR_EL1_SPAN (1 << 23)
-#define SCTLR_EL1_NTWE (1 << 18)
-#define SCTLR_EL1_NTWI (1 << 16)
-#define SCTLR_EL1_UCT (1 << 15)
-#define SCTLR_EL1_DZE (1 << 14)
-#define SCTLR_EL1_UMA (1 << 9)
-#define SCTLR_EL1_SED (1 << 8)
-#define SCTLR_EL1_ITD (1 << 7)
-#define SCTLR_EL1_CP15BEN (1 << 5)
-#define SCTLR_EL1_SA0 (1 << 4)
-
-#define SCTLR_EL1_RES1 ((1 << 11) | (1 << 20) | (1 << 22) | (1 << 28) | \
- (1 << 29))
-#define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \
- (1 << 27) | (1 << 30) | (1 << 31) | \
+#define SCTLR_EL1_UCI (_BITUL(26))
+#define SCTLR_EL1_E0E (_BITUL(24))
+#define SCTLR_EL1_SPAN (_BITUL(23))
+#define SCTLR_EL1_NTWE (_BITUL(18))
+#define SCTLR_EL1_NTWI (_BITUL(16))
+#define SCTLR_EL1_UCT (_BITUL(15))
+#define SCTLR_EL1_DZE (_BITUL(14))
+#define SCTLR_EL1_UMA (_BITUL(9))
+#define SCTLR_EL1_SED (_BITUL(8))
+#define SCTLR_EL1_ITD (_BITUL(7))
+#define SCTLR_EL1_CP15BEN (_BITUL(5))
+#define SCTLR_EL1_SA0 (_BITUL(4))
+
+#define SCTLR_EL1_RES1 ((_BITUL(11)) | (_BITUL(20)) | (_BITUL(22)) | (_BITUL(28)) | \
+ (_BITUL(29)))
+#define SCTLR_EL1_RES0 ((_BITUL(6)) | (_BITUL(10)) | (_BITUL(13)) | (_BITUL(17)) | \
+ (_BITUL(27)) | (_BITUL(30)) | (_BITUL(31)) | \
(0xffffefffUL << 32))
#ifdef CONFIG_CPU_BIG_ENDIAN
@@ -509,7 +532,7 @@
SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\
SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0)
-#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff
+#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffffUL
#error "Inconsistent SCTLR_EL1 set/clear bits"
#endif
@@ -528,11 +551,25 @@
#define ID_AA64ISAR0_AES_SHIFT 4
/* id_aa64isar1 */
+#define ID_AA64ISAR1_SB_SHIFT 36
+#define ID_AA64ISAR1_GPI_SHIFT 28
+#define ID_AA64ISAR1_GPA_SHIFT 24
#define ID_AA64ISAR1_LRCPC_SHIFT 20
#define ID_AA64ISAR1_FCMA_SHIFT 16
#define ID_AA64ISAR1_JSCVT_SHIFT 12
+#define ID_AA64ISAR1_API_SHIFT 8
+#define ID_AA64ISAR1_APA_SHIFT 4
#define ID_AA64ISAR1_DPB_SHIFT 0
+#define ID_AA64ISAR1_APA_NI 0x0
+#define ID_AA64ISAR1_APA_ARCHITECTED 0x1
+#define ID_AA64ISAR1_API_NI 0x0
+#define ID_AA64ISAR1_API_IMP_DEF 0x1
+#define ID_AA64ISAR1_GPA_NI 0x0
+#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1
+#define ID_AA64ISAR1_GPI_NI 0x0
+#define ID_AA64ISAR1_GPI_IMP_DEF 0x1
+
/* id_aa64pfr0 */
#define ID_AA64PFR0_CSV3_SHIFT 60
#define ID_AA64PFR0_CSV2_SHIFT 56
@@ -676,13 +713,13 @@
#define ZCR_ELx_LEN_SIZE 9
#define ZCR_ELx_LEN_MASK 0x1ff
-#define CPACR_EL1_ZEN_EL1EN (1 << 16) /* enable EL1 access */
-#define CPACR_EL1_ZEN_EL0EN (1 << 17) /* enable EL0 access, if EL1EN set */
+#define CPACR_EL1_ZEN_EL1EN (_BITUL(16)) /* enable EL1 access */
+#define CPACR_EL1_ZEN_EL0EN (_BITUL(17)) /* enable EL0 access, if EL1EN set */
#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
-#define SYS_MPIDR_SAFE_VAL (1UL << 31)
+#define SYS_MPIDR_SAFE_VAL (_BITUL(31))
#ifdef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index cb2c10a8f0a8..bbca68b54732 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -42,7 +42,18 @@ struct thread_info {
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
- int preempt_count; /* 0 => preemptable, <0 => bug */
+ union {
+ u64 preempt_count; /* 0 => preemptible, <0 => bug */
+ struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ u32 need_resched;
+ u32 count;
+#else
+ u32 count;
+ u32 need_resched;
+#endif
+ } preempt;
+ };
};
#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index c3c0387aee18..3a1870228946 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -21,6 +21,7 @@
#ifndef __ASSEMBLY__
+#include <linux/mm_types.h>
#include <linux/sched.h>
#include <asm/cputype.h>
#include <asm/mmu.h>
@@ -41,14 +42,14 @@
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op, \
ARM64_WORKAROUND_REPEAT_TLBI, \
- CONFIG_QCOM_FALKOR_ERRATUM_1009) \
+ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : )
#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op ", %0", \
ARM64_WORKAROUND_REPEAT_TLBI, \
- CONFIG_QCOM_FALKOR_ERRATUM_1009) \
+ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : "r" (arg))
#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
@@ -164,14 +165,20 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
dsb(ish);
}
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long uaddr)
+static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
+ unsigned long uaddr)
{
unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
dsb(ishst);
__tlbi(vale1is, addr);
__tlbi_user(vale1is, addr);
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long uaddr)
+{
+ flush_tlb_page_nosync(vma, uaddr);
dsb(ish);
}
@@ -179,7 +186,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
* This is meant to avoid soft lock-ups on large TLB flushing ranges and not
* necessarily a performance improvement.
*/
-#define MAX_TLBI_OPS 1024UL
+#define MAX_TLBI_OPS PTRS_PER_PTE
static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
@@ -188,7 +195,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long asid = ASID(vma->vm_mm);
unsigned long addr;
- if ((end - start) > (MAX_TLBI_OPS * stride)) {
+ if ((end - start) >= (MAX_TLBI_OPS * stride)) {
flush_tlb_mm(vma->vm_mm);
return;
}
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 07c34087bd5e..fad33f5fde47 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -45,8 +45,7 @@ static inline void set_fs(mm_segment_t fs)
* Prevent a mispredicted conditional call to set_fs from forwarding
* the wrong address limit to access_ok under speculation.
*/
- dsb(nsh);
- isb();
+ spec_bar();
/* On user-mode return, check fs is correct */
set_thread_flag(TIF_FSCHECK);
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
new file mode 100644
index 000000000000..856386ad076c
--- /dev/null
+++ b/arch/arm64/include/asm/xor.h
@@ -0,0 +1,73 @@
+/*
+ * arch/arm64/include/asm/xor.h
+ *
+ * Authors: Jackie Liu <liuyun01@kylinos.cn>
+ * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+extern struct xor_block_template const xor_block_inner_neon;
+
+static void
+xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ kernel_neon_begin();
+ xor_block_inner_neon.do_2(bytes, p1, p2);
+ kernel_neon_end();
+}
+
+static void
+xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ kernel_neon_begin();
+ xor_block_inner_neon.do_3(bytes, p1, p2, p3);
+ kernel_neon_end();
+}
+
+static void
+xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ kernel_neon_begin();
+ xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
+ kernel_neon_end();
+}
+
+static void
+xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ kernel_neon_begin();
+ xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
+ kernel_neon_end();
+}
+
+static struct xor_block_template xor_block_arm64 = {
+ .name = "arm64_neon",
+ .do_2 = xor_neon_2,
+ .do_3 = xor_neon_3,
+ .do_4 = xor_neon_4,
+ .do_5 = xor_neon_5
+};
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_32regs); \
+ if (cpu_has_neon()) { \
+ xor_speed(&xor_block_arm64);\
+ } \
+ } while (0)
+
+#endif /* ! CONFIG_KERNEL_MODE_NEON */