diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2019-07-10 23:24:10 -0700 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2019-07-10 23:24:10 -0700 |
commit | 597473720f4dc69749542bfcfed4a927a43d935e (patch) | |
tree | 711bf773910fb93d1dd9120c633adc807685e0d8 /arch/arm64/include | |
parent | Input: atmel_mxt_ts - fix leak in mxt_update_cfg() (diff) | |
parent | Input: gpio_keys_polled - allow specifying name of input device (diff) | |
download | linux-dev-597473720f4dc69749542bfcfed4a927a43d935e.tar.xz linux-dev-597473720f4dc69749542bfcfed4a927a43d935e.zip |
Merge branch 'next' into for-linus
Prepare input updates for 5.3 merge window.
Diffstat (limited to 'arch/arm64/include')
78 files changed, 2472 insertions, 1030 deletions
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 6cd5d77b6b44..1e17ea5c372b 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -14,7 +14,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += msi.h -generic-y += preempt.h generic-y += qrwlock.h generic-y += qspinlock.h generic-y += rwsem.h @@ -27,4 +26,3 @@ generic-y += trace_clock.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h -generic-y += xor.h diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 709208dfdc8b..7628efbe6c12 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -18,16 +18,28 @@ #include <asm/cputype.h> #include <asm/io.h> +#include <asm/ptrace.h> #include <asm/smp_plat.h> #include <asm/tlbflush.h> /* Macros for consistency checks of the GICC subtable of MADT */ -#define ACPI_MADT_GICC_LENGTH \ - (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) + +/* + * MADT GICC minimum length refers to the MADT GICC structure table length as + * defined in the earliest ACPI version supported on arm64, ie ACPI 5.1. + * + * The efficiency_class member was added to the + * struct acpi_madt_generic_interrupt to represent the MADT GICC structure + * "Processor Power Efficiency Class" field, added in ACPI 6.0 whose offset + * is therefore used to delimit the MADT GICC structure minimum length + * appropriately. + */ +#define ACPI_MADT_GICC_MIN_LENGTH ACPI_OFFSET( \ + struct acpi_madt_generic_interrupt, efficiency_class) #define BAD_MADT_GICC_ENTRY(entry, end) \ - (!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH || \ - (unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end)) + (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \ + (unsigned long)(entry) + (entry)->header.length > (end)) /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI @@ -99,9 +111,10 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu) static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); - +int apei_claim_sea(struct pt_regs *regs); #else static inline void acpi_init_cpus(void) { } +static inline int apei_claim_sea(struct pt_regs *regs) { return -ENOENT; } #endif /* CONFIG_ACPI */ #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 4b650ec1d7dd..b9f8d787eea9 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -14,8 +14,6 @@ #include <linux/stddef.h> #include <linux/stringify.h> -extern int alternatives_applied; - struct alt_instr { s32 orig_offset; /* offset to original instruction */ s32 alt_offset; /* offset to replacement instruction */ @@ -27,7 +25,9 @@ struct alt_instr { typedef void (*alternative_cb_t)(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); +void __init apply_boot_alternatives(void); void __init apply_alternatives_all(void); +bool alternative_is_applied(u16 cpufeature); #ifdef CONFIG_MODULES void apply_alternatives_module(void *start, size_t length); diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index e278f94df0c9..14b41ddc68ba 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -22,6 +22,7 @@ #ifndef __ASSEMBLY__ +#include <linux/irqchip/arm-gic-common.h> #include <linux/stringify.h> #include <asm/barrier.h> #include <asm/cacheflush.h> @@ -114,6 +115,21 @@ static inline void gic_write_bpr1(u32 val) write_sysreg_s(val, SYS_ICC_BPR1_EL1); } +static inline u32 gic_read_pmr(void) +{ + return read_sysreg_s(SYS_ICC_PMR_EL1); +} + +static inline void gic_write_pmr(u32 val) +{ + write_sysreg_s(val, SYS_ICC_PMR_EL1); +} + +static inline u32 gic_read_rpr(void) +{ + return read_sysreg_s(SYS_ICC_RPR_EL1); +} + #define gic_read_typer(c) readq_relaxed(c) #define gic_write_irouter(v, c) writeq_relaxed(v, c) #define gic_read_lpir(c) readq_relaxed(c) @@ -140,5 +156,21 @@ static inline void gic_write_bpr1(u32 val) #define gits_write_vpendbaser(v, c) writeq_relaxed(v, c) #define gits_read_vpendbaser(c) readq_relaxed(c) +static inline bool gic_prio_masking_enabled(void) +{ + return system_uses_irq_prio_masking(); +} + +static inline void gic_pmr_mask_irqs(void) +{ + BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF); + gic_write_pmr(GIC_PRIO_IRQOFF); +} + +static inline void gic_arch_enable_irqs(void) +{ + asm volatile ("msr daifclr, #2" : : : "memory"); +} + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ARCH_GICV3_H */ diff --git a/arch/arm64/include/asm/asm-prototypes.h b/arch/arm64/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..1c9a3a0c5fa5 --- /dev/null +++ b/arch/arm64/include/asm/asm-prototypes.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_PROTOTYPES_H +#define __ASM_PROTOTYPES_H +/* + * CONFIG_MODVERSIONS requires a C declaration to generate the appropriate CRC + * for each symbol. Since commit: + * + * 4efca4ed05cbdfd1 ("kbuild: modversions for EXPORT_SYMBOL() for asm") + * + * ... kbuild will automatically pick these up from <asm/asm-prototypes.h> and + * feed this to genksyms when building assembly files. + */ +#include <linux/arm-smccc.h> + +#include <asm/ftrace.h> +#include <asm/page.h> +#include <asm/string.h> +#include <asm/uaccess.h> + +#include <asm-generic/asm-prototypes.h> + +long long __ashlti3(long long a, int b); +long long __ashrti3(long long a, int b); +long long __lshrti3(long long a, int b); + +#endif /* __ASM_PROTOTYPES_H */ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 4128bec033f6..f74909ba29bd 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -24,7 +24,7 @@ .endm .macro __uaccess_ttbr0_enable, tmp1, tmp2 - get_thread_info \tmp1 + get_current_task \tmp1 ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 mrs \tmp2, ttbr1_el1 extr \tmp2, \tmp2, \tmp1, #48 diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 6142402c2eb4..c5308d01e228 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -23,8 +23,11 @@ #ifndef __ASM_ASSEMBLER_H #define __ASM_ASSEMBLER_H +#include <asm-generic/export.h> + #include <asm/asm-offsets.h> #include <asm/cpufeature.h> +#include <asm/cputype.h> #include <asm/debug-monitors.h> #include <asm/page.h> #include <asm/pgtable-hwdef.h> @@ -60,16 +63,8 @@ .endm /* - * Enable and disable interrupts. + * Save/restore interrupts. */ - .macro disable_irq - msr daifset, #2 - .endm - - .macro enable_irq - msr daifclr, #2 - .endm - .macro save_and_disable_irq, flags mrs \flags, daif msr daifset, #2 @@ -123,6 +118,19 @@ .endm /* + * Speculation barrier + */ + .macro sb +alternative_if_not ARM64_HAS_SB + dsb nsh + isb +alternative_else + SB_BARRIER_INSN + nop +alternative_endif + .endm + +/* * Sanitise a 64-bit bounded index wrt speculation, returning zero if out * of bounds. */ @@ -342,11 +350,10 @@ alternative_endif .endm /* - * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map + * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map */ - .macro tcr_set_idmap_t0sz, valreg, tmpreg - ldr_l \tmpreg, idmap_t0sz - bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH + .macro tcr_set_t0sz, valreg, t0sz + bfi \valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH .endm /* @@ -377,27 +384,33 @@ alternative_endif * size: size of the region * Corrupts: kaddr, size, tmp1, tmp2 */ + .macro __dcache_op_workaround_clean_cache, op, kaddr +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \kaddr +alternative_else + dc civac, \kaddr +alternative_endif + .endm + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 dcache_line_size \tmp1, \tmp2 add \size, \kaddr, \size sub \tmp2, \tmp1, #1 bic \kaddr, \kaddr, \tmp2 9998: - .if (\op == cvau || \op == cvac) -alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc \op, \kaddr -alternative_else - dc civac, \kaddr -alternative_endif - .elseif (\op == cvap) -alternative_if ARM64_HAS_DCPOP - sys 3, c7, c12, 1, \kaddr // dc cvap -alternative_else - dc cvac, \kaddr -alternative_endif + .ifc \op, cvau + __dcache_op_workaround_clean_cache \op, \kaddr + .else + .ifc \op, cvac + __dcache_op_workaround_clean_cache \op, \kaddr + .else + .ifc \op, cvap + sys 3, c7, c12, 1, \kaddr // dc cvap .else dc \op, \kaddr .endif + .endif + .endif add \kaddr, \kaddr, \tmp1 cmp \kaddr, \size b.lo 9998b @@ -477,6 +490,13 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU #else #define NOKPROBE(x) #endif + +#ifdef CONFIG_KASAN +#define EXPORT_SYMBOL_NOKASAN(name) +#else +#define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name) +#endif + /* * Emit a 64-bit absolute little endian symbol reference in a way that * ensures that it will be resolved at build time, even when building a @@ -509,13 +529,36 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endm /* - * Return the current thread_info. + * Return the current task_struct. */ - .macro get_thread_info, rd + .macro get_current_task, rd mrs \rd, sp_el0 .endm /* + * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD. + * orr is used as it can cover the immediate value (and is idempotent). + * In future this may be nop'ed out when dealing with 52-bit kernel VAs. + * ttbr: Value of ttbr to set, modified. + */ + .macro offset_ttbr1, ttbr +#ifdef CONFIG_ARM64_USER_VA_BITS_52 + orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET +#endif + .endm + +/* + * Perform the reverse of offset_ttbr1. + * bic is used as it can cover the immediate value and, in future, won't need + * to be nop'ed out when dealing with 52-bit kernel VAs. + */ + .macro restore_ttbr1, ttbr +#ifdef CONFIG_ARM64_USER_VA_BITS_52 + bic \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET +#endif + .endm + +/* * Arrange a physical address in a TTBR register, taking care of 52-bit * addresses. * @@ -554,6 +597,25 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU #endif .endm +/* + * tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU. + */ + .macro tcr_clear_errata_bits, tcr, tmp1, tmp2 +#ifdef CONFIG_FUJITSU_ERRATUM_010001 + mrs \tmp1, midr_el1 + + mov_q \tmp2, MIDR_FUJITSU_ERRATUM_010001_MASK + and \tmp1, \tmp1, \tmp2 + mov_q \tmp2, MIDR_FUJITSU_ERRATUM_010001 + cmp \tmp1, \tmp2 + b.ne 10f + + mov_q \tmp2, TCR_CLEAR_FUJITSU_ERRATUM_010001 + bic \tcr, \tcr, \tmp2 +10: +#endif /* CONFIG_FUJITSU_ERRATUM_010001 */ + .endm + /** * Errata workaround prior to disable MMU. Insert an ISB immediately prior * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0. @@ -671,12 +733,10 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .macro if_will_cond_yield_neon #ifdef CONFIG_PREEMPT - get_thread_info x0 - ldr w1, [x0, #TSK_TI_PREEMPT] - ldr x0, [x0, #TSK_TI_FLAGS] - cmp w1, #PREEMPT_DISABLE_OFFSET - csel x0, x0, xzr, eq - tbnz x0, #TIF_NEED_RESCHED, .Lyield_\@ // needs rescheduling? + get_current_task x0 + ldr x0, [x0, #TSK_TI_PREEMPT] + sub x0, x0, #PREEMPT_DISABLE_OFFSET + cbz x0, .Lyield_\@ /* fall through to endif_yield_neon */ .subsection 1 .Lyield_\@ : diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 9bca54dda75c..1f4e9ee641c9 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -42,124 +42,131 @@ #define ATOMIC_INIT(i) { (i) } -#define atomic_read(v) READ_ONCE((v)->counter) -#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) - -#define atomic_add_return_relaxed atomic_add_return_relaxed -#define atomic_add_return_acquire atomic_add_return_acquire -#define atomic_add_return_release atomic_add_return_release -#define atomic_add_return atomic_add_return - -#define atomic_sub_return_relaxed atomic_sub_return_relaxed -#define atomic_sub_return_acquire atomic_sub_return_acquire -#define atomic_sub_return_release atomic_sub_return_release -#define atomic_sub_return atomic_sub_return - -#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed -#define atomic_fetch_add_acquire atomic_fetch_add_acquire -#define atomic_fetch_add_release atomic_fetch_add_release -#define atomic_fetch_add atomic_fetch_add - -#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed -#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire -#define atomic_fetch_sub_release atomic_fetch_sub_release -#define atomic_fetch_sub atomic_fetch_sub - -#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed -#define atomic_fetch_and_acquire atomic_fetch_and_acquire -#define atomic_fetch_and_release atomic_fetch_and_release -#define atomic_fetch_and atomic_fetch_and - -#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed -#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire -#define atomic_fetch_andnot_release atomic_fetch_andnot_release -#define atomic_fetch_andnot atomic_fetch_andnot - -#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed -#define atomic_fetch_or_acquire atomic_fetch_or_acquire -#define atomic_fetch_or_release atomic_fetch_or_release -#define atomic_fetch_or atomic_fetch_or - -#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed -#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire -#define atomic_fetch_xor_release atomic_fetch_xor_release -#define atomic_fetch_xor atomic_fetch_xor - -#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) -#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new)) -#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new)) -#define atomic_xchg(v, new) xchg(&((v)->counter), (new)) - -#define atomic_cmpxchg_relaxed(v, old, new) \ - cmpxchg_relaxed(&((v)->counter), (old), (new)) -#define atomic_cmpxchg_acquire(v, old, new) \ - cmpxchg_acquire(&((v)->counter), (old), (new)) -#define atomic_cmpxchg_release(v, old, new) \ - cmpxchg_release(&((v)->counter), (old), (new)) -#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new)) - -#define atomic_andnot atomic_andnot +#define arch_atomic_read(v) READ_ONCE((v)->counter) +#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) + +#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed +#define arch_atomic_add_return_acquire arch_atomic_add_return_acquire +#define arch_atomic_add_return_release arch_atomic_add_return_release +#define arch_atomic_add_return arch_atomic_add_return + +#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed +#define arch_atomic_sub_return_acquire arch_atomic_sub_return_acquire +#define arch_atomic_sub_return_release arch_atomic_sub_return_release +#define arch_atomic_sub_return arch_atomic_sub_return + +#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed +#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire +#define arch_atomic_fetch_add_release arch_atomic_fetch_add_release +#define arch_atomic_fetch_add arch_atomic_fetch_add + +#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed +#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire +#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub_release +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + +#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed +#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire +#define arch_atomic_fetch_and_release arch_atomic_fetch_and_release +#define arch_atomic_fetch_and arch_atomic_fetch_and + +#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed +#define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire +#define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release +#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot + +#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed +#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire +#define arch_atomic_fetch_or_release arch_atomic_fetch_or_release +#define arch_atomic_fetch_or arch_atomic_fetch_or + +#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed +#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire +#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + +#define arch_atomic_xchg_relaxed(v, new) \ + arch_xchg_relaxed(&((v)->counter), (new)) +#define arch_atomic_xchg_acquire(v, new) \ + arch_xchg_acquire(&((v)->counter), (new)) +#define arch_atomic_xchg_release(v, new) \ + arch_xchg_release(&((v)->counter), (new)) +#define arch_atomic_xchg(v, new) \ + arch_xchg(&((v)->counter), (new)) + +#define arch_atomic_cmpxchg_relaxed(v, old, new) \ + arch_cmpxchg_relaxed(&((v)->counter), (old), (new)) +#define arch_atomic_cmpxchg_acquire(v, old, new) \ + arch_cmpxchg_acquire(&((v)->counter), (old), (new)) +#define arch_atomic_cmpxchg_release(v, old, new) \ + arch_cmpxchg_release(&((v)->counter), (old), (new)) +#define arch_atomic_cmpxchg(v, old, new) \ + arch_cmpxchg(&((v)->counter), (old), (new)) + +#define arch_atomic_andnot arch_atomic_andnot /* - * 64-bit atomic operations. + * 64-bit arch_atomic operations. */ -#define ATOMIC64_INIT ATOMIC_INIT -#define atomic64_read atomic_read -#define atomic64_set atomic_set - -#define atomic64_add_return_relaxed atomic64_add_return_relaxed -#define atomic64_add_return_acquire atomic64_add_return_acquire -#define atomic64_add_return_release atomic64_add_return_release -#define atomic64_add_return atomic64_add_return - -#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed -#define atomic64_sub_return_acquire atomic64_sub_return_acquire -#define atomic64_sub_return_release atomic64_sub_return_release -#define atomic64_sub_return atomic64_sub_return - -#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed -#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire -#define atomic64_fetch_add_release atomic64_fetch_add_release -#define atomic64_fetch_add atomic64_fetch_add - -#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed -#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire -#define atomic64_fetch_sub_release atomic64_fetch_sub_release -#define atomic64_fetch_sub atomic64_fetch_sub - -#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed -#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire -#define atomic64_fetch_and_release atomic64_fetch_and_release -#define atomic64_fetch_and atomic64_fetch_and - -#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed -#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire -#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release -#define atomic64_fetch_andnot atomic64_fetch_andnot - -#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed -#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire -#define atomic64_fetch_or_release atomic64_fetch_or_release -#define atomic64_fetch_or atomic64_fetch_or - -#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed -#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire -#define atomic64_fetch_xor_release atomic64_fetch_xor_release -#define atomic64_fetch_xor atomic64_fetch_xor - -#define atomic64_xchg_relaxed atomic_xchg_relaxed -#define atomic64_xchg_acquire atomic_xchg_acquire -#define atomic64_xchg_release atomic_xchg_release -#define atomic64_xchg atomic_xchg - -#define atomic64_cmpxchg_relaxed atomic_cmpxchg_relaxed -#define atomic64_cmpxchg_acquire atomic_cmpxchg_acquire -#define atomic64_cmpxchg_release atomic_cmpxchg_release -#define atomic64_cmpxchg atomic_cmpxchg - -#define atomic64_andnot atomic64_andnot - -#define atomic64_dec_if_positive atomic64_dec_if_positive +#define ATOMIC64_INIT ATOMIC_INIT +#define arch_atomic64_read arch_atomic_read +#define arch_atomic64_set arch_atomic_set + +#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed +#define arch_atomic64_add_return_acquire arch_atomic64_add_return_acquire +#define arch_atomic64_add_return_release arch_atomic64_add_return_release +#define arch_atomic64_add_return arch_atomic64_add_return + +#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed +#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire +#define arch_atomic64_sub_return_release arch_atomic64_sub_return_release +#define arch_atomic64_sub_return arch_atomic64_sub_return + +#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed +#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire +#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add_release +#define arch_atomic64_fetch_add arch_atomic64_fetch_add + +#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed +#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire +#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub + +#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed +#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire +#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and_release +#define arch_atomic64_fetch_and arch_atomic64_fetch_and + +#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed +#define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire +#define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release +#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot + +#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed +#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire +#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or_release +#define arch_atomic64_fetch_or arch_atomic64_fetch_or + +#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed +#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire +#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor + +#define arch_atomic64_xchg_relaxed arch_atomic_xchg_relaxed +#define arch_atomic64_xchg_acquire arch_atomic_xchg_acquire +#define arch_atomic64_xchg_release arch_atomic_xchg_release +#define arch_atomic64_xchg arch_atomic_xchg + +#define arch_atomic64_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed +#define arch_atomic64_cmpxchg_acquire arch_atomic_cmpxchg_acquire +#define arch_atomic64_cmpxchg_release arch_atomic_cmpxchg_release +#define arch_atomic64_cmpxchg arch_atomic_cmpxchg + +#define arch_atomic64_andnot arch_atomic64_andnot + +#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive + +#include <asm-generic/atomic-instrumented.h> #endif #endif diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index f5a2d09afb38..e321293e0c89 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -39,7 +39,7 @@ #define ATOMIC_OP(op, asm_op) \ __LL_SC_INLINE void \ -__LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ +__LL_SC_PREFIX(arch_atomic_##op(int i, atomic_t *v)) \ { \ unsigned long tmp; \ int result; \ @@ -53,11 +53,11 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "Ir" (i)); \ } \ -__LL_SC_EXPORT(atomic_##op); +__LL_SC_EXPORT(arch_atomic_##op); #define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \ __LL_SC_INLINE int \ -__LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \ +__LL_SC_PREFIX(arch_atomic_##op##_return##name(int i, atomic_t *v)) \ { \ unsigned long tmp; \ int result; \ @@ -75,11 +75,11 @@ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \ \ return result; \ } \ -__LL_SC_EXPORT(atomic_##op##_return##name); +__LL_SC_EXPORT(arch_atomic_##op##_return##name); #define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ __LL_SC_INLINE int \ -__LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v)) \ +__LL_SC_PREFIX(arch_atomic_fetch_##op##name(int i, atomic_t *v)) \ { \ unsigned long tmp; \ int val, result; \ @@ -97,7 +97,7 @@ __LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v)) \ \ return result; \ } \ -__LL_SC_EXPORT(atomic_fetch_##op##name); +__LL_SC_EXPORT(arch_atomic_fetch_##op##name); #define ATOMIC_OPS(...) \ ATOMIC_OP(__VA_ARGS__) \ @@ -133,7 +133,7 @@ ATOMIC_OPS(xor, eor) #define ATOMIC64_OP(op, asm_op) \ __LL_SC_INLINE void \ -__LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ +__LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v)) \ { \ long result; \ unsigned long tmp; \ @@ -147,11 +147,11 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "Ir" (i)); \ } \ -__LL_SC_EXPORT(atomic64_##op); +__LL_SC_EXPORT(arch_atomic64_##op); #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \ __LL_SC_INLINE long \ -__LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \ +__LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\ { \ long result; \ unsigned long tmp; \ @@ -169,11 +169,11 @@ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \ \ return result; \ } \ -__LL_SC_EXPORT(atomic64_##op##_return##name); +__LL_SC_EXPORT(arch_atomic64_##op##_return##name); #define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ __LL_SC_INLINE long \ -__LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v)) \ +__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(long i, atomic64_t *v)) \ { \ long result, val; \ unsigned long tmp; \ @@ -191,7 +191,7 @@ __LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v)) \ \ return result; \ } \ -__LL_SC_EXPORT(atomic64_fetch_##op##name); +__LL_SC_EXPORT(arch_atomic64_fetch_##op##name); #define ATOMIC64_OPS(...) \ ATOMIC64_OP(__VA_ARGS__) \ @@ -226,7 +226,7 @@ ATOMIC64_OPS(xor, eor) #undef ATOMIC64_OP __LL_SC_INLINE long -__LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) +__LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v)) { long result; unsigned long tmp; @@ -246,50 +246,59 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) return result; } -__LL_SC_EXPORT(atomic64_dec_if_positive); +__LL_SC_EXPORT(arch_atomic64_dec_if_positive); -#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \ -__LL_SC_INLINE unsigned long \ -__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ - unsigned long old, \ - unsigned long new)) \ +#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl) \ +__LL_SC_INLINE u##sz \ +__LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr, \ + unsigned long old, \ + u##sz new)) \ { \ - unsigned long tmp, oldval; \ + unsigned long tmp; \ + u##sz oldval; \ + \ + /* \ + * Sub-word sizes require explicit casting so that the compare \ + * part of the cmpxchg doesn't end up interpreting non-zero \ + * upper bits of the register containing "old". \ + */ \ + if (sz < 32) \ + old = (u##sz)old; \ \ asm volatile( \ " prfm pstl1strm, %[v]\n" \ - "1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \ + "1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ " cbnz %" #w "[tmp], 2f\n" \ - " st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \ + " st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \ " cbnz %w[tmp], 1b\n" \ " " #mb "\n" \ "2:" \ : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \ - [v] "+Q" (*(unsigned long *)ptr) \ - : [old] "Lr" (old), [new] "r" (new) \ + [v] "+Q" (*(u##sz *)ptr) \ + : [old] "Kr" (old), [new] "r" (new) \ : cl); \ \ return oldval; \ } \ -__LL_SC_EXPORT(__cmpxchg_case_##name); +__LL_SC_EXPORT(__cmpxchg_case_##name##sz); -__CMPXCHG_CASE(w, b, 1, , , , ) -__CMPXCHG_CASE(w, h, 2, , , , ) -__CMPXCHG_CASE(w, , 4, , , , ) -__CMPXCHG_CASE( , , 8, , , , ) -__CMPXCHG_CASE(w, b, acq_1, , a, , "memory") -__CMPXCHG_CASE(w, h, acq_2, , a, , "memory") -__CMPXCHG_CASE(w, , acq_4, , a, , "memory") -__CMPXCHG_CASE( , , acq_8, , a, , "memory") -__CMPXCHG_CASE(w, b, rel_1, , , l, "memory") -__CMPXCHG_CASE(w, h, rel_2, , , l, "memory") -__CMPXCHG_CASE(w, , rel_4, , , l, "memory") -__CMPXCHG_CASE( , , rel_8, , , l, "memory") -__CMPXCHG_CASE(w, b, mb_1, dmb ish, , l, "memory") -__CMPXCHG_CASE(w, h, mb_2, dmb ish, , l, "memory") -__CMPXCHG_CASE(w, , mb_4, dmb ish, , l, "memory") -__CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory") +__CMPXCHG_CASE(w, b, , 8, , , , ) +__CMPXCHG_CASE(w, h, , 16, , , , ) +__CMPXCHG_CASE(w, , , 32, , , , ) +__CMPXCHG_CASE( , , , 64, , , , ) +__CMPXCHG_CASE(w, b, acq_, 8, , a, , "memory") +__CMPXCHG_CASE(w, h, acq_, 16, , a, , "memory") +__CMPXCHG_CASE(w, , acq_, 32, , a, , "memory") +__CMPXCHG_CASE( , , acq_, 64, , a, , "memory") +__CMPXCHG_CASE(w, b, rel_, 8, , , l, "memory") +__CMPXCHG_CASE(w, h, rel_, 16, , , l, "memory") +__CMPXCHG_CASE(w, , rel_, 32, , , l, "memory") +__CMPXCHG_CASE( , , rel_, 64, , , l, "memory") +__CMPXCHG_CASE(w, b, mb_, 8, dmb ish, , l, "memory") +__CMPXCHG_CASE(w, h, mb_, 16, dmb ish, , l, "memory") +__CMPXCHG_CASE(w, , mb_, 32, dmb ish, , l, "memory") +__CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory") #undef __CMPXCHG_CASE diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index f9b0b09153e0..9256a3921e4b 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -25,9 +25,9 @@ #error "please don't include this file directly" #endif -#define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op) +#define __LL_SC_ATOMIC(op) __LL_SC_CALL(arch_atomic_##op) #define ATOMIC_OP(op, asm_op) \ -static inline void atomic_##op(int i, atomic_t *v) \ +static inline void arch_atomic_##op(int i, atomic_t *v) \ { \ register int w0 asm ("w0") = i; \ register atomic_t *x1 asm ("x1") = v; \ @@ -47,7 +47,7 @@ ATOMIC_OP(add, stadd) #undef ATOMIC_OP #define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \ -static inline int atomic_fetch_##op##name(int i, atomic_t *v) \ +static inline int arch_atomic_fetch_##op##name(int i, atomic_t *v) \ { \ register int w0 asm ("w0") = i; \ register atomic_t *x1 asm ("x1") = v; \ @@ -79,7 +79,7 @@ ATOMIC_FETCH_OPS(add, ldadd) #undef ATOMIC_FETCH_OPS #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ -static inline int atomic_add_return##name(int i, atomic_t *v) \ +static inline int arch_atomic_add_return##name(int i, atomic_t *v) \ { \ register int w0 asm ("w0") = i; \ register atomic_t *x1 asm ("x1") = v; \ @@ -105,7 +105,7 @@ ATOMIC_OP_ADD_RETURN( , al, "memory") #undef ATOMIC_OP_ADD_RETURN -static inline void atomic_and(int i, atomic_t *v) +static inline void arch_atomic_and(int i, atomic_t *v) { register int w0 asm ("w0") = i; register atomic_t *x1 asm ("x1") = v; @@ -123,7 +123,7 @@ static inline void atomic_and(int i, atomic_t *v) } #define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ -static inline int atomic_fetch_and##name(int i, atomic_t *v) \ +static inline int arch_atomic_fetch_and##name(int i, atomic_t *v) \ { \ register int w0 asm ("w0") = i; \ register atomic_t *x1 asm ("x1") = v; \ @@ -149,7 +149,7 @@ ATOMIC_FETCH_OP_AND( , al, "memory") #undef ATOMIC_FETCH_OP_AND -static inline void atomic_sub(int i, atomic_t *v) +static inline void arch_atomic_sub(int i, atomic_t *v) { register int w0 asm ("w0") = i; register atomic_t *x1 asm ("x1") = v; @@ -167,7 +167,7 @@ static inline void atomic_sub(int i, atomic_t *v) } #define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \ -static inline int atomic_sub_return##name(int i, atomic_t *v) \ +static inline int arch_atomic_sub_return##name(int i, atomic_t *v) \ { \ register int w0 asm ("w0") = i; \ register atomic_t *x1 asm ("x1") = v; \ @@ -195,7 +195,7 @@ ATOMIC_OP_SUB_RETURN( , al, "memory") #undef ATOMIC_OP_SUB_RETURN #define ATOMIC_FETCH_OP_SUB(name, mb, cl...) \ -static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ +static inline int arch_atomic_fetch_sub##name(int i, atomic_t *v) \ { \ register int w0 asm ("w0") = i; \ register atomic_t *x1 asm ("x1") = v; \ @@ -222,9 +222,9 @@ ATOMIC_FETCH_OP_SUB( , al, "memory") #undef ATOMIC_FETCH_OP_SUB #undef __LL_SC_ATOMIC -#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) +#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(arch_atomic64_##op) #define ATOMIC64_OP(op, asm_op) \ -static inline void atomic64_##op(long i, atomic64_t *v) \ +static inline void arch_atomic64_##op(long i, atomic64_t *v) \ { \ register long x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ @@ -244,7 +244,7 @@ ATOMIC64_OP(add, stadd) #undef ATOMIC64_OP #define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \ -static inline long atomic64_fetch_##op##name(long i, atomic64_t *v) \ +static inline long arch_atomic64_fetch_##op##name(long i, atomic64_t *v)\ { \ register long x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ @@ -276,7 +276,7 @@ ATOMIC64_FETCH_OPS(add, ldadd) #undef ATOMIC64_FETCH_OPS #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ -static inline long atomic64_add_return##name(long i, atomic64_t *v) \ +static inline long arch_atomic64_add_return##name(long i, atomic64_t *v)\ { \ register long x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ @@ -302,7 +302,7 @@ ATOMIC64_OP_ADD_RETURN( , al, "memory") #undef ATOMIC64_OP_ADD_RETURN -static inline void atomic64_and(long i, atomic64_t *v) +static inline void arch_atomic64_and(long i, atomic64_t *v) { register long x0 asm ("x0") = i; register atomic64_t *x1 asm ("x1") = v; @@ -320,7 +320,7 @@ static inline void atomic64_and(long i, atomic64_t *v) } #define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ -static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ +static inline long arch_atomic64_fetch_and##name(long i, atomic64_t *v) \ { \ register long x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ @@ -346,7 +346,7 @@ ATOMIC64_FETCH_OP_AND( , al, "memory") #undef ATOMIC64_FETCH_OP_AND -static inline void atomic64_sub(long i, atomic64_t *v) +static inline void arch_atomic64_sub(long i, atomic64_t *v) { register long x0 asm ("x0") = i; register atomic64_t *x1 asm ("x1") = v; @@ -364,7 +364,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) } #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \ -static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ +static inline long arch_atomic64_sub_return##name(long i, atomic64_t *v)\ { \ register long x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ @@ -392,7 +392,7 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") #undef ATOMIC64_OP_SUB_RETURN #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \ -static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ +static inline long arch_atomic64_fetch_sub##name(long i, atomic64_t *v) \ { \ register long x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ @@ -418,7 +418,7 @@ ATOMIC64_FETCH_OP_SUB( , al, "memory") #undef ATOMIC64_FETCH_OP_SUB -static inline long atomic64_dec_if_positive(atomic64_t *v) +static inline long arch_atomic64_dec_if_positive(atomic64_t *v) { register long x0 asm ("x0") = (long)v; @@ -446,22 +446,22 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) #define __LL_SC_CMPXCHG(op) __LL_SC_CALL(__cmpxchg_case_##op) -#define __CMPXCHG_CASE(w, sz, name, mb, cl...) \ -static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ - unsigned long old, \ - unsigned long new) \ +#define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...) \ +static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \ + u##sz old, \ + u##sz new) \ { \ register unsigned long x0 asm ("x0") = (unsigned long)ptr; \ - register unsigned long x1 asm ("x1") = old; \ - register unsigned long x2 asm ("x2") = new; \ + register u##sz x1 asm ("x1") = old; \ + register u##sz x2 asm ("x2") = new; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - __LL_SC_CMPXCHG(name) \ + __LL_SC_CMPXCHG(name##sz) \ __nops(2), \ /* LSE atomics */ \ " mov " #w "30, %" #w "[old]\n" \ - " cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n" \ + " cas" #mb #sfx "\t" #w "30, %" #w "[new], %[v]\n" \ " mov %" #w "[ret], " #w "30") \ : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \ : [old] "r" (x1), [new] "r" (x2) \ @@ -470,22 +470,22 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ return x0; \ } -__CMPXCHG_CASE(w, b, 1, ) -__CMPXCHG_CASE(w, h, 2, ) -__CMPXCHG_CASE(w, , 4, ) -__CMPXCHG_CASE(x, , 8, ) -__CMPXCHG_CASE(w, b, acq_1, a, "memory") -__CMPXCHG_CASE(w, h, acq_2, a, "memory") -__CMPXCHG_CASE(w, , acq_4, a, "memory") -__CMPXCHG_CASE(x, , acq_8, a, "memory") -__CMPXCHG_CASE(w, b, rel_1, l, "memory") -__CMPXCHG_CASE(w, h, rel_2, l, "memory") -__CMPXCHG_CASE(w, , rel_4, l, "memory") -__CMPXCHG_CASE(x, , rel_8, l, "memory") -__CMPXCHG_CASE(w, b, mb_1, al, "memory") -__CMPXCHG_CASE(w, h, mb_2, al, "memory") -__CMPXCHG_CASE(w, , mb_4, al, "memory") -__CMPXCHG_CASE(x, , mb_8, al, "memory") +__CMPXCHG_CASE(w, b, , 8, ) +__CMPXCHG_CASE(w, h, , 16, ) +__CMPXCHG_CASE(w, , , 32, ) +__CMPXCHG_CASE(x, , , 64, ) +__CMPXCHG_CASE(w, b, acq_, 8, a, "memory") +__CMPXCHG_CASE(w, h, acq_, 16, a, "memory") +__CMPXCHG_CASE(w, , acq_, 32, a, "memory") +__CMPXCHG_CASE(x, , acq_, 64, a, "memory") +__CMPXCHG_CASE(w, b, rel_, 8, l, "memory") +__CMPXCHG_CASE(w, h, rel_, 16, l, "memory") +__CMPXCHG_CASE(w, , rel_, 32, l, "memory") +__CMPXCHG_CASE(x, , rel_, 64, l, "memory") +__CMPXCHG_CASE(w, b, mb_, 8, al, "memory") +__CMPXCHG_CASE(w, h, mb_, 16, al, "memory") +__CMPXCHG_CASE(w, , mb_, 32, al, "memory") +__CMPXCHG_CASE(x, , mb_, 64, al, "memory") #undef __LL_SC_CMPXCHG #undef __CMPXCHG_CASE diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 822a9192c551..f66bb04fdf2d 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -34,6 +34,10 @@ #define psb_csync() asm volatile("hint #17" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory") +#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \ + SB_BARRIER_INSN"nop\n", \ + ARM64_HAS_SB)) + #define mb() dsb(sy) #define rmb() dsb(ld) #define wmb() dsb(st) diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index ed693c5bcec0..2945fe6cd863 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -16,10 +16,12 @@ * 0x400: for dynamic BRK instruction * 0x401: for compile time BRK instruction * 0x800: kernel-mode BUG() and WARN() traps + * 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff) */ #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 #define BUG_BRK_IMM 0x800 +#define KASAN_BRK_IMM 0x900 #endif diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 13dd42c3ad4e..926434f413fa 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -58,6 +58,10 @@ */ #define ARCH_DMA_MINALIGN (128) +#ifdef CONFIG_KASAN_SW_TAGS +#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) +#endif + #ifndef __ASSEMBLY__ #include <linux/bitops.h> diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 3b0938281541..e6ea0f42e097 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -30,46 +30,46 @@ * barrier case is generated as release+dmb for the former and * acquire+release for the latter. */ -#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl) \ -static inline unsigned long __xchg_case_##name(unsigned long x, \ - volatile void *ptr) \ -{ \ - unsigned long ret, tmp; \ - \ - asm volatile(ARM64_LSE_ATOMIC_INSN( \ - /* LL/SC */ \ - " prfm pstl1strm, %2\n" \ - "1: ld" #acq "xr" #sz "\t%" #w "0, %2\n" \ - " st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n" \ - " cbnz %w1, 1b\n" \ - " " #mb, \ - /* LSE atomics */ \ - " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \ - __nops(3) \ - " " #nop_lse) \ - : "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr) \ - : "r" (x) \ - : cl); \ - \ - return ret; \ +#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \ +static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \ +{ \ + u##sz ret; \ + unsigned long tmp; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " prfm pstl1strm, %2\n" \ + "1: ld" #acq "xr" #sfx "\t%" #w "0, %2\n" \ + " st" #rel "xr" #sfx "\t%w1, %" #w "3, %2\n" \ + " cbnz %w1, 1b\n" \ + " " #mb, \ + /* LSE atomics */ \ + " swp" #acq_lse #rel #sfx "\t%" #w "3, %" #w "0, %2\n" \ + __nops(3) \ + " " #nop_lse) \ + : "=&r" (ret), "=&r" (tmp), "+Q" (*(u##sz *)ptr) \ + : "r" (x) \ + : cl); \ + \ + return ret; \ } -__XCHG_CASE(w, b, 1, , , , , , ) -__XCHG_CASE(w, h, 2, , , , , , ) -__XCHG_CASE(w, , 4, , , , , , ) -__XCHG_CASE( , , 8, , , , , , ) -__XCHG_CASE(w, b, acq_1, , , a, a, , "memory") -__XCHG_CASE(w, h, acq_2, , , a, a, , "memory") -__XCHG_CASE(w, , acq_4, , , a, a, , "memory") -__XCHG_CASE( , , acq_8, , , a, a, , "memory") -__XCHG_CASE(w, b, rel_1, , , , , l, "memory") -__XCHG_CASE(w, h, rel_2, , , , , l, "memory") -__XCHG_CASE(w, , rel_4, , , , , l, "memory") -__XCHG_CASE( , , rel_8, , , , , l, "memory") -__XCHG_CASE(w, b, mb_1, dmb ish, nop, , a, l, "memory") -__XCHG_CASE(w, h, mb_2, dmb ish, nop, , a, l, "memory") -__XCHG_CASE(w, , mb_4, dmb ish, nop, , a, l, "memory") -__XCHG_CASE( , , mb_8, dmb ish, nop, , a, l, "memory") +__XCHG_CASE(w, b, , 8, , , , , , ) +__XCHG_CASE(w, h, , 16, , , , , , ) +__XCHG_CASE(w, , , 32, , , , , , ) +__XCHG_CASE( , , , 64, , , , , , ) +__XCHG_CASE(w, b, acq_, 8, , , a, a, , "memory") +__XCHG_CASE(w, h, acq_, 16, , , a, a, , "memory") +__XCHG_CASE(w, , acq_, 32, , , a, a, , "memory") +__XCHG_CASE( , , acq_, 64, , , a, a, , "memory") +__XCHG_CASE(w, b, rel_, 8, , , , , l, "memory") +__XCHG_CASE(w, h, rel_, 16, , , , , l, "memory") +__XCHG_CASE(w, , rel_, 32, , , , , l, "memory") +__XCHG_CASE( , , rel_, 64, , , , , l, "memory") +__XCHG_CASE(w, b, mb_, 8, dmb ish, nop, , a, l, "memory") +__XCHG_CASE(w, h, mb_, 16, dmb ish, nop, , a, l, "memory") +__XCHG_CASE(w, , mb_, 32, dmb ish, nop, , a, l, "memory") +__XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory") #undef __XCHG_CASE @@ -80,13 +80,13 @@ static inline unsigned long __xchg##sfx(unsigned long x, \ { \ switch (size) { \ case 1: \ - return __xchg_case##sfx##_1(x, ptr); \ + return __xchg_case##sfx##_8(x, ptr); \ case 2: \ - return __xchg_case##sfx##_2(x, ptr); \ + return __xchg_case##sfx##_16(x, ptr); \ case 4: \ - return __xchg_case##sfx##_4(x, ptr); \ + return __xchg_case##sfx##_32(x, ptr); \ case 8: \ - return __xchg_case##sfx##_8(x, ptr); \ + return __xchg_case##sfx##_64(x, ptr); \ default: \ BUILD_BUG(); \ } \ @@ -110,10 +110,10 @@ __XCHG_GEN(_mb) }) /* xchg */ -#define xchg_relaxed(...) __xchg_wrapper( , __VA_ARGS__) -#define xchg_acquire(...) __xchg_wrapper(_acq, __VA_ARGS__) -#define xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__) -#define xchg(...) __xchg_wrapper( _mb, __VA_ARGS__) +#define arch_xchg_relaxed(...) __xchg_wrapper( , __VA_ARGS__) +#define arch_xchg_acquire(...) __xchg_wrapper(_acq, __VA_ARGS__) +#define arch_xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__) +#define arch_xchg(...) __xchg_wrapper( _mb, __VA_ARGS__) #define __CMPXCHG_GEN(sfx) \ static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ @@ -123,13 +123,13 @@ static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ { \ switch (size) { \ case 1: \ - return __cmpxchg_case##sfx##_1(ptr, (u8)old, new); \ + return __cmpxchg_case##sfx##_8(ptr, old, new); \ case 2: \ - return __cmpxchg_case##sfx##_2(ptr, (u16)old, new); \ + return __cmpxchg_case##sfx##_16(ptr, old, new); \ case 4: \ - return __cmpxchg_case##sfx##_4(ptr, old, new); \ + return __cmpxchg_case##sfx##_32(ptr, old, new); \ case 8: \ - return __cmpxchg_case##sfx##_8(ptr, old, new); \ + return __cmpxchg_case##sfx##_64(ptr, old, new); \ default: \ BUILD_BUG(); \ } \ @@ -154,18 +154,18 @@ __CMPXCHG_GEN(_mb) }) /* cmpxchg */ -#define cmpxchg_relaxed(...) __cmpxchg_wrapper( , __VA_ARGS__) -#define cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__) -#define cmpxchg_release(...) __cmpxchg_wrapper(_rel, __VA_ARGS__) -#define cmpxchg(...) __cmpxchg_wrapper( _mb, __VA_ARGS__) -#define cmpxchg_local cmpxchg_relaxed +#define arch_cmpxchg_relaxed(...) __cmpxchg_wrapper( , __VA_ARGS__) +#define arch_cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__) +#define arch_cmpxchg_release(...) __cmpxchg_wrapper(_rel, __VA_ARGS__) +#define arch_cmpxchg(...) __cmpxchg_wrapper( _mb, __VA_ARGS__) +#define arch_cmpxchg_local arch_cmpxchg_relaxed /* cmpxchg64 */ -#define cmpxchg64_relaxed cmpxchg_relaxed -#define cmpxchg64_acquire cmpxchg_acquire -#define cmpxchg64_release cmpxchg_release -#define cmpxchg64 cmpxchg -#define cmpxchg64_local cmpxchg_local +#define arch_cmpxchg64_relaxed arch_cmpxchg_relaxed +#define arch_cmpxchg64_acquire arch_cmpxchg_acquire +#define arch_cmpxchg64_release arch_cmpxchg_release +#define arch_cmpxchg64 arch_cmpxchg +#define arch_cmpxchg64_local arch_cmpxchg_local /* cmpxchg_double */ #define system_has_cmpxchg_double() 1 @@ -177,36 +177,36 @@ __CMPXCHG_GEN(_mb) VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1); \ }) -#define cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \ -({\ - int __ret;\ - __cmpxchg_double_check(ptr1, ptr2); \ - __ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2), \ - ptr1); \ - __ret; \ +#define arch_cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \ +({ \ + int __ret; \ + __cmpxchg_double_check(ptr1, ptr2); \ + __ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \ + (unsigned long)(n1), (unsigned long)(n2), \ + ptr1); \ + __ret; \ }) -#define cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \ -({\ - int __ret;\ - __cmpxchg_double_check(ptr1, ptr2); \ - __ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2), \ - ptr1); \ - __ret; \ +#define arch_cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \ +({ \ + int __ret; \ + __cmpxchg_double_check(ptr1, ptr2); \ + __ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \ + (unsigned long)(n1), (unsigned long)(n2), \ + ptr1); \ + __ret; \ }) -#define __CMPWAIT_CASE(w, sz, name) \ -static inline void __cmpwait_case_##name(volatile void *ptr, \ - unsigned long val) \ +#define __CMPWAIT_CASE(w, sfx, sz) \ +static inline void __cmpwait_case_##sz(volatile void *ptr, \ + unsigned long val) \ { \ unsigned long tmp; \ \ asm volatile( \ " sevl\n" \ " wfe\n" \ - " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " ldxr" #sfx "\t%" #w "[tmp], %[v]\n" \ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ " cbnz %" #w "[tmp], 1f\n" \ " wfe\n" \ @@ -215,10 +215,10 @@ static inline void __cmpwait_case_##name(volatile void *ptr, \ : [val] "r" (val)); \ } -__CMPWAIT_CASE(w, b, 1); -__CMPWAIT_CASE(w, h, 2); -__CMPWAIT_CASE(w, , 4); -__CMPWAIT_CASE( , , 8); +__CMPWAIT_CASE(w, b, 8); +__CMPWAIT_CASE(w, h, 16); +__CMPWAIT_CASE(w, , 32); +__CMPWAIT_CASE( , , 64); #undef __CMPWAIT_CASE @@ -229,13 +229,13 @@ static inline void __cmpwait##sfx(volatile void *ptr, \ { \ switch (size) { \ case 1: \ - return __cmpwait_case##sfx##_1(ptr, (u8)val); \ + return __cmpwait_case##sfx##_8(ptr, (u8)val); \ case 2: \ - return __cmpwait_case##sfx##_2(ptr, (u16)val); \ + return __cmpwait_case##sfx##_16(ptr, (u16)val); \ case 4: \ - return __cmpwait_case##sfx##_4(ptr, val); \ + return __cmpwait_case##sfx##_32(ptr, val); \ case 8: \ - return __cmpwait_case##sfx##_8(ptr, val); \ + return __cmpwait_case##sfx##_64(ptr, val); \ default: \ BUILD_BUG(); \ } \ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 6e2d254c09eb..f6a76e43f39e 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -54,7 +54,14 @@ #define ARM64_HAS_CRC32 33 #define ARM64_SSBS 34 #define ARM64_WORKAROUND_1188873 35 +#define ARM64_HAS_SB 36 +#define ARM64_WORKAROUND_1165522 37 +#define ARM64_HAS_ADDRESS_AUTH_ARCH 38 +#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 +#define ARM64_HAS_GENERIC_AUTH_ARCH 40 +#define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41 +#define ARM64_HAS_IRQ_PRIO_MASKING 42 -#define ARM64_NCAPS 36 +#define ARM64_NCAPS 43 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 7e2ec64aa414..e505e1fbd2b9 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -321,19 +321,20 @@ struct arm64_cpu_capabilities { bool sign; unsigned long hwcap; }; - /* - * A list of "matches/cpu_enable" pair for the same - * "capability" of the same "type" as described by the parent. - * Only matches(), cpu_enable() and fields relevant to these - * methods are significant in the list. The cpu_enable is - * invoked only if the corresponding entry "matches()". - * However, if a cpu_enable() method is associated - * with multiple matches(), care should be taken that either - * the match criteria are mutually exclusive, or that the - * method is robust against being called multiple times. - */ - const struct arm64_cpu_capabilities *match_list; }; + + /* + * An optional list of "matches/cpu_enable" pair for the same + * "capability" of the same "type" as described by the parent. + * Only matches(), cpu_enable() and fields relevant to these + * methods are significant in the list. The cpu_enable is + * invoked only if the corresponding entry "matches()". + * However, if a cpu_enable() method is associated + * with multiple matches(), care should be taken that either + * the match criteria are mutually exclusive, or that the + * method is robust against being called multiple times. + */ + const struct arm64_cpu_capabilities *match_list; }; static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) @@ -353,10 +354,50 @@ cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap) return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU); } +/* + * Generic helper for handling capabilties with multiple (match,enable) pairs + * of call backs, sharing the same capability bit. + * Iterate over each entry to see if at least one matches. + */ +static inline bool +cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, + int scope) +{ + const struct arm64_cpu_capabilities *caps; + + for (caps = entry->match_list; caps->matches; caps++) + if (caps->matches(caps, scope)) + return true; + + return false; +} + +/* + * Take appropriate action for all matching entries in the shared capability + * entry. + */ +static inline void +cpucap_multi_entry_cap_cpu_enable(const struct arm64_cpu_capabilities *entry) +{ + const struct arm64_cpu_capabilities *caps; + + for (caps = entry->match_list; caps->matches; caps++) + if (caps->matches(caps, SCOPE_LOCAL_CPU) && + caps->cpu_enable) + caps->cpu_enable(caps); +} + extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; +/* ARM64 CAPS + alternative_cb */ +#define ARM64_NPATCHABLE (ARM64_NCAPS + 1) +extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); + +#define for_each_available_cap(cap) \ + for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS) + bool this_cpu_has_cap(unsigned int cap); static inline bool cpu_have_feature(unsigned int num) @@ -473,7 +514,6 @@ static inline bool id_aa64pfr0_sve(u64 pfr0) void __init setup_cpu_features(void); void check_local_cpu_capabilities(void); - u64 read_sanitised_ftr_reg(u32 id); static inline bool cpu_supports_mixed_endian_el0(void) @@ -486,11 +526,59 @@ static inline bool system_supports_32bit_el0(void) return cpus_have_const_cap(ARM64_HAS_32BIT_EL0); } +static inline bool system_supports_4kb_granule(void) +{ + u64 mmfr0; + u32 val; + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + val = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_TGRAN4_SHIFT); + + return val == ID_AA64MMFR0_TGRAN4_SUPPORTED; +} + +static inline bool system_supports_64kb_granule(void) +{ + u64 mmfr0; + u32 val; + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + val = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_TGRAN64_SHIFT); + + return val == ID_AA64MMFR0_TGRAN64_SUPPORTED; +} + +static inline bool system_supports_16kb_granule(void) +{ + u64 mmfr0; + u32 val; + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + val = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_TGRAN16_SHIFT); + + return val == ID_AA64MMFR0_TGRAN16_SUPPORTED; +} + static inline bool system_supports_mixed_endian_el0(void) { return id_aa64mmfr0_mixed_endian_el0(read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1)); } +static inline bool system_supports_mixed_endian(void) +{ + u64 mmfr0; + u32 val; + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + val = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_BIGENDEL_SHIFT); + + return val == 0x1; +} + static inline bool system_supports_fpsimd(void) { return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD); @@ -514,6 +602,26 @@ static inline bool system_supports_cnp(void) cpus_have_const_cap(ARM64_HAS_CNP); } +static inline bool system_supports_address_auth(void) +{ + return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) && + (cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) || + cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF)); +} + +static inline bool system_supports_generic_auth(void) +{ + return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) && + (cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH_ARCH) || + cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF)); +} + +static inline bool system_uses_irq_prio_masking(void) +{ + return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && + cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); +} + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 12f93e4d2452..5f1437099b99 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -76,6 +76,8 @@ #define ARM_CPU_IMP_BRCM 0x42 #define ARM_CPU_IMP_QCOM 0x51 #define ARM_CPU_IMP_NVIDIA 0x4E +#define ARM_CPU_IMP_FUJITSU 0x46 +#define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -104,6 +106,10 @@ #define NVIDIA_CPU_PART_DENVER 0x003 #define NVIDIA_CPU_PART_CARMEL 0x004 +#define FUJITSU_CPU_PART_A64FX 0x001 + +#define HISI_CPU_PART_TSV110 0xD01 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -122,6 +128,13 @@ #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) +#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) +#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) + +/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ +#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX +#define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0)) +#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0) #ifndef __ASSEMBLY__ @@ -151,6 +164,8 @@ struct midr_range { .rv_max = MIDR_CPU_VAR_REV(v_max, r_max), \ } +#define MIDR_REV_RANGE(m, v, r_min, r_max) MIDR_RANGE(m, v, r_min, v, r_max) +#define MIDR_REV(m, v, r) MIDR_RANGE(m, v, r, v, r) #define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf) static inline bool is_midr_in_range(u32 midr, struct midr_range const *range) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 8d91f2233135..db452aa9e651 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -18,8 +18,11 @@ #include <linux/irqflags.h> +#include <asm/cpufeature.h> + #define DAIF_PROCCTX 0 #define DAIF_PROCCTX_NOIRQ PSR_I_BIT +#define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) /* mask/save/unmask/restore all exceptions, including interrupts. */ static inline void local_daif_mask(void) @@ -36,31 +39,61 @@ static inline unsigned long local_daif_save(void) { unsigned long flags; - flags = arch_local_save_flags(); + flags = read_sysreg(daif); + + if (system_uses_irq_prio_masking()) { + /* If IRQs are masked with PMR, reflect it in the flags */ + if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF) + flags |= PSR_I_BIT; + } local_daif_mask(); return flags; } -static inline void local_daif_unmask(void) -{ - trace_hardirqs_on(); - asm volatile( - "msr daifclr, #0xf // local_daif_unmask" - : - : - : "memory"); -} - static inline void local_daif_restore(unsigned long flags) { - if (!arch_irqs_disabled_flags(flags)) + bool irq_disabled = flags & PSR_I_BIT; + + if (!irq_disabled) { trace_hardirqs_on(); - arch_local_irq_restore(flags); + if (system_uses_irq_prio_masking()) + arch_local_irq_enable(); + } else if (!(flags & PSR_A_BIT)) { + /* + * If interrupts are disabled but we can take + * asynchronous errors, we can take NMIs + */ + if (system_uses_irq_prio_masking()) { + flags &= ~PSR_I_BIT; + /* + * There has been concern that the write to daif + * might be reordered before this write to PMR. + * From the ARM ARM DDI 0487D.a, section D1.7.1 + * "Accessing PSTATE fields": + * Writes to the PSTATE fields have side-effects on + * various aspects of the PE operation. All of these + * side-effects are guaranteed: + * - Not to be visible to earlier instructions in + * the execution stream. + * - To be visible to later instructions in the + * execution stream + * + * Also, writes to PMR are self-synchronizing, so no + * interrupts with a lower priority than PMR is signaled + * to the PE after the write. + * + * So we don't need additional synchronization here. + */ + arch_local_irq_disable(); + } + } + + write_sysreg(flags, daif); - if (arch_irqs_disabled_flags(flags)) + if (irq_disabled) trace_hardirqs_off(); } diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 3dd3d664c5c5..4658c937e173 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -20,9 +20,6 @@ struct dev_archdata { #ifdef CONFIG_IOMMU_API void *iommu; /* private IOMMU data */ #endif -#ifdef CONFIG_XEN - const struct dma_map_ops *dev_dma_ops; -#endif }; struct pdev_archdata { diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index c41f3fb1446c..de98191e4c7d 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -24,26 +24,11 @@ #include <xen/xen.h> #include <asm/xen/hypervisor.h> -extern const struct dma_map_ops dummy_dma_ops; - static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - /* - * We expect no ISA devices, and all other DMA masters are expected to - * have someone call arch_setup_dma_ops at device creation time. - */ - return &dummy_dma_ops; + return NULL; } -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent); -#define arch_setup_dma_ops arch_setup_dma_ops - -#ifdef CONFIG_IOMMU_DMA -void arch_teardown_dma_ops(struct device *dev); -#define arch_teardown_dma_ops arch_teardown_dma_ops -#endif - /* * Do not use this function in a driver, it is only provided for * arch/arm/mm/xen.c, which is used by arm64 as well. diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 7ed320895d1f..c9e9a6978e73 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -44,6 +44,17 @@ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) +/* + * Even when Linux uses IRQ priorities for IRQ disabling, EFI does not. + * And EFI shouldn't really play around with priority masking as it is not aware + * which priorities the OS has assigned to its interrupts. + */ +#define arch_efi_save_flags(state_flags) \ + ((void)((state_flags) = read_sysreg(daif))) + +#define arch_efi_restore_flags(state_flags) write_sysreg(state_flags, daif) + + /* arch specific definitions used by the stub code */ /* diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 433b9554c6a1..6adc1a90e7e6 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -117,7 +117,11 @@ * 64-bit, this is above 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ +#ifdef CONFIG_ARM64_FORCE_52BIT #define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) +#else +#define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW_64 / 3) +#endif /* CONFIG_ARM64_FORCE_52BIT */ #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 676de2ec1762..52233f00d53d 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -29,23 +29,24 @@ #define ESR_ELx_EC_CP14_MR (0x05) #define ESR_ELx_EC_CP14_LS (0x06) #define ESR_ELx_EC_FP_ASIMD (0x07) -#define ESR_ELx_EC_CP10_ID (0x08) -/* Unallocated EC: 0x09 - 0x0B */ +#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ +/* Unallocated EC: 0x0A - 0x0B */ #define ESR_ELx_EC_CP14_64 (0x0C) /* Unallocated EC: 0x0d */ #define ESR_ELx_EC_ILL (0x0E) /* Unallocated EC: 0x0F - 0x10 */ #define ESR_ELx_EC_SVC32 (0x11) -#define ESR_ELx_EC_HVC32 (0x12) -#define ESR_ELx_EC_SMC32 (0x13) +#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */ /* Unallocated EC: 0x14 */ #define ESR_ELx_EC_SVC64 (0x15) -#define ESR_ELx_EC_HVC64 (0x16) -#define ESR_ELx_EC_SMC64 (0x17) +#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */ #define ESR_ELx_EC_SYS64 (0x18) #define ESR_ELx_EC_SVE (0x19) /* Unallocated EC: 0x1A - 0x1E */ -#define ESR_ELx_EC_IMP_DEF (0x1f) +#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ #define ESR_ELx_EC_IABT_LOW (0x20) #define ESR_ELx_EC_IABT_CUR (0x21) #define ESR_ELx_EC_PC_ALIGN (0x22) @@ -68,7 +69,7 @@ /* Unallocated EC: 0x36 - 0x37 */ #define ESR_ELx_EC_BKPT32 (0x38) /* Unallocated EC: 0x39 */ -#define ESR_ELx_EC_VECTOR32 (0x3A) +#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */ /* Unallocted EC: 0x3B */ #define ESR_ELx_EC_BRK64 (0x3C) /* Unallocated EC: 0x3D - 0x3F */ diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index ec1e6d6fa14c..f987b8a8f325 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -55,7 +55,11 @@ enum fixed_addresses { #ifdef CONFIG_ACPI_APEI_GHES /* Used for GHES mapping from assorted contexts */ FIX_APEI_GHES_IRQ, - FIX_APEI_GHES_NMI, + FIX_APEI_GHES_SEA, +#ifdef CONFIG_ARM_SDE_INTERFACE + FIX_APEI_GHES_SDEI_NORMAL, + FIX_APEI_GHES_SDEI_CRITICAL, +#endif #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index fac54fb050d0..15a6587e12f9 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -13,6 +13,7 @@ #include <asm/insn.h> +#define HAVE_FUNCTION_GRAPH_FP_TEST #define MCOUNT_ADDR ((unsigned long)_mcount) #define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 07fe2479d310..c7e1a7837706 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -30,8 +30,8 @@ do { \ " prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ -"2: stlxr %w3, %w0, %2\n" \ -" cbnz %w3, 1b\n" \ +"2: stlxr %w0, %w3, %2\n" \ +" cbnz %w0, 1b\n" \ " dmb ish\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ @@ -57,23 +57,23 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mov %w0, %w4", + __futex_atomic_op("mov %w3, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %w0, %w1, %w4", + __futex_atomic_op("add %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("orr %w0, %w1, %w4", + __futex_atomic_op("orr %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("and %w0, %w1, %w4", + __futex_atomic_op("and %w3, %w1, %w4", ret, oldval, uaddr, tmp, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("eor %w0, %w1, %w4", + __futex_atomic_op("eor %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; default: @@ -96,7 +96,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, u32 val, tmp; u32 __user *uaddr; - if (!access_ok(VERIFY_WRITE, _uaddr, sizeof(u32))) + if (!access_ok(_uaddr, sizeof(u32))) return -EFAULT; uaddr = __uaccess_mask_ptr(_uaddr); diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 1473fc2f7ab7..89691c86640a 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -17,8 +17,12 @@ #define __ASM_HARDIRQ_H #include <linux/cache.h> +#include <linux/percpu.h> #include <linux/threads.h> +#include <asm/barrier.h> #include <asm/irq.h> +#include <asm/kvm_arm.h> +#include <asm/sysreg.h> #define NR_IPI 7 @@ -37,6 +41,33 @@ u64 smp_irq_stat_cpu(unsigned int cpu); #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 +struct nmi_ctx { + u64 hcr; +}; + +DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts); + +#define arch_nmi_enter() \ + do { \ + if (is_kernel_in_hyp_mode()) { \ + struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ + nmi_ctx->hcr = read_sysreg(hcr_el2); \ + if (!(nmi_ctx->hcr & HCR_TGE)) { \ + write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \ + isb(); \ + } \ + } \ + } while (0) + +#define arch_nmi_exit() \ + do { \ + if (is_kernel_in_hyp_mode()) { \ + struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ + if (!(nmi_ctx->hcr & HCR_TGE)) \ + write_sysreg(nmi_ctx->hcr, hcr_el2); \ + } \ + } while (0) + static inline void ack_bad_irq(unsigned int irq) { extern unsigned long irq_err_count; diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index fb6609875455..c6a07a3b433e 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -20,6 +20,11 @@ #include <asm/page.h> +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION +#define arch_hugetlb_migration_supported arch_hugetlb_migration_supported +extern bool arch_hugetlb_migration_supported(struct hstate *h); +#endif + #define __HAVE_ARCH_HUGE_PTEP_GET static inline pte_t huge_ptep_get(pte_t *ptep) { diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h new file mode 100644 index 000000000000..e2c27a2278e9 --- /dev/null +++ b/arch/arm64/include/asm/image.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_IMAGE_H +#define __ASM_IMAGE_H + +#define ARM64_IMAGE_MAGIC "ARM\x64" + +#define ARM64_IMAGE_FLAG_BE_SHIFT 0 +#define ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT (ARM64_IMAGE_FLAG_BE_SHIFT + 1) +#define ARM64_IMAGE_FLAG_PHYS_BASE_SHIFT \ + (ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT + 2) +#define ARM64_IMAGE_FLAG_BE_MASK 0x1 +#define ARM64_IMAGE_FLAG_PAGE_SIZE_MASK 0x3 +#define ARM64_IMAGE_FLAG_PHYS_BASE_MASK 0x1 + +#define ARM64_IMAGE_FLAG_LE 0 +#define ARM64_IMAGE_FLAG_BE 1 +#define ARM64_IMAGE_FLAG_PAGE_SIZE_4K 1 +#define ARM64_IMAGE_FLAG_PAGE_SIZE_16K 2 +#define ARM64_IMAGE_FLAG_PAGE_SIZE_64K 3 +#define ARM64_IMAGE_FLAG_PHYS_BASE 1 + +#ifndef __ASSEMBLY__ + +#define arm64_image_flag_field(flags, field) \ + (((flags) >> field##_SHIFT) & field##_MASK) + +/* + * struct arm64_image_header - arm64 kernel image header + * See Documentation/arm64/booting.txt for details + * + * @code0: Executable code, or + * @mz_header alternatively used for part of MZ header + * @code1: Executable code + * @text_offset: Image load offset + * @image_size: Effective Image size + * @flags: kernel flags + * @reserved: reserved + * @magic: Magic number + * @reserved5: reserved, or + * @pe_header: alternatively used for PE COFF offset + */ + +struct arm64_image_header { + __le32 code0; + __le32 code1; + __le64 text_offset; + __le64 image_size; + __le64 flags; + __le64 res2; + __le64 res3; + __le64 res4; + __le32 magic; + __le32 res5; +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_IMAGE_H */ diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index c6802dea6cab..9c01f04db64d 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -261,6 +261,11 @@ enum aarch64_insn_prfm_policy { AARCH64_INSN_PRFM_POLICY_STRM, }; +enum aarch64_insn_adr_type { + AARCH64_INSN_ADR_TYPE_ADRP, + AARCH64_INSN_ADR_TYPE_ADR, +}; + #define __AARCH64_INSN_FUNCS(abbr, mask, val) \ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ { return (code & (mask)) == (val); } \ @@ -393,6 +398,9 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, int imm, enum aarch64_insn_variant variant, enum aarch64_insn_adsb_type type); +u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr, + enum aarch64_insn_register reg, + enum aarch64_insn_adr_type type); u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst, enum aarch64_insn_register src, int immr, int imms, diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 9f8b915af3a7..8bb7210ac286 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -104,7 +104,24 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) } /* IO barriers */ -#define __iormb() rmb() +#define __iormb(v) \ +({ \ + unsigned long tmp; \ + \ + rmb(); \ + \ + /* \ + * Create a dummy control dependency from the IO read to any \ + * later instructions. This ensures that a subsequent call to \ + * udelay() will be ordered due to the ISB in get_cycles(). \ + */ \ + asm volatile("eor %0, %1, %1\n" \ + "cbnz %0, ." \ + : "=r" (tmp) : "r" ((unsigned long)(v)) \ + : "memory"); \ +}) + +#define __io_par(v) __iormb(v) #define __iowmb() wmb() #define mmiowb() do { } while (0) @@ -129,10 +146,10 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * following Normal memory access. Writes are ordered relative to any prior * Normal memory access. */ -#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) -#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) -#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) -#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(); __v; }) +#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(__v); __v; }) +#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(__v); __v; }) +#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; }) +#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; }) #define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); }) #define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); }) @@ -183,9 +200,9 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); /* * io{read,write}{16,32,64}be() macros */ -#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; }) -#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; }) -#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(); __v; }) +#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(__v); __v; }) +#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(__v); __v; }) +#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(__v); __v; }) #define iowrite16be(v,p) ({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); }) #define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); }) diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 24692edf1a69..43d8366c1e87 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -18,7 +18,9 @@ #ifdef __KERNEL__ +#include <asm/alternative.h> #include <asm/ptrace.h> +#include <asm/sysreg.h> /* * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and @@ -36,33 +38,27 @@ /* * CPU interrupt mask handling. */ -static inline unsigned long arch_local_irq_save(void) -{ - unsigned long flags; - asm volatile( - "mrs %0, daif // arch_local_irq_save\n" - "msr daifset, #2" - : "=r" (flags) - : - : "memory"); - return flags; -} - static inline void arch_local_irq_enable(void) { - asm volatile( - "msr daifclr, #2 // arch_local_irq_enable" - : + asm volatile(ALTERNATIVE( + "msr daifclr, #2 // arch_local_irq_enable\n" + "nop", + "msr_s " __stringify(SYS_ICC_PMR_EL1) ",%0\n" + "dsb sy", + ARM64_HAS_IRQ_PRIO_MASKING) : + : "r" ((unsigned long) GIC_PRIO_IRQON) : "memory"); } static inline void arch_local_irq_disable(void) { - asm volatile( - "msr daifset, #2 // arch_local_irq_disable" - : + asm volatile(ALTERNATIVE( + "msr daifset, #2 // arch_local_irq_disable", + "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0", + ARM64_HAS_IRQ_PRIO_MASKING) : + : "r" ((unsigned long) GIC_PRIO_IRQOFF) : "memory"); } @@ -71,12 +67,44 @@ static inline void arch_local_irq_disable(void) */ static inline unsigned long arch_local_save_flags(void) { + unsigned long daif_bits; unsigned long flags; - asm volatile( - "mrs %0, daif // arch_local_save_flags" - : "=r" (flags) - : + + daif_bits = read_sysreg(daif); + + /* + * The asm is logically equivalent to: + * + * if (system_uses_irq_prio_masking()) + * flags = (daif_bits & PSR_I_BIT) ? + * GIC_PRIO_IRQOFF : + * read_sysreg_s(SYS_ICC_PMR_EL1); + * else + * flags = daif_bits; + */ + asm volatile(ALTERNATIVE( + "mov %0, %1\n" + "nop\n" + "nop", + "mrs_s %0, " __stringify(SYS_ICC_PMR_EL1) "\n" + "ands %1, %1, " __stringify(PSR_I_BIT) "\n" + "csel %0, %0, %2, eq", + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (flags), "+r" (daif_bits) + : "r" ((unsigned long) GIC_PRIO_IRQOFF) : "memory"); + + return flags; +} + +static inline unsigned long arch_local_irq_save(void) +{ + unsigned long flags; + + flags = arch_local_save_flags(); + + arch_local_irq_disable(); + return flags; } @@ -85,16 +113,32 @@ static inline unsigned long arch_local_save_flags(void) */ static inline void arch_local_irq_restore(unsigned long flags) { - asm volatile( - "msr daif, %0 // arch_local_irq_restore" - : - : "r" (flags) - : "memory"); + asm volatile(ALTERNATIVE( + "msr daif, %0\n" + "nop", + "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0\n" + "dsb sy", + ARM64_HAS_IRQ_PRIO_MASKING) + : "+r" (flags) + : + : "memory"); } static inline int arch_irqs_disabled_flags(unsigned long flags) { - return flags & PSR_I_BIT; + int res; + + asm volatile(ALTERNATIVE( + "and %w0, %w1, #" __stringify(PSR_I_BIT) "\n" + "nop", + "cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n" + "cset %w0, ls", + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (res) + : "r" ((int) flags) + : "memory"); + + return res; } #endif #endif diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index 8758bb008436..b52aacd2c526 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -4,12 +4,16 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_KASAN - #include <linux/linkage.h> #include <asm/memory.h> #include <asm/pgtable-types.h> +#define arch_kasan_set_tag(addr, tag) __tag_set(addr, tag) +#define arch_kasan_reset_tag(addr) __tag_reset(addr) +#define arch_kasan_get_tag(addr) __tag_get(addr) + +#ifdef CONFIG_KASAN + /* * KASAN_SHADOW_START: beginning of the kernel virtual addresses. * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses, diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index e17f0529a882..67e4cb75d1fd 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -93,6 +93,25 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif +#ifdef CONFIG_KEXEC_FILE +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + void *dtb; + unsigned long dtb_mem; +}; + +extern const struct kexec_file_ops kexec_image_ops; + +struct kimage; + +extern int arch_kimage_file_post_load_cleanup(struct kimage *image); +extern int load_other_segments(struct kimage *image, + unsigned long kernel_load_addr, unsigned long kernel_size, + char *initrd, unsigned long initrd_len, + char *cmdline); +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 6f602af5263c..7f9d2bfcf82e 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -24,6 +24,8 @@ /* Hyp Configuration Register (HCR) bits */ #define HCR_FWB (UL(1) << 46) +#define HCR_API (UL(1) << 41) +#define HCR_APK (UL(1) << 40) #define HCR_TEA (UL(1) << 37) #define HCR_TERR (UL(1) << 36) #define HCR_TLOR (UL(1) << 35) @@ -87,6 +89,7 @@ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ HCR_FMO | HCR_IMO) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) +#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ @@ -104,7 +107,7 @@ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) /* VTCR_EL2 Registers bits */ -#define VTCR_EL2_RES1 (1 << 31) +#define VTCR_EL2_RES1 (1U << 31) #define VTCR_EL2_HD (1 << 22) #define VTCR_EL2_HA (1 << 21) #define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT @@ -320,10 +323,6 @@ #define PAR_TO_HPFAR(par) \ (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8) -#define kvm_arm_exception_type \ - {0, "IRQ" }, \ - {1, "TRAP" } - #define ECN(x) { ESR_ELx_EC_##x, #x } #define kvm_arm_exception_class \ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index aea01a09eb94..f5b79e995f40 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -25,6 +25,7 @@ #define ARM_EXIT_WITH_SERROR_BIT 31 #define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT)) +#define ARM_EXCEPTION_IS_TRAP(x) (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_TRAP) #define ARM_SERROR_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT)) #define ARM_EXCEPTION_IRQ 0 @@ -34,6 +35,12 @@ /* The hyp-stub will return this for any kvm_call_hyp() call */ #define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR +#define kvm_arm_exception_type \ + {ARM_EXCEPTION_IRQ, "IRQ" }, \ + {ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \ + {ARM_EXCEPTION_TRAP, "TRAP" }, \ + {ARM_EXCEPTION_HYP_GONE, "HYP_GONE" } + #ifndef __ASSEMBLY__ #include <linux/mm.h> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 21247870def7..d3842791e1c4 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -24,6 +24,7 @@ #include <linux/kvm_host.h> +#include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/kvm_arm.h> #include <asm/kvm_hyp.h> @@ -76,6 +77,10 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) */ if (!vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 |= HCR_TID3; + + if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || + vcpu_el1_is_32bit(vcpu)) + vcpu->arch.hcr_el2 |= HCR_TID2; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) @@ -147,14 +152,6 @@ static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) return true; } -static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) -{ - if (vcpu_mode_is_32bit(vcpu)) - kvm_skip_instr32(vcpu, is_wide_instr); - else - *vcpu_pc(vcpu) += 4; -} - static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) { *vcpu_cpsr(vcpu) |= PSR_AA32_T_BIT; @@ -338,6 +335,14 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) return ESR_ELx_SYS64_ISS_RT(esr); } +static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_trap_is_iabt(vcpu)) + return false; + + return kvm_vcpu_dabt_iswrite(vcpu); +} + static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; @@ -424,4 +429,30 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, return data; /* Leave LE untouched */ } +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + if (vcpu_mode_is_32bit(vcpu)) + kvm_skip_instr32(vcpu, is_wide_instr); + else + *vcpu_pc(vcpu) += 4; + + /* advance the singlestep state machine */ + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; +} + +/* + * Skip an instruction which has been emulated at hyp while most guest sysregs + * are live. + */ +static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) +{ + *vcpu_pc(vcpu) = read_sysreg_el2(elr); + vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr); + + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + + write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr); + write_sysreg_el2(*vcpu_pc(vcpu), elr); +} + #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 52fbc823ff8c..a01fe087e022 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -24,12 +24,14 @@ #include <linux/types.h> #include <linux/kvm_types.h> +#include <asm/arch_gicv3.h> #include <asm/cpufeature.h> #include <asm/daifflags.h> #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> +#include <asm/smp_plat.h> #include <asm/thread_info.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -48,6 +50,7 @@ #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); @@ -56,16 +59,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); -struct kvm_arch { +struct kvm_vmid { /* The VMID generation used for the virt. memory system */ u64 vmid_gen; u32 vmid; +}; + +struct kvm_arch { + struct kvm_vmid vmid; /* stage2 entry level table */ pgd_t *pgd; + phys_addr_t pgd_phys; - /* VTTBR value associated with above pgd and vmid */ - u64 vttbr; /* VTCR_EL2 value for this VM */ u64 vtcr; @@ -208,6 +214,13 @@ struct kvm_cpu_context { typedef struct kvm_cpu_context kvm_cpu_context_t; +struct vcpu_reset_state { + unsigned long pc; + unsigned long r0; + bool be; + bool reset; +}; + struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; @@ -297,6 +310,9 @@ struct kvm_vcpu_arch { /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ u64 vsesr_el2; + /* Additional reset state */ + struct vcpu_reset_state reset_state; + /* True when deferrable sysregs are loaded on the physical CPU, * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ bool sysregs_loaded_on_cpu; @@ -319,7 +335,7 @@ struct kvm_vcpu_arch { */ #define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) -u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg); +u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); /* @@ -360,7 +376,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); @@ -370,7 +386,36 @@ void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); u64 __kvm_call_hyp(void *hypfn, ...); -#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) + +/* + * The couple of isb() below are there to guarantee the same behaviour + * on VHE as on !VHE, where the eret to EL1 acts as a context + * synchronization event. + */ +#define kvm_call_hyp(f, ...) \ + do { \ + if (has_vhe()) { \ + f(__VA_ARGS__); \ + isb(); \ + } else { \ + __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ + } \ + } while(0) + +#define kvm_call_hyp_ret(f, ...) \ + ({ \ + typeof(f(__VA_ARGS__)) ret; \ + \ + if (has_vhe()) { \ + ret = f(__VA_ARGS__); \ + isb(); \ + } else { \ + ret = __kvm_call_hyp(kvm_ksym_ref(f), \ + ##__VA_ARGS__); \ + } \ + \ + ret; \ + }) void force_vm_exit(const cpumask_t *mask); void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); @@ -389,6 +434,13 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, + int cpu) +{ + /* The host's MPIDR is immutable, so let's set it up at boot time */ + cpu_ctxt->sys_regs[MPIDR_EL1] = cpu_logical_map(cpu); +} + void __kvm_enable_ssbs(void); static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, @@ -422,7 +474,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, } } -static inline bool kvm_arch_check_sve_has_vhe(void) +static inline bool kvm_arch_requires_vhe(void) { /* * The Arm architecture specifies that implementation of SVE @@ -430,9 +482,13 @@ static inline bool kvm_arch_check_sve_has_vhe(void) * relies on this when SVE is present: */ if (system_supports_sve()) - return has_vhe(); - else return true; + + /* Some implementations have defects that confine them to VHE */ + if (cpus_have_cap(ARM64_WORKAROUND_1165522)) + return true; + + return false; } static inline void kvm_arch_hardware_unsetup(void) {} @@ -445,7 +501,6 @@ void kvm_arm_init_debug(void); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); -bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, @@ -471,10 +526,25 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) static inline void kvm_arm_vhe_guest_enter(void) { local_daif_mask(); + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + */ + if (system_uses_irq_prio_masking()) { + gic_write_pmr(GIC_PRIO_IRQON); + dsb(sy); + } } static inline void kvm_arm_vhe_guest_exit(void) { + /* + * local_daif_restore() takes care to properly restore PSTATE.DAIF + * and the GIC PMR if the host is using IRQ priorities. + */ local_daif_restore(DAIF_PROCCTX_NOIRQ); /* diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 23aca66767f9..4da765f2cca5 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -20,6 +20,8 @@ #include <linux/compiler.h> #include <linux/kvm_host.h> +#include <asm/alternative.h> +#include <asm/kvm_mmu.h> #include <asm/sysreg.h> #define __hyp_text __section(.hyp.text) notrace @@ -162,7 +164,14 @@ void __noreturn __hyp_do_panic(unsigned long, ...); static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) { write_sysreg(kvm->arch.vtcr, vtcr_el2); - write_sysreg(kvm->arch.vttbr, vttbr_el2); + write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); + + /* + * ARM erratum 1165522 requires the actual execution of the above + * before we can switch to the EL1/EL0 translation regime used by + * the guest. + */ + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522)); } #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 658657367f2f..ebeefcf835e8 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -138,7 +138,8 @@ static inline unsigned long __kern_hyp_va(unsigned long v) }) /* - * We currently only support a 40bit IPA. + * We currently support using a VM-specified IPA size. For backward + * compatibility, the default IPA size is fixed to 40bits. */ #define KVM_PHYS_SHIFT (40) @@ -184,6 +185,17 @@ void kvm_clear_hyp_idmap(void); #define kvm_mk_pgd(pudp) \ __pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE) +#define kvm_set_pud(pudp, pud) set_pud(pudp, pud) + +#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot) +#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot) +#define kvm_pfn_pud(pfn, prot) pfn_pud(pfn, prot) + +#define kvm_pud_pfn(pud) pud_pfn(pud) + +#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd) +#define kvm_pud_mkhuge(pud) pud_mkhuge(pud) + static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { pte_val(pte) |= PTE_S2_RDWR; @@ -196,6 +208,12 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) return pmd; } +static inline pud_t kvm_s2pud_mkwrite(pud_t pud) +{ + pud_val(pud) |= PUD_S2_RDWR; + return pud; +} + static inline pte_t kvm_s2pte_mkexec(pte_t pte) { pte_val(pte) &= ~PTE_S2_XN; @@ -208,6 +226,12 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd) return pmd; } +static inline pud_t kvm_s2pud_mkexec(pud_t pud) +{ + pud_val(pud) &= ~PUD_S2_XN; + return pud; +} + static inline void kvm_set_s2pte_readonly(pte_t *ptep) { pteval_t old_pteval, pteval; @@ -246,6 +270,31 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp) return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN); } +static inline void kvm_set_s2pud_readonly(pud_t *pudp) +{ + kvm_set_s2pte_readonly((pte_t *)pudp); +} + +static inline bool kvm_s2pud_readonly(pud_t *pudp) +{ + return kvm_s2pte_readonly((pte_t *)pudp); +} + +static inline bool kvm_s2pud_exec(pud_t *pudp) +{ + return !(READ_ONCE(pud_val(*pudp)) & PUD_S2_XN); +} + +static inline pud_t kvm_s2pud_mkyoung(pud_t pud) +{ + return pud_mkyoung(pud); +} + +static inline bool kvm_s2pud_young(pud_t pud) +{ + return pud_young(pud); +} + #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) #ifdef __PAGETABLE_PMD_FOLDED @@ -396,6 +445,17 @@ static inline int kvm_read_guest_lock(struct kvm *kvm, return ret; } +static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, + const void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_write_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + #ifdef CONFIG_KVM_INDIRECT_VECTORS /* * EL2 vectors can be mapped and rerouted in a number of ways, @@ -543,9 +603,15 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); } -static inline bool kvm_cpu_has_cnp(void) +static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) { - return system_supports_cnp(); + struct kvm_vmid *vmid = &kvm->arch.vmid; + u64 vmid_field, baddr; + u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; + + baddr = kvm->arch.pgd_phys; + vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; + return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h new file mode 100644 index 000000000000..8ac6ee77437c --- /dev/null +++ b/arch/arm64/include/asm/kvm_ras.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018 - Arm Ltd */ + +#ifndef __ARM64_KVM_RAS_H__ +#define __ARM64_KVM_RAS_H__ + +#include <linux/acpi.h> +#include <linux/errno.h> +#include <linux/types.h> + +#include <asm/acpi.h> + +/* + * Was this synchronous external abort a RAS notification? + * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. + */ +static inline int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr) +{ + /* apei_claim_sea(NULL) expects to mask interrupts itself */ + lockdep_assert_irqs_enabled(); + + return apei_claim_sea(NULL); +} + +#endif /* __ARM64_KVM_RAS_H__ */ diff --git a/arch/arm64/include/asm/memblock.h b/arch/arm64/include/asm/memblock.h deleted file mode 100644 index 6afeed2467f1..000000000000 --- a/arch/arm64/include/asm/memblock.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_MEMBLOCK_H -#define __ASM_MEMBLOCK_H - -extern void arm64_memblock_init(void); - -#endif diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f0a5c9531e8b..290195168bb3 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -53,8 +53,11 @@ #define PAGE_OFFSET (UL(0xffffffffffffffff) - \ (UL(1) << (VA_BITS - 1)) + 1) #define KIMAGE_VADDR (MODULES_END) +#define BPF_JIT_REGION_START (VA_START + KASAN_SHADOW_SIZE) +#define BPF_JIT_REGION_SIZE (SZ_128M) +#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) -#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) +#define MODULES_VADDR (BPF_JIT_REGION_END) #define MODULES_VSIZE (SZ_128M) #define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE) #define PCI_IO_END (VMEMMAP_START - SZ_2M) @@ -64,13 +67,18 @@ #define KERNEL_START _text #define KERNEL_END _end +#ifdef CONFIG_ARM64_USER_VA_BITS_52 +#define MAX_USER_VA_BITS 52 +#else +#define MAX_USER_VA_BITS VA_BITS +#endif + /* - * KASAN requires 1/8th of the kernel virtual address space for the shadow - * region. KASAN can bloat the stack significantly, so double the (minimum) - * stack size when KASAN is in use. + * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual + * address space for the shadow region respectively. They can bloat the stack + * significantly, so double the (minimum) stack size when they are in use. */ #ifdef CONFIG_KASAN -#define KASAN_SHADOW_SCALE_SHIFT 3 #define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT)) #define KASAN_THREAD_SHIFT 1 #else @@ -159,14 +167,6 @@ #define IOREMAP_MAX_ORDER (PMD_SHIFT) #endif -#ifdef CONFIG_BLK_DEV_INITRD -#define __early_init_dt_declare_initrd(__start, __end) \ - do { \ - initrd_start = (__start); \ - initrd_end = (__end); \ - } while (0) -#endif - #ifndef __ASSEMBLY__ #include <linux/bitops.h> @@ -187,6 +187,9 @@ static inline unsigned long kaslr_offset(void) return kimage_vaddr - KIMAGE_VADDR; } +/* the actual size of a user virtual address */ +extern u64 vabits_user; + /* * Allow all memory at the discovery stage. We will clip it later. */ @@ -204,6 +207,26 @@ static inline unsigned long kaslr_offset(void) #define PHYS_PFN_OFFSET (PHYS_OFFSET >> PAGE_SHIFT) /* + * When dealing with data aborts, watchpoints, or instruction traps we may end + * up with a tagged userland pointer. Clear the tag to get a sane pointer to + * pass on to access_ok(), for instance. + */ +#define untagged_addr(addr) \ + ((__typeof__(addr))sign_extend64((u64)(addr), 55)) + +#ifdef CONFIG_KASAN_SW_TAGS +#define __tag_shifted(tag) ((u64)(tag) << 56) +#define __tag_set(addr, tag) (__typeof__(addr))( \ + ((u64)(addr) & ~__tag_shifted(0xff)) | __tag_shifted(tag)) +#define __tag_reset(addr) untagged_addr(addr) +#define __tag_get(addr) (__u8)((u64)(addr) >> 56) +#else +#define __tag_set(addr, tag) (addr) +#define __tag_reset(addr) (addr) +#define __tag_get(addr) 0 +#endif + +/* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h * files. Use virt_to_phys/phys_to_virt/__pa/__va instead. @@ -286,7 +309,14 @@ static inline void *phys_to_virt(phys_addr_t x) #define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) #define __page_to_voff(kaddr) (((u64)(kaddr) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) -#define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET)) +#define page_to_virt(page) ({ \ + unsigned long __addr = \ + ((__page_to_voff(page)) | PAGE_OFFSET); \ + unsigned long __addr_tag = \ + __tag_set(__addr, page_kasan_tag(page)); \ + ((void *)__addr_tag); \ +}) + #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) #define _virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \ @@ -294,9 +324,21 @@ static inline void *phys_to_virt(phys_addr_t x) #endif #endif -#define _virt_addr_is_linear(kaddr) (((u64)(kaddr)) >= PAGE_OFFSET) -#define virt_addr_valid(kaddr) (_virt_addr_is_linear(kaddr) && \ - _virt_addr_valid(kaddr)) +#define _virt_addr_is_linear(kaddr) \ + (__tag_reset((u64)(kaddr)) >= PAGE_OFFSET) +#define virt_addr_valid(kaddr) \ + (_virt_addr_is_linear(kaddr) && _virt_addr_valid(kaddr)) + +/* + * Given that the GIC architecture permits ITS implementations that can only be + * configured with a LPI table address once, GICv3 systems with many CPUs may + * end up reserving a lot of different regions after a kexec for their LPI + * tables (one per CPU), as we are forced to reuse the same memory after kexec + * (and thus reserve it persistently with EFI beforehand) + */ +#if defined(CONFIG_EFI) && defined(CONFIG_ARM_GIC_V3_ITS) +# define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS + 1) +#endif #include <asm-generic/memory_model.h> diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 7689c7aa1d77..67ef25d037ea 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -16,6 +16,8 @@ #ifndef __ASM_MMU_H #define __ASM_MMU_H +#include <asm/cputype.h> + #define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */ #define USER_ASID_BIT 48 #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) @@ -44,6 +46,48 @@ static inline bool arm64_kernel_unmapped_at_el0(void) cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); } +static inline bool arm64_kernel_use_ng_mappings(void) +{ + bool tx1_bug; + + /* What's a kpti? Use global mappings if we don't know. */ + if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) + return false; + + /* + * Note: this function is called before the CPU capabilities have + * been configured, so our early mappings will be global. If we + * later determine that kpti is required, then + * kpti_install_ng_mappings() will make them non-global. + */ + if (arm64_kernel_unmapped_at_el0()) + return true; + + if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + return false; + + /* + * KASLR is enabled so we're going to be enabling kpti on non-broken + * CPUs regardless of their susceptibility to Meltdown. Rather + * than force everybody to go through the G -> nG dance later on, + * just put down non-global mappings from the beginning. + */ + if (!IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { + tx1_bug = false; +#ifndef MODULE + } else if (!static_branch_likely(&arm64_const_caps_ready)) { + extern const struct midr_range cavium_erratum_27456_cpus[]; + + tx1_bug = is_midr_in_range_list(read_cpuid_id(), + cavium_erratum_27456_cpus); +#endif + } else { + tx1_bug = __cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456); + } + + return !tx1_bug && kaslr_offset() > 0; +} + typedef void (*bp_hardening_cb_t)(void); struct bp_hardening_data { @@ -85,6 +129,7 @@ static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) static inline void arm64_apply_bp_hardening(void) { } #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ +extern void arm64_memblock_init(void); extern void paging_init(void); extern void bootmem_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 1e58bf58c22b..2da3e478fd8f 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -35,6 +35,8 @@ #include <asm/sysreg.h> #include <asm/tlbflush.h> +extern bool rodata_full; + static inline void contextidr_thread_switch(struct task_struct *next) { if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR)) @@ -72,6 +74,9 @@ extern u64 idmap_ptrs_per_pgd; static inline bool __cpu_uses_extended_idmap(void) { + if (IS_ENABLED(CONFIG_ARM64_USER_VA_BITS_52)) + return false; + return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)); } diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 97d0ef12e2ff..cd9f4e9d04d3 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -22,7 +22,7 @@ #ifdef CONFIG_ARM64_MODULE_PLTS struct mod_plt_sec { - struct elf64_shdr *plt; + int plt_shndx; int plt_num_entries; int plt_max_entries; }; @@ -36,10 +36,12 @@ struct mod_arch_specific { }; #endif -u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, +u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, + void *loc, const Elf64_Rela *rela, Elf64_Sym *sym); -u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val); +u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs, + void *loc, u64 val); #ifdef CONFIG_RANDOMIZE_BASE extern u64 module_alloc_base; @@ -56,39 +58,24 @@ struct plt_entry { * is exactly what we are dealing with here, we are free to use x16 * as a scratch register in the PLT veneers. */ - __le32 mov0; /* movn x16, #0x.... */ - __le32 mov1; /* movk x16, #0x...., lsl #16 */ - __le32 mov2; /* movk x16, #0x...., lsl #32 */ + __le32 adrp; /* adrp x16, .... */ + __le32 add; /* add x16, x16, #0x.... */ __le32 br; /* br x16 */ }; -static inline struct plt_entry get_plt_entry(u64 val) +static inline bool is_forbidden_offset_for_adrp(void *place) { - /* - * MOVK/MOVN/MOVZ opcode: - * +--------+------------+--------+-----------+-------------+---------+ - * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | - * +--------+------------+--------+-----------+-------------+---------+ - * - * Rd := 0x10 (x16) - * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) - * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) - * sf := 1 (64-bit variant) - */ - return (struct plt_entry){ - cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), - cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), - cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), - cpu_to_le32(0xd61f0200) - }; + return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) && + cpus_have_const_cap(ARM64_WORKAROUND_843419) && + ((u64)place & 0xfff) >= 0xff8; } -static inline bool plt_entries_equal(const struct plt_entry *a, - const struct plt_entry *b) +struct plt_entry get_plt_entry(u64 dst, void *pc); +bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b); + +static inline bool plt_entry_is_initialized(const struct plt_entry *e) { - return a->mov0 == b->mov0 && - a->mov1 == b->mov1 && - a->mov2 == b->mov2; + return e->adrp || e->add || e->br; } #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/include/asm/neon-intrinsics.h b/arch/arm64/include/asm/neon-intrinsics.h new file mode 100644 index 000000000000..71abfc7612b2 --- /dev/null +++ b/arch/arm64/include/asm/neon-intrinsics.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2018 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_NEON_INTRINSICS_H +#define __ASM_NEON_INTRINSICS_H + +#include <asm-generic/int-ll64.h> + +/* + * In the kernel, u64/s64 are [un]signed long long, not [un]signed long. + * So by redefining these macros to the former, we can force gcc-stdint.h + * to define uint64_t / in64_t in a compatible manner. + */ + +#ifdef __INT64_TYPE__ +#undef __INT64_TYPE__ +#define __INT64_TYPE__ long long +#endif + +#ifdef __UINT64_TYPE__ +#undef __UINT64_TYPE__ +#define __UINT64_TYPE__ unsigned long long +#endif + +/* + * genksyms chokes on the ARM NEON instrinsics system header, but we + * don't export anything it defines anyway, so just disregard when + * genksyms execute. + */ +#ifndef __GENKSYMS__ +#include <arm_neon.h> +#endif + +#ifdef CONFIG_CC_IS_CLANG +#pragma clang diagnostic ignored "-Wincompatible-pointer-types" +#endif + +#endif /* __ASM_NEON_INTRINSICS_H */ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 21a81b59a0cc..6b81dd8cee01 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -48,263 +48,193 @@ static inline unsigned long __my_cpu_offset(void) } #define __my_cpu_offset __my_cpu_offset() -#define PERCPU_OP(op, asm_op) \ -static inline unsigned long __percpu_##op(void *ptr, \ - unsigned long val, int size) \ +#define PERCPU_RW_OPS(sz) \ +static inline unsigned long __percpu_read_##sz(void *ptr) \ { \ - unsigned long loop, ret; \ + return READ_ONCE(*(u##sz *)ptr); \ +} \ \ - switch (size) { \ - case 1: \ - asm ("//__per_cpu_" #op "_1\n" \ - "1: ldxrb %w[ret], %[ptr]\n" \ - #asm_op " %w[ret], %w[ret], %w[val]\n" \ - " stxrb %w[loop], %w[ret], %[ptr]\n" \ - " cbnz %w[loop], 1b" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u8 *)ptr) \ - : [val] "Ir" (val)); \ - break; \ - case 2: \ - asm ("//__per_cpu_" #op "_2\n" \ - "1: ldxrh %w[ret], %[ptr]\n" \ - #asm_op " %w[ret], %w[ret], %w[val]\n" \ - " stxrh %w[loop], %w[ret], %[ptr]\n" \ - " cbnz %w[loop], 1b" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u16 *)ptr) \ - : [val] "Ir" (val)); \ - break; \ - case 4: \ - asm ("//__per_cpu_" #op "_4\n" \ - "1: ldxr %w[ret], %[ptr]\n" \ - #asm_op " %w[ret], %w[ret], %w[val]\n" \ - " stxr %w[loop], %w[ret], %[ptr]\n" \ - " cbnz %w[loop], 1b" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u32 *)ptr) \ - : [val] "Ir" (val)); \ - break; \ - case 8: \ - asm ("//__per_cpu_" #op "_8\n" \ - "1: ldxr %[ret], %[ptr]\n" \ - #asm_op " %[ret], %[ret], %[val]\n" \ - " stxr %w[loop], %[ret], %[ptr]\n" \ - " cbnz %w[loop], 1b" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u64 *)ptr) \ - : [val] "Ir" (val)); \ - break; \ - default: \ - ret = 0; \ - BUILD_BUG(); \ - } \ - \ - return ret; \ -} - -PERCPU_OP(add, add) -PERCPU_OP(and, and) -PERCPU_OP(or, orr) -#undef PERCPU_OP - -static inline unsigned long __percpu_read(void *ptr, int size) -{ - unsigned long ret; - - switch (size) { - case 1: - ret = READ_ONCE(*(u8 *)ptr); - break; - case 2: - ret = READ_ONCE(*(u16 *)ptr); - break; - case 4: - ret = READ_ONCE(*(u32 *)ptr); - break; - case 8: - ret = READ_ONCE(*(u64 *)ptr); - break; - default: - ret = 0; - BUILD_BUG(); - } - - return ret; +static inline void __percpu_write_##sz(void *ptr, unsigned long val) \ +{ \ + WRITE_ONCE(*(u##sz *)ptr, (u##sz)val); \ } -static inline void __percpu_write(void *ptr, unsigned long val, int size) -{ - switch (size) { - case 1: - WRITE_ONCE(*(u8 *)ptr, (u8)val); - break; - case 2: - WRITE_ONCE(*(u16 *)ptr, (u16)val); - break; - case 4: - WRITE_ONCE(*(u32 *)ptr, (u32)val); - break; - case 8: - WRITE_ONCE(*(u64 *)ptr, (u64)val); - break; - default: - BUILD_BUG(); - } +#define __PERCPU_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \ +static inline void \ +__percpu_##name##_case_##sz(void *ptr, unsigned long val) \ +{ \ + unsigned int loop; \ + u##sz tmp; \ + \ + asm volatile (ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + "1: ldxr" #sfx "\t%" #w "[tmp], %[ptr]\n" \ + #op_llsc "\t%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ + " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \ + " cbnz %w[loop], 1b", \ + /* LSE atomics */ \ + #op_lse "\t%" #w "[val], %[ptr]\n" \ + __nops(3)) \ + : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \ + [ptr] "+Q"(*(u##sz *)ptr) \ + : [val] "r" ((u##sz)(val))); \ } -static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, - int size) -{ - unsigned long ret, loop; - - switch (size) { - case 1: - asm ("//__percpu_xchg_1\n" - "1: ldxrb %w[ret], %[ptr]\n" - " stxrb %w[loop], %w[val], %[ptr]\n" - " cbnz %w[loop], 1b" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u8 *)ptr) - : [val] "r" (val)); - break; - case 2: - asm ("//__percpu_xchg_2\n" - "1: ldxrh %w[ret], %[ptr]\n" - " stxrh %w[loop], %w[val], %[ptr]\n" - " cbnz %w[loop], 1b" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u16 *)ptr) - : [val] "r" (val)); - break; - case 4: - asm ("//__percpu_xchg_4\n" - "1: ldxr %w[ret], %[ptr]\n" - " stxr %w[loop], %w[val], %[ptr]\n" - " cbnz %w[loop], 1b" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u32 *)ptr) - : [val] "r" (val)); - break; - case 8: - asm ("//__percpu_xchg_8\n" - "1: ldxr %[ret], %[ptr]\n" - " stxr %w[loop], %[val], %[ptr]\n" - " cbnz %w[loop], 1b" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u64 *)ptr) - : [val] "r" (val)); - break; - default: - ret = 0; - BUILD_BUG(); - } - - return ret; +#define __PERCPU_RET_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \ +static inline u##sz \ +__percpu_##name##_return_case_##sz(void *ptr, unsigned long val) \ +{ \ + unsigned int loop; \ + u##sz ret; \ + \ + asm volatile (ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + "1: ldxr" #sfx "\t%" #w "[ret], %[ptr]\n" \ + #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \ + " stxr" #sfx "\t%w[loop], %" #w "[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b", \ + /* LSE atomics */ \ + #op_lse "\t%" #w "[val], %" #w "[ret], %[ptr]\n" \ + #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \ + __nops(2)) \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u##sz *)ptr) \ + : [val] "r" ((u##sz)(val))); \ + \ + return ret; \ } -/* this_cpu_cmpxchg */ -#define _protect_cmpxchg_local(pcp, o, n) \ -({ \ - typeof(*raw_cpu_ptr(&(pcp))) __ret; \ - preempt_disable(); \ - __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \ - preempt_enable(); \ - __ret; \ -}) - -#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) -#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) -#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) -#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define PERCPU_OP(name, op_llsc, op_lse) \ + __PERCPU_OP_CASE(w, b, name, 8, op_llsc, op_lse) \ + __PERCPU_OP_CASE(w, h, name, 16, op_llsc, op_lse) \ + __PERCPU_OP_CASE(w, , name, 32, op_llsc, op_lse) \ + __PERCPU_OP_CASE( , , name, 64, op_llsc, op_lse) + +#define PERCPU_RET_OP(name, op_llsc, op_lse) \ + __PERCPU_RET_OP_CASE(w, b, name, 8, op_llsc, op_lse) \ + __PERCPU_RET_OP_CASE(w, h, name, 16, op_llsc, op_lse) \ + __PERCPU_RET_OP_CASE(w, , name, 32, op_llsc, op_lse) \ + __PERCPU_RET_OP_CASE( , , name, 64, op_llsc, op_lse) + +PERCPU_RW_OPS(8) +PERCPU_RW_OPS(16) +PERCPU_RW_OPS(32) +PERCPU_RW_OPS(64) +PERCPU_OP(add, add, stadd) +PERCPU_OP(andnot, bic, stclr) +PERCPU_OP(or, orr, stset) +PERCPU_RET_OP(add, add, ldadd) + +#undef PERCPU_RW_OPS +#undef __PERCPU_OP_CASE +#undef __PERCPU_RET_OP_CASE +#undef PERCPU_OP +#undef PERCPU_RET_OP +/* + * It would be nice to avoid the conditional call into the scheduler when + * re-enabling preemption for preemptible kernels, but doing that in a way + * which builds inside a module would mean messing directly with the preempt + * count. If you do this, peterz and tglx will hunt you down. + */ #define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ ({ \ int __ret; \ - preempt_disable(); \ + preempt_disable_notrace(); \ __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \ raw_cpu_ptr(&(ptr2)), \ o1, o2, n1, n2); \ - preempt_enable(); \ + preempt_enable_notrace(); \ __ret; \ }) -#define _percpu_read(pcp) \ +#define _pcp_protect(op, pcp, ...) \ ({ \ - typeof(pcp) __retval; \ preempt_disable_notrace(); \ - __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \ - sizeof(pcp)); \ + op(raw_cpu_ptr(&(pcp)), __VA_ARGS__); \ preempt_enable_notrace(); \ - __retval; \ }) -#define _percpu_write(pcp, val) \ -do { \ +#define _pcp_protect_return(op, pcp, args...) \ +({ \ + typeof(pcp) __retval; \ preempt_disable_notrace(); \ - __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \ - sizeof(pcp)); \ + __retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \ preempt_enable_notrace(); \ -} while(0) \ - -#define _pcp_protect(operation, pcp, val) \ -({ \ - typeof(pcp) __retval; \ - preempt_disable(); \ - __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \ - (val), sizeof(pcp)); \ - preempt_enable(); \ - __retval; \ + __retval; \ }) -#define _percpu_add(pcp, val) \ - _pcp_protect(__percpu_add, pcp, val) - -#define _percpu_add_return(pcp, val) _percpu_add(pcp, val) - -#define _percpu_and(pcp, val) \ - _pcp_protect(__percpu_and, pcp, val) - -#define _percpu_or(pcp, val) \ - _pcp_protect(__percpu_or, pcp, val) - -#define _percpu_xchg(pcp, val) (typeof(pcp)) \ - _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val)) - -#define this_cpu_add_1(pcp, val) _percpu_add(pcp, val) -#define this_cpu_add_2(pcp, val) _percpu_add(pcp, val) -#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val) -#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val) - -#define this_cpu_add_return_1(pcp, val) _percpu_add_return(pcp, val) -#define this_cpu_add_return_2(pcp, val) _percpu_add_return(pcp, val) -#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val) -#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val) - -#define this_cpu_and_1(pcp, val) _percpu_and(pcp, val) -#define this_cpu_and_2(pcp, val) _percpu_and(pcp, val) -#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val) -#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val) - -#define this_cpu_or_1(pcp, val) _percpu_or(pcp, val) -#define this_cpu_or_2(pcp, val) _percpu_or(pcp, val) -#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val) -#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val) - -#define this_cpu_read_1(pcp) _percpu_read(pcp) -#define this_cpu_read_2(pcp) _percpu_read(pcp) -#define this_cpu_read_4(pcp) _percpu_read(pcp) -#define this_cpu_read_8(pcp) _percpu_read(pcp) - -#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val) -#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val) -#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val) -#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val) - -#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val) -#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val) -#define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val) -#define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_read_1(pcp) \ + _pcp_protect_return(__percpu_read_8, pcp) +#define this_cpu_read_2(pcp) \ + _pcp_protect_return(__percpu_read_16, pcp) +#define this_cpu_read_4(pcp) \ + _pcp_protect_return(__percpu_read_32, pcp) +#define this_cpu_read_8(pcp) \ + _pcp_protect_return(__percpu_read_64, pcp) + +#define this_cpu_write_1(pcp, val) \ + _pcp_protect(__percpu_write_8, pcp, (unsigned long)val) +#define this_cpu_write_2(pcp, val) \ + _pcp_protect(__percpu_write_16, pcp, (unsigned long)val) +#define this_cpu_write_4(pcp, val) \ + _pcp_protect(__percpu_write_32, pcp, (unsigned long)val) +#define this_cpu_write_8(pcp, val) \ + _pcp_protect(__percpu_write_64, pcp, (unsigned long)val) + +#define this_cpu_add_1(pcp, val) \ + _pcp_protect(__percpu_add_case_8, pcp, val) +#define this_cpu_add_2(pcp, val) \ + _pcp_protect(__percpu_add_case_16, pcp, val) +#define this_cpu_add_4(pcp, val) \ + _pcp_protect(__percpu_add_case_32, pcp, val) +#define this_cpu_add_8(pcp, val) \ + _pcp_protect(__percpu_add_case_64, pcp, val) + +#define this_cpu_add_return_1(pcp, val) \ + _pcp_protect_return(__percpu_add_return_case_8, pcp, val) +#define this_cpu_add_return_2(pcp, val) \ + _pcp_protect_return(__percpu_add_return_case_16, pcp, val) +#define this_cpu_add_return_4(pcp, val) \ + _pcp_protect_return(__percpu_add_return_case_32, pcp, val) +#define this_cpu_add_return_8(pcp, val) \ + _pcp_protect_return(__percpu_add_return_case_64, pcp, val) + +#define this_cpu_and_1(pcp, val) \ + _pcp_protect(__percpu_andnot_case_8, pcp, ~val) +#define this_cpu_and_2(pcp, val) \ + _pcp_protect(__percpu_andnot_case_16, pcp, ~val) +#define this_cpu_and_4(pcp, val) \ + _pcp_protect(__percpu_andnot_case_32, pcp, ~val) +#define this_cpu_and_8(pcp, val) \ + _pcp_protect(__percpu_andnot_case_64, pcp, ~val) + +#define this_cpu_or_1(pcp, val) \ + _pcp_protect(__percpu_or_case_8, pcp, val) +#define this_cpu_or_2(pcp, val) \ + _pcp_protect(__percpu_or_case_16, pcp, val) +#define this_cpu_or_4(pcp, val) \ + _pcp_protect(__percpu_or_case_32, pcp, val) +#define this_cpu_or_8(pcp, val) \ + _pcp_protect(__percpu_or_case_64, pcp, val) + +#define this_cpu_xchg_1(pcp, val) \ + _pcp_protect_return(xchg_relaxed, pcp, val) +#define this_cpu_xchg_2(pcp, val) \ + _pcp_protect_return(xchg_relaxed, pcp, val) +#define this_cpu_xchg_4(pcp, val) \ + _pcp_protect_return(xchg_relaxed, pcp, val) +#define this_cpu_xchg_8(pcp, val) \ + _pcp_protect_return(xchg_relaxed, pcp, val) + +#define this_cpu_cmpxchg_1(pcp, o, n) \ + _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) +#define this_cpu_cmpxchg_2(pcp, o, n) \ + _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) +#define this_cpu_cmpxchg_4(pcp, o, n) \ + _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) +#define this_cpu_cmpxchg_8(pcp, o, n) \ + _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) #include <asm-generic/percpu.h> diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index f9ccc36d3dc3..c593761ba61c 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -24,6 +24,160 @@ #define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) /* + * Common architectural and microarchitectural event numbers. + */ +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05 +#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06 +#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07 +#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08 +#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09 +#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A +#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B +#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C +#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D +#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E +#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 +#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18 +#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19 +#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A +#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B +#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C +#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D +#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20 +#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22 +#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23 +#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C +#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D +#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E +#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F +#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x31 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x32 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x33 +#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x34 +#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x35 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38 + +/* Statistical profiling extension microarchitectural events */ +#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 +#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001 +#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002 +#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003 + +/* ARMv8 recommended implementation defined event types */ +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x42 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x43 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x44 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x45 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x46 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x47 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x48 + +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x4C +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x4D +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x4E +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x4F +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x50 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x51 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x52 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x53 + +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x56 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x57 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x58 + +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x5C +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x5D +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x5E +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x5F +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x60 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x61 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x62 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x63 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x64 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x65 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x66 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x67 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x68 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x69 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x6A + +#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x6C +#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x6D +#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x6E +#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x6F +#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x70 +#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x71 +#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x72 +#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x73 +#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x74 +#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x75 +#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x76 +#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x77 +#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x78 +#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x79 +#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x7A + +#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x7C +#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x7D +#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x7E + +#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x81 +#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x82 +#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x83 +#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x84 + +#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x86 +#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x87 +#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x88 + +#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x8A +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x8B +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x8C +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x8D +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x8E +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x8F +#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x90 +#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x91 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0xA0 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0xA1 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0xA2 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0xA3 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0xA6 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0xA7 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0xA8 + +/* * Per-CPU PMCR: config reg */ #define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */ @@ -50,21 +204,11 @@ #define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ /* - * PMUv3 event types: required events - */ -#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 -#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 -#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 - -/* * Event filters for PMUv3 */ -#define ARMV8_PMU_EXCLUDE_EL1 (1 << 31) -#define ARMV8_PMU_EXCLUDE_EL0 (1 << 30) -#define ARMV8_PMU_INCLUDE_EL2 (1 << 27) +#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) +#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) +#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) /* * PMUSERENR: user enable reg diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 2e05bcd944c8..52fa47c73bf0 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -91,13 +91,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); static inline pte_t * -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) +pte_alloc_one_kernel(struct mm_struct *mm) { return (pte_t *)__get_free_page(PGALLOC_GFP); } static inline pgtable_t -pte_alloc_one(struct mm_struct *mm, unsigned long addr) +pte_alloc_one(struct mm_struct *mm) { struct page *pte; diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 1d7d8da2ef9b..a69259cc1f16 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -80,7 +80,7 @@ #define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) +#define PTRS_PER_PGD (1 << (MAX_USER_VA_BITS - PGDIR_SHIFT)) /* * Section address mask and size definitions. @@ -193,6 +193,10 @@ #define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ #define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */ +#define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */ +#define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */ +#define PUD_S2_XN (_AT(pudval_t, 2) << 53) /* XN[1:0] */ + /* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ @@ -224,6 +228,8 @@ #define TCR_TxSZ_WIDTH 6 #define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET) +#define TCR_EPD0_SHIFT 7 +#define TCR_EPD0_MASK (UL(1) << TCR_EPD0_SHIFT) #define TCR_IRGN0_SHIFT 8 #define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) #define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) @@ -231,6 +237,8 @@ #define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) #define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_EPD1_SHIFT 23 +#define TCR_EPD1_MASK (UL(1) << TCR_EPD1_SHIFT) #define TCR_IRGN1_SHIFT 24 #define TCR_IRGN1_MASK (UL(3) << TCR_IRGN1_SHIFT) #define TCR_IRGN1_NC (UL(0) << TCR_IRGN1_SHIFT) @@ -291,8 +299,10 @@ #define TCR_A1 (UL(1) << 22) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) +#define TCR_TBI1 (UL(1) << 38) #define TCR_HA (UL(1) << 39) #define TCR_HD (UL(1) << 40) +#define TCR_NFD0 (UL(1) << 53) #define TCR_NFD1 (UL(1) << 54) /* @@ -306,4 +316,10 @@ #define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2) #endif +#ifdef CONFIG_ARM64_USER_VA_BITS_52 +/* Must be at least 64-byte aligned to prevent corruption of the TTBR */ +#define TTBR1_BADDR_4852_OFFSET (((UL(1) << (52 - PGDIR_SHIFT)) - \ + (UL(1) << (48 - PGDIR_SHIFT))) * 8) +#endif + #endif diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 78b942c1bea4..986e41c4c32b 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -37,8 +37,8 @@ #define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#define PTE_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PTE_NG : 0) -#define PMD_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PMD_SECT_NG : 0) +#define PTE_MAYBE_NG (arm64_kernel_use_ng_mappings() ? PTE_NG : 0) +#define PMD_MAYBE_NG (arm64_kernel_use_ng_mappings() ? PMD_SECT_NG : 0) #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 50b1ef8584c0..de70c1eabf33 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -22,6 +22,7 @@ #include <asm/memory.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable-prot.h> +#include <asm/tlbflush.h> /* * VMALLOC range. @@ -314,6 +315,11 @@ static inline pte_t pud_pte(pud_t pud) return __pte(pud_val(pud)); } +static inline pud_t pte_pud(pte_t pte) +{ + return __pud(pte_val(pte)); +} + static inline pmd_t pud_pmd(pud_t pud) { return __pmd(pud_val(pud)); @@ -381,8 +387,12 @@ static inline int pmd_protnone(pmd_t pmd) #define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) +#define pud_young(pud) pte_young(pud_pte(pud)) +#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud))) #define pud_write(pud) pte_write(pud_pte(pud)) +#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT)) + #define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud)) #define __phys_to_pud_val(phys) __phys_to_pte_val(phys) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) @@ -685,6 +695,27 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, return __ptep_test_and_clear_young(ptep); } +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +static inline int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + int young = ptep_test_and_clear_young(vma, address, ptep); + + if (young) { + /* + * We can elide the trailing DSB here since the worst that can + * happen is that a CPU continues to use the young entry in its + * TLB and we mistakenly reclaim the associated page. The + * window for such an event is bounded by the next + * context-switch, which provides a DSB to complete the TLB + * invalidation. + */ + flush_tlb_page_nosync(vma, address); + } + + return young; +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h new file mode 100644 index 000000000000..15d49515efdd --- /dev/null +++ b/arch/arm64/include/asm/pointer_auth.h @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __ASM_POINTER_AUTH_H +#define __ASM_POINTER_AUTH_H + +#include <linux/bitops.h> +#include <linux/random.h> + +#include <asm/cpufeature.h> +#include <asm/memory.h> +#include <asm/sysreg.h> + +#ifdef CONFIG_ARM64_PTR_AUTH +/* + * Each key is a 128-bit quantity which is split across a pair of 64-bit + * registers (Lo and Hi). + */ +struct ptrauth_key { + unsigned long lo, hi; +}; + +/* + * We give each process its own keys, which are shared by all threads. The keys + * are inherited upon fork(), and reinitialised upon exec*(). + */ +struct ptrauth_keys { + struct ptrauth_key apia; + struct ptrauth_key apib; + struct ptrauth_key apda; + struct ptrauth_key apdb; + struct ptrauth_key apga; +}; + +static inline void ptrauth_keys_init(struct ptrauth_keys *keys) +{ + if (system_supports_address_auth()) { + get_random_bytes(&keys->apia, sizeof(keys->apia)); + get_random_bytes(&keys->apib, sizeof(keys->apib)); + get_random_bytes(&keys->apda, sizeof(keys->apda)); + get_random_bytes(&keys->apdb, sizeof(keys->apdb)); + } + + if (system_supports_generic_auth()) + get_random_bytes(&keys->apga, sizeof(keys->apga)); +} + +#define __ptrauth_key_install(k, v) \ +do { \ + struct ptrauth_key __pki_v = (v); \ + write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \ + write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \ +} while (0) + +static inline void ptrauth_keys_switch(struct ptrauth_keys *keys) +{ + if (system_supports_address_auth()) { + __ptrauth_key_install(APIA, keys->apia); + __ptrauth_key_install(APIB, keys->apib); + __ptrauth_key_install(APDA, keys->apda); + __ptrauth_key_install(APDB, keys->apdb); + } + + if (system_supports_generic_auth()) + __ptrauth_key_install(APGA, keys->apga); +} + +extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg); + +/* + * The EL0 pointer bits used by a pointer authentication code. + * This is dependent on TBI0 being enabled, or bits 63:56 would also apply. + */ +#define ptrauth_user_pac_mask() GENMASK(54, vabits_user) + +/* Only valid for EL0 TTBR0 instruction pointers */ +static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr) +{ + return ptr & ~ptrauth_user_pac_mask(); +} + +#define ptrauth_thread_init_user(tsk) \ +do { \ + struct task_struct *__ptiu_tsk = (tsk); \ + ptrauth_keys_init(&__ptiu_tsk->thread.keys_user); \ + ptrauth_keys_switch(&__ptiu_tsk->thread.keys_user); \ +} while (0) + +#define ptrauth_thread_switch(tsk) \ + ptrauth_keys_switch(&(tsk)->thread.keys_user) + +#else /* CONFIG_ARM64_PTR_AUTH */ +#define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL) +#define ptrauth_strip_insn_pac(lr) (lr) +#define ptrauth_thread_init_user(tsk) +#define ptrauth_thread_switch(tsk) +#endif /* CONFIG_ARM64_PTR_AUTH */ + +#endif /* __ASM_POINTER_AUTH_H */ diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h new file mode 100644 index 000000000000..d49951647014 --- /dev/null +++ b/arch/arm64/include/asm/preempt.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_PREEMPT_H +#define __ASM_PREEMPT_H + +#include <linux/thread_info.h> + +#define PREEMPT_NEED_RESCHED BIT(32) +#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED) + +static inline int preempt_count(void) +{ + return READ_ONCE(current_thread_info()->preempt.count); +} + +static inline void preempt_count_set(u64 pc) +{ + /* Preserve existing value of PREEMPT_NEED_RESCHED */ + WRITE_ONCE(current_thread_info()->preempt.count, pc); +} + +#define init_task_preempt_count(p) do { \ + task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \ +} while (0) + +#define init_idle_preempt_count(p, cpu) do { \ + task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ +} while (0) + +static inline void set_preempt_need_resched(void) +{ + current_thread_info()->preempt.need_resched = 0; +} + +static inline void clear_preempt_need_resched(void) +{ + current_thread_info()->preempt.need_resched = 1; +} + +static inline bool test_preempt_need_resched(void) +{ + return !current_thread_info()->preempt.need_resched; +} + +static inline void __preempt_count_add(int val) +{ + u32 pc = READ_ONCE(current_thread_info()->preempt.count); + pc += val; + WRITE_ONCE(current_thread_info()->preempt.count, pc); +} + +static inline void __preempt_count_sub(int val) +{ + u32 pc = READ_ONCE(current_thread_info()->preempt.count); + pc -= val; + WRITE_ONCE(current_thread_info()->preempt.count, pc); +} + +static inline bool __preempt_count_dec_and_test(void) +{ + struct thread_info *ti = current_thread_info(); + u64 pc = READ_ONCE(ti->preempt_count); + + /* Update only the count field, leaving need_resched unchanged */ + WRITE_ONCE(ti->preempt.count, --pc); + + /* + * If we wrote back all zeroes, then we're preemptible and in + * need of a reschedule. Otherwise, we need to reload the + * preempt_count in case the need_resched flag was cleared by an + * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE + * pair. + */ + return !pc || !READ_ONCE(ti->preempt_count); +} + +static inline bool should_resched(int preempt_offset) +{ + u64 pc = READ_ONCE(current_thread_info()->preempt_count); + return pc == preempt_offset; +} + +#ifdef CONFIG_PREEMPT +void preempt_schedule(void); +#define __preempt_schedule() preempt_schedule() +void preempt_schedule_notrace(void); +#define __preempt_schedule_notrace() preempt_schedule_notrace() +#endif /* CONFIG_PREEMPT */ + +#endif /* __ASM_PREEMPT_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 6b0d4dff5012..5d9ce62bdebd 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -19,10 +19,8 @@ #ifndef __ASM_PROCESSOR_H #define __ASM_PROCESSOR_H -#define TASK_SIZE_64 (UL(1) << VA_BITS) - -#define KERNEL_DS UL(-1) -#define USER_DS (TASK_SIZE_64 - 1) +#define KERNEL_DS UL(-1) +#define USER_DS ((UL(1) << MAX_USER_VA_BITS) - 1) /* * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is @@ -46,6 +44,7 @@ #include <asm/hw_breakpoint.h> #include <asm/lse.h> #include <asm/pgtable-hwdef.h> +#include <asm/pointer_auth.h> #include <asm/ptrace.h> #include <asm/types.h> @@ -53,19 +52,31 @@ * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. */ + +#define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS) +#define TASK_SIZE_64 (UL(1) << vabits_user) + #ifdef CONFIG_COMPAT #define TASK_SIZE_32 UL(0x100000000) #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) #define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) +#define DEFAULT_MAP_WINDOW (test_thread_flag(TIF_32BIT) ? \ + TASK_SIZE_32 : DEFAULT_MAP_WINDOW_64) #else #define TASK_SIZE TASK_SIZE_64 +#define DEFAULT_MAP_WINDOW DEFAULT_MAP_WINDOW_64 #endif /* CONFIG_COMPAT */ +#ifdef CONFIG_ARM64_FORCE_52BIT +#define STACK_TOP_MAX TASK_SIZE_64 #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) +#else +#define STACK_TOP_MAX DEFAULT_MAP_WINDOW_64 +#define TASK_UNMAPPED_BASE (PAGE_ALIGN(DEFAULT_MAP_WINDOW / 4)) +#endif /* CONFIG_ARM64_FORCE_52BIT */ -#define STACK_TOP_MAX TASK_SIZE_64 #ifdef CONFIG_COMPAT #define AARCH32_VECTORS_BASE 0xffff0000 #define STACK_TOP (test_thread_flag(TIF_32BIT) ? \ @@ -74,6 +85,15 @@ #define STACK_TOP STACK_TOP_MAX #endif /* CONFIG_COMPAT */ +#ifndef CONFIG_ARM64_FORCE_52BIT +#define arch_get_mmap_end(addr) ((addr > DEFAULT_MAP_WINDOW) ? TASK_SIZE :\ + DEFAULT_MAP_WINDOW) + +#define arch_get_mmap_base(addr, base) ((addr > DEFAULT_MAP_WINDOW) ? \ + base + TASK_SIZE - DEFAULT_MAP_WINDOW :\ + base) +#endif /* CONFIG_ARM64_FORCE_52BIT */ + extern phys_addr_t arm64_dma_phys_limit; #define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) @@ -127,6 +147,9 @@ struct thread_struct { unsigned long fault_address; /* fault info */ unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ +#ifdef CONFIG_ARM64_PTR_AUTH + struct ptrauth_keys keys_user; +#endif }; static inline void arch_thread_struct_whitelist(unsigned long *offset, @@ -168,6 +191,9 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) memset(regs, 0, sizeof(*regs)); forget_syscall(regs); regs->pc = pc; + + if (system_uses_irq_prio_masking()) + regs->pmr_save = GIC_PRIO_IRQON; } static inline void start_thread(struct pt_regs *regs, unsigned long pc, @@ -270,6 +296,9 @@ extern void __init minsigstksz_setup(void); #define SVE_SET_VL(arg) sve_set_current_vl(arg) #define SVE_GET_VL() sve_get_current_vl() +/* PR_PAC_RESET_KEYS prctl */ +#define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg) + /* * For CONFIG_GCC_PLUGIN_STACKLEAK * diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 6afd8476c60c..9e948a93d26c 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -34,13 +34,10 @@ struct ptdump_info { void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info); #ifdef CONFIG_ARM64_PTDUMP_DEBUGFS -int ptdump_debugfs_register(struct ptdump_info *info, const char *name); +void ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else -static inline int ptdump_debugfs_register(struct ptdump_info *info, - const char *name) -{ - return 0; -} +static inline void ptdump_debugfs_register(struct ptdump_info *info, + const char *name) { } #endif void ptdump_check_wx(void); #endif /* CONFIG_ARM64_PTDUMP_CORE */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index fce22c4b2f73..ec60174c8c18 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -19,12 +19,26 @@ #ifndef __ASM_PTRACE_H #define __ASM_PTRACE_H +#include <asm/cpufeature.h> + #include <uapi/asm/ptrace.h> /* Current Exception Level values, as contained in CurrentEL */ #define CurrentEL_EL1 (1 << 2) #define CurrentEL_EL2 (2 << 2) +/* + * PMR values used to mask/unmask interrupts. + * + * GIC priority masking works as follows: if an IRQ's priority is a higher value + * than the value held in PMR, that IRQ is masked. Lowering the value of PMR + * means masking more IRQs (or at least that the same IRQs remain masked). + * + * To mask interrupts, we clear the most significant bit of PMR. + */ +#define GIC_PRIO_IRQON 0xf0 +#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) + /* Additional SPSR bits not exposed in the UABI */ #define PSR_IL_BIT (1 << 20) @@ -167,7 +181,8 @@ struct pt_regs { #endif u64 orig_addr_limit; - u64 unused; // maintain 16 byte alignment + /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */ + u64 pmr_save; u64 stackframe[2]; }; @@ -202,8 +217,13 @@ static inline void forget_syscall(struct pt_regs *regs) #define processor_mode(regs) \ ((regs)->pstate & PSR_MODE_MASK) -#define interrupts_enabled(regs) \ - (!((regs)->pstate & PSR_I_BIT)) +#define irqs_priority_unmasked(regs) \ + (system_uses_irq_prio_masking() ? \ + (regs)->pmr_save == GIC_PRIO_IRQON : \ + true) + +#define interrupts_enabled(regs) \ + (!((regs)->pstate & PSR_I_BIT) && irqs_priority_unmasked(regs)) #define fast_interrupts_enabled(regs) \ (!((regs)->pstate & PSR_F_BIT)) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index f82b447bd34f..18553f399e08 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -16,16 +16,23 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H +#include <linux/const.h> + /* Values for secondary_data.status */ +#define CPU_STUCK_REASON_SHIFT (8) +#define CPU_BOOT_STATUS_MASK ((UL(1) << CPU_STUCK_REASON_SHIFT) - 1) -#define CPU_MMU_OFF (-1) -#define CPU_BOOT_SUCCESS (0) +#define CPU_MMU_OFF (-1) +#define CPU_BOOT_SUCCESS (0) /* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */ -#define CPU_KILL_ME (1) +#define CPU_KILL_ME (1) /* The cpu couldn't die gracefully and is looping in the kernel */ -#define CPU_STUCK_IN_KERNEL (2) +#define CPU_STUCK_IN_KERNEL (2) /* Fatal system error detected by secondary CPU, crash the system */ -#define CPU_PANIC_KERNEL (3) +#define CPU_PANIC_KERNEL (3) + +#define CPU_STUCK_REASON_52_BIT_VA (UL(1) << CPU_STUCK_REASON_SHIFT) +#define CPU_STUCK_REASON_NO_GRAN (UL(2) << CPU_STUCK_REASON_SHIFT) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h index 58d15be11c4d..5884a2b02827 100644 --- a/arch/arm64/include/asm/stackprotector.h +++ b/arch/arm64/include/asm/stackprotector.h @@ -34,7 +34,8 @@ static __always_inline void boot_init_stack_canary(void) canary &= CANARY_MASK; current->stack_canary = canary; - __stack_chk_guard = current->stack_canary; + if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK)) + __stack_chk_guard = current->stack_canary; } #endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index d352f6df8d2c..5412fa40825e 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -30,16 +30,14 @@ #define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls)) /* - * The hardware supports concatenation of up to 16 tables at stage2 entry level - * and we use the feature whenever possible. + * The hardware supports concatenation of up to 16 tables at stage2 entry + * level and we use the feature whenever possible, which means we resolve 4 + * additional bits of address at the entry level. * - * Now, the minimum number of bits resolved at any level is (PAGE_SHIFT - 3). - * On arm64, the smallest PAGE_SIZE supported is 4k, which means - * (PAGE_SHIFT - 3) > 4 holds for all page sizes. - * This implies, the total number of page table levels at stage2 expected - * by the hardware is actually the number of levels required for (IPA_SHIFT - 4) - * in normal translations(e.g, stage1), since we cannot have another level in - * the range (IPA_SHIFT, IPA_SHIFT - 4). + * This implies, the total number of page table levels required for + * IPA_SHIFT at stage2 expected by the hardware can be calculated using + * the same logic used for the (non-collapsable) stage1 page tables but for + * (IPA_SHIFT - 4). */ #define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4) #define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr) diff --git a/arch/arm64/include/asm/sync_bitops.h b/arch/arm64/include/asm/sync_bitops.h index eee31a9f72a5..e9c1a02c2154 100644 --- a/arch/arm64/include/asm/sync_bitops.h +++ b/arch/arm64/include/asm/sync_bitops.h @@ -15,13 +15,13 @@ * ops which are SMP safe even on a UP kernel. */ -#define sync_set_bit(nr, p) set_bit(nr, p) -#define sync_clear_bit(nr, p) clear_bit(nr, p) -#define sync_change_bit(nr, p) change_bit(nr, p) -#define sync_test_and_set_bit(nr, p) test_and_set_bit(nr, p) -#define sync_test_and_clear_bit(nr, p) test_and_clear_bit(nr, p) -#define sync_test_and_change_bit(nr, p) test_and_change_bit(nr, p) -#define sync_test_bit(nr, addr) test_bit(nr, addr) -#define sync_cmpxchg cmpxchg +#define sync_set_bit(nr, p) set_bit(nr, p) +#define sync_clear_bit(nr, p) clear_bit(nr, p) +#define sync_change_bit(nr, p) change_bit(nr, p) +#define sync_test_and_set_bit(nr, p) test_and_set_bit(nr, p) +#define sync_test_and_clear_bit(nr, p) test_and_clear_bit(nr, p) +#define sync_test_and_change_bit(nr, p) test_and_change_bit(nr, p) +#define sync_test_bit(nr, addr) test_bit(nr, addr) +#define arch_sync_cmpxchg arch_cmpxchg #endif diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index ad8be16a39c9..a179df3674a1 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -65,52 +65,22 @@ static inline void syscall_set_return_value(struct task_struct *task, static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, unsigned long *args) { - if (n == 0) - return; - - if (i + n > SYSCALL_MAX_ARGS) { - unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; - unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; - pr_warning("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - memset(args_bad, 0, n_bad * sizeof(args[0])); - } - - if (i == 0) { - args[0] = regs->orig_x0; - args++; - i++; - n--; - } - - memcpy(args, ®s->regs[i], n * sizeof(args[0])); + args[0] = regs->orig_x0; + args++; + + memcpy(args, ®s->regs[1], 5 * sizeof(args[0])); } static inline void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, - unsigned int i, unsigned int n, const unsigned long *args) { - if (n == 0) - return; - - if (i + n > SYSCALL_MAX_ARGS) { - pr_warning("%s called with max args %d, handling only %d\n", - __func__, i + n, SYSCALL_MAX_ARGS); - n = SYSCALL_MAX_ARGS - i; - } - - if (i == 0) { - regs->orig_x0 = args[0]; - args++; - i++; - n--; - } - - memcpy(®s->regs[i], args, n * sizeof(args[0])); + regs->orig_x0 = args[0]; + args++; + + memcpy(®s->regs[1], args, 5 * sizeof(args[0])); } /* diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 842fb9572661..5b267dec6194 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,6 +20,7 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#include <linux/const.h> #include <linux/stringify.h> /* @@ -104,6 +105,11 @@ #define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) #define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) +#define __SYS_BARRIER_INSN(CRm, op2, Rt) \ + __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) + +#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31) + #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) @@ -183,6 +189,19 @@ #define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1) #define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) +#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0) +#define SYS_APIAKEYHI_EL1 sys_reg(3, 0, 2, 1, 1) +#define SYS_APIBKEYLO_EL1 sys_reg(3, 0, 2, 1, 2) +#define SYS_APIBKEYHI_EL1 sys_reg(3, 0, 2, 1, 3) + +#define SYS_APDAKEYLO_EL1 sys_reg(3, 0, 2, 2, 0) +#define SYS_APDAKEYHI_EL1 sys_reg(3, 0, 2, 2, 1) +#define SYS_APDBKEYLO_EL1 sys_reg(3, 0, 2, 2, 2) +#define SYS_APDBKEYHI_EL1 sys_reg(3, 0, 2, 2, 3) + +#define SYS_APGAKEYLO_EL1 sys_reg(3, 0, 2, 3, 0) +#define SYS_APGAKEYHI_EL1 sys_reg(3, 0, 2, 3, 1) + #define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) #define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) @@ -342,6 +361,7 @@ #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) +#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) @@ -373,6 +393,10 @@ #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) +#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) +#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) +#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) + #define __PMEV_op2(n) ((n) & 0x7) #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) #define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) @@ -407,7 +431,7 @@ #define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) #define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) -#define SYS_ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) +#define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) #define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) #define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) @@ -431,27 +455,31 @@ #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* Common SCTLR_ELx flags. */ -#define SCTLR_ELx_DSSBS (1UL << 44) -#define SCTLR_ELx_EE (1 << 25) -#define SCTLR_ELx_IESB (1 << 21) -#define SCTLR_ELx_WXN (1 << 19) -#define SCTLR_ELx_I (1 << 12) -#define SCTLR_ELx_SA (1 << 3) -#define SCTLR_ELx_C (1 << 2) -#define SCTLR_ELx_A (1 << 1) -#define SCTLR_ELx_M 1 +#define SCTLR_ELx_DSSBS (_BITUL(44)) +#define SCTLR_ELx_ENIA (_BITUL(31)) +#define SCTLR_ELx_ENIB (_BITUL(30)) +#define SCTLR_ELx_ENDA (_BITUL(27)) +#define SCTLR_ELx_EE (_BITUL(25)) +#define SCTLR_ELx_IESB (_BITUL(21)) +#define SCTLR_ELx_WXN (_BITUL(19)) +#define SCTLR_ELx_ENDB (_BITUL(13)) +#define SCTLR_ELx_I (_BITUL(12)) +#define SCTLR_ELx_SA (_BITUL(3)) +#define SCTLR_ELx_C (_BITUL(2)) +#define SCTLR_ELx_A (_BITUL(1)) +#define SCTLR_ELx_M (_BITUL(0)) #define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB) /* SCTLR_EL2 specific flags. */ -#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \ - (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \ - (1 << 29)) -#define SCTLR_EL2_RES0 ((1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | \ - (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ - (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \ - (1 << 27) | (1 << 30) | (1 << 31) | \ +#define SCTLR_EL2_RES1 ((_BITUL(4)) | (_BITUL(5)) | (_BITUL(11)) | (_BITUL(16)) | \ + (_BITUL(18)) | (_BITUL(22)) | (_BITUL(23)) | (_BITUL(28)) | \ + (_BITUL(29))) +#define SCTLR_EL2_RES0 ((_BITUL(6)) | (_BITUL(7)) | (_BITUL(8)) | (_BITUL(9)) | \ + (_BITUL(10)) | (_BITUL(13)) | (_BITUL(14)) | (_BITUL(15)) | \ + (_BITUL(17)) | (_BITUL(20)) | (_BITUL(24)) | (_BITUL(26)) | \ + (_BITUL(27)) | (_BITUL(30)) | (_BITUL(31)) | \ (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN @@ -473,23 +501,23 @@ #endif /* SCTLR_EL1 specific flags. */ -#define SCTLR_EL1_UCI (1 << 26) -#define SCTLR_EL1_E0E (1 << 24) -#define SCTLR_EL1_SPAN (1 << 23) -#define SCTLR_EL1_NTWE (1 << 18) -#define SCTLR_EL1_NTWI (1 << 16) -#define SCTLR_EL1_UCT (1 << 15) -#define SCTLR_EL1_DZE (1 << 14) -#define SCTLR_EL1_UMA (1 << 9) -#define SCTLR_EL1_SED (1 << 8) -#define SCTLR_EL1_ITD (1 << 7) -#define SCTLR_EL1_CP15BEN (1 << 5) -#define SCTLR_EL1_SA0 (1 << 4) - -#define SCTLR_EL1_RES1 ((1 << 11) | (1 << 20) | (1 << 22) | (1 << 28) | \ - (1 << 29)) -#define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ - (1 << 27) | (1 << 30) | (1 << 31) | \ +#define SCTLR_EL1_UCI (_BITUL(26)) +#define SCTLR_EL1_E0E (_BITUL(24)) +#define SCTLR_EL1_SPAN (_BITUL(23)) +#define SCTLR_EL1_NTWE (_BITUL(18)) +#define SCTLR_EL1_NTWI (_BITUL(16)) +#define SCTLR_EL1_UCT (_BITUL(15)) +#define SCTLR_EL1_DZE (_BITUL(14)) +#define SCTLR_EL1_UMA (_BITUL(9)) +#define SCTLR_EL1_SED (_BITUL(8)) +#define SCTLR_EL1_ITD (_BITUL(7)) +#define SCTLR_EL1_CP15BEN (_BITUL(5)) +#define SCTLR_EL1_SA0 (_BITUL(4)) + +#define SCTLR_EL1_RES1 ((_BITUL(11)) | (_BITUL(20)) | (_BITUL(22)) | (_BITUL(28)) | \ + (_BITUL(29))) +#define SCTLR_EL1_RES0 ((_BITUL(6)) | (_BITUL(10)) | (_BITUL(13)) | (_BITUL(17)) | \ + (_BITUL(27)) | (_BITUL(30)) | (_BITUL(31)) | \ (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN @@ -528,11 +556,25 @@ #define ID_AA64ISAR0_AES_SHIFT 4 /* id_aa64isar1 */ +#define ID_AA64ISAR1_SB_SHIFT 36 +#define ID_AA64ISAR1_GPI_SHIFT 28 +#define ID_AA64ISAR1_GPA_SHIFT 24 #define ID_AA64ISAR1_LRCPC_SHIFT 20 #define ID_AA64ISAR1_FCMA_SHIFT 16 #define ID_AA64ISAR1_JSCVT_SHIFT 12 +#define ID_AA64ISAR1_API_SHIFT 8 +#define ID_AA64ISAR1_APA_SHIFT 4 #define ID_AA64ISAR1_DPB_SHIFT 0 +#define ID_AA64ISAR1_APA_NI 0x0 +#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_API_NI 0x0 +#define ID_AA64ISAR1_API_IMP_DEF 0x1 +#define ID_AA64ISAR1_GPA_NI 0x0 +#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_GPI_NI 0x0 +#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 + /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 @@ -676,13 +718,13 @@ #define ZCR_ELx_LEN_SIZE 9 #define ZCR_ELx_LEN_MASK 0x1ff -#define CPACR_EL1_ZEN_EL1EN (1 << 16) /* enable EL1 access */ -#define CPACR_EL1_ZEN_EL0EN (1 << 17) /* enable EL0 access, if EL1EN set */ +#define CPACR_EL1_ZEN_EL1EN (_BITUL(16)) /* enable EL1 access */ +#define CPACR_EL1_ZEN_EL0EN (_BITUL(17)) /* enable EL0 access, if EL1EN set */ #define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ -#define SYS_MPIDR_SAFE_VAL (1UL << 31) +#define SYS_MPIDR_SAFE_VAL (_BITUL(31)) #ifdef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 0e2a0ecaf484..32693f34f431 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -46,8 +46,6 @@ extern void __show_regs(struct pt_regs *); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -int handle_guest_sea(phys_addr_t addr, unsigned int esr); - #endif /* __ASSEMBLY__ */ #endif /* __ASM_SYSTEM_MISC_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index cb2c10a8f0a8..eb3ef73e07cf 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -42,7 +42,18 @@ struct thread_info { #ifdef CONFIG_ARM64_SW_TTBR0_PAN u64 ttbr0; /* saved TTBR0_EL1 */ #endif - int preempt_count; /* 0 => preemptable, <0 => bug */ + union { + u64 preempt_count; /* 0 => preemptible, <0 => bug */ + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + u32 need_resched; + u32 count; +#else + u32 count; + u32 need_resched; +#endif + } preempt; + }; }; #define thread_saved_pc(tsk) \ @@ -68,7 +79,6 @@ void arch_release_task_struct(struct task_struct *tsk); * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary * TIF_NOTIFY_RESUME - callback before returning to user - * TIF_USEDFPU - FPU was used by this task this quantum (SMP) */ #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 5dfd23897dea..3a1870228946 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -21,6 +21,7 @@ #ifndef __ASSEMBLY__ +#include <linux/mm_types.h> #include <linux/sched.h> #include <asm/cputype.h> #include <asm/mmu.h> @@ -164,14 +165,20 @@ static inline void flush_tlb_mm(struct mm_struct *mm) dsb(ish); } -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long uaddr) +static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, + unsigned long uaddr) { unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); dsb(ishst); __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long uaddr) +{ + flush_tlb_page_nosync(vma, uaddr); dsb(ish); } @@ -179,7 +186,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLBI_OPS 1024UL +#define MAX_TLBI_OPS PTRS_PER_PTE static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, @@ -188,7 +195,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long asid = ASID(vma->vm_mm); unsigned long addr; - if ((end - start) > (MAX_TLBI_OPS * stride)) { + if ((end - start) >= (MAX_TLBI_OPS * stride)) { flush_tlb_mm(vma->vm_mm); return; } diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 07c34087bd5e..e5d5f31c6d36 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -34,7 +34,6 @@ #include <asm/memory.h> #include <asm/extable.h> -#define get_ds() (KERNEL_DS) #define get_fs() (current_thread_info()->addr_limit) static inline void set_fs(mm_segment_t fs) @@ -45,8 +44,7 @@ static inline void set_fs(mm_segment_t fs) * Prevent a mispredicted conditional call to set_fs from forwarding * the wrong address limit to access_ok under speculation. */ - dsb(nsh); - isb(); + spec_bar(); /* On user-mode return, check fs is correct */ set_thread_flag(TIF_FSCHECK); @@ -96,14 +94,7 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si return ret; } -/* - * When dealing with data aborts, watchpoints, or instruction traps we may end - * up with a tagged userland pointer. Clear the tag to get a sane pointer to - * pass on to access_ok(), for instance. - */ -#define untagged_addr(addr) sign_extend64(addr, 55) - -#define access_ok(type, addr, size) __range_ok(addr, size) +#define access_ok(addr, size) __range_ok(addr, size) #define user_addr_max get_fs #define _ASM_EXTABLE(from, to) \ @@ -276,7 +267,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) : "+r" (err), "=&r" (x) \ : "r" (addr), "i" (-EFAULT)) -#define __get_user_err(x, ptr, err) \ +#define __raw_get_user(x, ptr, err) \ do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ @@ -305,28 +296,22 @@ do { \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) -#define __get_user_check(x, ptr, err) \ -({ \ +#define __get_user_error(x, ptr, err) \ +do { \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ - if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \ + if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __get_user_err((x), __p, (err)); \ + __raw_get_user((x), __p, (err)); \ } else { \ (x) = 0; (err) = -EFAULT; \ } \ -}) - -#define __get_user_error(x, ptr, err) \ -({ \ - __get_user_check((x), (ptr), (err)); \ - (void)0; \ -}) +} while (0) #define __get_user(x, ptr) \ ({ \ int __gu_err = 0; \ - __get_user_check((x), (ptr), __gu_err); \ + __get_user_error((x), (ptr), __gu_err); \ __gu_err; \ }) @@ -346,7 +331,7 @@ do { \ : "+r" (err) \ : "r" (x), "r" (addr), "i" (-EFAULT)) -#define __put_user_err(x, ptr, err) \ +#define __raw_put_user(x, ptr, err) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ @@ -374,28 +359,22 @@ do { \ uaccess_disable_not_uao(); \ } while (0) -#define __put_user_check(x, ptr, err) \ -({ \ +#define __put_user_error(x, ptr, err) \ +do { \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ - if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \ + if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __put_user_err((x), __p, (err)); \ + __raw_put_user((x), __p, (err)); \ } else { \ (err) = -EFAULT; \ } \ -}) - -#define __put_user_error(x, ptr, err) \ -({ \ - __put_user_check((x), (ptr), (err)); \ - (void)0; \ -}) +} while (0) #define __put_user(x, ptr) \ ({ \ int __pu_err = 0; \ - __put_user_check((x), (ptr), __pu_err); \ + __put_user_error((x), (ptr), __pu_err); \ __pu_err; \ }) @@ -426,7 +405,7 @@ extern unsigned long __must_check __arch_copy_in_user(void __user *to, const voi extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n); static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n) { - if (access_ok(VERIFY_WRITE, to, n)) + if (access_ok(to, n)) n = __arch_clear_user(__uaccess_mask_ptr(to), n); return n; } diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index b13ca091f833..f2a83ff6b73c 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -40,10 +40,11 @@ * The following SVCs are ARM private. */ #define __ARM_NR_COMPAT_BASE 0x0f0000 -#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) -#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) +#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE + 2) +#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) +#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 399 +#define __NR_compat_syscalls 428 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 2cd6dcf8d246..23f1a44acada 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -270,7 +270,7 @@ __SYSCALL(__NR_uname, sys_newuname) /* 123 was sys_modify_ldt */ __SYSCALL(123, sys_ni_syscall) #define __NR_adjtimex 124 -__SYSCALL(__NR_adjtimex, compat_sys_adjtimex) +__SYSCALL(__NR_adjtimex, sys_adjtimex_time32) #define __NR_mprotect 125 __SYSCALL(__NR_mprotect, sys_mprotect) #define __NR_sigprocmask 126 @@ -344,9 +344,9 @@ __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) #define __NR_sched_get_priority_min 160 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) #define __NR_sched_rr_get_interval 161 -__SYSCALL(__NR_sched_rr_get_interval, compat_sys_sched_rr_get_interval) +__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32) #define __NR_nanosleep 162 -__SYSCALL(__NR_nanosleep, compat_sys_nanosleep) +__SYSCALL(__NR_nanosleep, sys_nanosleep_time32) #define __NR_mremap 163 __SYSCALL(__NR_mremap, sys_mremap) #define __NR_setresuid 164 @@ -376,7 +376,7 @@ __SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask) #define __NR_rt_sigpending 176 __SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending) #define __NR_rt_sigtimedwait 177 -__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait) +__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32) #define __NR_rt_sigqueueinfo 178 __SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo) #define __NR_rt_sigsuspend 179 @@ -502,7 +502,7 @@ __SYSCALL(__NR_tkill, sys_tkill) #define __NR_sendfile64 239 __SYSCALL(__NR_sendfile64, sys_sendfile64) #define __NR_futex 240 -__SYSCALL(__NR_futex, compat_sys_futex) +__SYSCALL(__NR_futex, sys_futex_time32) #define __NR_sched_setaffinity 241 __SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity) #define __NR_sched_getaffinity 242 @@ -512,7 +512,7 @@ __SYSCALL(__NR_io_setup, compat_sys_io_setup) #define __NR_io_destroy 244 __SYSCALL(__NR_io_destroy, sys_io_destroy) #define __NR_io_getevents 245 -__SYSCALL(__NR_io_getevents, compat_sys_io_getevents) +__SYSCALL(__NR_io_getevents, sys_io_getevents_time32) #define __NR_io_submit 246 __SYSCALL(__NR_io_submit, compat_sys_io_submit) #define __NR_io_cancel 247 @@ -538,21 +538,21 @@ __SYSCALL(__NR_set_tid_address, sys_set_tid_address) #define __NR_timer_create 257 __SYSCALL(__NR_timer_create, compat_sys_timer_create) #define __NR_timer_settime 258 -__SYSCALL(__NR_timer_settime, compat_sys_timer_settime) +__SYSCALL(__NR_timer_settime, sys_timer_settime32) #define __NR_timer_gettime 259 -__SYSCALL(__NR_timer_gettime, compat_sys_timer_gettime) +__SYSCALL(__NR_timer_gettime, sys_timer_gettime32) #define __NR_timer_getoverrun 260 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) #define __NR_timer_delete 261 __SYSCALL(__NR_timer_delete, sys_timer_delete) #define __NR_clock_settime 262 -__SYSCALL(__NR_clock_settime, compat_sys_clock_settime) +__SYSCALL(__NR_clock_settime, sys_clock_settime32) #define __NR_clock_gettime 263 -__SYSCALL(__NR_clock_gettime, compat_sys_clock_gettime) +__SYSCALL(__NR_clock_gettime, sys_clock_gettime32) #define __NR_clock_getres 264 -__SYSCALL(__NR_clock_getres, compat_sys_clock_getres) +__SYSCALL(__NR_clock_getres, sys_clock_getres_time32) #define __NR_clock_nanosleep 265 -__SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep) +__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep_time32) #define __NR_statfs64 266 __SYSCALL(__NR_statfs64, compat_sys_aarch32_statfs64) #define __NR_fstatfs64 267 @@ -560,7 +560,7 @@ __SYSCALL(__NR_fstatfs64, compat_sys_aarch32_fstatfs64) #define __NR_tgkill 268 __SYSCALL(__NR_tgkill, sys_tgkill) #define __NR_utimes 269 -__SYSCALL(__NR_utimes, compat_sys_utimes) +__SYSCALL(__NR_utimes, sys_utimes_time32) #define __NR_arm_fadvise64_64 270 __SYSCALL(__NR_arm_fadvise64_64, compat_sys_aarch32_fadvise64_64) #define __NR_pciconfig_iobase 271 @@ -574,9 +574,9 @@ __SYSCALL(__NR_mq_open, compat_sys_mq_open) #define __NR_mq_unlink 275 __SYSCALL(__NR_mq_unlink, sys_mq_unlink) #define __NR_mq_timedsend 276 -__SYSCALL(__NR_mq_timedsend, compat_sys_mq_timedsend) +__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend_time32) #define __NR_mq_timedreceive 277 -__SYSCALL(__NR_mq_timedreceive, compat_sys_mq_timedreceive) +__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive_time32) #define __NR_mq_notify 278 __SYSCALL(__NR_mq_notify, compat_sys_mq_notify) #define __NR_mq_getsetattr 279 @@ -622,7 +622,7 @@ __SYSCALL(__NR_semop, sys_semop) #define __NR_semget 299 __SYSCALL(__NR_semget, sys_semget) #define __NR_semctl 300 -__SYSCALL(__NR_semctl, compat_sys_semctl) +__SYSCALL(__NR_semctl, compat_sys_old_semctl) #define __NR_msgsnd 301 __SYSCALL(__NR_msgsnd, compat_sys_msgsnd) #define __NR_msgrcv 302 @@ -630,7 +630,7 @@ __SYSCALL(__NR_msgrcv, compat_sys_msgrcv) #define __NR_msgget 303 __SYSCALL(__NR_msgget, sys_msgget) #define __NR_msgctl 304 -__SYSCALL(__NR_msgctl, compat_sys_msgctl) +__SYSCALL(__NR_msgctl, compat_sys_old_msgctl) #define __NR_shmat 305 __SYSCALL(__NR_shmat, compat_sys_shmat) #define __NR_shmdt 306 @@ -638,7 +638,7 @@ __SYSCALL(__NR_shmdt, sys_shmdt) #define __NR_shmget 307 __SYSCALL(__NR_shmget, sys_shmget) #define __NR_shmctl 308 -__SYSCALL(__NR_shmctl, compat_sys_shmctl) +__SYSCALL(__NR_shmctl, compat_sys_old_shmctl) #define __NR_add_key 309 __SYSCALL(__NR_add_key, sys_add_key) #define __NR_request_key 310 @@ -646,7 +646,7 @@ __SYSCALL(__NR_request_key, sys_request_key) #define __NR_keyctl 311 __SYSCALL(__NR_keyctl, compat_sys_keyctl) #define __NR_semtimedop 312 -__SYSCALL(__NR_semtimedop, compat_sys_semtimedop) +__SYSCALL(__NR_semtimedop, sys_semtimedop_time32) #define __NR_vserver 313 __SYSCALL(__NR_vserver, sys_ni_syscall) #define __NR_ioprio_set 314 @@ -674,7 +674,7 @@ __SYSCALL(__NR_mknodat, sys_mknodat) #define __NR_fchownat 325 __SYSCALL(__NR_fchownat, sys_fchownat) #define __NR_futimesat 326 -__SYSCALL(__NR_futimesat, compat_sys_futimesat) +__SYSCALL(__NR_futimesat, sys_futimesat_time32) #define __NR_fstatat64 327 __SYSCALL(__NR_fstatat64, sys_fstatat64) #define __NR_unlinkat 328 @@ -692,9 +692,9 @@ __SYSCALL(__NR_fchmodat, sys_fchmodat) #define __NR_faccessat 334 __SYSCALL(__NR_faccessat, sys_faccessat) #define __NR_pselect6 335 -__SYSCALL(__NR_pselect6, compat_sys_pselect6) +__SYSCALL(__NR_pselect6, compat_sys_pselect6_time32) #define __NR_ppoll 336 -__SYSCALL(__NR_ppoll, compat_sys_ppoll) +__SYSCALL(__NR_ppoll, compat_sys_ppoll_time32) #define __NR_unshare 337 __SYSCALL(__NR_unshare, sys_unshare) #define __NR_set_robust_list 338 @@ -718,7 +718,7 @@ __SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait) #define __NR_kexec_load 347 __SYSCALL(__NR_kexec_load, compat_sys_kexec_load) #define __NR_utimensat 348 -__SYSCALL(__NR_utimensat, compat_sys_utimensat) +__SYSCALL(__NR_utimensat, sys_utimensat_time32) #define __NR_signalfd 349 __SYSCALL(__NR_signalfd, compat_sys_signalfd) #define __NR_timerfd_create 350 @@ -728,9 +728,9 @@ __SYSCALL(__NR_eventfd, sys_eventfd) #define __NR_fallocate 352 __SYSCALL(__NR_fallocate, compat_sys_aarch32_fallocate) #define __NR_timerfd_settime 353 -__SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime) +__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime32) #define __NR_timerfd_gettime 354 -__SYSCALL(__NR_timerfd_gettime, compat_sys_timerfd_gettime) +__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime32) #define __NR_signalfd4 355 __SYSCALL(__NR_signalfd4, compat_sys_signalfd4) #define __NR_eventfd2 356 @@ -752,7 +752,7 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo) #define __NR_perf_event_open 364 __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_recvmmsg 365 -__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg) +__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg_time32) #define __NR_accept4 366 __SYSCALL(__NR_accept4, sys_accept4) #define __NR_fanotify_init 367 @@ -766,7 +766,7 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) #define __NR_open_by_handle_at 371 __SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at) #define __NR_clock_adjtime 372 -__SYSCALL(__NR_clock_adjtime, compat_sys_clock_adjtime) +__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime32) #define __NR_syncfs 373 __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_sendmmsg 374 @@ -819,6 +819,61 @@ __SYSCALL(__NR_pkey_free, sys_pkey_free) __SYSCALL(__NR_statx, sys_statx) #define __NR_rseq 398 __SYSCALL(__NR_rseq, sys_rseq) +#define __NR_io_pgetevents 399 +__SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents) +#define __NR_migrate_pages 400 +__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages) +#define __NR_kexec_file_load 401 +__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) +/* 402 is unused */ +#define __NR_clock_gettime64 403 +__SYSCALL(__NR_clock_gettime64, sys_clock_gettime) +#define __NR_clock_settime64 404 +__SYSCALL(__NR_clock_settime64, sys_clock_settime) +#define __NR_clock_adjtime64 405 +__SYSCALL(__NR_clock_adjtime64, sys_clock_adjtime) +#define __NR_clock_getres_time64 406 +__SYSCALL(__NR_clock_getres_time64, sys_clock_getres) +#define __NR_clock_nanosleep_time64 407 +__SYSCALL(__NR_clock_nanosleep_time64, sys_clock_nanosleep) +#define __NR_timer_gettime64 408 +__SYSCALL(__NR_timer_gettime64, sys_timer_gettime) +#define __NR_timer_settime64 409 +__SYSCALL(__NR_timer_settime64, sys_timer_settime) +#define __NR_timerfd_gettime64 410 +__SYSCALL(__NR_timerfd_gettime64, sys_timerfd_gettime) +#define __NR_timerfd_settime64 411 +__SYSCALL(__NR_timerfd_settime64, sys_timerfd_settime) +#define __NR_utimensat_time64 412 +__SYSCALL(__NR_utimensat_time64, sys_utimensat) +#define __NR_pselect6_time64 413 +__SYSCALL(__NR_pselect6_time64, compat_sys_pselect6_time64) +#define __NR_ppoll_time64 414 +__SYSCALL(__NR_ppoll_time64, compat_sys_ppoll_time64) +#define __NR_io_pgetevents_time64 416 +__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +#define __NR_recvmmsg_time64 417 +__SYSCALL(__NR_recvmmsg_time64, compat_sys_recvmmsg_time64) +#define __NR_mq_timedsend_time64 418 +__SYSCALL(__NR_mq_timedsend_time64, sys_mq_timedsend) +#define __NR_mq_timedreceive_time64 419 +__SYSCALL(__NR_mq_timedreceive_time64, sys_mq_timedreceive) +#define __NR_semtimedop_time64 420 +__SYSCALL(__NR_semtimedop_time64, sys_semtimedop) +#define __NR_rt_sigtimedwait_time64 421 +__SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64) +#define __NR_futex_time64 422 +__SYSCALL(__NR_futex_time64, sys_futex) +#define __NR_sched_rr_get_interval_time64 423 +__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) +#define __NR_pidfd_send_signal 424 +__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) +#define __NR_io_uring_setup 425 +__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) +#define __NR_io_uring_enter 426 +__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) +#define __NR_io_uring_register 427 +__SYSCALL(__NR_io_uring_register, sys_io_uring_register) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h index b3ef061d8b74..d88e56b90b93 100644 --- a/arch/arm64/include/asm/xen/page-coherent.h +++ b/arch/arm64/include/asm/xen/page-coherent.h @@ -1 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H +#define _ASM_ARM64_XEN_PAGE_COHERENT_H + +#include <linux/dma-mapping.h> +#include <asm/page.h> #include <xen/arm/page-coherent.h> + +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) +{ + return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) +{ + dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs); +} + +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + + if (pfn_valid(pfn)) + dma_direct_sync_single_for_cpu(hwdev, handle, size, dir); + else + __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); +} + +static inline void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + if (pfn_valid(pfn)) + dma_direct_sync_single_for_device(hwdev, handle, size, dir); + else + __xen_dma_sync_single_for_device(hwdev, handle, size, dir); +} + +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, + dma_addr_t dev_addr, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + unsigned long page_pfn = page_to_xen_pfn(page); + unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); + unsigned long compound_pages = + (1<<compound_order(page)) * XEN_PFN_PER_PAGE; + bool local = (page_pfn <= dev_pfn) && + (dev_pfn - page_pfn < compound_pages); + + if (local) + dma_direct_map_page(hwdev, page, offset, size, dir, attrs); + else + __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); +} + +static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + unsigned long pfn = PFN_DOWN(handle); + /* + * Dom0 is mapped 1:1, while the Linux page can be spanned accross + * multiple Xen page, it's not possible to have a mix of local and + * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a + * foreign mfn will always return false. If the page is local we can + * safely call the native dma_ops function, otherwise we call the xen + * specific function. + */ + if (pfn_valid(pfn)) + dma_direct_unmap_page(hwdev, handle, size, dir, attrs); + else + __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); +} + +#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */ diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h new file mode 100644 index 000000000000..856386ad076c --- /dev/null +++ b/arch/arm64/include/asm/xor.h @@ -0,0 +1,73 @@ +/* + * arch/arm64/include/asm/xor.h + * + * Authors: Jackie Liu <liuyun01@kylinos.cn> + * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/hardirq.h> +#include <asm-generic/xor.h> +#include <asm/hwcap.h> +#include <asm/neon.h> + +#ifdef CONFIG_KERNEL_MODE_NEON + +extern struct xor_block_template const xor_block_inner_neon; + +static void +xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + kernel_neon_begin(); + xor_block_inner_neon.do_2(bytes, p1, p2); + kernel_neon_end(); +} + +static void +xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + kernel_neon_begin(); + xor_block_inner_neon.do_3(bytes, p1, p2, p3); + kernel_neon_end(); +} + +static void +xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + kernel_neon_begin(); + xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); + kernel_neon_end(); +} + +static void +xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + kernel_neon_begin(); + xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); + kernel_neon_end(); +} + +static struct xor_block_template xor_block_arm64 = { + .name = "arm64_neon", + .do_2 = xor_neon_2, + .do_3 = xor_neon_3, + .do_4 = xor_neon_4, + .do_5 = xor_neon_5 +}; +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (cpu_has_neon()) { \ + xor_speed(&xor_block_arm64);\ + } \ + } while (0) + +#endif /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild index 6c5adf458690..602d137932dc 100644 --- a/arch/arm64/include/uapi/asm/Kbuild +++ b/arch/arm64/include/uapi/asm/Kbuild @@ -1,22 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -# UAPI Header export list -include include/uapi/asm-generic/Kbuild.asm -generic-y += errno.h -generic-y += ioctl.h -generic-y += ioctls.h -generic-y += ipcbuf.h generic-y += kvm_para.h -generic-y += mman.h -generic-y += msgbuf.h -generic-y += poll.h -generic-y += resource.h -generic-y += sembuf.h -generic-y += shmbuf.h -generic-y += socket.h -generic-y += sockios.h -generic-y += swab.h -generic-y += termbits.h -generic-y += termios.h -generic-y += types.h -generic-y += siginfo.h diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 2bcd6e4f3474..5f0750c2199c 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -49,5 +49,8 @@ #define HWCAP_ILRCPC (1 << 26) #define HWCAP_FLAGM (1 << 27) #define HWCAP_SSBS (1 << 28) +#define HWCAP_SB (1 << 29) +#define HWCAP_PACA (1 << 30) +#define HWCAP_PACG (1UL << 31) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index a36227fdb084..d78623acb649 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -23,7 +23,7 @@ #include <linux/types.h> #include <asm/hwcap.h> -#include <asm/sigcontext.h> +#include <asm/sve_context.h> /* @@ -130,9 +130,9 @@ struct user_sve_header { */ /* Offset from the start of struct user_sve_header to the register data */ -#define SVE_PT_REGS_OFFSET \ - ((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1)) \ - / SVE_VQ_BYTES * SVE_VQ_BYTES) +#define SVE_PT_REGS_OFFSET \ + ((sizeof(struct user_sve_header) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) /* * The register data content and layout depends on the value of the @@ -178,39 +178,36 @@ struct user_sve_header { * Additional data might be appended in the future. */ -#define SVE_PT_SVE_ZREG_SIZE(vq) SVE_SIG_ZREG_SIZE(vq) -#define SVE_PT_SVE_PREG_SIZE(vq) SVE_SIG_PREG_SIZE(vq) -#define SVE_PT_SVE_FFR_SIZE(vq) SVE_SIG_FFR_SIZE(vq) +#define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) +#define SVE_PT_SVE_PREG_SIZE(vq) __SVE_PREG_SIZE(vq) +#define SVE_PT_SVE_FFR_SIZE(vq) __SVE_FFR_SIZE(vq) #define SVE_PT_SVE_FPSR_SIZE sizeof(__u32) #define SVE_PT_SVE_FPCR_SIZE sizeof(__u32) -#define __SVE_SIG_TO_PT(offset) \ - ((offset) - SVE_SIG_REGS_OFFSET + SVE_PT_REGS_OFFSET) - #define SVE_PT_SVE_OFFSET SVE_PT_REGS_OFFSET #define SVE_PT_SVE_ZREGS_OFFSET \ - __SVE_SIG_TO_PT(SVE_SIG_ZREGS_OFFSET) + (SVE_PT_REGS_OFFSET + __SVE_ZREGS_OFFSET) #define SVE_PT_SVE_ZREG_OFFSET(vq, n) \ - __SVE_SIG_TO_PT(SVE_SIG_ZREG_OFFSET(vq, n)) + (SVE_PT_REGS_OFFSET + __SVE_ZREG_OFFSET(vq, n)) #define SVE_PT_SVE_ZREGS_SIZE(vq) \ - (SVE_PT_SVE_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_PT_SVE_ZREGS_OFFSET) + (SVE_PT_SVE_ZREG_OFFSET(vq, __SVE_NUM_ZREGS) - SVE_PT_SVE_ZREGS_OFFSET) #define SVE_PT_SVE_PREGS_OFFSET(vq) \ - __SVE_SIG_TO_PT(SVE_SIG_PREGS_OFFSET(vq)) + (SVE_PT_REGS_OFFSET + __SVE_PREGS_OFFSET(vq)) #define SVE_PT_SVE_PREG_OFFSET(vq, n) \ - __SVE_SIG_TO_PT(SVE_SIG_PREG_OFFSET(vq, n)) + (SVE_PT_REGS_OFFSET + __SVE_PREG_OFFSET(vq, n)) #define SVE_PT_SVE_PREGS_SIZE(vq) \ - (SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \ + (SVE_PT_SVE_PREG_OFFSET(vq, __SVE_NUM_PREGS) - \ SVE_PT_SVE_PREGS_OFFSET(vq)) #define SVE_PT_SVE_FFR_OFFSET(vq) \ - __SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq)) + (SVE_PT_REGS_OFFSET + __SVE_FFR_OFFSET(vq)) #define SVE_PT_SVE_FPSR_OFFSET(vq) \ ((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) + \ - (SVE_VQ_BYTES - 1)) \ - / SVE_VQ_BYTES * SVE_VQ_BYTES) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) #define SVE_PT_SVE_FPCR_OFFSET(vq) \ (SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE) @@ -221,14 +218,34 @@ struct user_sve_header { #define SVE_PT_SVE_SIZE(vq, flags) \ ((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE \ - - SVE_PT_SVE_OFFSET + (SVE_VQ_BYTES - 1)) \ - / SVE_VQ_BYTES * SVE_VQ_BYTES) + - SVE_PT_SVE_OFFSET + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) #define SVE_PT_SIZE(vq, flags) \ (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \ SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags)) +/* pointer authentication masks (NT_ARM_PAC_MASK) */ + +struct user_pac_mask { + __u64 data_mask; + __u64 insn_mask; +}; + +/* pointer authentication keys (NT_ARM_PACA_KEYS, NT_ARM_PACG_KEYS) */ + +struct user_pac_address_keys { + __uint128_t apiakey; + __uint128_t apibkey; + __uint128_t apdakey; + __uint128_t apdbkey; +}; + +struct user_pac_generic_keys { + __uint128_t apgakey; +}; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index dca8f8b5168b..5f3c0cec5af9 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -130,6 +130,8 @@ struct sve_context { #endif /* !__ASSEMBLY__ */ +#include <asm/sve_context.h> + /* * The SVE architecture leaves space for future expansion of the * vector length beyond its initial architectural limit of 2048 bits @@ -138,21 +140,20 @@ struct sve_context { * See linux/Documentation/arm64/sve.txt for a description of the VL/VQ * terminology. */ -#define SVE_VQ_BYTES 16 /* number of bytes per quadword */ +#define SVE_VQ_BYTES __SVE_VQ_BYTES /* bytes per quadword */ -#define SVE_VQ_MIN 1 -#define SVE_VQ_MAX 512 +#define SVE_VQ_MIN __SVE_VQ_MIN +#define SVE_VQ_MAX __SVE_VQ_MAX -#define SVE_VL_MIN (SVE_VQ_MIN * SVE_VQ_BYTES) -#define SVE_VL_MAX (SVE_VQ_MAX * SVE_VQ_BYTES) +#define SVE_VL_MIN __SVE_VL_MIN +#define SVE_VL_MAX __SVE_VL_MAX -#define SVE_NUM_ZREGS 32 -#define SVE_NUM_PREGS 16 +#define SVE_NUM_ZREGS __SVE_NUM_ZREGS +#define SVE_NUM_PREGS __SVE_NUM_PREGS -#define sve_vl_valid(vl) \ - ((vl) % SVE_VQ_BYTES == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX) -#define sve_vq_from_vl(vl) ((vl) / SVE_VQ_BYTES) -#define sve_vl_from_vq(vq) ((vq) * SVE_VQ_BYTES) +#define sve_vl_valid(vl) __sve_vl_valid(vl) +#define sve_vq_from_vl(vl) __sve_vq_from_vl(vl) +#define sve_vl_from_vq(vq) __sve_vl_from_vq(vq) /* * If the SVE registers are currently live for the thread at signal delivery, @@ -205,34 +206,33 @@ struct sve_context { * Additional data might be appended in the future. */ -#define SVE_SIG_ZREG_SIZE(vq) ((__u32)(vq) * SVE_VQ_BYTES) -#define SVE_SIG_PREG_SIZE(vq) ((__u32)(vq) * (SVE_VQ_BYTES / 8)) -#define SVE_SIG_FFR_SIZE(vq) SVE_SIG_PREG_SIZE(vq) +#define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) +#define SVE_SIG_PREG_SIZE(vq) __SVE_PREG_SIZE(vq) +#define SVE_SIG_FFR_SIZE(vq) __SVE_FFR_SIZE(vq) #define SVE_SIG_REGS_OFFSET \ - ((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1)) \ - / SVE_VQ_BYTES * SVE_VQ_BYTES) + ((sizeof(struct sve_context) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) -#define SVE_SIG_ZREGS_OFFSET SVE_SIG_REGS_OFFSET +#define SVE_SIG_ZREGS_OFFSET \ + (SVE_SIG_REGS_OFFSET + __SVE_ZREGS_OFFSET) #define SVE_SIG_ZREG_OFFSET(vq, n) \ - (SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREG_SIZE(vq) * (n)) -#define SVE_SIG_ZREGS_SIZE(vq) \ - (SVE_SIG_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_SIG_ZREGS_OFFSET) + (SVE_SIG_REGS_OFFSET + __SVE_ZREG_OFFSET(vq, n)) +#define SVE_SIG_ZREGS_SIZE(vq) __SVE_ZREGS_SIZE(vq) #define SVE_SIG_PREGS_OFFSET(vq) \ - (SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREGS_SIZE(vq)) + (SVE_SIG_REGS_OFFSET + __SVE_PREGS_OFFSET(vq)) #define SVE_SIG_PREG_OFFSET(vq, n) \ - (SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREG_SIZE(vq) * (n)) -#define SVE_SIG_PREGS_SIZE(vq) \ - (SVE_SIG_PREG_OFFSET(vq, SVE_NUM_PREGS) - SVE_SIG_PREGS_OFFSET(vq)) + (SVE_SIG_REGS_OFFSET + __SVE_PREG_OFFSET(vq, n)) +#define SVE_SIG_PREGS_SIZE(vq) __SVE_PREGS_SIZE(vq) #define SVE_SIG_FFR_OFFSET(vq) \ - (SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREGS_SIZE(vq)) + (SVE_SIG_REGS_OFFSET + __SVE_FFR_OFFSET(vq)) #define SVE_SIG_REGS_SIZE(vq) \ - (SVE_SIG_FFR_OFFSET(vq) + SVE_SIG_FFR_SIZE(vq) - SVE_SIG_REGS_OFFSET) - -#define SVE_SIG_CONTEXT_SIZE(vq) (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq)) + (__SVE_FFR_OFFSET(vq) + __SVE_FFR_SIZE(vq)) +#define SVE_SIG_CONTEXT_SIZE(vq) \ + (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq)) #endif /* _UAPI__ASM_SIGCONTEXT_H */ diff --git a/arch/arm64/include/uapi/asm/sve_context.h b/arch/arm64/include/uapi/asm/sve_context.h new file mode 100644 index 000000000000..754ab751b523 --- /dev/null +++ b/arch/arm64/include/uapi/asm/sve_context.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (C) 2017-2018 ARM Limited */ + +/* + * For use by other UAPI headers only. + * Do not make direct use of header or its definitions. + */ + +#ifndef _UAPI__ASM_SVE_CONTEXT_H +#define _UAPI__ASM_SVE_CONTEXT_H + +#include <linux/types.h> + +#define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ + +#define __SVE_VQ_MIN 1 +#define __SVE_VQ_MAX 512 + +#define __SVE_VL_MIN (__SVE_VQ_MIN * __SVE_VQ_BYTES) +#define __SVE_VL_MAX (__SVE_VQ_MAX * __SVE_VQ_BYTES) + +#define __SVE_NUM_ZREGS 32 +#define __SVE_NUM_PREGS 16 + +#define __sve_vl_valid(vl) \ + ((vl) % __SVE_VQ_BYTES == 0 && \ + (vl) >= __SVE_VL_MIN && \ + (vl) <= __SVE_VL_MAX) + +#define __sve_vq_from_vl(vl) ((vl) / __SVE_VQ_BYTES) +#define __sve_vl_from_vq(vq) ((vq) * __SVE_VQ_BYTES) + +#define __SVE_ZREG_SIZE(vq) ((__u32)(vq) * __SVE_VQ_BYTES) +#define __SVE_PREG_SIZE(vq) ((__u32)(vq) * (__SVE_VQ_BYTES / 8)) +#define __SVE_FFR_SIZE(vq) __SVE_PREG_SIZE(vq) + +#define __SVE_ZREGS_OFFSET 0 +#define __SVE_ZREG_OFFSET(vq, n) \ + (__SVE_ZREGS_OFFSET + __SVE_ZREG_SIZE(vq) * (n)) +#define __SVE_ZREGS_SIZE(vq) \ + (__SVE_ZREG_OFFSET(vq, __SVE_NUM_ZREGS) - __SVE_ZREGS_OFFSET) + +#define __SVE_PREGS_OFFSET(vq) \ + (__SVE_ZREGS_OFFSET + __SVE_ZREGS_SIZE(vq)) +#define __SVE_PREG_OFFSET(vq, n) \ + (__SVE_PREGS_OFFSET(vq) + __SVE_PREG_SIZE(vq) * (n)) +#define __SVE_PREGS_SIZE(vq) \ + (__SVE_PREG_OFFSET(vq, __SVE_NUM_PREGS) - __SVE_PREGS_OFFSET(vq)) + +#define __SVE_FFR_OFFSET(vq) \ + (__SVE_PREGS_OFFSET(vq) + __SVE_PREGS_SIZE(vq)) + +#endif /* ! _UAPI__ASM_SVE_CONTEXT_H */ diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h index dae1584cf017..4703d218663a 100644 --- a/arch/arm64/include/uapi/asm/unistd.h +++ b/arch/arm64/include/uapi/asm/unistd.h @@ -17,5 +17,7 @@ #define __ARCH_WANT_RENAMEAT #define __ARCH_WANT_NEW_STAT +#define __ARCH_WANT_SET_GET_RLIMIT +#define __ARCH_WANT_TIME32_SYSCALLS #include <asm-generic/unistd.h> |