diff options
Diffstat (limited to 'arch')
289 files changed, 4023 insertions, 2811 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 832f68af7c77..6682b2a53e34 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -812,6 +812,45 @@ config LTO_CLANG_THIN If unsure, say Y. endchoice +config ARCH_SUPPORTS_AUTOFDO_CLANG + bool + +config AUTOFDO_CLANG + bool "Enable Clang's AutoFDO build (EXPERIMENTAL)" + depends on ARCH_SUPPORTS_AUTOFDO_CLANG + depends on CC_IS_CLANG && CLANG_VERSION >= 170000 + help + This option enables Clang’s AutoFDO build. When + an AutoFDO profile is specified in variable + CLANG_AUTOFDO_PROFILE during the build process, + Clang uses the profile to optimize the kernel. + + If no profile is specified, AutoFDO options are + still passed to Clang to facilitate the collection + of perf data for creating an AutoFDO profile in + subsequent builds. + + If unsure, say N. + +config ARCH_SUPPORTS_PROPELLER_CLANG + bool + +config PROPELLER_CLANG + bool "Enable Clang's Propeller build" + depends on ARCH_SUPPORTS_PROPELLER_CLANG + depends on CC_IS_CLANG && CLANG_VERSION >= 190000 + help + This option enables Clang’s Propeller build. When the Propeller + profiles is specified in variable CLANG_PROPELLER_PROFILE_PREFIX + during the build process, Clang uses the profiles to optimize + the kernel. + + If no profile is specified, Propeller options are still passed + to Clang to facilitate the collection of perf data for creating + the Propeller profiles in subsequent builds. + + If unsure, say N. + config ARCH_SUPPORTS_CFI_CLANG bool help diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c index 5808a66e2a81..3048758304b5 100644 --- a/arch/alpha/kernel/pci-sysfs.c +++ b/arch/alpha/kernel/pci-sysfs.c @@ -64,7 +64,7 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num, * Return: %0 on success, negative error code otherwise */ static int pci_mmap_resource(struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, struct vm_area_struct *vma, int sparse) { struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj)); @@ -93,14 +93,14 @@ static int pci_mmap_resource(struct kobject *kobj, } static int pci_mmap_resource_sparse(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, struct vm_area_struct *vma) { return pci_mmap_resource(kobj, attr, vma, 1); } static int pci_mmap_resource_dense(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, struct vm_area_struct *vma) { return pci_mmap_resource(kobj, attr, vma, 0); diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5b2488142041..ea5a1dcb133b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -297,7 +297,6 @@ config ARC_PAGE_SIZE_16K config ARC_PAGE_SIZE_4K bool "4KB" select HAVE_PAGE_SIZE_4KB - depends on ARC_MMU_V3 || ARC_MMU_V4 endchoice @@ -474,7 +473,8 @@ config HIGHMEM config ARC_HAS_PAE40 bool "Support for the 40-bit Physical Address Extension" - depends on ISA_ARCV2 + depends on ARC_MMU_V4 + depends on !ARC_PAGE_SIZE_4K select HIGHMEM select PHYS_ADDR_T_64BIT help diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 2390dd042e36..fb98478ed1ab 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,7 +6,7 @@ KBUILD_DEFCONFIG := haps_hs_smp_defconfig ifeq ($(CROSS_COMPILE),) -CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-) +CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux- arc-linux-gnu-) endif cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index 2a151607b080..88bcc7ab6f5a 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -54,7 +54,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <30>; + ngpios = <30>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index c0a812674ce9..9a2dc39a5cff 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -62,7 +62,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <30>; + ngpios = <30>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 67556f4b7057..f31382cb8be4 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -69,7 +69,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <30>; + ngpios = <30>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index b64435385304..3add2fe257f8 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -250,7 +250,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; }; @@ -258,7 +258,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <8>; + ngpios = <8>; reg = <1>; }; @@ -266,7 +266,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <8>; + ngpios = <8>; reg = <2>; }; }; @@ -281,7 +281,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <30>; + ngpios = <30>; reg = <0>; }; @@ -289,7 +289,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <10>; + ngpios = <10>; reg = <1>; }; @@ -297,7 +297,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <8>; + ngpios = <8>; reg = <2>; }; }; diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 41b980df862b..98bb850722a4 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -308,7 +308,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <24>; + ngpios = <24>; reg = <0>; }; }; diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 4b13f60fe7ca..005d9e4d187a 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -146,7 +146,7 @@ #ifndef __ASSEMBLY__ -#include <soc/arc/aux.h> +#include <soc/arc/arc_aux.h> /* Helpers */ #define TO_KB(bytes) ((bytes) >> 10) diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 58045c898340..76f43db0890f 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -48,7 +48,7 @@ \ switch(sizeof((_p_))) { \ case 1: \ - _prev_ = (__typeof__(*(ptr)))cmpxchg_emu_u8((volatile u8 *)_p_, (uintptr_t)_o_, (uintptr_t)_n_); \ + _prev_ = (__typeof__(*(ptr)))cmpxchg_emu_u8((volatile u8 *__force)_p_, (uintptr_t)_o_, (uintptr_t)_n_); \ break; \ case 4: \ _prev_ = __cmpxchg(_p_, _o_, _n_); \ diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h index d85dc0721907..41412642f279 100644 --- a/arch/arc/include/asm/mmu-arcv2.h +++ b/arch/arc/include/asm/mmu-arcv2.h @@ -9,7 +9,7 @@ #ifndef _ASM_ARC_MMU_ARCV2_H #define _ASM_ARC_MMU_ARCV2_H -#include <soc/arc/aux.h> +#include <soc/arc/arc_aux.h> /* * TLB Management regs diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c index 4458e409ca0a..6d989b6d88c6 100644 --- a/arch/arc/net/bpf_jit_arcv2.c +++ b/arch/arc/net/bpf_jit_arcv2.c @@ -2916,7 +2916,7 @@ bool check_jmp_32(u32 curr_off, u32 targ_off, u8 cond) addendum = (cond == ARC_CC_AL) ? 0 : INSN_len_normal; disp = get_displacement(curr_off + addendum, targ_off); - if (ARC_CC_AL) + if (cond == ARC_CC_AL) return is_valid_far_disp(disp); else return is_valid_near_disp(disp); diff --git a/arch/arm/Makefile b/arch/arm/Makefile index aafebf145738..00ca7886b18e 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -264,13 +264,13 @@ stack_protector_prepare: prepare0 -mstack-protector-guard=tls \ -mstack-protector-guard-offset=$(shell \ awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}'\ - include/generated/asm-offsets.h)) + $(objtree)/include/generated/asm-offsets.h)) else stack_protector_prepare: prepare0 $(eval SSP_PLUGIN_CFLAGS := \ -fplugin-arg-arm_ssp_per_task_plugin-offset=$(shell \ awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}'\ - include/generated/asm-offsets.h)) + $(objtree)/include/generated/asm-offsets.h)) $(eval KBUILD_CFLAGS += $(SSP_PLUGIN_CFLAGS)) $(eval GCC_PLUGINS_CFLAGS += $(SSP_PLUGIN_CFLAGS)) endif diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 06b0e5fd54a6..cb6ef449b987 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -516,7 +516,7 @@ static void locomo_remove(struct platform_device *dev) */ static struct platform_driver locomo_device_driver = { .probe = locomo_probe, - .remove_new = locomo_remove, + .remove = locomo_remove, #ifdef CONFIG_PM .suspend = locomo_suspend, .resume = locomo_resume, diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 550978dc3c50..9846f30990f7 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -1154,7 +1154,7 @@ static struct dev_pm_ops sa1111_pm_ops = { */ static struct platform_driver sa1111_device_driver = { .probe = sa1111_probe, - .remove_new = sa1111_remove, + .remove = sa1111_remove, .driver = { .name = "sa1111", .pm = &sa1111_pm_ops, diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c index 9018c7240166..0b08b6621878 100644 --- a/arch/arm/common/scoop.c +++ b/arch/arm/common/scoop.c @@ -250,7 +250,7 @@ static void scoop_remove(struct platform_device *pdev) static struct platform_driver scoop_driver = { .probe = scoop_probe, - .remove_new = scoop_remove, + .remove = scoop_remove, .suspend = scoop_suspend, .resume = scoop_resume, .driver = { diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 1dfae1af8e31..ef6a657c8d13 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -25,6 +25,7 @@ #include <asm/tls.h> #include <asm/system_info.h> #include <asm/uaccess-asm.h> +#include <asm/kasan_def.h> #include "entry-header.S" #include <asm/probes.h> @@ -561,6 +562,13 @@ ENTRY(__switch_to) @ entries covering the vmalloc region. @ ldr r2, [ip] +#ifdef CONFIG_KASAN_VMALLOC + @ Also dummy read from the KASAN shadow memory for the new stack if we + @ are using KASAN + mov_l r2, KASAN_SHADOW_OFFSET + add r2, r2, ip, lsr #KASAN_SHADOW_SCALE_SHIFT + ldr r2, [r2] +#endif #endif @ When CONFIG_THREAD_INFO_IN_TASK=n, the update of SP itself is what diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index b68cb86dbe4c..e898f7c2733e 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -596,7 +596,7 @@ static struct platform_driver imx_mmdc_driver = { .of_match_table = imx_mmdc_dt_ids, }, .probe = imx_mmdc_probe, - .remove_new = imx_mmdc_remove, + .remove = imx_mmdc_remove, }; static int __init imx_mmdc_init(void) diff --git a/arch/arm/mach-omap1/omap-dma.c b/arch/arm/mach-omap1/omap-dma.c index f091f78631d0..aebe5e55ff60 100644 --- a/arch/arm/mach-omap1/omap-dma.c +++ b/arch/arm/mach-omap1/omap-dma.c @@ -832,7 +832,7 @@ static void omap_system_dma_remove(struct platform_device *pdev) static struct platform_driver omap_system_dma_driver = { .probe = omap_system_dma_probe, - .remove_new = omap_system_dma_remove, + .remove = omap_system_dma_remove, .driver = { .name = "omap_dma_system" }, diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c index 72fa2e3fd353..0c8d9000df5a 100644 --- a/arch/arm/mach-pxa/sharpsl_pm.c +++ b/arch/arm/mach-pxa/sharpsl_pm.c @@ -919,7 +919,7 @@ static void sharpsl_pm_remove(struct platform_device *pdev) static struct platform_driver sharpsl_pm_driver = { .probe = sharpsl_pm_probe, - .remove_new = sharpsl_pm_remove, + .remove = sharpsl_pm_remove, .suspend = sharpsl_pm_suspend, .resume = sharpsl_pm_resume, .driver = { diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c index 1956b095e699..d94810217095 100644 --- a/arch/arm/mach-sa1100/jornada720_ssp.c +++ b/arch/arm/mach-sa1100/jornada720_ssp.c @@ -188,7 +188,7 @@ static void jornada_ssp_remove(struct platform_device *dev) struct platform_driver jornadassp_driver = { .probe = jornada_ssp_probe, - .remove_new = jornada_ssp_remove, + .remove = jornada_ssp_remove, .driver = { .name = "jornada_ssp", }, diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index 0ef0ebbf31ac..88fe79f0a4ed 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -423,7 +423,7 @@ static const struct dev_pm_ops neponset_pm_ops = { static struct platform_driver neponset_device_driver = { .probe = neponset_probe, - .remove_new = neponset_remove, + .remove = neponset_remove, .driver = { .name = "neponset", .pm = PM_OPS, diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 794cfea9f9d4..89f1c97f3079 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -23,6 +23,7 @@ */ #include <linux/module.h> #include <linux/errno.h> +#include <linux/kasan.h> #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/io.h> @@ -115,16 +116,40 @@ int ioremap_page(unsigned long virt, unsigned long phys, } EXPORT_SYMBOL(ioremap_page); +#ifdef CONFIG_KASAN +static unsigned long arm_kasan_mem_to_shadow(unsigned long addr) +{ + return (unsigned long)kasan_mem_to_shadow((void *)addr); +} +#else +static unsigned long arm_kasan_mem_to_shadow(unsigned long addr) +{ + return 0; +} +#endif + +static void memcpy_pgd(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + end = ALIGN(end, PGDIR_SIZE); + memcpy(pgd_offset(mm, start), pgd_offset_k(start), + sizeof(pgd_t) * (pgd_index(end) - pgd_index(start))); +} + void __check_vmalloc_seq(struct mm_struct *mm) { int seq; do { - seq = atomic_read(&init_mm.context.vmalloc_seq); - memcpy(pgd_offset(mm, VMALLOC_START), - pgd_offset_k(VMALLOC_START), - sizeof(pgd_t) * (pgd_index(VMALLOC_END) - - pgd_index(VMALLOC_START))); + seq = atomic_read_acquire(&init_mm.context.vmalloc_seq); + memcpy_pgd(mm, VMALLOC_START, VMALLOC_END); + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + unsigned long start = + arm_kasan_mem_to_shadow(VMALLOC_START); + unsigned long end = + arm_kasan_mem_to_shadow(VMALLOC_END); + memcpy_pgd(mm, start, end); + } /* * Use a store-release so that other CPUs that observe the * counter's new value are guaranteed to see the results of the diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index b68efe643a12..d44867fc0c5e 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -56,6 +56,34 @@ extern unsigned int VFP_arch_feroceon __alias(VFP_arch); union vfp_state *vfp_current_hw_state[NR_CPUS]; /* + * Claim ownership of the VFP unit. + * + * The caller may change VFP registers until vfp_state_release() is called. + * + * local_bh_disable() is used to disable preemption and to disable VFP + * processing in softirq context. On PREEMPT_RT kernels local_bh_disable() is + * not sufficient because it only serializes soft interrupt related sections + * via a local lock, but stays preemptible. Disabling preemption is the right + * choice here as bottom half processing is always in thread context on RT + * kernels so it implicitly prevents bottom half processing as well. + */ +static void vfp_state_hold(void) +{ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_disable(); + else + preempt_disable(); +} + +static void vfp_state_release(void) +{ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_bh_enable(); + else + preempt_enable(); +} + +/* * Is 'thread's most up to date state stored in this CPUs hardware? * Must be called from non-preemptible context. */ @@ -240,7 +268,7 @@ static void vfp_panic(char *reason, u32 inst) /* * Process bitmask of exception conditions. */ -static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_regs *regs) +static int vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr) { int si_code = 0; @@ -248,8 +276,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ if (exceptions == VFP_EXCEPTION_ERROR) { vfp_panic("unhandled bounce", inst); - vfp_raise_sigfpe(FPE_FLTINV, regs); - return; + return FPE_FLTINV; } /* @@ -277,8 +304,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF); RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV); - if (si_code) - vfp_raise_sigfpe(si_code, regs); + return si_code; } /* @@ -324,6 +350,8 @@ static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs) static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) { u32 fpscr, orig_fpscr, fpsid, exceptions; + int si_code2 = 0; + int si_code = 0; pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); @@ -369,8 +397,8 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) * unallocated VFP instruction but with FPSCR.IXE set and not * on VFP subarch 1. */ - vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs); - return; + si_code = vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr); + goto exit; } /* @@ -394,14 +422,14 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) */ exceptions = vfp_emulate_instruction(trigger, fpscr, regs); if (exceptions) - vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); + si_code2 = vfp_raise_exceptions(exceptions, trigger, orig_fpscr); /* * If there isn't a second FP instruction, exit now. Note that * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1. */ if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V)) - return; + goto exit; /* * The barrier() here prevents fpinst2 being read @@ -413,7 +441,13 @@ static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) emulate: exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); if (exceptions) - vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); + si_code = vfp_raise_exceptions(exceptions, trigger, orig_fpscr); +exit: + vfp_state_release(); + if (si_code2) + vfp_raise_sigfpe(si_code2, regs); + if (si_code) + vfp_raise_sigfpe(si_code, regs); } static void vfp_enable(void *unused) @@ -512,11 +546,9 @@ static inline void vfp_pm_init(void) { } */ void vfp_sync_hwstate(struct thread_info *thread) { - unsigned int cpu = get_cpu(); + vfp_state_hold(); - local_bh_disable(); - - if (vfp_state_in_hw(cpu, thread)) { + if (vfp_state_in_hw(raw_smp_processor_id(), thread)) { u32 fpexc = fmrx(FPEXC); /* @@ -527,8 +559,7 @@ void vfp_sync_hwstate(struct thread_info *thread) fmxr(FPEXC, fpexc); } - local_bh_enable(); - put_cpu(); + vfp_state_release(); } /* Ensure that the thread reloads the hardware VFP state on the next use. */ @@ -683,7 +714,7 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) if (!user_mode(regs)) return vfp_kmode_exception(regs, trigger); - local_bh_disable(); + vfp_state_hold(); fpexc = fmrx(FPEXC); /* @@ -748,6 +779,7 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) * replay the instruction that trapped. */ fmxr(FPEXC, fpexc); + vfp_state_release(); } else { /* Check for synchronous or asynchronous exceptions */ if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) { @@ -762,17 +794,17 @@ static int vfp_support_entry(struct pt_regs *regs, u32 trigger) if (!(fpscr & FPSCR_IXE)) { if (!(fpscr & FPSCR_LENGTH_MASK)) { pr_debug("not VFP\n"); - local_bh_enable(); + vfp_state_release(); return -ENOEXEC; } fpexc |= FPEXC_DEX; } } bounce: regs->ARM_pc += 4; + /* VFP_bounce() will invoke vfp_state_release() */ VFP_bounce(trigger, fpexc, regs); } - local_bh_enable(); return 0; } @@ -837,7 +869,7 @@ void kernel_neon_begin(void) unsigned int cpu; u32 fpexc; - local_bh_disable(); + vfp_state_hold(); /* * Kernel mode NEON is only allowed outside of hardirq context with @@ -868,7 +900,7 @@ void kernel_neon_end(void) { /* Disable the NEON/VFP unit. */ fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN); - local_bh_enable(); + vfp_state_release(); } EXPORT_SYMBOL(kernel_neon_end); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index d743737bf9ce..100570a048c5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -14,7 +14,6 @@ config ARM64 select ARCH_HAS_DEBUG_WX select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_BINFMT_ELF_STATE - select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE @@ -1236,6 +1235,17 @@ config HISILICON_ERRATUM_161600802 If unsure, say Y. +config HISILICON_ERRATUM_162100801 + bool "Hip09 162100801 erratum support" + default y + help + When enabling GICv4.1 in hip09, VMAPP will fail to clear some caches + during unmapping operation, which will cause some vSGIs lost. + To fix the issue, invalidate related vPE cache through GICR_INVALLR + after VMOVP. + + If unsure, say Y. + config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 9efd3f37c2fd..358c68565bfd 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -71,7 +71,7 @@ stack_protector_prepare: prepare0 -mstack-protector-guard-reg=sp_el0 \ -mstack-protector-guard-offset=$(shell \ awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}' \ - include/generated/asm-offsets.h)) + $(objtree)/include/generated/asm-offsets.h)) endif ifeq ($(CONFIG_ARM64_BTI_KERNEL),y) diff --git a/arch/arm64/boot/dts/mediatek/mt8173-elm-hana.dtsi b/arch/arm64/boot/dts/mediatek/mt8173-elm-hana.dtsi index ae0379fd42a9..dfc5c2f0ddef 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-elm-hana.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173-elm-hana.dtsi @@ -14,6 +14,7 @@ compatible = "melfas,mip4_ts"; reg = <0x34>; interrupts-extended = <&pio 88 IRQ_TYPE_LEVEL_LOW>; + status = "fail-needs-probe"; }; /* @@ -26,6 +27,7 @@ reg = <0x20>; hid-descr-addr = <0x0020>; interrupts-extended = <&pio 88 IRQ_TYPE_LEVEL_LOW>; + status = "fail-needs-probe"; }; /* Lenovo Ideapad C330 uses G2Touch touchscreen as a 2nd source touchscreen */ @@ -35,6 +37,7 @@ hid-descr-addr = <0x0001>; interrupt-parent = <&pio>; interrupts = <88 IRQ_TYPE_LEVEL_LOW>; + status = "fail-needs-probe"; }; }; @@ -47,6 +50,8 @@ trackpad2: trackpad@2c { compatible = "hid-over-i2c"; interrupts-extended = <&pio 117 IRQ_TYPE_LEVEL_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&trackpad_irq>; reg = <0x2c>; hid-descr-addr = <0x0020>; /* @@ -58,6 +63,7 @@ */ vdd-supply = <&mt6397_vgp6_reg>; wakeup-source; + status = "fail-needs-probe"; }; }; @@ -82,3 +88,11 @@ }; }; }; + +&touchscreen { + status = "fail-needs-probe"; +}; + +&trackpad { + status = "fail-needs-probe"; +}; diff --git a/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi b/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi index b4d85147b77b..eee64461421f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173-elm.dtsi @@ -358,12 +358,12 @@ &i2c4 { clock-frequency = <400000>; status = "okay"; - pinctrl-names = "default"; - pinctrl-0 = <&trackpad_irq>; trackpad: trackpad@15 { compatible = "elan,ekth3000"; interrupts-extended = <&pio 117 IRQ_TYPE_LEVEL_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&trackpad_irq>; reg = <0x15>; vcc-supply = <&mt6397_vgp6_reg>; wakeup-source; diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index a523b519700f..a2b5d6f20f4d 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -18,7 +18,7 @@ #include "aes-ce-setkey.h" -MODULE_IMPORT_NS(CRYPTO_INTERNAL); +MODULE_IMPORT_NS("CRYPTO_INTERNAL"); static int num_rounds(struct crypto_aes_ctx *ctx) { diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index a147e847a5a1..b0150999743f 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -1048,7 +1048,7 @@ unregister_ciphers: #ifdef USE_V8_CRYPTO_EXTENSIONS module_cpu_feature_match(AES, aes_init); -EXPORT_SYMBOL_NS(ce_aes_mac_update, CRYPTO_INTERNAL); +EXPORT_SYMBOL_NS(ce_aes_mac_update, "CRYPTO_INTERNAL"); #else module_init(aes_init); EXPORT_SYMBOL(neon_aes_ecb_encrypt); diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 201a46efd918..cbbf70e0f204 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -44,6 +44,8 @@ cpucap_is_possible(const unsigned int cap) return IS_ENABLED(CONFIG_ARM64_TLB_RANGE); case ARM64_HAS_S1POE: return IS_ENABLED(CONFIG_ARM64_POE); + case ARM64_HAS_GCS: + return IS_ENABLED(CONFIG_ARM64_GCS); case ARM64_UNMAP_KERNEL_AT_EL0: return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0); case ARM64_WORKAROUND_843419: diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b64e49bd9d10..8b4e5a3cd24c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -847,8 +847,7 @@ static inline bool system_supports_poe(void) static inline bool system_supports_gcs(void) { - return IS_ENABLED(CONFIG_ARM64_GCS) && - alternative_has_cap_unlikely(ARM64_HAS_GCS); + return alternative_has_cap_unlikely(ARM64_HAS_GCS); } static inline bool system_supports_haft(void) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 85ef966c08cd..4ef52d7245bb 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -87,7 +87,7 @@ 1 << PMSCR_EL2_PA_SHIFT) msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter .Lskip_spe_el2_\@: - mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) + mov x0, #MDCR_EL2_E2PB_MASK orr x2, x2, x0 // If we don't have VHE, then // use EL1&0 translation. @@ -100,7 +100,7 @@ and x0, x0, TRBIDR_EL1_P cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2 - mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT) + mov x0, #MDCR_EL2_E2TB_MASK orr x2, x2, x0 // allow the EL1&0 translation // to own it. diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 1d53022fc7e1..21df8bbd2668 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -7,6 +7,7 @@ #ifndef BUILD_VDSO #include <linux/compiler.h> #include <linux/fs.h> +#include <linux/hugetlb.h> #include <linux/shmem_fs.h> #include <linux/types.h> @@ -44,7 +45,7 @@ static inline unsigned long arch_calc_vm_flag_bits(struct file *file, if (system_supports_mte()) { if (flags & (MAP_ANONYMOUS | MAP_HUGETLB)) return VM_MTE_ALLOWED; - if (shmem_file(file)) + if (shmem_file(file) || is_file_hugepages(file)) return VM_MTE_ALLOWED; } diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 65f76064c86b..ae990da1eae5 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -114,8 +114,8 @@ SYM_CODE_START_LOCAL(__finalise_el2) // Use EL2 translations for SPE & TRBE and disable access from EL1 mrs x0, mdcr_el2 - bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) - bic x0, x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT) + bic x0, x0, #MDCR_EL2_E2PB_MASK + bic x0, x0, #MDCR_EL2_E2TB_MASK msr mdcr_el2, x0 // Transfer the MM state from EL1 to EL2 diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 7f99723fbb8c..1041bc67a3ee 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -30,20 +30,17 @@ static bool is_image_text(unsigned long addr) static void __kprobes *patch_map(void *addr, int fixmap) { - unsigned long uintaddr = (uintptr_t) addr; - bool image = is_image_text(uintaddr); - struct page *page; - - if (image) - page = phys_to_page(__pa_symbol(addr)); - else if (IS_ENABLED(CONFIG_EXECMEM)) - page = vmalloc_to_page(addr); - else - return addr; - - BUG_ON(!page); - return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + - (uintaddr & ~PAGE_MASK)); + phys_addr_t phys; + + if (is_image_text((unsigned long)addr)) { + phys = __pa_symbol(addr); + } else { + struct page *page = vmalloc_to_page(addr); + BUG_ON(!page); + phys = page_to_phys(page) + offset_in_page(addr); + } + + return (void *)set_fixmap_offset(fixmap, phys); } static void __kprobes patch_unmap(int fixmap) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e4437f62a2cd..f79b0d5f71ac 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -720,6 +720,8 @@ static int fpmr_set(struct task_struct *target, const struct user_regset *regset if (!system_supports_fpmr()) return -EINVAL; + fpmr = target->thread.uw.fpmr; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpmr, 0, count); if (ret) return ret; @@ -1427,7 +1429,7 @@ static int tagged_addr_ctrl_get(struct task_struct *target, { long ctrl = get_tagged_addr_ctrl(target); - if (IS_ERR_VALUE(ctrl)) + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) return ctrl; return membuf_write(&to, &ctrl, sizeof(ctrl)); @@ -1441,6 +1443,10 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct int ret; long ctrl; + ctrl = get_tagged_addr_ctrl(target); + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) + return ctrl; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); if (ret) return ret; @@ -1472,6 +1478,8 @@ static int poe_set(struct task_struct *target, const struct if (!system_supports_poe()) return -EINVAL; + ctrl = target->thread.por_el0; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); if (ret) return ret; @@ -1483,6 +1491,22 @@ static int poe_set(struct task_struct *target, const struct #endif #ifdef CONFIG_ARM64_GCS +static void task_gcs_to_user(struct user_gcs *user_gcs, + const struct task_struct *target) +{ + user_gcs->features_enabled = target->thread.gcs_el0_mode; + user_gcs->features_locked = target->thread.gcs_el0_locked; + user_gcs->gcspr_el0 = target->thread.gcspr_el0; +} + +static void task_gcs_from_user(struct task_struct *target, + const struct user_gcs *user_gcs) +{ + target->thread.gcs_el0_mode = user_gcs->features_enabled; + target->thread.gcs_el0_locked = user_gcs->features_locked; + target->thread.gcspr_el0 = user_gcs->gcspr_el0; +} + static int gcs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) @@ -1495,9 +1519,7 @@ static int gcs_get(struct task_struct *target, if (target == current) gcs_preserve_current_state(); - user_gcs.features_enabled = target->thread.gcs_el0_mode; - user_gcs.features_locked = target->thread.gcs_el0_locked; - user_gcs.gcspr_el0 = target->thread.gcspr_el0; + task_gcs_to_user(&user_gcs, target); return membuf_write(&to, &user_gcs, sizeof(user_gcs)); } @@ -1513,6 +1535,8 @@ static int gcs_set(struct task_struct *target, const struct if (!system_supports_gcs()) return -EINVAL; + task_gcs_to_user(&user_gcs, target); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1); if (ret) return ret; @@ -1520,9 +1544,7 @@ static int gcs_set(struct task_struct *target, const struct if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) return -EINVAL; - target->thread.gcs_el0_mode = user_gcs.features_enabled; - target->thread.gcs_el0_locked = user_gcs.features_locked; - target->thread.gcspr_el0 = user_gcs.gcspr_el0; + task_gcs_from_user(target, &user_gcs); return 0; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 14ac6fdb872b..37e24f1bd227 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -1462,10 +1462,33 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig, struct rt_sigframe_user_layout *user, int usig) { __sigrestore_t sigtramp; + int err; + + if (ksig->ka.sa.sa_flags & SA_RESTORER) + sigtramp = ksig->ka.sa.sa_restorer; + else + sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); + + err = gcs_signal_entry(sigtramp, ksig); + if (err) + return err; + + /* + * We must not fail from this point onwards. We are going to update + * registers, including SP, in order to invoke the signal handler. If + * we failed and attempted to deliver a nested SIGSEGV to a handler + * after that point, the subsequent sigreturn would end up restoring + * the (partial) state for the original signal handler. + */ regs->regs[0] = usig; + if (ksig->ka.sa.sa_flags & SA_SIGINFO) { + regs->regs[1] = (unsigned long)&user->sigframe->info; + regs->regs[2] = (unsigned long)&user->sigframe->uc; + } regs->sp = (unsigned long)user->sigframe; regs->regs[29] = (unsigned long)&user->next_frame->fp; + regs->regs[30] = (unsigned long)sigtramp; regs->pc = (unsigned long)ksig->ka.sa.sa_handler; /* @@ -1506,14 +1529,7 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig, sme_smstop(); } - if (ksig->ka.sa.sa_flags & SA_RESTORER) - sigtramp = ksig->ka.sa.sa_restorer; - else - sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); - - regs->regs[30] = (unsigned long)sigtramp; - - return gcs_signal_entry(sigtramp, ksig); + return 0; } static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, @@ -1537,14 +1553,16 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, err |= __save_altstack(&frame->uc.uc_stack, regs->sp); err |= setup_sigframe(&user, regs, set, &ua_state); - if (err == 0) { + if (ksig->ka.sa.sa_flags & SA_SIGINFO) + err |= copy_siginfo_to_user(&frame->info, &ksig->info); + + if (err == 0) err = setup_return(regs, ksig, &user, usig); - if (ksig->ka.sa.sa_flags & SA_SIGINFO) { - err |= copy_siginfo_to_user(&frame->info, &ksig->info); - regs->regs[1] = (unsigned long)&frame->info; - regs->regs[2] = (unsigned long)&frame->uc; - } - } + + /* + * We must not fail if setup_return() succeeded - see comment at the + * beginning of setup_return(). + */ if (err == 0) set_handler_user_access_state(); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index caef85462acb..1d9d51d7627f 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -26,7 +26,6 @@ enum kunwind_source { KUNWIND_SOURCE_CALLER, KUNWIND_SOURCE_TASK, KUNWIND_SOURCE_REGS_PC, - KUNWIND_SOURCE_REGS_LR, }; union unwind_flags { @@ -138,8 +137,10 @@ kunwind_recover_return_address(struct kunwind_state *state) orig_pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, state->common.pc, (void *)state->common.fp); - if (WARN_ON_ONCE(state->common.pc == orig_pc)) + if (state->common.pc == orig_pc) { + WARN_ON_ONCE(state->task == current); return -EINVAL; + } state->common.pc = orig_pc; state->flags.fgraph = 1; } @@ -178,23 +179,8 @@ int kunwind_next_regs_pc(struct kunwind_state *state) state->regs = regs; state->common.pc = regs->pc; state->common.fp = regs->regs[29]; - state->source = KUNWIND_SOURCE_REGS_PC; - return 0; -} - -static __always_inline int -kunwind_next_regs_lr(struct kunwind_state *state) -{ - /* - * The stack for the regs was consumed by kunwind_next_regs_pc(), so we - * cannot consume that again here, but we know the regs are safe to - * access. - */ - state->common.pc = state->regs->regs[30]; - state->common.fp = state->regs->regs[29]; state->regs = NULL; - state->source = KUNWIND_SOURCE_REGS_LR; - + state->source = KUNWIND_SOURCE_REGS_PC; return 0; } @@ -215,12 +201,12 @@ kunwind_next_frame_record_meta(struct kunwind_state *state) case FRAME_META_TYPE_FINAL: if (meta == &task_pt_regs(tsk)->stackframe) return -ENOENT; - WARN_ON_ONCE(1); + WARN_ON_ONCE(tsk == current); return -EINVAL; case FRAME_META_TYPE_PT_REGS: return kunwind_next_regs_pc(state); default: - WARN_ON_ONCE(1); + WARN_ON_ONCE(tsk == current); return -EINVAL; } } @@ -274,11 +260,8 @@ kunwind_next(struct kunwind_state *state) case KUNWIND_SOURCE_FRAME: case KUNWIND_SOURCE_CALLER: case KUNWIND_SOURCE_TASK: - case KUNWIND_SOURCE_REGS_LR: - err = kunwind_next_frame_record(state); - break; case KUNWIND_SOURCE_REGS_PC: - err = kunwind_next_regs_lr(state); + err = kunwind_next_frame_record(state); break; default: err = -EINVAL; @@ -436,7 +419,6 @@ static const char *state_source_string(const struct kunwind_state *state) case KUNWIND_SOURCE_CALLER: return "C"; case KUNWIND_SOURCE_TASK: return "T"; case KUNWIND_SOURCE_REGS_PC: return "P"; - case KUNWIND_SOURCE_REGS_LR: return "L"; default: return "U"; } } diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 8c5d7990e5b3..3d7eb395e33d 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -739,8 +739,15 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, final_attr = s1_parattr; break; default: - /* MemAttr[2]=0, Device from S2 */ - final_attr = s2_memattr & GENMASK(1,0) << 2; + /* + * MemAttr[2]=0, Device from S2. + * + * FWB does not influence the way that stage 1 + * memory types and attributes are combined + * with stage 2 Device type and attributes. + */ + final_attr = min(s2_memattr_to_attr(s2_memattr), + s1_parattr); } } else { /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 01616c39a810..071993c16de8 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -126,7 +126,7 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) /* Trap SPE */ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) { mdcr_set |= MDCR_EL2_TPMS; - mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + mdcr_clear |= MDCR_EL2_E2PB_MASK; } /* Trap Trace Filter */ @@ -143,7 +143,7 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) /* Trap External Trace */ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids)) - mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT; + mdcr_clear |= MDCR_EL2_E2TB_MASK; vcpu->arch.mdcr_el2 |= mdcr_set; vcpu->arch.mdcr_el2 &= ~mdcr_clear; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index aeaa6017ffd8..9b36218b48de 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -951,7 +951,7 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu, return v; } -static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1) +static __always_inline void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1) { int i = sr - __SANITISED_REG_START__; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 8ad62284fa23..456102bc0b55 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -274,12 +274,23 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) irq_work_sync(&vcpu->arch.pmu.overflow_work); } -bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx) +static u64 kvm_pmu_hyp_counter_mask(struct kvm_vcpu *vcpu) { - unsigned int hpmn; + unsigned int hpmn, n; - if (!vcpu_has_nv(vcpu) || idx == ARMV8_PMU_CYCLE_IDX) - return false; + if (!vcpu_has_nv(vcpu)) + return 0; + + hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2)); + n = vcpu->kvm->arch.pmcr_n; + + /* + * Programming HPMN to a value greater than PMCR_EL0.N is + * CONSTRAINED UNPREDICTABLE. Make the implementation choice that an + * UNKNOWN number of counters (in our case, zero) are reserved for EL2. + */ + if (hpmn >= n) + return 0; /* * Programming HPMN=0 is CONSTRAINED UNPREDICTABLE if FEAT_HPMN0 isn't @@ -288,20 +299,22 @@ bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx) * implementation choice that all counters are included in the second * range reserved for EL2/EL3. */ - hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2)); - return idx >= hpmn; + return GENMASK(n - 1, hpmn); +} + +bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx) +{ + return kvm_pmu_hyp_counter_mask(vcpu) & BIT(idx); } u64 kvm_pmu_accessible_counter_mask(struct kvm_vcpu *vcpu) { u64 mask = kvm_pmu_implemented_counter_mask(vcpu); - u64 hpmn; if (!vcpu_has_nv(vcpu) || vcpu_is_el2(vcpu)) return mask; - hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2)); - return mask & ~GENMASK(vcpu->kvm->arch.pmcr_n - 1, hpmn); + return mask & ~kvm_pmu_hyp_counter_mask(vcpu); } u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu) @@ -375,15 +388,30 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) } } -static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) +/* + * Returns the PMU overflow state, which is true if there exists an event + * counter where the values of the global enable control, PMOVSSET_EL0[n], and + * PMINTENSET_EL1[n] are all 1. + */ +static bool kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { - u64 reg = 0; + u64 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); - if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) { - reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); - reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); - reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); - } + reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); + + /* + * PMCR_EL0.E is the global enable control for event counters available + * to EL0 and EL1. + */ + if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) + reg &= kvm_pmu_hyp_counter_mask(vcpu); + + /* + * Otherwise, MDCR_EL2.HPME is the global enable control for event + * counters reserved for EL2. + */ + if (!(vcpu_read_sys_reg(vcpu, MDCR_EL2) & MDCR_EL2_HPME)) + reg &= ~kvm_pmu_hyp_counter_mask(vcpu); return reg; } @@ -396,7 +424,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) if (!kvm_vcpu_has_pmu(vcpu)) return; - overflow = !!kvm_pmu_overflow_status(vcpu); + overflow = kvm_pmu_overflow_status(vcpu); if (pmu->irq_level == overflow) return; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 83c6b4a07ef5..e2a5c2918d9e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2618,7 +2618,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_WRITABLE(ID_AA64MMFR0_EL1, ~(ID_AA64MMFR0_EL1_RES0 | ID_AA64MMFR0_EL1_TGRAN4_2 | ID_AA64MMFR0_EL1_TGRAN64_2 | - ID_AA64MMFR0_EL1_TGRAN16_2)), + ID_AA64MMFR0_EL1_TGRAN16_2 | + ID_AA64MMFR0_EL1_ASIDBITS)), ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 | ID_AA64MMFR1_EL1_HCX | ID_AA64MMFR1_EL1_TWED | diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index e1397ab2072a..afb018528bc3 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -287,7 +287,10 @@ static int vgic_debug_show(struct seq_file *s, void *v) * Expect this to succeed, as iter_mark_lpis() takes a reference on * every LPI to be visited. */ - irq = vgic_get_irq(kvm, vcpu, iter->intid); + if (iter->intid < VGIC_NR_PRIVATE_IRQS) + irq = vgic_get_vcpu_irq(vcpu, iter->intid); + else + irq = vgic_get_irq(kvm, iter->intid); if (WARN_ON_ONCE(!irq)) return -EINVAL; diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 48c952563e85..bc7e22ab5d81 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -322,7 +322,7 @@ int vgic_init(struct kvm *kvm) goto out; for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - struct vgic_irq *irq = vgic_get_irq(kvm, vcpu, i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); switch (dist->vgic_model) { case KVM_DEV_TYPE_ARM_VGIC_V3: diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 198296933e7e..fb96802799c6 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -31,6 +31,41 @@ static int vgic_its_commit_v0(struct vgic_its *its); static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, struct kvm_vcpu *filter_vcpu, bool needs_inv); +#define vgic_its_read_entry_lock(i, g, valp, t) \ + ({ \ + int __sz = vgic_its_get_abi(i)->t##_esz; \ + struct kvm *__k = (i)->dev->kvm; \ + int __ret; \ + \ + BUILD_BUG_ON(NR_ITS_ABIS == 1 && \ + sizeof(*(valp)) != ABI_0_ESZ); \ + if (NR_ITS_ABIS > 1 && \ + KVM_BUG_ON(__sz != sizeof(*(valp)), __k)) \ + __ret = -EINVAL; \ + else \ + __ret = kvm_read_guest_lock(__k, (g), \ + valp, __sz); \ + __ret; \ + }) + +#define vgic_its_write_entry_lock(i, g, val, t) \ + ({ \ + int __sz = vgic_its_get_abi(i)->t##_esz; \ + struct kvm *__k = (i)->dev->kvm; \ + typeof(val) __v = (val); \ + int __ret; \ + \ + BUILD_BUG_ON(NR_ITS_ABIS == 1 && \ + sizeof(__v) != ABI_0_ESZ); \ + if (NR_ITS_ABIS > 1 && \ + KVM_BUG_ON(__sz != sizeof(__v), __k)) \ + __ret = -EINVAL; \ + else \ + __ret = vgic_write_guest_lock(__k, (g), \ + &__v, __sz); \ + __ret; \ + }) + /* * Creates a new (reference to a) struct vgic_irq for a given LPI. * If this LPI is already mapped on another ITS, we increase its refcount @@ -42,7 +77,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, struct kvm_vcpu *vcpu) { struct vgic_dist *dist = &kvm->arch.vgic; - struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; + struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq; unsigned long flags; int ret; @@ -419,7 +454,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) last_byte_offset = byte_offset; } - irq = vgic_get_irq(vcpu->kvm, NULL, intid); + irq = vgic_get_irq(vcpu->kvm, intid); if (!irq) continue; @@ -573,12 +608,22 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, lockdep_assert_held(&its->its_lock); vgic_get_irq_kref(irq); + old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT); + + /* + * Put the reference taken on @irq if the store fails. Intentionally do + * not return the error as the translation cache is best effort. + */ + if (xa_is_err(old)) { + vgic_put_irq(kvm, irq); + return; + } + /* * We could have raced with another CPU caching the same * translation behind our back, ensure we don't leak a * reference if that is the case. */ - old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT); if (old) vgic_put_irq(kvm, old); } @@ -794,7 +839,7 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, its_free_ite(kvm, ite); - return vgic_its_write_entry_lock(its, gpa, 0, ite_esz); + return vgic_its_write_entry_lock(its, gpa, 0ULL, ite); } return E_ITS_DISCARD_UNMAPPED_INTERRUPT; @@ -1143,7 +1188,6 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, bool valid = its_cmd_get_validbit(its_cmd); u8 num_eventid_bits = its_cmd_get_size(its_cmd); gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd); - int dte_esz = vgic_its_get_abi(its)->dte_esz; struct its_device *device; gpa_t gpa; @@ -1168,7 +1212,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, * is an error, so we are done in any case. */ if (!valid) - return vgic_its_write_entry_lock(its, gpa, 0, dte_esz); + return vgic_its_write_entry_lock(its, gpa, 0ULL, dte); device = vgic_its_alloc_device(its, device_id, itt_addr, num_eventid_bits); @@ -1288,7 +1332,7 @@ int vgic_its_invall(struct kvm_vcpu *vcpu) unsigned long intid; xa_for_each(&dist->lpi_xa, intid, irq) { - irq = vgic_get_irq(kvm, NULL, intid); + irq = vgic_get_irq(kvm, intid); if (!irq) continue; @@ -1354,7 +1398,7 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, return 0; xa_for_each(&dist->lpi_xa, intid, irq) { - irq = vgic_get_irq(kvm, NULL, intid); + irq = vgic_get_irq(kvm, intid); if (!irq) continue; @@ -2090,7 +2134,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, * vgic_its_save_ite - Save an interrupt translation entry at @gpa */ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, - struct its_ite *ite, gpa_t gpa, int ite_esz) + struct its_ite *ite, gpa_t gpa) { u32 next_offset; u64 val; @@ -2101,7 +2145,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, ite->collection->collection_id; val = cpu_to_le64(val); - return vgic_its_write_entry_lock(its, gpa, val, ite_esz); + return vgic_its_write_entry_lock(its, gpa, val, ite); } /** @@ -2201,7 +2245,7 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device) if (ite->irq->hw && !kvm_vgic_global_state.has_gicv4_1) return -EACCES; - ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz); + ret = vgic_its_save_ite(its, device, ite, gpa); if (ret) return ret; } @@ -2240,10 +2284,9 @@ static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) * @its: ITS handle * @dev: ITS device * @ptr: GPA - * @dte_esz: device table entry size */ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, - gpa_t ptr, int dte_esz) + gpa_t ptr) { u64 val, itt_addr_field; u32 next_offset; @@ -2256,7 +2299,7 @@ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, (dev->num_eventid_bits - 1)); val = cpu_to_le64(val); - return vgic_its_write_entry_lock(its, ptr, val, dte_esz); + return vgic_its_write_entry_lock(its, ptr, val, dte); } /** @@ -2332,10 +2375,8 @@ static int vgic_its_device_cmp(void *priv, const struct list_head *a, */ static int vgic_its_save_device_tables(struct vgic_its *its) { - const struct vgic_its_abi *abi = vgic_its_get_abi(its); u64 baser = its->baser_device_table; struct its_device *dev; - int dte_esz = abi->dte_esz; if (!(baser & GITS_BASER_VALID)) return 0; @@ -2354,7 +2395,7 @@ static int vgic_its_save_device_tables(struct vgic_its *its) if (ret) return ret; - ret = vgic_its_save_dte(its, dev, eaddr, dte_esz); + ret = vgic_its_save_dte(its, dev, eaddr); if (ret) return ret; } @@ -2435,7 +2476,7 @@ static int vgic_its_restore_device_tables(struct vgic_its *its) static int vgic_its_save_cte(struct vgic_its *its, struct its_collection *collection, - gpa_t gpa, int esz) + gpa_t gpa) { u64 val; @@ -2444,7 +2485,7 @@ static int vgic_its_save_cte(struct vgic_its *its, collection->collection_id); val = cpu_to_le64(val); - return vgic_its_write_entry_lock(its, gpa, val, esz); + return vgic_its_write_entry_lock(its, gpa, val, cte); } /* @@ -2452,7 +2493,7 @@ static int vgic_its_save_cte(struct vgic_its *its, * Return +1 on success, 0 if the entry was invalid (which should be * interpreted as end-of-table), and a negative error value for generic errors. */ -static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) +static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa) { struct its_collection *collection; struct kvm *kvm = its->dev->kvm; @@ -2460,7 +2501,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) u64 val; int ret; - ret = vgic_its_read_entry_lock(its, gpa, &val, esz); + ret = vgic_its_read_entry_lock(its, gpa, &val, cte); if (ret) return ret; val = le64_to_cpu(val); @@ -2507,7 +2548,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; list_for_each_entry(collection, &its->collection_list, coll_list) { - ret = vgic_its_save_cte(its, collection, gpa, cte_esz); + ret = vgic_its_save_cte(its, collection, gpa); if (ret) return ret; gpa += cte_esz; @@ -2521,7 +2562,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) * table is not fully filled, add a last dummy element * with valid bit unset */ - return vgic_its_write_entry_lock(its, gpa, 0, cte_esz); + return vgic_its_write_entry_lock(its, gpa, 0ULL, cte); } /* @@ -2546,7 +2587,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; while (read < max_size) { - ret = vgic_its_restore_cte(its, gpa, cte_esz); + ret = vgic_its_restore_cte(its, gpa); if (ret <= 0) break; gpa += cte_esz; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index e070cda86e12..f25fccb1f8e6 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -148,7 +148,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, if (!(targets & (1U << c))) continue; - irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; @@ -167,7 +167,7 @@ static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu, u64 val = 0; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); val |= (u64)irq->targets << (i * 8); @@ -191,7 +191,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, return; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, intid + i); int target; raw_spin_lock_irqsave(&irq->irq_lock, flags); @@ -213,7 +213,7 @@ static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu, u64 val = 0; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); val |= (u64)irq->source << (i * 8); @@ -231,7 +231,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, unsigned long flags; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); @@ -253,7 +253,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, unsigned long flags; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 9e50928f5d7d..ae4c0593d114 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -194,7 +194,7 @@ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { int intid = VGIC_ADDR_TO_INTID(addr, 64); - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, intid); unsigned long ret = 0; if (!irq) @@ -220,7 +220,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, if (addr & 4) return; - irq = vgic_get_irq(vcpu->kvm, NULL, intid); + irq = vgic_get_irq(vcpu->kvm, intid); if (!irq) return; @@ -530,6 +530,7 @@ static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu, unsigned long val) { struct vgic_irq *irq; + u32 intid; /* * If the guest wrote only to the upper 32bit part of the @@ -541,9 +542,13 @@ static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu, if ((addr & 4) || !vgic_lpis_enabled(vcpu)) return; + intid = lower_32_bits(val); + if (intid < VGIC_MIN_LPI) + return; + vgic_set_rdist_busy(vcpu, true); - irq = vgic_get_irq(vcpu->kvm, NULL, lower_32_bits(val)); + irq = vgic_get_irq(vcpu->kvm, intid); if (irq) { vgic_its_inv_lpi(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq); @@ -1020,7 +1025,7 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) static void vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, u32 sgi, bool allow_group1) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, sgi); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, sgi); unsigned long flags; raw_spin_lock_irqsave(&irq->irq_lock, flags); diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index cf76523a2194..e416e433baff 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -50,7 +50,7 @@ unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->group) value |= BIT(i); @@ -74,7 +74,7 @@ void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, unsigned long flags; for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->group = !!(val & BIT(i)); @@ -102,7 +102,7 @@ unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->enabled) value |= (1U << i); @@ -122,7 +122,7 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw && vgic_irq_is_sgi(irq->intid)) { @@ -171,7 +171,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw && vgic_irq_is_sgi(irq->intid) && irq->enabled) @@ -193,7 +193,7 @@ int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = true; @@ -214,7 +214,7 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = false; @@ -236,7 +236,7 @@ static unsigned long __read_pending(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); unsigned long flags; bool val; @@ -309,7 +309,7 @@ static void __set_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); /* GICD_ISPENDR0 SGI bits are WI when written from the guest. */ if (is_vgic_v2_sgi(vcpu, irq) && !is_user) { @@ -395,7 +395,7 @@ static void __clear_pending(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); /* GICD_ICPENDR0 SGI bits are WI when written from the guest. */ if (is_vgic_v2_sgi(vcpu, irq) && !is_user) { @@ -494,7 +494,7 @@ static unsigned long __vgic_mmio_read_active(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); /* * Even for HW interrupts, don't evaluate the HW state as @@ -598,7 +598,7 @@ static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, int i; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, false); vgic_put_irq(vcpu->kvm, irq); } @@ -635,7 +635,7 @@ static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, int i; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, true); vgic_put_irq(vcpu->kvm, irq); } @@ -672,7 +672,7 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, u64 val = 0; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); val |= (u64)irq->priority << (i * 8); @@ -698,7 +698,7 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, unsigned long flags; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); /* Narrow the priority range to what we actually support */ @@ -719,7 +719,7 @@ unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, int i; for (i = 0; i < len * 4; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->config == VGIC_CONFIG_EDGE) value |= (2U << (i * 2)); @@ -750,7 +750,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, if (intid + i < VGIC_NR_PRIVATE_IRQS) continue; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + irq = vgic_get_irq(vcpu->kvm, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (test_bit(i * 2 + 1, &val)) @@ -775,7 +775,7 @@ u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) continue; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level) val |= (1U << i); @@ -799,7 +799,7 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) continue; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + irq = vgic_get_vcpu_irq(vcpu, intid + i); /* * Line level is set irrespective of irq type diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index ae5a44d5702d..381673f03c39 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -72,7 +72,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) kvm_notify_acked_irq(vcpu->kvm, 0, intid - VGIC_NR_PRIVATE_IRQS); - irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); raw_spin_lock(&irq->irq_lock); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index b217b256853c..f267bc2486a1 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -65,7 +65,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) kvm_notify_acked_irq(vcpu->kvm, 0, intid - VGIC_NR_PRIVATE_IRQS); - irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); if (!irq) /* An LPI could have been unmapped. */ continue; diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 74a67ad87f29..eedecbbbcf31 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -123,7 +123,7 @@ static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu) * IRQ. The SGI code will do its magic. */ for (i = 0; i < VGIC_NR_SGIS; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); struct irq_desc *desc; unsigned long flags; int ret; @@ -160,7 +160,7 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu) int i; for (i = 0; i < VGIC_NR_SGIS; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); struct irq_desc *desc; unsigned long flags; int ret; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index f50274fd5581..cc8c6b9b5dd8 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -84,17 +84,11 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) * struct vgic_irq. It also increases the refcount, so any caller is expected * to call vgic_put_irq() once it's finished with this IRQ. */ -struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, - u32 intid) +struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid) { - /* SGIs and PPIs */ - if (intid <= VGIC_MAX_PRIVATE) { - intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1); - return &vcpu->arch.vgic_cpu.private_irqs[intid]; - } - /* SPIs */ - if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { + if (intid >= VGIC_NR_PRIVATE_IRQS && + intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; } @@ -106,6 +100,20 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, return NULL; } +struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid) +{ + if (WARN_ON(!vcpu)) + return NULL; + + /* SGIs and PPIs */ + if (intid < VGIC_NR_PRIVATE_IRQS) { + intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS); + return &vcpu->arch.vgic_cpu.private_irqs[intid]; + } + + return vgic_get_irq(vcpu->kvm, intid); +} + /* * We can't do anything in here, because we lack the kvm pointer to * lock and remove the item from the lpi_list. So we keep this function @@ -437,7 +445,10 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level); - irq = vgic_get_irq(kvm, vcpu, intid); + if (intid < VGIC_NR_PRIVATE_IRQS) + irq = vgic_get_vcpu_irq(vcpu, intid); + else + irq = vgic_get_irq(kvm, intid); if (!irq) return -EINVAL; @@ -499,7 +510,7 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, u32 vintid, struct irq_ops *ops) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); unsigned long flags; int ret; @@ -524,7 +535,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, */ void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); unsigned long flags; if (!irq->hw) @@ -547,7 +558,7 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) if (!vgic_initialized(vcpu->kvm)) return -EAGAIN; - irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + irq = vgic_get_vcpu_irq(vcpu, vintid); BUG_ON(!irq); raw_spin_lock_irqsave(&irq->irq_lock, flags); @@ -560,7 +571,7 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); unsigned long flags; int ret = -1; @@ -596,7 +607,7 @@ int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) return -EINVAL; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->owner && irq->owner != owner) ret = -EEXIST; @@ -1008,7 +1019,7 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) if (!vgic_initialized(vcpu->kvm)) return false; - irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + irq = vgic_get_vcpu_irq(vcpu, vintid); raw_spin_lock_irqsave(&irq->irq_lock, flags); map_is_active = irq->hw && irq->active; raw_spin_unlock_irqrestore(&irq->irq_lock, flags); diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 309295f5e1b0..122d95b4e284 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -146,29 +146,6 @@ static inline int vgic_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -static inline int vgic_its_read_entry_lock(struct vgic_its *its, gpa_t eaddr, - u64 *eval, unsigned long esize) -{ - struct kvm *kvm = its->dev->kvm; - - if (KVM_BUG_ON(esize != sizeof(*eval), kvm)) - return -EINVAL; - - return kvm_read_guest_lock(kvm, eaddr, eval, esize); - -} - -static inline int vgic_its_write_entry_lock(struct vgic_its *its, gpa_t eaddr, - u64 eval, unsigned long esize) -{ - struct kvm *kvm = its->dev->kvm; - - if (KVM_BUG_ON(esize != sizeof(eval), kvm)) - return -EINVAL; - - return vgic_write_guest_lock(kvm, eaddr, &eval, esize); -} - /* * This struct provides an intermediate representation of the fields contained * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC @@ -202,8 +179,8 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, const struct vgic_register_region * vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, gpa_t addr, int len); -struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, - u32 intid); +struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid); +struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); bool vgic_get_phys_line_level(struct vgic_irq *irq); void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 188197590fc9..b2ac06246327 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -32,9 +32,9 @@ static unsigned long nr_pinned_asids; static unsigned long *pinned_asid_map; #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) -#define ASID_FIRST_VERSION (1UL << asid_bits) +#define ASID_FIRST_VERSION (1UL << 16) -#define NUM_USER_ASIDS ASID_FIRST_VERSION +#define NUM_USER_ASIDS (1UL << asid_bits) #define ctxid2asid(asid) ((asid) & ~ASID_MASK) #define asid2ctxid(asid, genid) ((asid) | (genid)) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 87b3f1a25535..a86c897017df 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -30,11 +30,13 @@ void copy_highpage(struct page *to, struct page *from) if (!system_supports_mte()) return; - if (folio_test_hugetlb(src) && - folio_test_hugetlb_mte_tagged(src)) { - if (!folio_try_hugetlb_mte_tagging(dst)) + if (folio_test_hugetlb(src)) { + if (!folio_test_hugetlb_mte_tagged(src) || + from != folio_page(src, 0)) return; + WARN_ON_ONCE(!folio_try_hugetlb_mte_tagging(dst)); + /* * Populate tags for all subpages. * diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d21f67d67cf5..ccdef53872a0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -117,15 +117,6 @@ static void __init arch_reserve_crashkernel(void) static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) { - /** - * Information we get from firmware (e.g. DT dma-ranges) describe DMA - * bus constraints. Devices using DMA might have their own limitations. - * Some of them rely on DMA zone in low 32-bit memory. Keep low RAM - * DMA zone on platforms that have RAM there. - */ - if (memblock_start_of_DRAM() < U32_MAX) - zone_limit = min(zone_limit, U32_MAX); - return min(zone_limit, memblock_end_of_DRAM() - 1) + 1; } @@ -141,6 +132,14 @@ static void __init zone_sizes_init(void) acpi_zone_dma_limit = acpi_iort_dma_get_max_cpu_address(); dt_zone_dma_limit = of_dma_get_max_cpu_address(NULL); zone_dma_limit = min(dt_zone_dma_limit, acpi_zone_dma_limit); + /* + * Information we get from firmware (e.g. DT dma-ranges) describe DMA + * bus constraints. Devices using DMA might have their own limitations. + * Some of them rely on DMA zone in low 32-bit memory. Keep low RAM + * DMA zone on platforms that have RAM there. + */ + if (memblock_start_of_DRAM() < U32_MAX) + zone_dma_limit = min(zone_dma_limit, U32_MAX); arm64_dma_phys_limit = max_zone_phys(zone_dma_limit); max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d9fce0fd475a..dae3a9104ca6 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -23,6 +23,7 @@ config LOONGARCH select ARCH_HAS_KERNEL_FPU_SUPPORT if CPU_HAS_FPU select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + select ARCH_HAS_PREEMPT_LAZY select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_MEMORY @@ -66,6 +67,7 @@ config LOONGARCH select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_RT select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS @@ -155,6 +157,7 @@ config LOONGARCH select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index ae3f80622f4c..567bd122a9ee 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -59,7 +59,7 @@ endif ifdef CONFIG_64BIT ld-emul = $(64bit-emul) -cflags-y += -mabi=lp64s +cflags-y += -mabi=lp64s -mcmodel=normal endif cflags-y += -pipe $(CC_FLAGS_NO_FPU) @@ -104,7 +104,7 @@ ifdef CONFIG_OBJTOOL KBUILD_CFLAGS += -fno-jump-tables endif -KBUILD_RUSTFLAGS += --target=loongarch64-unknown-none-softfloat +KBUILD_RUSTFLAGS += --target=loongarch64-unknown-none-softfloat -Ccode-model=small KBUILD_RUSTFLAGS_KERNEL += -Zdirect-access-external-data=yes KBUILD_RUSTFLAGS_MODULE += -Zdirect-access-external-data=no diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi index 92180140eb56..8dff2aa52417 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi @@ -266,7 +266,7 @@ status = "disabled"; }; - dma-controller@1fe00c20 { + apbdma2: dma-controller@1fe00c20 { compatible = "loongson,ls2k1000-apbdma"; reg = <0x0 0x1fe00c20 0x0 0x8>; interrupt-parent = <&liointc1>; @@ -276,7 +276,7 @@ status = "disabled"; }; - dma-controller@1fe00c30 { + apbdma3: dma-controller@1fe00c30 { compatible = "loongson,ls2k1000-apbdma"; reg = <0x0 0x1fe00c30 0x0 0x8>; interrupt-parent = <&liointc1>; @@ -352,6 +352,19 @@ status = "disabled"; }; + i2s: i2s@1fe2d000 { + compatible = "loongson,ls2k1000-i2s"; + reg = <0 0x1fe2d000 0 0x14>, + <0 0x1fe00438 0 0x8>; + interrupt-parent = <&liointc0>; + interrupts = <5 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + dmas = <&apbdma2 0>, <&apbdma3 0>; + dma-names = "tx", "rx"; + #sound-dai-cells = <0>; + status = "disabled"; + }; + spi0: spi@1fff0220 { compatible = "loongson,ls2k1000-spi"; reg = <0x0 0x1fff0220 0x0 0x10>; diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi index 0953c5707825..b4ff55a33e90 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi @@ -173,6 +173,22 @@ status = "disabled"; }; + i2c@1fe00120 { + compatible = "loongson,ls2k-i2c"; + reg = <0x0 0x1fe00120 0x0 0x8>; + interrupt-parent = <&liointc>; + interrupts = <8 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + i2c@1fe00130 { + compatible = "loongson,ls2k-i2c"; + reg = <0x0 0x1fe00130 0x0 0x8>; + interrupt-parent = <&liointc>; + interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + uart0: serial@1fe001e0 { compatible = "ns16550a"; reg = <0x0 0x1fe001e0 0x0 0x10>; @@ -243,9 +259,11 @@ status = "disabled"; }; - hda@7,0 { + i2s@7,0 { reg = <0x3800 0x0 0x0 0x0 0x0>; - interrupts = <58 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <78 IRQ_TYPE_LEVEL_HIGH>, + <79 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "tx", "rx"; interrupt-parent = <&pic>; status = "disabled"; }; diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 75b366407a60..4dffc90192f7 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -1,4 +1,5 @@ # CONFIG_LOCALVERSION_AUTO is not set +CONFIG_KERNEL_ZSTD=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_NO_HZ=y @@ -70,6 +71,14 @@ CONFIG_ACPI_IPMI=m CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_PCI_SLOT=y CONFIG_ACPI_HOTPLUG_MEMORY=y +CONFIG_ACPI_BGRT=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y +CONFIG_LOONGSON3_CPUFREQ=m CONFIG_VIRTUALIZATION=y CONFIG_KVM=m CONFIG_JUMP_LABEL=y @@ -78,6 +87,9 @@ CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_MODULE_COMPRESS=y +CONFIG_MODULE_COMPRESS_ZSTD=y +CONFIG_MODULE_DECOMPRESS=y CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y @@ -85,6 +97,8 @@ CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_FC_APPID=y CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_CGROUP_IOPRIO=y +CONFIG_BLK_INLINE_ENCRYPTION=y +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y CONFIG_BSD_DISKLABEL=y CONFIG_UNIXWARE_DISKLABEL=y @@ -413,7 +427,16 @@ CONFIG_PARPORT_PC=y CONFIG_PARPORT_SERIAL=y CONFIG_PARPORT_PC_FIFO=y CONFIG_ZRAM=m +CONFIG_ZRAM_BACKEND_LZ4=y +CONFIG_ZRAM_BACKEND_LZ4HC=y +CONFIG_ZRAM_BACKEND_ZSTD=y +CONFIG_ZRAM_BACKEND_DEFLATE=y +CONFIG_ZRAM_BACKEND_842=y +CONFIG_ZRAM_BACKEND_LZO=y CONFIG_ZRAM_DEF_COMP_ZSTD=y +CONFIG_ZRAM_WRITEBACK=y +CONFIG_ZRAM_MEMORY_TRACKING=y +CONFIG_ZRAM_MULTI_COMP=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m @@ -433,6 +456,9 @@ CONFIG_NVME_TARGET_RDMA=m CONFIG_NVME_TARGET_FC=m CONFIG_NVME_TARGET_TCP=m CONFIG_EEPROM_AT24=m +CONFIG_PVPANIC=y +CONFIG_PVPANIC_MMIO=m +CONFIG_PVPANIC_PCI=m CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y @@ -470,12 +496,10 @@ CONFIG_PATA_ATIIXP=y CONFIG_PATA_PCMCIA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=m -CONFIG_MD_LINEAR=m CONFIG_MD_RAID0=m CONFIG_MD_RAID1=m CONFIG_MD_RAID10=m CONFIG_MD_RAID456=m -CONFIG_MD_MULTIPATH=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y CONFIG_DM_CRYPT=m @@ -489,6 +513,16 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_MULTIPATH_HST=m +CONFIG_DM_MULTIPATH_IOA=m +CONFIG_DM_INIT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=m +CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y +CONFIG_DM_VERITY_FEC=y +CONFIG_DM_INTEGRITY=m +CONFIG_DM_ZONED=m +CONFIG_DM_VDO=m CONFIG_TARGET_CORE=m CONFIG_TCM_IBLOCK=m CONFIG_TCM_FILEIO=m @@ -500,6 +534,13 @@ CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=y CONFIG_WIREGUARD=m +CONFIG_IFB=m +CONFIG_NET_TEAM=m +CONFIG_NET_TEAM_MODE_BROADCAST=m +CONFIG_NET_TEAM_MODE_ROUNDROBIN=m +CONFIG_NET_TEAM_MODE_RANDOM=m +CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m +CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m @@ -580,12 +621,14 @@ CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m CONFIG_USB_RTL8150=m CONFIG_USB_RTL8152=m +CONFIG_USB_USBNET=m # CONFIG_USB_NET_AX8817X is not set # CONFIG_USB_NET_AX88179_178A is not set CONFIG_USB_NET_CDC_EEM=m CONFIG_USB_NET_HUAWEI_CDC_NCM=m CONFIG_USB_NET_CDC_MBIM=m # CONFIG_USB_NET_NET1080 is not set +CONFIG_USB_NET_RNDIS_HOST=m # CONFIG_USB_BELKIN is not set # CONFIG_USB_ARMLINUX is not set # CONFIG_USB_NET_ZAURUS is not set @@ -594,10 +637,11 @@ CONFIG_ATH9K_HTC=m CONFIG_IWLWIFI=m CONFIG_IWLDVM=m CONFIG_IWLMVM=m -CONFIG_HOSTAP=m CONFIG_MT7601U=m CONFIG_RT2X00=m CONFIG_RT2800USB=m +CONFIG_RTL8180=m +CONFIG_RTL8187=m CONFIG_RTL8192CE=m CONFIG_RTL8192SE=m CONFIG_RTL8192DE=m @@ -607,18 +651,26 @@ CONFIG_RTL8188EE=m CONFIG_RTL8192EE=m CONFIG_RTL8821AE=m CONFIG_RTL8192CU=m +CONFIG_RTL8192DU=m # CONFIG_RTLWIFI_DEBUG is not set CONFIG_RTL8XXXU=m CONFIG_RTW88=m CONFIG_RTW88_8822BE=m +CONFIG_RTW88_8822BU=m CONFIG_RTW88_8822CE=m +CONFIG_RTW88_8822CU=m CONFIG_RTW88_8723DE=m +CONFIG_RTW88_8723DU=m CONFIG_RTW88_8821CE=m +CONFIG_RTW88_8821CU=m CONFIG_RTW89=m +CONFIG_RTW89_8851BE=m CONFIG_RTW89_8852AE=m +CONFIG_RTW89_8852BE=m +CONFIG_RTW89_8852BTE=m CONFIG_RTW89_8852CE=m +CONFIG_RTW89_8922AE=m CONFIG_ZD1211RW=m -CONFIG_USB_NET_RNDIS_WLAN=m CONFIG_USB4_NET=m CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_MOUSEDEV_PSAUX=y @@ -651,6 +703,9 @@ CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=m CONFIG_I2C_CHARDEV=y CONFIG_I2C_PIIX4=y +CONFIG_I2C_DESIGNWARE_CORE=y +CONFIG_I2C_DESIGNWARE_SLAVE=y +CONFIG_I2C_DESIGNWARE_PCI=y CONFIG_I2C_GPIO=y CONFIG_I2C_LS2X=y CONFIG_SPI=y @@ -727,11 +782,22 @@ CONFIG_SND_HDA_CODEC_CONEXANT=y CONFIG_SND_USB_AUDIO=m CONFIG_SND_SOC=m CONFIG_SND_SOC_LOONGSON_CARD=m +CONFIG_SND_SOC_ES7134=m +CONFIG_SND_SOC_ES7241=m +CONFIG_SND_SOC_ES8311=m +CONFIG_SND_SOC_ES8316=m +CONFIG_SND_SOC_ES8323=m +CONFIG_SND_SOC_ES8326=m +CONFIG_SND_SOC_ES8328_I2C=m +CONFIG_SND_SOC_ES8328_SPI=m +CONFIG_SND_SOC_UDA1334=m +CONFIG_SND_SOC_UDA1342=m CONFIG_SND_VIRTIO=m CONFIG_HIDRAW=y CONFIG_UHID=m CONFIG_HID_A4TECH=m CONFIG_HID_CHERRY=m +CONFIG_HID_ELAN=m CONFIG_HID_LOGITECH=m CONFIG_HID_LOGITECH_DJ=m CONFIG_LOGITECH_FF=y @@ -740,7 +806,11 @@ CONFIG_LOGIG940_FF=y CONFIG_HID_MICROSOFT=m CONFIG_HID_MULTITOUCH=m CONFIG_HID_SUNPLUS=m +CONFIG_HID_WACOM=m CONFIG_USB_HIDDEV=y +CONFIG_I2C_HID_ACPI=m +CONFIG_I2C_HID_OF=m +CONFIG_I2C_HID_OF_ELAN=m CONFIG_USB=y CONFIG_USB_OTG=y CONFIG_USB_MON=y @@ -775,7 +845,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y CONFIG_RTC_DRV_LOONGSON=y CONFIG_DMADEVICES=y -CONFIG_LS2X_APB_DMA=y +CONFIG_LOONGSON2_APB_DMA=y CONFIG_UDMABUF=y CONFIG_DMABUF_HEAPS=y CONFIG_DMABUF_HEAPS_SYSTEM=y @@ -852,6 +922,9 @@ CONFIG_F2FS_FS=m CONFIG_F2FS_FS_SECURITY=y CONFIG_F2FS_CHECK_FS=y CONFIG_F2FS_FS_COMPRESSION=y +CONFIG_FS_ENCRYPTION=y +CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y +CONFIG_FS_VERITY=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y @@ -904,16 +977,14 @@ CONFIG_SQUASHFS_ZSTD=y CONFIG_MINIX_FS=m CONFIG_ROMFS_FS=m CONFIG_PSTORE=m -CONFIG_PSTORE_LZO_COMPRESS=m -CONFIG_PSTORE_LZ4_COMPRESS=m -CONFIG_PSTORE_LZ4HC_COMPRESS=m -CONFIG_PSTORE_842_COMPRESS=y -CONFIG_PSTORE_ZSTD_COMPRESS=y -CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y +CONFIG_PSTORE_COMPRESS=y CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_EROFS_FS=m CONFIG_EROFS_FS_ZIP_LZMA=y +CONFIG_EROFS_FS_ZIP_DEFLATE=y +CONFIG_EROFS_FS_ZIP_ZSTD=y +CONFIG_EROFS_FS_ONDEMAND=y CONFIG_EROFS_FS_PCPU_KTHREAD=y CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h index b837c65a4894..c8e4057734d0 100644 --- a/arch/loongarch/include/asm/hugetlb.h +++ b/arch/loongarch/include/asm/hugetlb.h @@ -24,6 +24,16 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } +#define __HAVE_ARCH_HUGE_PTE_CLEAR +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz) +{ + pte_t clear; + + pte_val(clear) = (unsigned long)invalid_pte_table; + set_pte_at(mm, addr, ptep, clear); +} + #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 944482063f14..3089785ca97e 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -683,7 +683,17 @@ DEF_EMIT_REG2I16_FORMAT(blt, blt_op) DEF_EMIT_REG2I16_FORMAT(bge, bge_op) DEF_EMIT_REG2I16_FORMAT(bltu, bltu_op) DEF_EMIT_REG2I16_FORMAT(bgeu, bgeu_op) -DEF_EMIT_REG2I16_FORMAT(jirl, jirl_op) + +static inline void emit_jirl(union loongarch_instruction *insn, + enum loongarch_gpr rd, + enum loongarch_gpr rj, + int offset) +{ + insn->reg2i16_format.opcode = jirl_op; + insn->reg2i16_format.immediate = offset; + insn->reg2i16_format.rd = rd; + insn->reg2i16_format.rj = rj; +} #define DEF_EMIT_REG2BSTRD_FORMAT(NAME, OP) \ static inline void emit_##NAME(union loongarch_instruction *insn, \ diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 8bf0e6f51546..4f5a9441754e 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -66,8 +66,9 @@ register unsigned long current_stack_pointer __asm__("$sp"); * - pending work-to-be-done flags are in LSW * - other flags in MSW */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_NEED_RESCHED 0 /* rescheduling necessary */ +#define TIF_NEED_RESCHED_LAZY 1 /* lazy rescheduling necessary */ +#define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NOTIFY_RESUME 3 /* callback before returning to user */ #define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */ #define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ @@ -88,8 +89,9 @@ register unsigned long current_stack_pointer __asm__("$sp"); #define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */ #define TIF_PATCH_PENDING 21 /* pending live patching update */ -#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) +#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_NOTIFY_SIGNAL (1<<TIF_NOTIFY_SIGNAL) #define _TIF_NOHZ (1<<TIF_NOHZ) diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index 2bf86aeda874..de21e72759ee 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -95,7 +95,7 @@ static void __init init_screen_info(void) memset(si, 0, sizeof(*si)); early_memunmap(si, sizeof(*si)); - memblock_reserve(screen_info.lfb_base, screen_info.lfb_size); + memblock_reserve(__screen_info_lfb_base(&screen_info), screen_info.lfb_size); } void __init efi_init(void) diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 3050329556d1..14d7d700bcb9 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -332,7 +332,7 @@ u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) return INSN_BREAK; } - emit_jirl(&insn, rj, rd, imm >> 2); + emit_jirl(&insn, rd, rj, imm >> 2); return insn.word; } diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 5d59e9ce2772..fbf747447f13 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -82,7 +82,7 @@ void show_ipi_list(struct seq_file *p, int prec) for (i = 0; i < NR_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).ipi_irqs[i]); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, cpu).ipi_irqs[i], 10); seq_printf(p, " LoongArch %d %s\n", i + 1, ipi_types[i]); } } diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 46d7d40c87e3..a07d7eff4dc5 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -127,7 +127,11 @@ void sync_counter(void) int constant_clockevent_init(void) { unsigned int cpu = smp_processor_id(); - unsigned long min_delta = 0x600; +#ifdef CONFIG_PREEMPT_RT + unsigned long min_delta = 100; +#else + unsigned long min_delta = 1000; +#endif unsigned long max_delta = (1UL << 48) - 1; struct clock_event_device *cd; static int irq = 0, timer_irq_installed = 0; diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 69f3e3782cc9..a7893bd01e73 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -156,7 +156,7 @@ static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) { - int ret; + int idx, ret; unsigned long *val; u32 addr, rd, rj, opcode; @@ -167,7 +167,6 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) rj = inst.reg2_format.rj; opcode = inst.reg2_format.opcode; addr = vcpu->arch.gprs[rj]; - ret = EMULATE_DO_IOCSR; run->iocsr_io.phys_addr = addr; run->iocsr_io.is_write = 0; val = &vcpu->arch.gprs[rd]; @@ -207,20 +206,28 @@ int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu) } if (run->iocsr_io.is_write) { - if (!kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (ret == 0) ret = EMULATE_DONE; - else + else { + ret = EMULATE_DO_IOCSR; /* Save data and let user space to write it */ memcpy(run->iocsr_io.data, val, run->iocsr_io.len); - + } trace_kvm_iocsr(KVM_TRACE_IOCSR_WRITE, run->iocsr_io.len, addr, val); } else { - if (!kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val)) + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (ret == 0) ret = EMULATE_DONE; - else + else { + ret = EMULATE_DO_IOCSR; /* Save register id for iocsr read completion */ vcpu->arch.io_gpr = rd; - + } trace_kvm_iocsr(KVM_TRACE_IOCSR_READ, run->iocsr_io.len, addr, NULL); } @@ -359,7 +366,7 @@ static int kvm_handle_gspr(struct kvm_vcpu *vcpu) int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) { - int ret; + int idx, ret; unsigned int op8, opcode, rd; struct kvm_run *run = vcpu->run; @@ -464,8 +471,10 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) * it need not return to user space to handle the mmio * exception. */ + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, run->mmio.len, &vcpu->arch.gprs[rd]); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (!ret) { update_pc(&vcpu->arch); vcpu->mmio_needed = 0; @@ -531,7 +540,7 @@ int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) { - int ret; + int idx, ret; unsigned int rd, op8, opcode; unsigned long curr_pc, rd_val = 0; struct kvm_run *run = vcpu->run; @@ -631,7 +640,9 @@ int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) * it need not return to user space to handle the mmio * exception. */ + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, run->mmio.len, data); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (!ret) return EMULATE_DONE; diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index a233a323e295..93f4acd44523 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -98,7 +98,7 @@ static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) { - int i, ret; + int i, idx, ret; uint32_t val = 0, mask = 0; /* @@ -107,7 +107,9 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) */ if ((data >> 27) & 0xf) { /* Read the old val */ + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) { kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); return ret; @@ -121,7 +123,9 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) val &= mask; } val |= ((uint32_t)(data >> 32) & ~mask); + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index cab1818be68d..d18a4a270415 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -240,7 +240,7 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu) */ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) { - int ret; + int idx, ret; /* * Check conditions before entering the guest @@ -249,7 +249,9 @@ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) if (ret < 0) return ret; + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvm_check_requests(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); return ret; } diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 5ac9beb5f093..3b427b319db2 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -289,7 +289,7 @@ static void setup_tlb_handler(int cpu) /* Avoid lockdep warning */ rcutree_report_cpu_starting(cpu); -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && !defined(CONFIG_PREEMPT_RT) vec_sz = sizeof(exception_handlers); if (pcpu_handlers[cpu]) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 7dbefd4ba210..ea357a3edc09 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -179,15 +179,15 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) if (!is_tail_call) { /* Set return value */ - move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]); + emit_insn(ctx, addiw, LOONGARCH_GPR_A0, regmap[BPF_REG_0], 0); /* Return to the caller */ - emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0); + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); } else { /* * Call the next bpf prog and skip the first instruction * of TCC initialization. */ - emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1); + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 1); } } @@ -904,7 +904,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext return ret; move_addr(ctx, t1, func_addr); - emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0); + emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0); move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); break; diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 40c1175823d6..fdde1bcd4e26 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -19,7 +19,7 @@ ccflags-vdso := \ cflags-vdso := $(ccflags-vdso) \ -isystem $(shell $(CC) -print-file-name=include) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ - -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \ + -std=gnu11 -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ $(call cc-option, -fno-asynchronous-unwind-tables) \ $(call cc-option, -fno-stack-protector) diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index f8edc9082724..20d877cb4e30 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -503,6 +503,7 @@ CONFIG_UHID=m # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set +CONFIG_RTC_DRV_M48T59=m CONFIG_RTC_DRV_MSM6242=m CONFIG_RTC_DRV_RP5C01=m CONFIG_RTC_DRV_GENERIC=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 71fc71bb660e..5e1c8d0d3da5 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -391,6 +391,7 @@ CONFIG_UHID=m # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set +CONFIG_RTC_DRV_M48T59=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 41072e68028e..5d1409e6a137 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -392,6 +392,7 @@ CONFIG_UHID=m # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set +CONFIG_RTC_DRV_M48T59=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set diff --git a/arch/m68k/include/asm/mvme147hw.h b/arch/m68k/include/asm/mvme147hw.h index dbf88059e47a..6ad93bac06f9 100644 --- a/arch/m68k/include/asm/mvme147hw.h +++ b/arch/m68k/include/asm/mvme147hw.h @@ -4,24 +4,7 @@ #include <asm/irq.h> -typedef struct { - unsigned char - ctrl, - bcd_sec, - bcd_min, - bcd_hr, - bcd_dow, - bcd_dom, - bcd_mth, - bcd_year; -} MK48T02; - -#define RTC_WRITE 0x80 -#define RTC_READ 0x40 -#define RTC_STOP 0x20 - -#define m147_rtc ((MK48T02 * volatile)0xfffe07f8) - +#define MVME147_RTC_BASE 0xfffe0000 struct pcc_regs { volatile u_long dma_tadr; diff --git a/arch/m68k/include/asm/mvme16xhw.h b/arch/m68k/include/asm/mvme16xhw.h index cc7f5ae1220f..ff1126a51fbe 100644 --- a/arch/m68k/include/asm/mvme16xhw.h +++ b/arch/m68k/include/asm/mvme16xhw.h @@ -24,23 +24,7 @@ typedef struct { #define mvmelp ((*(volatile MVMElpPtr)(MVME_LPR_BASE))) -typedef struct { - unsigned char - ctrl, - bcd_sec, - bcd_min, - bcd_hr, - bcd_dow, - bcd_dom, - bcd_mth, - bcd_year; -} MK48T08_t, *MK48T08ptr_t; - -#define RTC_WRITE 0x80 -#define RTC_READ 0x40 -#define RTC_STOP 0x20 - -#define MVME_RTC_BASE 0xfffc1ff8 +#define MVME_RTC_BASE 0xfffc0000 #define MVME_I596_BASE 0xfff46000 diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index 824c42a302c6..3054d3857efa 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -19,8 +19,9 @@ #include <linux/linkage.h> #include <linux/init.h> #include <linux/major.h> -#include <linux/rtc.h> #include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/rtc/m48t59.h> #include <asm/bootinfo.h> #include <asm/bootinfo-vme.h> @@ -36,13 +37,9 @@ static void mvme147_get_model(char *model); static void __init mvme147_sched_init(void); -extern int mvme147_hwclk (int, struct rtc_time *); extern void mvme147_reset (void); -static int bcd2int (unsigned char b); - - int __init mvme147_parse_bootinfo(const struct bi_record *bi) { uint16_t tag = be16_to_cpu(bi->tag); @@ -80,7 +77,6 @@ void __init config_mvme147(void) { mach_sched_init = mvme147_sched_init; mach_init_IRQ = mvme147_init_IRQ; - mach_hwclk = mvme147_hwclk; mach_reset = mvme147_reset; mach_get_model = mvme147_get_model; @@ -89,6 +85,28 @@ void __init config_mvme147(void) vme_brdtype = VME_TYPE_MVME147; } +static struct resource m48t59_rsrc[] = { + DEFINE_RES_MEM(MVME147_RTC_BASE, 0x800), +}; + +static struct m48t59_plat_data m48t59_data = { + .type = M48T59RTC_TYPE_M48T02, + .yy_offset = 70, +}; + +static int __init mvme147_platform_init(void) +{ + if (!MACH_IS_MVME147) + return 0; + + platform_device_register_resndata(NULL, "rtc-m48t59", -1, + m48t59_rsrc, ARRAY_SIZE(m48t59_rsrc), + &m48t59_data, sizeof(m48t59_data)); + return 0; +} + +arch_initcall(mvme147_platform_init); + static u64 mvme147_read_clk(struct clocksource *cs); static struct clocksource mvme147_clk = { @@ -162,31 +180,6 @@ static u64 mvme147_read_clk(struct clocksource *cs) return ticks; } -static int bcd2int (unsigned char b) -{ - return ((b>>4)*10 + (b&15)); -} - -int mvme147_hwclk(int op, struct rtc_time *t) -{ - if (!op) { - m147_rtc->ctrl = RTC_READ; - t->tm_year = bcd2int (m147_rtc->bcd_year); - t->tm_mon = bcd2int(m147_rtc->bcd_mth) - 1; - t->tm_mday = bcd2int (m147_rtc->bcd_dom); - t->tm_hour = bcd2int (m147_rtc->bcd_hr); - t->tm_min = bcd2int (m147_rtc->bcd_min); - t->tm_sec = bcd2int (m147_rtc->bcd_sec); - m147_rtc->ctrl = 0; - if (t->tm_year < 70) - t->tm_year += 100; - } else { - /* FIXME Setting the time is not yet supported */ - return -EOPNOTSUPP; - } - return 0; -} - static void scc_delay(void) { __asm__ __volatile__ ("nop; nop;"); diff --git a/arch/m68k/mvme16x/Makefile b/arch/m68k/mvme16x/Makefile index a8a368c2cbea..02f9e4ad8209 100644 --- a/arch/m68k/mvme16x/Makefile +++ b/arch/m68k/mvme16x/Makefile @@ -3,4 +3,4 @@ # Makefile for Linux arch/m68k/mvme16x source directory # -obj-y := config.o rtc.o +obj-y := config.o diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index d1fbd1704d65..99768fe8da73 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -21,9 +21,10 @@ #include <linux/linkage.h> #include <linux/init.h> #include <linux/major.h> -#include <linux/rtc.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/rtc/m48t59.h> #include <asm/bootinfo.h> #include <asm/bootinfo-vme.h> @@ -39,16 +40,10 @@ extern t_bdid mvme_bdid; -static MK48T08ptr_t volatile rtc = (MK48T08ptr_t)MVME_RTC_BASE; - static void mvme16x_get_model(char *model); extern void mvme16x_sched_init(void); -extern int mvme16x_hwclk (int, struct rtc_time *); extern void mvme16x_reset (void); -int bcd2int (unsigned char b); - - unsigned short mvme16x_config; EXPORT_SYMBOL(mvme16x_config); @@ -268,7 +263,6 @@ void __init config_mvme16x(void) mach_sched_init = mvme16x_sched_init; mach_init_IRQ = mvme16x_init_IRQ; - mach_hwclk = mvme16x_hwclk; mach_reset = mvme16x_reset; mach_get_model = mvme16x_get_model; mach_get_hardware_list = mvme16x_get_hardware_list; @@ -312,6 +306,28 @@ void __init config_mvme16x(void) } } +static struct resource m48t59_rsrc[] = { + DEFINE_RES_MEM(MVME_RTC_BASE, 0x2000), +}; + +static struct m48t59_plat_data m48t59_data = { + .type = M48T59RTC_TYPE_M48T08, + .yy_offset = 70, +}; + +static int __init mvme16x_platform_init(void) +{ + if (!MACH_IS_MVME16x) + return 0; + + platform_device_register_resndata(NULL, "rtc-m48t59", -1, + m48t59_rsrc, ARRAY_SIZE(m48t59_rsrc), + &m48t59_data, sizeof(m48t59_data)); + return 0; +} + +arch_initcall(mvme16x_platform_init); + static irqreturn_t mvme16x_abort_int (int irq, void *dev_id) { unsigned long *new = (unsigned long *)vectors; @@ -426,28 +442,3 @@ static u64 mvme16x_read_clk(struct clocksource *cs) return ticks; } - -int bcd2int (unsigned char b) -{ - return ((b>>4)*10 + (b&15)); -} - -int mvme16x_hwclk(int op, struct rtc_time *t) -{ - if (!op) { - rtc->ctrl = RTC_READ; - t->tm_year = bcd2int (rtc->bcd_year); - t->tm_mon = bcd2int(rtc->bcd_mth) - 1; - t->tm_mday = bcd2int (rtc->bcd_dom); - t->tm_hour = bcd2int (rtc->bcd_hr); - t->tm_min = bcd2int (rtc->bcd_min); - t->tm_sec = bcd2int (rtc->bcd_sec); - rtc->ctrl = 0; - if (t->tm_year < 70) - t->tm_year += 100; - } else { - /* FIXME Setting the time is not yet supported */ - return -EOPNOTSUPP; - } - return 0; -} diff --git a/arch/m68k/mvme16x/rtc.c b/arch/m68k/mvme16x/rtc.c deleted file mode 100644 index ccbaae1125e6..000000000000 --- a/arch/m68k/mvme16x/rtc.c +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Real Time Clock interface for Linux on the MVME16x - * - * Based on the PC driver by Paul Gortmaker. - */ - -#define RTC_VERSION "1.00" - -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/miscdevice.h> -#include <linux/ioport.h> -#include <linux/capability.h> -#include <linux/fcntl.h> -#include <linux/init.h> -#include <linux/poll.h> -#include <linux/rtc.h> /* For struct rtc_time and ioctls, etc */ -#include <linux/bcd.h> -#include <asm/mvme16xhw.h> - -#include <asm/io.h> -#include <linux/uaccess.h> -#include <asm/setup.h> - -/* - * We sponge a minor off of the misc major. No need slurping - * up another valuable major dev number for this. If you add - * an ioctl, make sure you don't conflict with SPARC's RTC - * ioctls. - */ - -static const unsigned char days_in_mo[] = -{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; - -static atomic_t rtc_ready = ATOMIC_INIT(1); - -static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - volatile MK48T08ptr_t rtc = (MK48T08ptr_t)MVME_RTC_BASE; - unsigned long flags; - struct rtc_time wtime; - void __user *argp = (void __user *)arg; - - switch (cmd) { - case RTC_RD_TIME: /* Read the time/date from RTC */ - { - local_irq_save(flags); - /* Ensure clock and real-time-mode-register are accessible */ - rtc->ctrl = RTC_READ; - memset(&wtime, 0, sizeof(struct rtc_time)); - wtime.tm_sec = bcd2bin(rtc->bcd_sec); - wtime.tm_min = bcd2bin(rtc->bcd_min); - wtime.tm_hour = bcd2bin(rtc->bcd_hr); - wtime.tm_mday = bcd2bin(rtc->bcd_dom); - wtime.tm_mon = bcd2bin(rtc->bcd_mth)-1; - wtime.tm_year = bcd2bin(rtc->bcd_year); - if (wtime.tm_year < 70) - wtime.tm_year += 100; - wtime.tm_wday = bcd2bin(rtc->bcd_dow)-1; - rtc->ctrl = 0; - local_irq_restore(flags); - return copy_to_user(argp, &wtime, sizeof wtime) ? - -EFAULT : 0; - } - case RTC_SET_TIME: /* Set the RTC */ - { - struct rtc_time rtc_tm; - unsigned char mon, day, hrs, min, sec, leap_yr; - unsigned int yrs; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - if (copy_from_user(&rtc_tm, argp, sizeof(struct rtc_time))) - return -EFAULT; - - yrs = rtc_tm.tm_year; - if (yrs < 1900) - yrs += 1900; - mon = rtc_tm.tm_mon + 1; /* tm_mon starts at zero */ - day = rtc_tm.tm_mday; - hrs = rtc_tm.tm_hour; - min = rtc_tm.tm_min; - sec = rtc_tm.tm_sec; - - leap_yr = ((!(yrs % 4) && (yrs % 100)) || !(yrs % 400)); - - if ((mon > 12) || (day == 0)) - return -EINVAL; - - if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr))) - return -EINVAL; - - if ((hrs >= 24) || (min >= 60) || (sec >= 60)) - return -EINVAL; - - if (yrs >= 2070) - return -EINVAL; - - local_irq_save(flags); - rtc->ctrl = RTC_WRITE; - - rtc->bcd_sec = bin2bcd(sec); - rtc->bcd_min = bin2bcd(min); - rtc->bcd_hr = bin2bcd(hrs); - rtc->bcd_dom = bin2bcd(day); - rtc->bcd_mth = bin2bcd(mon); - rtc->bcd_year = bin2bcd(yrs%100); - - rtc->ctrl = 0; - local_irq_restore(flags); - return 0; - } - default: - return -EINVAL; - } -} - -/* - * We enforce only one user at a time here with the open/close. - */ -static int rtc_open(struct inode *inode, struct file *file) -{ - if( !atomic_dec_and_test(&rtc_ready) ) - { - atomic_inc( &rtc_ready ); - return -EBUSY; - } - return 0; -} - -static int rtc_release(struct inode *inode, struct file *file) -{ - atomic_inc( &rtc_ready ); - return 0; -} - -/* - * The various file operations we support. - */ - -static const struct file_operations rtc_fops = { - .unlocked_ioctl = rtc_ioctl, - .open = rtc_open, - .release = rtc_release, - .llseek = noop_llseek, -}; - -static struct miscdevice rtc_dev= -{ - .minor = RTC_MINOR, - .name = "rtc", - .fops = &rtc_fops -}; - -static int __init rtc_MK48T08_init(void) -{ - if (!MACH_IS_MVME16x) - return -ENODEV; - - pr_info("MK48T08 Real Time Clock Driver v%s\n", RTC_VERSION); - return misc_register(&rtc_dev); -} -device_initcall(rtc_MK48T08_init); diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi index cce9428afc41..ee71045883e7 100644 --- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi +++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi @@ -70,7 +70,6 @@ device_type = "pci"; #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <2>; msi-parent = <&msi>; reg = <0 0x1a000000 0 0x02000000>, @@ -234,7 +233,7 @@ }; }; - pci_bridge@9,0 { + pcie@9,0 { compatible = "pci0014,7a19.1", "pci0014,7a19", "pciclass060400", @@ -244,12 +243,16 @@ interrupts = <32 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 32 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@a,0 { + pcie@a,0 { compatible = "pci0014,7a09.1", "pci0014,7a09", "pciclass060400", @@ -259,12 +262,16 @@ interrupts = <33 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 33 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@b,0 { + pcie@b,0 { compatible = "pci0014,7a09.1", "pci0014,7a09", "pciclass060400", @@ -274,12 +281,16 @@ interrupts = <34 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 34 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@c,0 { + pcie@c,0 { compatible = "pci0014,7a09.1", "pci0014,7a09", "pciclass060400", @@ -289,12 +300,16 @@ interrupts = <35 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 35 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@d,0 { + pcie@d,0 { compatible = "pci0014,7a19.1", "pci0014,7a19", "pciclass060400", @@ -304,12 +319,16 @@ interrupts = <36 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 36 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@e,0 { + pcie@e,0 { compatible = "pci0014,7a09.1", "pci0014,7a09", "pciclass060400", @@ -319,12 +338,16 @@ interrupts = <37 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 37 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@f,0 { + pcie@f,0 { compatible = "pci0014,7a29.1", "pci0014,7a29", "pciclass060400", @@ -334,12 +357,16 @@ interrupts = <40 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 40 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@10,0 { + pcie@10,0 { compatible = "pci0014,7a19.1", "pci0014,7a19", "pciclass060400", @@ -349,12 +376,16 @@ interrupts = <41 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 41 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@11,0 { + pcie@11,0 { compatible = "pci0014,7a29.1", "pci0014,7a29", "pciclass060400", @@ -364,12 +395,16 @@ interrupts = <42 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 42 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@12,0 { + pcie@12,0 { compatible = "pci0014,7a19.1", "pci0014,7a19", "pciclass060400", @@ -379,12 +414,16 @@ interrupts = <43 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 43 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@13,0 { + pcie@13,0 { compatible = "pci0014,7a29.1", "pci0014,7a29", "pciclass060400", @@ -394,12 +433,16 @@ interrupts = <38 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 38 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; - pci_bridge@14,0 { + pcie@14,0 { compatible = "pci0014,7a19.1", "pci0014,7a19", "pciclass060400", @@ -409,9 +452,13 @@ interrupts = <39 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&pic>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; interrupt-map = <0 0 0 0 &pic 39 IRQ_TYPE_LEVEL_HIGH>; + ranges; }; }; diff --git a/arch/mips/boot/dts/mobileye/eyeq5-clocks.dtsi b/arch/mips/boot/dts/mobileye/eyeq5-clocks.dtsi deleted file mode 100644 index 17a342cc744e..000000000000 --- a/arch/mips/boot/dts/mobileye/eyeq5-clocks.dtsi +++ /dev/null @@ -1,270 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* - * Copyright 2023 Mobileye Vision Technologies Ltd. - */ - -#include <dt-bindings/clock/mobileye,eyeq5-clk.h> - -/ { - /* Fixed clock */ - xtal: xtal { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <30000000>; - }; - -/* PLL_CPU derivatives */ - occ_cpu: occ-cpu { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_CPU>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - si_css0_ref_clk: si-css0-ref-clk { /* gate ClkRstGen_si_css0_ref */ - compatible = "fixed-factor-clock"; - clocks = <&occ_cpu>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - cpc_clk: cpc-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - core0_clk: core0-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - core1_clk: core1-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - core2_clk: core2-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - core3_clk: core3-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - cm_clk: cm-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - mem_clk: mem-clk { - compatible = "fixed-factor-clock"; - clocks = <&si_css0_ref_clk>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - occ_isram: occ-isram { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_CPU>; - #clock-cells = <0>; - clock-div = <2>; - clock-mult = <1>; - }; - isram_clk: isram-clk { /* gate ClkRstGen_isram */ - compatible = "fixed-factor-clock"; - clocks = <&occ_isram>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - occ_dbu: occ-dbu { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_CPU>; - #clock-cells = <0>; - clock-div = <10>; - clock-mult = <1>; - }; - si_dbu_tp_pclk: si-dbu-tp-pclk { /* gate ClkRstGen_dbu */ - compatible = "fixed-factor-clock"; - clocks = <&occ_dbu>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; -/* PLL_VDI derivatives */ - occ_vdi: occ-vdi { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_VDI>; - #clock-cells = <0>; - clock-div = <2>; - clock-mult = <1>; - }; - vdi_clk: vdi-clk { /* gate ClkRstGen_vdi */ - compatible = "fixed-factor-clock"; - clocks = <&occ_vdi>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - occ_can_ser: occ-can-ser { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_VDI>; - #clock-cells = <0>; - clock-div = <16>; - clock-mult = <1>; - }; - can_ser_clk: can-ser-clk { /* gate ClkRstGen_can_ser */ - compatible = "fixed-factor-clock"; - clocks = <&occ_can_ser>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - i2c_ser_clk: i2c-ser-clk { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_VDI>; - #clock-cells = <0>; - clock-div = <20>; - clock-mult = <1>; - }; -/* PLL_PER derivatives */ - occ_periph: occ-periph { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <16>; - clock-mult = <1>; - }; - periph_clk: periph-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - can_clk: can-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - spi_clk: spi-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - uart_clk: uart-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - i2c_clk: i2c-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - clock-output-names = "i2c_clk"; - }; - timer_clk: timer-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - clock-output-names = "timer_clk"; - }; - gpio_clk: gpio-clk { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - clock-output-names = "gpio_clk"; - }; - emmc_sys_clk: emmc-sys-clk { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <10>; - clock-mult = <1>; - clock-output-names = "emmc_sys_clk"; - }; - ccf_ctrl_clk: ccf-ctrl-clk { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <4>; - clock-mult = <1>; - clock-output-names = "ccf_ctrl_clk"; - }; - occ_mjpeg_core: occ-mjpeg-core { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <2>; - clock-mult = <1>; - clock-output-names = "occ_mjpeg_core"; - }; - hsm_clk: hsm-clk { /* gate ClkRstGen_hsm */ - compatible = "fixed-factor-clock"; - clocks = <&occ_mjpeg_core>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - clock-output-names = "hsm_clk"; - }; - mjpeg_core_clk: mjpeg-core-clk { /* gate ClkRstGen_mjpeg_gen */ - compatible = "fixed-factor-clock"; - clocks = <&occ_mjpeg_core>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - clock-output-names = "mjpeg_core_clk"; - }; - fcmu_a_clk: fcmu-a-clk { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <20>; - clock-mult = <1>; - clock-output-names = "fcmu_a_clk"; - }; - occ_pci_sys: occ-pci-sys { - compatible = "fixed-factor-clock"; - clocks = <&olb EQ5C_PLL_PER>; - #clock-cells = <0>; - clock-div = <8>; - clock-mult = <1>; - clock-output-names = "occ_pci_sys"; - }; - pclk: pclk { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <250000000>; /* 250MHz */ - }; - tsu_clk: tsu-clk { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <125000000>; /* 125MHz */ - }; -}; diff --git a/arch/mips/boot/dts/mobileye/eyeq5.dtsi b/arch/mips/boot/dts/mobileye/eyeq5.dtsi index 0708771c193d..5d73e8320b8e 100644 --- a/arch/mips/boot/dts/mobileye/eyeq5.dtsi +++ b/arch/mips/boot/dts/mobileye/eyeq5.dtsi @@ -5,7 +5,7 @@ #include <dt-bindings/interrupt-controller/mips-gic.h> -#include "eyeq5-clocks.dtsi" +#include <dt-bindings/clock/mobileye,eyeq5-clk.h> / { #address-cells = <2>; @@ -17,7 +17,7 @@ device_type = "cpu"; compatible = "img,i6500"; reg = <0>; - clocks = <&core0_clk>; + clocks = <&olb EQ5C_CPU_CORE0>; }; }; @@ -64,6 +64,24 @@ #interrupt-cells = <1>; }; + xtal: xtal { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <30000000>; + }; + + pclk: pclk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <250000000>; /* 250MHz */ + }; + + tsu_clk: tsu-clk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <125000000>; /* 125MHz */ + }; + soc: soc { #address-cells = <2>; #size-cells = <2>; @@ -76,7 +94,7 @@ reg-io-width = <4>; interrupt-parent = <&gic>; interrupts = <GIC_SHARED 6 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&uart_clk>, <&occ_periph>; + clocks = <&olb EQ5C_PER_UART>, <&olb EQ5C_PER_OCC>; clock-names = "uartclk", "apb_pclk"; resets = <&olb 0 10>; pinctrl-names = "default"; @@ -89,7 +107,7 @@ reg-io-width = <4>; interrupt-parent = <&gic>; interrupts = <GIC_SHARED 6 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&uart_clk>, <&occ_periph>; + clocks = <&olb EQ5C_PER_UART>, <&olb EQ5C_PER_OCC>; clock-names = "uartclk", "apb_pclk"; resets = <&olb 0 11>; pinctrl-names = "default"; @@ -102,7 +120,7 @@ reg-io-width = <4>; interrupt-parent = <&gic>; interrupts = <GIC_SHARED 6 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&uart_clk>, <&occ_periph>; + clocks = <&olb EQ5C_PER_UART>, <&olb EQ5C_PER_OCC>; clock-names = "uartclk", "apb_pclk"; resets = <&olb 0 12>; pinctrl-names = "default"; @@ -135,7 +153,7 @@ timer { compatible = "mti,gic-timer"; interrupts = <GIC_LOCAL 1 IRQ_TYPE_NONE>; - clocks = <&core0_clk>; + clocks = <&olb EQ5C_CPU_CORE0>; }; }; }; diff --git a/arch/mips/boot/dts/mobileye/eyeq6h-fixed-clocks.dtsi b/arch/mips/boot/dts/mobileye/eyeq6h-fixed-clocks.dtsi deleted file mode 100644 index 5fa99e06fde7..000000000000 --- a/arch/mips/boot/dts/mobileye/eyeq6h-fixed-clocks.dtsi +++ /dev/null @@ -1,52 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* - * Copyright 2023 Mobileye Vision Technologies Ltd. - */ - -#include <dt-bindings/clock/mobileye,eyeq5-clk.h> - -/ { - xtal: clock-30000000 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <30000000>; - }; - - pll_west: clock-2000000000-west { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <2000000000>; - }; - - pll_cpu: clock-2000000000-cpu { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <2000000000>; - }; - - /* pll-cpu derivatives */ - occ_cpu: clock-2000000000-occ-cpu { - compatible = "fixed-factor-clock"; - clocks = <&pll_cpu>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - - /* pll-west derivatives */ - occ_periph_w: clock-200000000 { - compatible = "fixed-factor-clock"; - clocks = <&pll_west>; - #clock-cells = <0>; - clock-div = <10>; - clock-mult = <1>; - }; - uart_clk: clock-200000000-uart { - compatible = "fixed-factor-clock"; - clocks = <&occ_periph_w>; - #clock-cells = <0>; - clock-div = <1>; - clock-mult = <1>; - }; - -}; diff --git a/arch/mips/boot/dts/mobileye/eyeq6h.dtsi b/arch/mips/boot/dts/mobileye/eyeq6h.dtsi index 1db3c3cda2e3..4a1a43f351d3 100644 --- a/arch/mips/boot/dts/mobileye/eyeq6h.dtsi +++ b/arch/mips/boot/dts/mobileye/eyeq6h.dtsi @@ -5,7 +5,7 @@ #include <dt-bindings/interrupt-controller/mips-gic.h> -#include "eyeq6h-fixed-clocks.dtsi" +#include <dt-bindings/clock/mobileye,eyeq5-clk.h> / { #address-cells = <2>; @@ -17,7 +17,7 @@ device_type = "cpu"; compatible = "img,i6500"; reg = <0>; - clocks = <&occ_cpu>; + clocks = <&olb_central EQ6HC_CENTRAL_CPU_OCC>; }; }; @@ -32,19 +32,42 @@ #interrupt-cells = <1>; }; + xtal: clock-30000000 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <30000000>; + }; + soc: soc { compatible = "simple-bus"; #address-cells = <2>; #size-cells = <2>; ranges; + olb_acc: system-controller@d2003000 { + compatible = "mobileye,eyeq6h-acc-olb", "syscon"; + reg = <0x0 0xd2003000 0x0 0x1000>; + #reset-cells = <1>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + + olb_central: system-controller@d3100000 { + compatible = "mobileye,eyeq6h-central-olb", "syscon"; + reg = <0x0 0xd3100000 0x0 0x1000>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + uart0: serial@d3331000 { compatible = "arm,pl011", "arm,primecell"; reg = <0 0xd3331000 0x0 0x1000>; reg-io-width = <4>; interrupt-parent = <&gic>; interrupts = <GIC_SHARED 43 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&occ_periph_w>, <&occ_periph_w>; + clocks = <&olb_west EQ6HC_WEST_PER_UART>, <&olb_west EQ6HC_WEST_PER_OCC>; clock-names = "uartclk", "apb_pclk"; }; @@ -56,6 +79,15 @@ pinctrl-single,function-mask = <0xffff>; }; + olb_west: system-controller@d3338000 { + compatible = "mobileye,eyeq6h-west-olb", "syscon"; + reg = <0x0 0xd3338000 0x0 0x1000>; + #reset-cells = <1>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + pinctrl_east: pinctrl@d3357000 { compatible = "pinctrl-single"; reg = <0x0 0xd3357000 0x0 0xb0>; @@ -64,6 +96,23 @@ pinctrl-single,function-mask = <0xffff>; }; + olb_east: system-controller@d3358000 { + compatible = "mobileye,eyeq6h-east-olb", "syscon"; + reg = <0x0 0xd3358000 0x0 0x1000>; + #reset-cells = <1>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + + olb_south: system-controller@d8013000 { + compatible = "mobileye,eyeq6h-south-olb", "syscon"; + reg = <0x0 0xd8013000 0x0 0x1000>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + pinctrl_south: pinctrl@d8014000 { compatible = "pinctrl-single"; reg = <0x0 0xd8014000 0x0 0xf8>; @@ -72,6 +121,22 @@ pinctrl-single,function-mask = <0xffff>; }; + olb_ddr0: system-controller@e4080000 { + compatible = "mobileye,eyeq6h-ddr0-olb", "syscon"; + reg = <0x0 0xe4080000 0x0 0x1000>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + + olb_ddr1: system-controller@e4081000 { + compatible = "mobileye,eyeq6h-ddr1-olb", "syscon"; + reg = <0x0 0xe4081000 0x0 0x1000>; + #clock-cells = <1>; + clocks = <&xtal>; + clock-names = "ref"; + }; + gic: interrupt-controller@f0920000 { compatible = "mti,gic"; reg = <0x0 0xf0920000 0x0 0x20000>; @@ -89,7 +154,7 @@ timer { compatible = "mti,gic-timer"; interrupts = <GIC_LOCAL 1 IRQ_TYPE_NONE>; - clocks = <&occ_cpu>; + clocks = <&olb_central EQ6HC_CENTRAL_CPU_OCC>; }; }; }; diff --git a/arch/mips/boot/dts/realtek/rtl930x.dtsi b/arch/mips/boot/dts/realtek/rtl930x.dtsi index 6a6f3f3fe389..17577457d159 100644 --- a/arch/mips/boot/dts/realtek/rtl930x.dtsi +++ b/arch/mips/boot/dts/realtek/rtl930x.dtsi @@ -61,6 +61,8 @@ }; &soc { + ranges = <0x0 0x18000000 0x20000>; + intc: interrupt-controller@3000 { compatible = "realtek,rtl9300-intc", "realtek,rtl-intc"; reg = <0x3000 0x18>, <0x3018 0x18>; @@ -88,6 +90,17 @@ interrupts = <7>, <8>, <9>, <10>, <11>; clocks = <&lx_clk>; }; + + snand: spi@1a400 { + compatible = "realtek,rtl9301-snand"; + reg = <0x1a400 0x44>; + interrupt-parent = <&intc>; + interrupts = <19>; + clocks = <&lx_clk>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; }; &uart0 { diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S index b825ed4476c7..e3ff6179c99f 100644 --- a/arch/mips/kernel/head.S +++ b/arch/mips/kernel/head.S @@ -59,6 +59,7 @@ #endif .endm + __HEAD #ifndef CONFIG_NO_EXCEPT_FILL /* * Reserved space for exception handlers. diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 9ff55cb80a64..2b708fac8d2c 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -61,6 +61,7 @@ SECTIONS /* read-only */ _text = .; /* Text and read-only data */ .text : { + HEAD_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 45ddbaa6c123..dae856fb3e5b 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -749,7 +749,7 @@ static void bridge_remove(struct platform_device *pdev) static struct platform_driver bridge_driver = { .probe = bridge_probe, - .remove_new = bridge_remove, + .remove = bridge_remove, .driver = { .name = "xtalk-bridge", } diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index 440711d7bf40..ce6f2b08a35e 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -239,6 +239,8 @@ handler: ;\ /* =====================================================[ exceptions] === */ + __REF + /* ---[ 0x100: RESET exception ]----------------------------------------- */ EXCEPTION_ENTRY(_tng_kernel_start) diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index 439e00f81e5d..bd760066f1cd 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -26,15 +26,15 @@ #include <asm/asm-offsets.h> #include <linux/of_fdt.h> -#define tophys(rd,rs) \ - l.movhi rd,hi(-KERNELBASE) ;\ +#define tophys(rd,rs) \ + l.movhi rd,hi(-KERNELBASE) ;\ l.add rd,rd,rs -#define CLEAR_GPR(gpr) \ +#define CLEAR_GPR(gpr) \ l.movhi gpr,0x0 -#define LOAD_SYMBOL_2_GPR(gpr,symbol) \ - l.movhi gpr,hi(symbol) ;\ +#define LOAD_SYMBOL_2_GPR(gpr,symbol) \ + l.movhi gpr,hi(symbol) ;\ l.ori gpr,gpr,lo(symbol) @@ -326,21 +326,21 @@ l.addi r1,r1,-(INT_FRAME_SIZE) ;\ /* r1 is KSP, r30 is __pa(KSP) */ ;\ tophys (r30,r1) ;\ - l.sw PT_GPR12(r30),r12 ;\ + l.sw PT_GPR12(r30),r12 ;\ l.mfspr r12,r0,SPR_EPCR_BASE ;\ l.sw PT_PC(r30),r12 ;\ l.mfspr r12,r0,SPR_ESR_BASE ;\ l.sw PT_SR(r30),r12 ;\ /* save r31 */ ;\ EXCEPTION_T_LOAD_GPR30(r12) ;\ - l.sw PT_GPR30(r30),r12 ;\ + l.sw PT_GPR30(r30),r12 ;\ /* save r10 as was prior to exception */ ;\ EXCEPTION_T_LOAD_GPR10(r12) ;\ - l.sw PT_GPR10(r30),r12 ;\ - /* save PT_SP as was prior to exception */ ;\ + l.sw PT_GPR10(r30),r12 ;\ + /* save PT_SP as was prior to exception */ ;\ EXCEPTION_T_LOAD_SP(r12) ;\ l.sw PT_SP(r30),r12 ;\ - l.sw PT_GPR13(r30),r13 ;\ + l.sw PT_GPR13(r30),r13 ;\ /* --> */ ;\ /* save exception r4, set r4 = EA */ ;\ l.sw PT_GPR4(r30),r4 ;\ @@ -357,6 +357,8 @@ /* =====================================================[ exceptions] === */ + __HEAD + /* ---[ 0x100: RESET exception ]----------------------------------------- */ .org 0x100 /* Jump to .init code at _start which lives in the .head section @@ -394,7 +396,7 @@ _dispatch_do_ipage_fault: .org 0x500 EXCEPTION_HANDLE(_timer_handler) -/* ---[ 0x600: Alignment exception ]-------------------------------------- */ +/* ---[ 0x600: Alignment exception ]------------------------------------- */ .org 0x600 EXCEPTION_HANDLE(_alignment_handler) @@ -424,7 +426,7 @@ _dispatch_do_ipage_fault: .org 0xc00 EXCEPTION_HANDLE(_sys_call_handler) -/* ---[ 0xd00: Floating point exception ]--------------------------------- */ +/* ---[ 0xd00: Floating point exception ]-------------------------------- */ .org 0xd00 EXCEPTION_HANDLE(_fpe_trap_handler) @@ -506,10 +508,10 @@ _dispatch_do_ipage_fault: /* .text*/ -/* This early stuff belongs in HEAD, but some of the functions below definitely +/* This early stuff belongs in the .init.text section, but some of the functions below definitely * don't... */ - __HEAD + __INIT .global _start _start: /* Init r0 to zero as per spec */ @@ -816,7 +818,7 @@ secondary_start: #endif -/* ========================================[ cache ]=== */ +/* ==========================================================[ cache ]=== */ /* alignment here so we don't change memory offsets with * memory controller defined diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index bc1306047837..049bff45f612 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -50,6 +50,7 @@ SECTIONS .text : AT(ADDR(.text) - LOAD_OFFSET) { _stext = .; + HEAD_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT @@ -83,8 +84,6 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_begin = .; - HEAD_TEXT_SECTION - /* Page aligned */ INIT_TEXT_SECTION(PAGE_SIZE) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 41489483a602..f3804103c56c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -403,10 +403,12 @@ PHONY += stack_protector_prepare stack_protector_prepare: prepare0 ifdef CONFIG_PPC64 $(eval KBUILD_CFLAGS += -mstack-protector-guard=tls -mstack-protector-guard-reg=r13 \ - -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' include/generated/asm-offsets.h)) + -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' \ + $(objtree)/include/generated/asm-offsets.h)) else $(eval KBUILD_CFLAGS += -mstack-protector-guard=tls -mstack-protector-guard-reg=r2 \ - -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' include/generated/asm-offsets.h)) + -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' \ + $(objtree)/include/generated/asm-offsets.h)) endif endif diff --git a/arch/powerpc/crypto/vmx.c b/arch/powerpc/crypto/vmx.c index 7eb713cc87c8..0b725e826388 100644 --- a/arch/powerpc/crypto/vmx.c +++ b/arch/powerpc/crypto/vmx.c @@ -74,4 +74,4 @@ MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions " "support on Power 8"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0.0"); -MODULE_IMPORT_NS(CRYPTO_INTERNAL); +MODULE_IMPORT_NS("CRYPTO_INTERNAL"); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 73210e5bcfa7..8e776ba39497 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2848,7 +2848,7 @@ static void __init fixup_device_tree_chrp(void) #endif #if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC) -static void __init fixup_device_tree_pmac(void) +static void __init fixup_device_tree_pmac64(void) { phandle u3, i2c, mpic; u32 u3_rev; @@ -2888,7 +2888,31 @@ static void __init fixup_device_tree_pmac(void) &parent, sizeof(parent)); } #else -#define fixup_device_tree_pmac() +#define fixup_device_tree_pmac64() +#endif + +#ifdef CONFIG_PPC_PMAC +static void __init fixup_device_tree_pmac(void) +{ + __be32 val = 1; + char type[8]; + phandle node; + + // Some pmacs are missing #size-cells on escc nodes + for (node = 0; prom_next_node(&node); ) { + type[0] = '\0'; + prom_getprop(node, "device_type", type, sizeof(type)); + if (prom_strcmp(type, "escc")) + continue; + + if (prom_getproplen(node, "#size-cells") != PROM_ERROR) + continue; + + prom_setprop(node, NULL, "#size-cells", &val, sizeof(val)); + } +} +#else +static inline void fixup_device_tree_pmac(void) { } #endif #ifdef CONFIG_PPC_EFIKA @@ -3111,6 +3135,7 @@ static void __init fixup_device_tree(void) { fixup_device_tree_chrp(); fixup_device_tree_pmac(); + fixup_device_tree_pmac64(); fixup_device_tree_efika(); fixup_device_tree_pasemi(); } diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index c5d0f92c7969..384c9dc1899a 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -10,7 +10,6 @@ #include <linux/memblock.h> #include <linux/mem_encrypt.h> #include <linux/cc_platform.h> -#include <linux/mem_encrypt.h> #include <asm/machdep.h> #include <asm/svm.h> #include <asm/swiotlb.h> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index ff1e353b0d6f..d4a7ca0388c0 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -32,6 +32,7 @@ config RISCV select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_HW_PTE_YOUNG select ARCH_HAS_KCOV select ARCH_HAS_KERNEL_FPU_SUPPORT if 64BIT && FPU select ARCH_HAS_MEMBARRIER_CALLBACKS @@ -83,6 +84,7 @@ config RISCV select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select ARCH_WANTS_NO_INSTR select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE + select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU select BUILDTIME_TABLE_SORT if MMU select CLINT_TIMER if RISCV_M_MODE @@ -116,6 +118,7 @@ config RISCV select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAS_IOPORT if MMU + select HAVE_ALIGNED_STRUCT_PAGE select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT @@ -507,6 +510,39 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accommodate various tables. +choice + prompt "RISC-V spinlock type" + default RISCV_COMBO_SPINLOCKS + +config RISCV_TICKET_SPINLOCKS + bool "Using ticket spinlock" + +config RISCV_QUEUED_SPINLOCKS + bool "Using queued spinlock" + depends on SMP && MMU && NONPORTABLE + select ARCH_USE_QUEUED_SPINLOCKS + help + The queued spinlock implementation requires the forward progress + guarantee of cmpxchg()/xchg() atomic operations: CAS with Zabha or + LR/SC with Ziccrse provide such guarantee. + + Select this if and only if Zabha or Ziccrse is available on your + platform, RISCV_QUEUED_SPINLOCKS must not be selected for platforms + without one of those extensions. + + If unsure, select RISCV_COMBO_SPINLOCKS, which will use qspinlocks + when supported and otherwise ticket spinlocks. + +config RISCV_COMBO_SPINLOCKS + bool "Using combo spinlock" + depends on SMP && MMU + select ARCH_USE_QUEUED_SPINLOCKS + help + Embed both queued spinlock and ticket lock so that the spinlock + implementation can be chosen at runtime. + +endchoice + config RISCV_ALTERNATIVE bool depends on !XIP_KERNEL @@ -532,6 +568,17 @@ config RISCV_ISA_C If you don't know what to do here, say Y. +config RISCV_ISA_SUPM + bool "Supm extension for userspace pointer masking" + depends on 64BIT + default y + help + Add support for pointer masking in userspace (Supm) when the + underlying hardware extension (Smnpm or Ssnpm) is detected at boot. + + If this option is disabled, userspace will be unable to use + the prctl(PR_{SET,GET}_TAGGED_ADDR_CTRL) API. + config RISCV_ISA_SVNAPOT bool "Svnapot extension support for supervisor mode NAPOT pages" depends on 64BIT && MMU @@ -633,6 +680,40 @@ config RISCV_ISA_ZAWRS use of these instructions in the kernel when the Zawrs extension is detected at boot. +config TOOLCHAIN_HAS_ZABHA + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zabha) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zabha) + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZABHA + bool "Zabha extension support for atomic byte/halfword operations" + depends on TOOLCHAIN_HAS_ZABHA + depends on RISCV_ALTERNATIVE + default y + help + Enable the use of the Zabha ISA-extension to implement kernel + byte/halfword atomic memory operations when it is detected at boot. + + If you don't know what to do here, say Y. + +config TOOLCHAIN_HAS_ZACAS + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas) + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZACAS + bool "Zacas extension support for atomic CAS" + depends on TOOLCHAIN_HAS_ZACAS + depends on RISCV_ALTERNATIVE + default y + help + Enable the use of the Zacas ISA-extension to implement kernel atomic + cmpxchg operations when it is detected at boot. + If you don't know what to do here, say Y. config TOOLCHAIN_HAS_ZBB @@ -786,10 +867,24 @@ config THREAD_SIZE_ORDER config RISCV_MISALIGNED bool + help + Embed support for detecting and emulating misaligned + scalar or vector loads and stores. + +config RISCV_SCALAR_MISALIGNED + bool + select RISCV_MISALIGNED select SYSCTL_ARCH_UNALIGN_ALLOW help Embed support for emulating misaligned loads and stores. +config RISCV_VECTOR_MISALIGNED + bool + select RISCV_MISALIGNED + depends on RISCV_ISA_V + help + Enable detecting support for vector misaligned loads and stores. + choice prompt "Unaligned Accesses Support" default RISCV_PROBE_UNALIGNED_ACCESS @@ -801,7 +896,7 @@ choice config RISCV_PROBE_UNALIGNED_ACCESS bool "Probe for hardware unaligned access support" - select RISCV_MISALIGNED + select RISCV_SCALAR_MISALIGNED help During boot, the kernel will run a series of tests to determine the speed of unaligned accesses. This probing will dynamically determine @@ -812,7 +907,7 @@ config RISCV_PROBE_UNALIGNED_ACCESS config RISCV_EMULATED_UNALIGNED_ACCESS bool "Emulate unaligned access where system support is missing" - select RISCV_MISALIGNED + select RISCV_SCALAR_MISALIGNED help If unaligned memory accesses trap into the kernel as they are not supported by the system, the kernel will emulate the unaligned @@ -841,6 +936,46 @@ config RISCV_EFFICIENT_UNALIGNED_ACCESS endchoice +choice + prompt "Vector unaligned Accesses Support" + depends on RISCV_ISA_V + default RISCV_PROBE_VECTOR_UNALIGNED_ACCESS + help + This determines the level of support for vector unaligned accesses. This + information is used by the kernel to perform optimizations. It is also + exposed to user space via the hwprobe syscall. The hardware will be + probed at boot by default. + +config RISCV_PROBE_VECTOR_UNALIGNED_ACCESS + bool "Probe speed of vector unaligned accesses" + select RISCV_VECTOR_MISALIGNED + depends on RISCV_ISA_V + help + During boot, the kernel will run a series of tests to determine the + speed of vector unaligned accesses if they are supported. This probing + will dynamically determine the speed of vector unaligned accesses on + the underlying system if they are supported. + +config RISCV_SLOW_VECTOR_UNALIGNED_ACCESS + bool "Assume the system supports slow vector unaligned memory accesses" + depends on NONPORTABLE + help + Assume that the system supports slow vector unaligned memory accesses. The + kernel and userspace programs may not be able to run at all on systems + that do not support unaligned memory accesses. + +config RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS + bool "Assume the system supports fast vector unaligned memory accesses" + depends on NONPORTABLE + help + Assume that the system supports fast vector unaligned memory accesses. When + enabled, this option improves the performance of the kernel on such + systems. However, the kernel and userspace programs will run much more + slowly, or will not be able to run at all, on systems that do not + support efficient unaligned memory accesses. + +endchoice + source "arch/riscv/Kconfig.vendor" endmenu # "Platform type" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index d469db9f46f4..13fbc0f94238 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -82,6 +82,12 @@ else riscv-march-$(CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI) := $(riscv-march-y)_zicsr_zifencei endif +# Check if the toolchain supports Zacas +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas + +# Check if the toolchain supports Zabha +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha + # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') @@ -129,7 +135,7 @@ stack_protector_prepare: prepare0 -mstack-protector-guard-reg=tp \ -mstack-protector-guard-offset=$(shell \ awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}' \ - include/generated/asm-offsets.h)) + $(objtree)/include/generated/asm-offsets.h)) endif # arch specific predefines for sparse diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 1d5e13b148f2..b4a37345703e 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -167,6 +167,7 @@ CONFIG_PINCTRL_SOPHGO_CV1800B=y CONFIG_PINCTRL_SOPHGO_CV1812H=y CONFIG_PINCTRL_SOPHGO_SG2000=y CONFIG_PINCTRL_SOPHGO_SG2002=y +CONFIG_GPIO_DWAPB=y CONFIG_GPIO_SIFIVE=y CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_SENSORS_SFCTEMP=m diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 1461af12da6e..de13d5a234f8 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -6,10 +6,12 @@ generic-y += early_ioremap.h generic-y += flat.h generic-y += kvm_para.h generic-y += mmzone.h +generic-y += mcs_spinlock.h generic-y += parport.h -generic-y += spinlock.h generic-y += spinlock_types.h +generic-y += ticket_spinlock.h generic-y += qrwlock.h generic-y += qrwlock_types.h +generic-y += qspinlock.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index ebbce134917c..4cadc56220fe 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -12,30 +12,43 @@ #include <asm/fence.h> #include <asm/hwcap.h> #include <asm/insn-def.h> - -#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ -({ \ - u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ - ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ - ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ - << __s; \ - ulong __newx = (ulong)(n) << __s; \ - ulong __retx; \ - ulong __rc; \ - \ - __asm__ __volatile__ ( \ - prepend \ - "0: lr.w %0, %2\n" \ - " and %1, %0, %z4\n" \ - " or %1, %1, %z3\n" \ - " sc.w" sc_sfx " %1, %1, %2\n" \ - " bnez %1, 0b\n" \ - append \ - : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ - : "rJ" (__newx), "rJ" (~__mask) \ - : "memory"); \ - \ - r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +#include <asm/cpufeature-macros.h> + +#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ + swap_append, r, p, n) \ +({ \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \ + __asm__ __volatile__ ( \ + prepend \ + " amoswap" swap_sfx " %0, %z2, %1\n" \ + swap_append \ + : "=&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + } else { \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z4\n" \ + " or %1, %1, %z3\n" \ + " sc.w" sc_sfx " %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + sc_append \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" (__newx), "rJ" (~__mask) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ + } \ }) #define __arch_xchg(sfx, prepend, append, r, p, n) \ @@ -58,8 +71,13 @@ \ switch (sizeof(*__ptr)) { \ case 1: \ + __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \ + prepend, sc_append, swap_append, \ + __ret, __ptr, __new); \ + break; \ case 2: \ - __arch_xchg_masked(sc_sfx, prepend, sc_append, \ + __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \ + prepend, sc_append, swap_append, \ __ret, __ptr, __new); \ break; \ case 4: \ @@ -106,55 +124,90 @@ * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ - -#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \ -({ \ - u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ - ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ - ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ - << __s; \ - ulong __newx = (ulong)(n) << __s; \ - ulong __oldx = (ulong)(o) << __s; \ - ulong __retx; \ - ulong __rc; \ - \ - __asm__ __volatile__ ( \ - prepend \ - "0: lr.w %0, %2\n" \ - " and %1, %0, %z5\n" \ - " bne %1, %z3, 1f\n" \ - " and %1, %0, %z6\n" \ - " or %1, %1, %z4\n" \ - " sc.w" sc_sfx " %1, %1, %2\n" \ - " bnez %1, 0b\n" \ - append \ - "1:\n" \ - : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ - : "rJ" ((long)__oldx), "rJ" (__newx), \ - "rJ" (__mask), "rJ" (~__mask) \ - : "memory"); \ - \ - r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + r, p, o, n) \ +({ \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ + r = o; \ + \ + __asm__ __volatile__ ( \ + cas_prepend \ + " amocas" cas_sfx " %0, %z2, %1\n" \ + cas_append \ + : "+&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + } else { \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __oldx = (ulong)(o) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + sc_prepend \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z5\n" \ + " bne %1, %z3, 1f\n" \ + " and %1, %0, %z6\n" \ + " or %1, %1, %z4\n" \ + " sc.w" sc_sfx " %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + sc_append \ + "1:\n" \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" ((long)__oldx), "rJ" (__newx), \ + "rJ" (__mask), "rJ" (~__mask) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ + } \ }) -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ +#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + r, p, co, o, n) \ ({ \ - register unsigned int __rc; \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ + r = o; \ \ - __asm__ __volatile__ ( \ - prepend \ - "0: lr" lr_sfx " %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc" sc_sfx " %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - append \ - "1:\n" \ - : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ - : "rJ" (co o), "rJ" (n) \ - : "memory"); \ + __asm__ __volatile__ ( \ + cas_prepend \ + " amocas" cas_sfx " %0, %z2, %1\n" \ + cas_append \ + : "+&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + } else { \ + register unsigned int __rc; \ + \ + __asm__ __volatile__ ( \ + sc_prepend \ + "0: lr" lr_sfx " %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc" sc_sfx " %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + sc_append \ + "1:\n" \ + : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ + : "rJ" (co o), "rJ" (n) \ + : "memory"); \ + } \ }) -#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ +#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(__ptr)) __old = (old); \ @@ -163,17 +216,28 @@ \ switch (sizeof(*__ptr)) { \ case 1: \ + __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, __old, __new); \ + break; \ case 2: \ - __arch_cmpxchg_masked(sc_sfx, prepend, append, \ - __ret, __ptr, __old, __new); \ + __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, __old, __new); \ break; \ case 4: \ - __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ - __ret, __ptr, (long), __old, __new); \ + __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, (long), __old, __new); \ break; \ case 8: \ - __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append, \ - __ret, __ptr, /**/, __old, __new); \ + __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, /**/, __old, __new); \ break; \ default: \ BUILD_BUG(); \ @@ -181,17 +245,40 @@ (__typeof__(*(__ptr)))__ret; \ }) +/* + * These macros are here to improve the readability of the arch_cmpxchg_XXX() + * macros. + */ +#define SC_SFX(x) x +#define CAS_SFX(x) x +#define SC_PREPEND(x) x +#define SC_APPEND(x) x +#define CAS_PREPEND(x) x +#define CAS_APPEND(x) x + #define arch_cmpxchg_relaxed(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), "", "", "") + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(""), CAS_SFX(""), \ + SC_PREPEND(""), SC_APPEND(""), \ + CAS_PREPEND(""), CAS_APPEND("")) #define arch_cmpxchg_acquire(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER) + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(""), CAS_SFX(""), \ + SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \ + CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER)) #define arch_cmpxchg_release(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "") + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(""), CAS_SFX(""), \ + SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \ + CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND("")) #define arch_cmpxchg(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(".rl"), CAS_SFX(".aqrl"), \ + SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \ + CAS_PREPEND(""), CAS_APPEND("")) #define arch_cmpxchg_local(ptr, o, n) \ arch_cmpxchg_relaxed((ptr), (o), (n)) @@ -226,6 +313,44 @@ arch_cmpxchg_release((ptr), (o), (n)); \ }) +#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) + +#define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) + +union __u128_halves { + u128 full; + struct { + u64 low, high; + }; +}; + +#define __arch_cmpxchg128(p, o, n, cas_sfx) \ +({ \ + __typeof__(*(p)) __o = (o); \ + union __u128_halves __hn = { .full = (n) }; \ + union __u128_halves __ho = { .full = (__o) }; \ + register unsigned long t1 asm ("t1") = __hn.low; \ + register unsigned long t2 asm ("t2") = __hn.high; \ + register unsigned long t3 asm ("t3") = __ho.low; \ + register unsigned long t4 asm ("t4") = __ho.high; \ + \ + __asm__ __volatile__ ( \ + " amocas.q" cas_sfx " %0, %z3, %2" \ + : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \ + : "rJ" (t1), "rJ" (t2) \ + : "memory"); \ + \ + ((u128)t4 << 64) | t3; \ +}) + +#define arch_cmpxchg128(ptr, o, n) \ + __arch_cmpxchg128((ptr), (o), (n), ".aqrl") + +#define arch_cmpxchg128_local(ptr, o, n) \ + __arch_cmpxchg128((ptr), (o), (n), "") + +#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */ + #ifdef CONFIG_RISCV_ISA_ZAWRS /* * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to @@ -245,6 +370,11 @@ static __always_inline void __cmpwait(volatile void *ptr, : : : : no_zawrs); switch (size) { + case 1: + fallthrough; + case 2: + /* RISC-V doesn't have lr instructions on byte and half-word. */ + goto no_zawrs; case 4: asm volatile( " lr.w %0, %1\n" diff --git a/arch/riscv/include/asm/compat.h b/arch/riscv/include/asm/compat.h index aa103530a5c8..6081327e55f5 100644 --- a/arch/riscv/include/asm/compat.h +++ b/arch/riscv/include/asm/compat.h @@ -9,7 +9,6 @@ */ #include <linux/types.h> #include <linux/sched.h> -#include <linux/sched/task_stack.h> #include <asm-generic/compat.h> static inline int is_compat_task(void) diff --git a/arch/riscv/include/asm/cpufeature-macros.h b/arch/riscv/include/asm/cpufeature-macros.h new file mode 100644 index 000000000000..a8103edbf51f --- /dev/null +++ b/arch/riscv/include/asm/cpufeature-macros.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022-2024 Rivos, Inc + */ + +#ifndef _ASM_CPUFEATURE_MACROS_H +#define _ASM_CPUFEATURE_MACROS_H + +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> + +#define STANDARD_EXT 0 + +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit); +#define riscv_isa_extension_available(isa_bitmap, ext) \ + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) + +static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor, + const unsigned long ext) +{ + asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_no); + + return true; +l_no: + return false; +} + +static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor, + const unsigned long ext) +{ + asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_unlikely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + +static __always_inline bool riscv_has_extension_likely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_likely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + +#endif /* _ASM_CPUFEATURE_MACROS_H */ diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 45f9c1171a48..4bd054c54c21 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -8,9 +8,12 @@ #include <linux/bitmap.h> #include <linux/jump_label.h> +#include <linux/workqueue.h> +#include <linux/kconfig.h> +#include <linux/percpu-defs.h> +#include <linux/threads.h> #include <asm/hwcap.h> -#include <asm/alternative-macros.h> -#include <asm/errno.h> +#include <asm/cpufeature-macros.h> /* * These are probed via a device_initcall(), via either the SBI or directly @@ -31,7 +34,7 @@ DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; -void riscv_user_isa_enable(void); +void __init riscv_user_isa_enable(void); #define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ .name = #_name, \ @@ -58,8 +61,9 @@ void riscv_user_isa_enable(void); #define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \ _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate) -#if defined(CONFIG_RISCV_MISALIGNED) bool check_unaligned_access_emulated_all_cpus(void); +#if defined(CONFIG_RISCV_SCALAR_MISALIGNED) +void check_unaligned_access_emulated(struct work_struct *work __always_unused); void unaligned_emulation_finish(void); bool unaligned_ctl_available(void); DECLARE_PER_CPU(long, misaligned_access_speed); @@ -70,6 +74,12 @@ static inline bool unaligned_ctl_available(void) } #endif +bool check_vector_unaligned_access_emulated_all_cpus(void); +#if defined(CONFIG_RISCV_VECTOR_MISALIGNED) +void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused); +DECLARE_PER_CPU(long, vector_misaligned_access); +#endif + #if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); @@ -103,61 +113,6 @@ extern const size_t riscv_isa_ext_count; extern bool riscv_isa_fallback; unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); - -#define STANDARD_EXT 0 - -bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit); -#define riscv_isa_extension_available(isa_bitmap, ext) \ - __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) - -static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor, - const unsigned long ext) -{ - asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1) - : - : [vendor] "i" (vendor), [ext] "i" (ext) - : - : l_no); - - return true; -l_no: - return false; -} - -static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor, - const unsigned long ext) -{ - asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1) - : - : [vendor] "i" (vendor), [ext] "i" (ext) - : - : l_yes); - - return false; -l_yes: - return true; -} - -static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) - return __riscv_has_extension_unlikely(STANDARD_EXT, ext); - - return __riscv_isa_extension_available(NULL, ext); -} - -static __always_inline bool riscv_has_extension_likely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) - return __riscv_has_extension_likely(STANDARD_EXT, ext); - - return __riscv_isa_extension_available(NULL, ext); -} - static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) { compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 25966995da04..37bdea65bbd8 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -119,6 +119,10 @@ /* HSTATUS flags */ #ifdef CONFIG_64BIT +#define HSTATUS_HUPMM _AC(0x3000000000000, UL) +#define HSTATUS_HUPMM_PMLEN_0 _AC(0x0000000000000, UL) +#define HSTATUS_HUPMM_PMLEN_7 _AC(0x2000000000000, UL) +#define HSTATUS_HUPMM_PMLEN_16 _AC(0x3000000000000, UL) #define HSTATUS_VSXL _AC(0x300000000, UL) #define HSTATUS_VSXL_SHIFT 32 #endif @@ -195,6 +199,11 @@ /* xENVCFG flags */ #define ENVCFG_STCE (_AC(1, ULL) << 63) #define ENVCFG_PBMTE (_AC(1, ULL) << 62) +#define ENVCFG_ADUE (_AC(1, ULL) << 61) +#define ENVCFG_PMM (_AC(0x3, ULL) << 32) +#define ENVCFG_PMM_PMLEN_0 (_AC(0x0, ULL) << 32) +#define ENVCFG_PMM_PMLEN_7 (_AC(0x2, ULL) << 32) +#define ENVCFG_PMM_PMLEN_16 (_AC(0x3, ULL) << 32) #define ENVCFG_CBZE (_AC(1, UL) << 7) #define ENVCFG_CBCFE (_AC(1, UL) << 6) #define ENVCFG_CBIE_SHIFT 4 @@ -216,6 +225,12 @@ #define SMSTATEEN0_SSTATEEN0_SHIFT 63 #define SMSTATEEN0_SSTATEEN0 (_ULL(1) << SMSTATEEN0_SSTATEEN0_SHIFT) +/* mseccfg bits */ +#define MSECCFG_PMM ENVCFG_PMM +#define MSECCFG_PMM_PMLEN_0 ENVCFG_PMM_PMLEN_0 +#define MSECCFG_PMM_PMLEN_7 ENVCFG_PMM_PMLEN_7 +#define MSECCFG_PMM_PMLEN_16 ENVCFG_PMM_PMLEN_16 + /* symbolic CSR names: */ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 @@ -382,6 +397,8 @@ #define CSR_MIP 0x344 #define CSR_PMPCFG0 0x3a0 #define CSR_PMPADDR0 0x3b0 +#define CSR_MSECCFG 0x747 +#define CSR_MSECCFGH 0x757 #define CSR_MVENDORID 0xf11 #define CSR_MARCHID 0xf12 #define CSR_MIMPID 0xf13 diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h index 2293e535f865..b28ccc6cdeea 100644 --- a/arch/riscv/include/asm/entry-common.h +++ b/arch/riscv/include/asm/entry-common.h @@ -33,6 +33,7 @@ static inline int handle_misaligned_load(struct pt_regs *regs) { return -1; } + static inline int handle_misaligned_store(struct pt_regs *regs) { return -1; diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 46d9de54179e..869da082252a 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -93,6 +93,13 @@ #define RISCV_ISA_EXT_ZCMOP 84 #define RISCV_ISA_EXT_ZAWRS 85 #define RISCV_ISA_EXT_SVVPTC 86 +#define RISCV_ISA_EXT_SMMPM 87 +#define RISCV_ISA_EXT_SMNPM 88 +#define RISCV_ISA_EXT_SSNPM 89 +#define RISCV_ISA_EXT_ZABHA 90 +#define RISCV_ISA_EXT_ZICCRSE 91 +#define RISCV_ISA_EXT_SVADE 92 +#define RISCV_ISA_EXT_SVADU 93 #define RISCV_ISA_EXT_XLINUXENVCFG 127 @@ -101,8 +108,10 @@ #ifdef CONFIG_RISCV_M_MODE #define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SMAIA +#define RISCV_ISA_EXT_SUPM RISCV_ISA_EXT_SMNPM #else #define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SSAIA +#define RISCV_ISA_EXT_SUPM RISCV_ISA_EXT_SSNPM #endif #endif /* _ASM_RISCV_HWCAP_H */ diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index ffb9484531af..1ce1df6d0ff3 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 9 +#define RISCV_HWPROBE_MAX_KEY 10 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h index 7388edd88986..d08bf7fb3aee 100644 --- a/arch/riscv/include/asm/kfence.h +++ b/arch/riscv/include/asm/kfence.h @@ -22,7 +22,9 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) else set_pte(pte, __pte(pte_val(ptep_get(pte)) | _PAGE_PRESENT)); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + preempt_disable(); + local_flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + preempt_enable(); return true; } diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index c9e03e9da3dc..1cc90465d75b 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -26,8 +26,15 @@ typedef struct { unsigned long exec_fdpic_loadmap; unsigned long interp_fdpic_loadmap; #endif + unsigned long flags; +#ifdef CONFIG_RISCV_ISA_SUPM + u8 pmlen; +#endif } mm_context_t; +/* Lock the pointer masking mode because this mm is multithreaded */ +#define MM_CONTEXT_LOCK_PMLEN 0 + #define cntx2asid(cntx) ((cntx) & SATP_ASID_MASK) #define cntx2version(cntx) ((cntx) & ~SATP_ASID_MASK) diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index 7030837adc1a..8c4bc49a3a0f 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -20,6 +20,9 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { +#ifdef CONFIG_RISCV_ISA_SUPM + next->context.pmlen = 0; +#endif switch_mm(prev, next, NULL); } @@ -30,11 +33,21 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_MMU atomic_long_set(&mm->context.id, 0); #endif + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM)) + clear_bit(MM_CONTEXT_LOCK_PMLEN, &mm->context.flags); return 0; } DECLARE_STATIC_KEY_FALSE(use_asid_allocator); +#ifdef CONFIG_RISCV_ISA_SUPM +#define mm_untag_mask mm_untag_mask +static inline unsigned long mm_untag_mask(struct mm_struct *mm) +{ + return -1UL >> mm->context.pmlen; +} +#endif + #include <asm-generic/mmu_context.h> #endif /* _ASM_RISCV_MMU_CONTEXT_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 5d7f3e8c2e50..d4e99eef90ac 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -113,6 +113,7 @@ #include <asm/tlbflush.h> #include <linux/mm_types.h> #include <asm/compat.h> +#include <asm/cpufeature.h> #define __page_val_to_pfn(_val) (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT) @@ -284,7 +285,6 @@ static inline pte_t pud_pte(pud_t pud) } #ifdef CONFIG_RISCV_ISA_SVNAPOT -#include <asm/cpufeature.h> static __always_inline bool has_svnapot(void) { @@ -656,6 +656,17 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) } /* + * Both Svade and Svadu control the hardware behavior when the PTE A/D bits need to be set. By + * default the M-mode firmware enables the hardware updating scheme when only Svadu is present in + * DT. + */ +#define arch_has_hw_pte_young arch_has_hw_pte_young +static inline bool arch_has_hw_pte_young(void) +{ + return riscv_has_extension_unlikely(RISCV_ISA_EXT_SVADU); +} + +/* * THP functions */ static inline pmd_t pte_pmd(pte_t pte) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index efa1b3519b23..5f56eb9d114a 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -102,6 +102,7 @@ struct thread_struct { unsigned long s[12]; /* s[0]: frame pointer */ struct __riscv_d_ext_state fstate; unsigned long bad_cause; + unsigned long envcfg; u32 riscv_v_flags; u32 vstate_ctrl; struct __riscv_v_ext_state vstate; @@ -177,6 +178,14 @@ extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); #define RISCV_SET_ICACHE_FLUSH_CTX(arg1, arg2) riscv_set_icache_flush_ctx(arg1, arg2) extern int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread); +#ifdef CONFIG_RISCV_ISA_SUPM +/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg); +long get_tagged_addr_ctrl(struct task_struct *task); +#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(current, arg) +#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current) +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_PROCESSOR_H */ diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h new file mode 100644 index 000000000000..e5121b89acea --- /dev/null +++ b/arch/riscv/include/asm/spinlock.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_RISCV_SPINLOCK_H +#define __ASM_RISCV_SPINLOCK_H + +#ifdef CONFIG_RISCV_COMBO_SPINLOCKS +#define _Q_PENDING_LOOPS (1 << 9) + +#define __no_arch_spinlock_redefine +#include <asm/ticket_spinlock.h> +#include <asm/qspinlock.h> +#include <asm/jump_label.h> + +/* + * TODO: Use an alternative instead of a static key when we are able to parse + * the extensions string earlier in the boot process. + */ +DECLARE_STATIC_KEY_TRUE(qspinlock_key); + +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \ +static __always_inline type arch_spin_##op(type_lock lock) \ +{ \ + if (static_branch_unlikely(&qspinlock_key)) \ + return queued_spin_##op(lock); \ + return ticket_spin_##op(lock); \ +} + +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *) +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *) +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *) +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *) +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *) +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t) + +#elif defined(CONFIG_RISCV_QUEUED_SPINLOCKS) + +#include <asm/qspinlock.h> + +#else + +#include <asm/ticket_spinlock.h> + +#endif + +#include <asm/qrwlock.h> + +#endif /* __ASM_RISCV_SPINLOCK_H */ diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 7594df37cc9f..94e33216b2d9 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -70,6 +70,24 @@ static __always_inline bool has_fpu(void) { return false; } #define __switch_to_fpu(__prev, __next) do { } while (0) #endif +static inline void envcfg_update_bits(struct task_struct *task, + unsigned long mask, unsigned long val) +{ + unsigned long envcfg; + + envcfg = (task->thread.envcfg & ~mask) | val; + task->thread.envcfg = envcfg; + if (task == current) + csr_write(CSR_ENVCFG, envcfg); +} + +static inline void __switch_to_envcfg(struct task_struct *next) +{ + asm volatile (ALTERNATIVE("nop", "csrw " __stringify(CSR_ENVCFG) ", %0", + 0, RISCV_ISA_EXT_XLINUXENVCFG, 1) + :: "r" (next->thread.envcfg) : "memory"); +} + extern struct task_struct *__switch_to(struct task_struct *, struct task_struct *); @@ -103,6 +121,7 @@ do { \ __switch_to_vector(__prev, __next); \ if (switch_to_should_flush_icache(__next)) \ local_flush_icache_all(); \ + __switch_to_envcfg(__next); \ ((last) = __switch_to(__prev, __next)); \ } while (0) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 72ec1d9bd3f3..fee56b0c8058 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -9,8 +9,41 @@ #define _ASM_RISCV_UACCESS_H #include <asm/asm-extable.h> +#include <asm/cpufeature.h> #include <asm/pgtable.h> /* for TASK_SIZE */ +#ifdef CONFIG_RISCV_ISA_SUPM +static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigned long addr) +{ + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) { + u8 pmlen = mm->context.pmlen; + + /* Virtual addresses are sign-extended; physical addresses are zero-extended. */ + if (IS_ENABLED(CONFIG_MMU)) + return (long)(addr << pmlen) >> pmlen; + else + return (addr << pmlen) >> pmlen; + } + + return addr; +} + +#define untagged_addr(addr) ({ \ + unsigned long __addr = (__force unsigned long)(addr); \ + (__force __typeof__(addr))__untagged_addr_remote(current->mm, __addr); \ +}) + +#define untagged_addr_remote(mm, addr) ({ \ + unsigned long __addr = (__force unsigned long)(addr); \ + mmap_assert_locked(mm); \ + (__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \ +}) + +#define access_ok(addr, size) likely(__access_ok(untagged_addr(addr), size)) +#else +#define untagged_addr(addr) (addr) +#endif + /* * User space memory access functions */ @@ -130,7 +163,7 @@ do { \ */ #define __get_user(x, ptr) \ ({ \ - const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_ptr = untagged_addr(ptr); \ long __gu_err = 0; \ \ __chk_user_ptr(__gu_ptr); \ @@ -246,7 +279,7 @@ do { \ */ #define __put_user(x, ptr) \ ({ \ - __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_ptr = untagged_addr(ptr); \ __typeof__(*__gu_ptr) __val = (x); \ long __pu_err = 0; \ \ @@ -293,13 +326,13 @@ unsigned long __must_check __asm_copy_from_user(void *to, static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - return __asm_copy_from_user(to, from, n); + return __asm_copy_from_user(to, untagged_addr(from), n); } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - return __asm_copy_to_user(to, from, n); + return __asm_copy_to_user(untagged_addr(to), from, n); } extern long strncpy_from_user(char *dest, const char __user *src, long count); @@ -314,7 +347,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) { might_fault(); return access_ok(to, n) ? - __clear_user(to, n) : n; + __clear_user(untagged_addr(to), n) : n; } #define __get_kernel_nofault(dst, src, type, err_label) \ diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index be7d309cca8a..c7c023afbacd 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -21,6 +21,7 @@ extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); +bool insn_is_vector(u32 insn_buf); bool riscv_v_first_use_handler(struct pt_regs *regs); void kernel_vector_begin(void); void kernel_vector_end(void); @@ -268,6 +269,7 @@ struct pt_regs; static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } +static __always_inline bool insn_is_vector(u32 insn_buf) { return false; } static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 1e153cda57db..3af142b99f77 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -72,6 +72,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZCF (1ULL << 46) #define RISCV_HWPROBE_EXT_ZCMOP (1ULL << 47) #define RISCV_HWPROBE_EXT_ZAWRS (1ULL << 48) +#define RISCV_HWPROBE_EXT_SUPM (1ULL << 49) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) @@ -88,6 +89,11 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2 #define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3 #define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4 +#define RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF 10 +#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN 0 +#define RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW 2 +#define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3 +#define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index e97db3296456..3482c9a73d1b 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -175,6 +175,10 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZCF, KVM_RISCV_ISA_EXT_ZCMOP, KVM_RISCV_ISA_EXT_ZAWRS, + KVM_RISCV_ISA_EXT_SMNPM, + KVM_RISCV_ISA_EXT_SSNPM, + KVM_RISCV_ISA_EXT_SVADE, + KVM_RISCV_ISA_EXT_SVADU, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 69dc8aaab3fb..063d1faf5a53 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -75,7 +75,8 @@ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o -obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o +obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o +obj-$(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS) += vec-copy-unaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_FPU) += kernel_mode_fpu.o diff --git a/arch/riscv/kernel/copy-unaligned.h b/arch/riscv/kernel/copy-unaligned.h index e3d70d35b708..85d4d11450cb 100644 --- a/arch/riscv/kernel/copy-unaligned.h +++ b/arch/riscv/kernel/copy-unaligned.h @@ -10,4 +10,9 @@ void __riscv_copy_words_unaligned(void *dst, const void *src, size_t size); void __riscv_copy_bytes_unaligned(void *dst, const void *src, size_t size); +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS +void __riscv_copy_vec_words_unaligned(void *dst, const void *src, size_t size); +void __riscv_copy_vec_bytes_unaligned(void *dst, const void *src, size_t size); +#endif + #endif /* __RISCV_KERNEL_COPY_UNALIGNED_H */ diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 826f46b21f2e..c0916ed318c2 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -29,6 +29,8 @@ #define NUM_ALPHA_EXTS ('z' - 'a' + 1) +static bool any_cpu_has_zicboz; + unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ @@ -99,6 +101,7 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n"); return -EINVAL; } + any_cpu_has_zicboz = true; return 0; } @@ -133,6 +136,16 @@ static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data, return -EPROBE_DEFER; } +static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + /* SVADE has already been detected, use SVADE only */ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_SVADE)) + return -EOPNOTSUPP; + + return 0; +} + static const unsigned int riscv_zk_bundled_exts[] = { RISCV_ISA_EXT_ZBKB, RISCV_ISA_EXT_ZBKC, @@ -315,6 +328,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { riscv_ext_zicbom_validate), __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate), + __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), @@ -323,6 +337,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM), __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP), + __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA), __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA), @@ -375,10 +390,15 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_BUNDLE(zvksg, riscv_zvksg_bundled_exts), __RISCV_ISA_EXT_DATA(zvkt, RISCV_ISA_EXT_ZVKT), __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA), + __RISCV_ISA_EXT_DATA(smmpm, RISCV_ISA_EXT_SMMPM), + __RISCV_ISA_EXT_SUPERSET(smnpm, RISCV_ISA_EXT_SMNPM, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(smstateen, RISCV_ISA_EXT_SMSTATEEN), __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), + __RISCV_ISA_EXT_SUPERSET(ssnpm, RISCV_ISA_EXT_SSNPM, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), + __RISCV_ISA_EXT_DATA(svade, RISCV_ISA_EXT_SVADE), + __RISCV_ISA_EXT_DATA_VALIDATE(svadu, RISCV_ISA_EXT_SVADU, riscv_ext_svadu_validate), __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), @@ -918,10 +938,12 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -void riscv_user_isa_enable(void) +void __init riscv_user_isa_enable(void) { - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) - csr_set(CSR_ENVCFG, ENVCFG_CBZE); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOZ)) + current->thread.envcfg |= ENVCFG_CBZE; + else if (any_cpu_has_zicboz) + pr_warn("Zicboz disabled as it is unavailable on some harts\n"); } #ifdef CONFIG_RISCV_ALTERNATIVE diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S index 327cf527dd7e..f74f6b60e347 100644 --- a/arch/riscv/kernel/fpu.S +++ b/arch/riscv/kernel/fpu.S @@ -170,7 +170,7 @@ SYM_FUNC_END(__fstate_restore) __access_func(f31) -#ifdef CONFIG_RISCV_MISALIGNED +#ifdef CONFIG_RISCV_SCALAR_MISALIGNED /* * Disable compressed instructions set to keep a constant offset between FP @@ -224,4 +224,4 @@ SYM_FUNC_START(get_f64_reg) fp_access_epilogue SYM_FUNC_END(get_f64_reg) -#endif /* CONFIG_RISCV_MISALIGNED */ +#endif /* CONFIG_RISCV_SCALAR_MISALIGNED */ diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c index 6eee6f736f68..654ed159c830 100644 --- a/arch/riscv/kernel/jump_label.c +++ b/arch/riscv/kernel/jump_label.c @@ -36,9 +36,15 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, insn = RISCV_INSN_NOP; } - mutex_lock(&text_mutex); - patch_insn_write(addr, &insn, sizeof(insn)); - mutex_unlock(&text_mutex); + if (early_boot_irqs_disabled) { + riscv_patch_in_stop_machine = 1; + patch_insn_write(addr, &insn, sizeof(insn)); + riscv_patch_in_stop_machine = 0; + } else { + mutex_lock(&text_mutex); + patch_insn_write(addr, &insn, sizeof(insn)); + mutex_unlock(&text_mutex); + } return true; } diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index e3142d8a6e28..58b6482c2bf6 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/bitfield.h> #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -180,6 +181,10 @@ void flush_thread(void) memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE); #endif +#ifdef CONFIG_RISCV_ISA_SUPM + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + envcfg_update_bits(current, ENVCFG_PMM, ENVCFG_PMM_PMLEN_0); +#endif } void arch_release_task_struct(struct task_struct *tsk) @@ -208,6 +213,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); + /* Ensure all threads in this mm have the same pointer masking mode. */ + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM) && p->mm && (clone_flags & CLONE_VM)) + set_bit(MM_CONTEXT_LOCK_PMLEN, &p->mm->context.flags); + memset(&p->thread.s, 0, sizeof(p->thread.s)); /* p->thread holds context to be restored by __switch_to() */ @@ -242,3 +251,148 @@ void __init arch_task_cache_init(void) { riscv_v_setup_ctx_cache(); } + +#ifdef CONFIG_RISCV_ISA_SUPM +enum { + PMLEN_0 = 0, + PMLEN_7 = 7, + PMLEN_16 = 16, +}; + +static bool have_user_pmlen_7; +static bool have_user_pmlen_16; + +/* + * Control the relaxed ABI allowing tagged user addresses into the kernel. + */ +static unsigned int tagged_addr_disabled; + +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) +{ + unsigned long valid_mask = PR_PMLEN_MASK | PR_TAGGED_ADDR_ENABLE; + struct thread_info *ti = task_thread_info(task); + struct mm_struct *mm = task->mm; + unsigned long pmm; + u8 pmlen; + + if (is_compat_thread(ti)) + return -EINVAL; + + if (arg & ~valid_mask) + return -EINVAL; + + /* + * Prefer the smallest PMLEN that satisfies the user's request, + * in case choosing a larger PMLEN has a performance impact. + */ + pmlen = FIELD_GET(PR_PMLEN_MASK, arg); + if (pmlen == PMLEN_0) { + pmm = ENVCFG_PMM_PMLEN_0; + } else if (pmlen <= PMLEN_7 && have_user_pmlen_7) { + pmlen = PMLEN_7; + pmm = ENVCFG_PMM_PMLEN_7; + } else if (pmlen <= PMLEN_16 && have_user_pmlen_16) { + pmlen = PMLEN_16; + pmm = ENVCFG_PMM_PMLEN_16; + } else { + return -EINVAL; + } + + /* + * Do not allow the enabling of the tagged address ABI if globally + * disabled via sysctl abi.tagged_addr_disabled, if pointer masking + * is disabled for userspace. + */ + if (arg & PR_TAGGED_ADDR_ENABLE && (tagged_addr_disabled || !pmlen)) + return -EINVAL; + + if (!(arg & PR_TAGGED_ADDR_ENABLE)) + pmlen = PMLEN_0; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + if (test_bit(MM_CONTEXT_LOCK_PMLEN, &mm->context.flags) && mm->context.pmlen != pmlen) { + mmap_write_unlock(mm); + return -EBUSY; + } + + envcfg_update_bits(task, ENVCFG_PMM, pmm); + mm->context.pmlen = pmlen; + + mmap_write_unlock(mm); + + return 0; +} + +long get_tagged_addr_ctrl(struct task_struct *task) +{ + struct thread_info *ti = task_thread_info(task); + long ret = 0; + + if (is_compat_thread(ti)) + return -EINVAL; + + /* + * The mm context's pmlen is set only when the tagged address ABI is + * enabled, so the effective PMLEN must be extracted from envcfg.PMM. + */ + switch (task->thread.envcfg & ENVCFG_PMM) { + case ENVCFG_PMM_PMLEN_7: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_7); + break; + case ENVCFG_PMM_PMLEN_16: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_16); + break; + } + + if (task->mm->context.pmlen) + ret |= PR_TAGGED_ADDR_ENABLE; + + return ret; +} + +static bool try_to_set_pmm(unsigned long value) +{ + csr_set(CSR_ENVCFG, value); + return (csr_read_clear(CSR_ENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value; +} + +/* + * Global sysctl to disable the tagged user addresses support. This control + * only prevents the tagged address ABI enabling via prctl() and does not + * disable it for tasks that already opted in to the relaxed ABI. + */ + +static struct ctl_table tagged_addr_sysctl_table[] = { + { + .procname = "tagged_addr_disabled", + .mode = 0644, + .data = &tagged_addr_disabled, + .maxlen = sizeof(int), + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +}; + +static int __init tagged_addr_init(void) +{ + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return 0; + + /* + * envcfg.PMM is a WARL field. Detect which values are supported. + * Assume the supported PMLEN values are the same on all harts. + */ + csr_clear(CSR_ENVCFG, ENVCFG_PMM); + have_user_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7); + have_user_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16); + + if (!register_sysctl("abi", tagged_addr_sysctl_table)) + return -EINVAL; + + return 0; +} +core_initcall(tagged_addr_init); +#endif /* CONFIG_RISCV_ISA_SUPM */ diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 92731ff8c79a..ea67e9fb7a58 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -28,6 +28,9 @@ enum riscv_regset { #ifdef CONFIG_RISCV_ISA_V REGSET_V, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + REGSET_TAGGED_ADDR_CTRL, +#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -152,6 +155,35 @@ static int riscv_vr_set(struct task_struct *target, } #endif +#ifdef CONFIG_RISCV_ISA_SUPM +static int tagged_addr_ctrl_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + long ctrl = get_tagged_addr_ctrl(target); + + if (IS_ERR_VALUE(ctrl)) + return ctrl; + + return membuf_write(&to, &ctrl, sizeof(ctrl)); +} + +static int tagged_addr_ctrl_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + long ctrl; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); + if (ret) + return ret; + + return set_tagged_addr_ctrl(target, ctrl); +} +#endif + static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { .core_note_type = NT_PRSTATUS, @@ -182,6 +214,16 @@ static const struct user_regset riscv_user_regset[] = { .set = riscv_vr_set, }, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + [REGSET_TAGGED_ADDR_CTRL] = { + .core_note_type = NT_RISCV_TAGGED_ADDR_CTRL, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = tagged_addr_ctrl_get, + .set = tagged_addr_ctrl_set, + }, +#endif }; static const struct user_regset_view riscv_user_native_view = { diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 26c886db4fb3..45010e71df86 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -227,7 +227,7 @@ static void __init init_resources(void) static void __init parse_dtb(void) { /* Early scan of device tree from init memory */ - if (early_init_dt_scan(dtb_early_va, __pa(dtb_early_va))) { + if (early_init_dt_scan(dtb_early_va, dtb_early_pa)) { const char *name = of_flat_dt_get_machine_name(); if (name) { @@ -244,6 +244,42 @@ static void __init parse_dtb(void) #endif } +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS) +DEFINE_STATIC_KEY_TRUE(qspinlock_key); +EXPORT_SYMBOL(qspinlock_key); +#endif + +static void __init riscv_spinlock_init(void) +{ + char *using_ext = NULL; + + if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) { + pr_info("Ticket spinlock: enabled\n"); + return; + } + + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && + riscv_isa_extension_available(NULL, ZABHA) && + riscv_isa_extension_available(NULL, ZACAS)) { + using_ext = "using Zabha"; + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) { + using_ext = "using Ziccrse"; + } +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS) + else { + static_branch_disable(&qspinlock_key); + pr_info("Ticket spinlock: enabled\n"); + return; + } +#endif + + if (!using_ext) + pr_err("Queued spinlock without Zabha or Ziccrse"); + else + pr_info("Queued spinlock %s: enabled\n", using_ext); +} + extern void __init init_rt_signal_env(void); void __init setup_arch(char **cmdline_p) @@ -297,6 +333,7 @@ void __init setup_arch(char **cmdline_p) riscv_set_dma_cache_alignment(); riscv_user_isa_enable(); + riscv_spinlock_init(); } bool arch_cpu_is_hotpluggable(int cpu) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 0f8f1c95ac38..e36d20205bd7 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -233,8 +233,6 @@ asmlinkage __visible void smp_callin(void) numa_add_cpu(curr_cpuid); set_cpu_online(curr_cpuid, true); - riscv_user_isa_enable(); - /* * Remote cache and TLB flushes are ignored while the CPU is offline, * so flush them both right now just in case. diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index c8cec0cc5833..9a8a0dc035b2 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -14,7 +14,7 @@ void suspend_save_csrs(struct suspend_context *context) { - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG)) context->envcfg = csr_read(CSR_ENVCFG); context->tvec = csr_read(CSR_TVEC); context->ie = csr_read(CSR_IE); @@ -37,7 +37,7 @@ void suspend_save_csrs(struct suspend_context *context) void suspend_restore_csrs(struct suspend_context *context) { csr_write(CSR_SCRATCH, 0); - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG)) csr_write(CSR_ENVCFG, context->envcfg); csr_write(CSR_TVEC, context->tvec); csr_write(CSR_IE, context->ie); diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 711a31f27c3d..cb93adfffc48 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -150,6 +150,9 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZFH); EXT_KEY(ZFHMIN); } + + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM)) + EXT_KEY(SUPM); #undef EXT_KEY } @@ -201,6 +204,43 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) } #endif +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus) +{ + int cpu; + u64 perf = -1ULL; + + /* Return if supported or not even if speed wasn't probed */ + for_each_cpu(cpu, cpus) { + int this_perf = per_cpu(vector_misaligned_access, cpu); + + if (perf == -1ULL) + perf = this_perf; + + if (perf != this_perf) { + perf = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + break; + } + } + + if (perf == -1ULL) + return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + + return perf; +} +#else +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus) +{ + if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + + if (IS_ENABLED(CONFIG_RISCV_SLOW_VECTOR_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + + return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; +} +#endif + static void hwprobe_one_pair(struct riscv_hwprobe *pair, const struct cpumask *cpus) { @@ -229,6 +269,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = hwprobe_misaligned(cpus); break; + case RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF: + pair->value = hwprobe_vec_misaligned(cpus); + break; + case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE: pair->value = 0; if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 1b9867136b61..7cc108aed74e 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -16,6 +16,7 @@ #include <asm/entry-common.h> #include <asm/hwprobe.h> #include <asm/cpufeature.h> +#include <asm/vector.h> #define INSN_MATCH_LB 0x3 #define INSN_MASK_LB 0x707f @@ -320,12 +321,37 @@ union reg_data { u64 data_u64; }; -static bool unaligned_ctl __read_mostly; - /* sysctl hooks */ int unaligned_enabled __read_mostly = 1; /* Enabled by default */ -int handle_misaligned_load(struct pt_regs *regs) +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +static int handle_vector_misaligned_load(struct pt_regs *regs) +{ + unsigned long epc = regs->epc; + unsigned long insn; + + if (get_insn(regs, epc, &insn)) + return -1; + + /* Only return 0 when in check_vector_unaligned_access_emulated */ + if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + *this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + regs->epc = epc + INSN_LEN(insn); + return 0; + } + + /* If vector instruction we don't emulate it yet */ + regs->epc = epc; + return -1; +} +#else +static int handle_vector_misaligned_load(struct pt_regs *regs) +{ + return -1; +} +#endif + +static int handle_scalar_misaligned_load(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; @@ -433,7 +459,7 @@ int handle_misaligned_load(struct pt_regs *regs) return 0; } -int handle_misaligned_store(struct pt_regs *regs) +static int handle_scalar_misaligned_store(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; @@ -524,11 +550,96 @@ int handle_misaligned_store(struct pt_regs *regs) return 0; } -static bool check_unaligned_access_emulated(int cpu) +int handle_misaligned_load(struct pt_regs *regs) +{ + unsigned long epc = regs->epc; + unsigned long insn; + + if (IS_ENABLED(CONFIG_RISCV_VECTOR_MISALIGNED)) { + if (get_insn(regs, epc, &insn)) + return -1; + + if (insn_is_vector(insn)) + return handle_vector_misaligned_load(regs); + } + + if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED)) + return handle_scalar_misaligned_load(regs); + + return -1; +} + +int handle_misaligned_store(struct pt_regs *regs) { + if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED)) + return handle_scalar_misaligned_store(regs); + + return -1; +} + +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused) +{ + long *mas_ptr = this_cpu_ptr(&vector_misaligned_access); + unsigned long tmp_var; + + *mas_ptr = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + + kernel_vector_begin(); + /* + * In pre-13.0.0 versions of GCC, vector registers cannot appear in + * the clobber list. This inline asm clobbers v0, but since we do not + * currently build the kernel with V enabled, the v0 clobber arg is not + * needed (as the compiler will not emit vector code itself). If the kernel + * is changed to build with V enabled, the clobber arg will need to be + * added here. + */ + __asm__ __volatile__ ( + ".balign 4\n\t" + ".option push\n\t" + ".option arch, +zve32x\n\t" + " vsetivli zero, 1, e16, m1, ta, ma\n\t" // Vectors of 16b + " vle16.v v0, (%[ptr])\n\t" // Load bytes + ".option pop\n\t" + : : [ptr] "r" ((u8 *)&tmp_var + 1)); + kernel_vector_end(); +} + +bool check_vector_unaligned_access_emulated_all_cpus(void) +{ + int cpu; + + if (!has_vector()) { + for_each_online_cpu(cpu) + per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + return false; + } + + schedule_on_each_cpu(check_vector_unaligned_access_emulated); + + for_each_online_cpu(cpu) + if (per_cpu(vector_misaligned_access, cpu) + == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return false; + + return true; +} +#else +bool check_vector_unaligned_access_emulated_all_cpus(void) +{ + return false; +} +#endif + +#ifdef CONFIG_RISCV_SCALAR_MISALIGNED + +static bool unaligned_ctl __read_mostly; + +void check_unaligned_access_emulated(struct work_struct *work __always_unused) +{ + int cpu = smp_processor_id(); long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); unsigned long tmp_var, tmp_val; - bool misaligned_emu_detected; *mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; @@ -536,19 +647,16 @@ static bool check_unaligned_access_emulated(int cpu) " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); - misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED); /* * If unaligned_ctl is already set, this means that we detected that all * CPUS uses emulated misaligned access at boot time. If that changed * when hotplugging the new cpu, this is something we don't handle. */ - if (unlikely(unaligned_ctl && !misaligned_emu_detected)) { + if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED))) { pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n"); while (true) cpu_relax(); } - - return misaligned_emu_detected; } bool check_unaligned_access_emulated_all_cpus(void) @@ -560,8 +668,11 @@ bool check_unaligned_access_emulated_all_cpus(void) * accesses emulated since tasks requesting such control can run on any * CPU. */ + schedule_on_each_cpu(check_unaligned_access_emulated); + for_each_online_cpu(cpu) - if (!check_unaligned_access_emulated(cpu)) + if (per_cpu(misaligned_access_speed, cpu) + != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED) return false; unaligned_ctl = true; @@ -572,3 +683,9 @@ bool unaligned_ctl_available(void) { return unaligned_ctl; } +#else +bool check_unaligned_access_emulated_all_cpus(void) +{ + return false; +} +#endif diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 160628a2116d..91f189cf1611 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -6,11 +6,13 @@ #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/jump_label.h> +#include <linux/kthread.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/types.h> #include <asm/cpufeature.h> #include <asm/hwprobe.h> +#include <asm/vector.h> #include "copy-unaligned.h" @@ -19,7 +21,8 @@ #define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) -DEFINE_PER_CPU(long, misaligned_access_speed); +DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; +DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS static cpumask_t fast_misaligned_access; @@ -191,6 +194,7 @@ static int riscv_online_cpu(unsigned int cpu) if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) goto exit; + check_unaligned_access_emulated(NULL); buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); if (!buf) { pr_warn("Allocation failure, not measuring misaligned performance\n"); @@ -259,23 +263,159 @@ out: kfree(bufs); return 0; } +#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ +static int check_unaligned_access_speed_all_cpus(void) +{ + return 0; +} +#endif -static int check_unaligned_access_all_cpus(void) +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS +static void check_vector_unaligned_access(struct work_struct *work __always_unused) { - bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; - if (!all_cpus_emulated) - return check_unaligned_access_speed_all_cpus(); + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return; + + page = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!page) { + pr_warn("Allocation failure, not measuring vector misaligned performance\n"); + return; + } + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + + /* Do a warmup. */ + kernel_vector_begin(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + kernel_vector_end(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n", + cpu); + + return; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of vector byte access time to vector unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow"); + + per_cpu(vector_misaligned_access, cpu) = speed; +} + +static int riscv_online_cpu_vec(unsigned int cpu) +{ + if (!has_vector()) + return 0; + + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) + return 0; + + check_vector_unaligned_access_emulated(NULL); + check_vector_unaligned_access(NULL); return 0; } -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ -static int check_unaligned_access_all_cpus(void) + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { - check_unaligned_access_emulated_all_cpus(); + schedule_on_each_cpu(check_vector_unaligned_access); + + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu_vec, NULL); return 0; } +#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ +static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + return 0; +} #endif +static int check_unaligned_access_all_cpus(void) +{ + bool all_cpus_emulated, all_cpus_vec_unsupported; + + all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus(); + + if (!all_cpus_vec_unsupported && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"); + } + + if (!all_cpus_emulated) + return check_unaligned_access_speed_all_cpus(); + + return 0; +} + arch_initcall(check_unaligned_access_all_cpus); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 3f1c4b2d0b06..9a1b555e8733 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -45,7 +45,7 @@ $(obj)/vdso.o: $(obj)/vdso.so # link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE - $(call if_changed,vdsold) + $(call if_changed,vdsold_and_check) LDFLAGS_vdso.so.dbg = -shared -soname=linux-vdso.so.1 \ --build-id=sha1 --hash-style=both --eh-frame-hdr @@ -65,7 +65,8 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE # actual build commands # The DSO images are built using a special linker script # Make sure only to export the intended __vdso_xxx symbol offsets. -quiet_cmd_vdsold = VDSOLD $@ - cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ +quiet_cmd_vdsold_and_check = VDSOLD $@ + cmd_vdsold_and_check = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ - rm $@.tmp + rm $@.tmp && \ + $(cmd_vdso_check) diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S new file mode 100644 index 000000000000..d16f19f1b3b6 --- /dev/null +++ b/arch/riscv/kernel/vec-copy-unaligned.S @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2024 Rivos Inc. */ + +#include <linux/args.h> +#include <linux/linkage.h> +#include <asm/asm.h> + + .text + +#define WORD_EEW 32 + +#define WORD_SEW CONCATENATE(e, WORD_EEW) +#define VEC_L CONCATENATE(vle, WORD_EEW).v +#define VEC_S CONCATENATE(vle, WORD_EEW).v + +/* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */ +/* Performs a memcpy without aligning buffers, using word loads and stores. */ +/* Note: The size is truncated to a multiple of WORD_EEW */ +SYM_FUNC_START(__riscv_copy_vec_words_unaligned) + andi a4, a2, ~(WORD_EEW-1) + beqz a4, 2f + add a3, a1, a4 + .option push + .option arch, +zve32x +1: + vsetivli t0, 8, WORD_SEW, m8, ta, ma + VEC_L v0, (a1) + VEC_S v0, (a0) + addi a0, a0, WORD_EEW + addi a1, a1, WORD_EEW + bltu a1, a3, 1b + +2: + .option pop + ret +SYM_FUNC_END(__riscv_copy_vec_words_unaligned) + +/* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */ +/* Performs a memcpy without aligning buffers, using only byte accesses. */ +/* Note: The size is truncated to a multiple of 8 */ +SYM_FUNC_START(__riscv_copy_vec_bytes_unaligned) + andi a4, a2, ~(8-1) + beqz a4, 2f + add a3, a1, a4 + .option push + .option arch, +zve32x +1: + vsetivli t0, 8, e8, m8, ta, ma + vle8.v v0, (a1) + vse8.v v0, (a0) + addi a0, a0, 8 + addi a1, a1, 8 + bltu a1, a3, 1b + +2: + .option pop + ret +SYM_FUNC_END(__riscv_copy_vec_bytes_unaligned) diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 682b3feee451..821818886fab 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -66,7 +66,7 @@ void __init riscv_v_setup_ctx_cache(void) #endif } -static bool insn_is_vector(u32 insn_buf) +bool insn_is_vector(u32 insn_buf) { u32 opcode = insn_buf & __INSN_OPCODE_MASK; u32 width, csr; diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index dcced4db7fe8..19afd1f23537 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -590,7 +590,7 @@ void kvm_riscv_aia_enable(void) csr_set(CSR_HIE, BIT(IRQ_S_GEXT)); /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) - csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF)); + csr_set(CSR_HVIEN, BIT(IRQ_PMU_OVF)); } void kvm_riscv_aia_disable(void) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index dc3f76f6e46c..e048dcc6e65e 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -551,6 +551,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) if (riscv_isa_extension_available(isa, ZICBOZ)) cfg->henvcfg |= ENVCFG_CBZE; + if (riscv_isa_extension_available(isa, SVADU) && + !riscv_isa_extension_available(isa, SVADE)) + cfg->henvcfg |= ENVCFG_ADUE; + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { cfg->hstateen0 |= SMSTATEEN0_HSENVCFG; if (riscv_isa_extension_available(isa, SSAIA)) diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index b319c4c13c54..753f66c8b70a 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -15,6 +15,7 @@ #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/kvm_vcpu_vector.h> +#include <asm/pgtable.h> #include <asm/vector.h> #define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) @@ -34,10 +35,14 @@ static const unsigned long kvm_isa_ext_arr[] = { [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v, /* Multi letter extensions (alphabetically sorted) */ + [KVM_RISCV_ISA_EXT_SMNPM] = RISCV_ISA_EXT_SSNPM, KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), KVM_ISA_EXT_ARR(SSCOFPMF), + KVM_ISA_EXT_ARR(SSNPM), KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVADE), + KVM_ISA_EXT_ARR(SVADU), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), @@ -110,6 +115,12 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SSCOFPMF: /* Sscofpmf depends on interrupt filtering defined in ssaia */ return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA); + case KVM_RISCV_ISA_EXT_SVADU: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Guest OS can use Svadu only when host OS enable Svadu. + */ + return arch_has_hw_pte_young(); case KVM_RISCV_ISA_EXT_V: return riscv_v_vstate_ctrl_user_allowed(); default: @@ -127,8 +138,10 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_C: case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: + case KVM_RISCV_ISA_EXT_SMNPM: /* There is not architectural config bit to disable sscofpmf completely */ case KVM_RISCV_ISA_EXT_SSCOFPMF: + case KVM_RISCV_ISA_EXT_SSNPM: case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: @@ -181,6 +194,12 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) /* Extensions which can be disabled using Smstateen */ case KVM_RISCV_ISA_EXT_SSAIA: return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN); + case KVM_RISCV_ISA_EXT_SVADE: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Svade is not allowed to disable when the platform use Svade. + */ + return arch_has_hw_pte_young(); default: break; } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 0e8c20adcd98..fc53ce748c80 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1566,7 +1566,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) pmd_clear(pmd); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, bool is_vmemmap) { struct page *page = pud_page(*pud); struct ptdesc *ptdesc = page_ptdesc(page); @@ -1579,7 +1579,8 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) return; } - pagetable_pmd_dtor(ptdesc); + if (!is_vmemmap) + pagetable_pmd_dtor(ptdesc); if (PageReserved(page)) free_reserved_page(page); else @@ -1703,7 +1704,7 @@ static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, un remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap); if (pgtable_l4_enabled) - free_pmd_table(pmd_base, pudp); + free_pmd_table(pmd_base, pudp, is_vmemmap); } } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c64b2987d108..0077969170e8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -87,6 +87,7 @@ config S390 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS + select ARCH_HAS_PREEMPT_LAZY select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_DIRECT_MAP @@ -218,6 +219,7 @@ config S390 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RETHOOK diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 8cc02d6e0d0f..9c46b1b630b1 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -1168,4 +1168,4 @@ MODULE_ALIAS_CRYPTO("aes-all"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); -MODULE_IMPORT_NS(CRYPTO_INTERNAL); +MODULE_IMPORT_NS("CRYPTO_INTERNAL"); diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index ccd4e148b5ed..a7f7bdc9e19c 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -66,14 +66,15 @@ typedef int (debug_header_proc_t) (debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, - char *out_buf); + char *out_buf, size_t out_buf_size); typedef int (debug_format_proc_t) (debug_info_t *id, struct debug_view *view, char *out_buf, + size_t out_buf_size, const char *in_buf); typedef int (debug_prolog_proc_t) (debug_info_t *id, struct debug_view *view, - char *out_buf); + char *out_buf, size_t out_buf_size); typedef int (debug_input_proc_t) (debug_info_t *id, struct debug_view *view, struct file *file, @@ -81,7 +82,8 @@ typedef int (debug_input_proc_t) (debug_info_t *id, size_t in_buf_size, loff_t *offset); int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf); + int area, debug_entry_t *entry, + char *out_buf, size_t out_buf_size); struct debug_view { char name[DEBUG_MAX_NAME_LEN]; diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 64761c78f774..13f51a6a5bb1 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -17,8 +17,8 @@ #define GMAP_NOTIFY_MPROT 0x1 /* Status bits only for huge segment entries */ -#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */ -#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */ +#define _SEGMENT_ENTRY_GMAP_IN 0x0800 /* invalidation notify bit */ +#define _SEGMENT_ENTRY_GMAP_UC 0x0002 /* dirty (migration) */ /** * struct gmap_struct - guest address space diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 6f815d4ba0ca..a40664b236e9 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -10,6 +10,8 @@ #define _ASM_S390_HUGETLB_H #include <linux/pgtable.h> +#include <linux/swap.h> +#include <linux/swapops.h> #include <asm/page.h> #define hugepages_supported() (MACHINE_HAS_EDAT1) @@ -78,7 +80,7 @@ static inline int huge_pte_none(pte_t pte) #define __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY static inline int huge_pte_none_mostly(pte_t pte) { - return huge_pte_none(pte); + return huge_pte_none(pte) || is_pte_marker(pte); } #define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 1cd8eaebd3c0..97c7c8127543 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -94,11 +94,16 @@ union ipte_control { }; }; +/* + * Utility is defined as two bytes but having it four bytes wide + * generates more efficient code. Since the following bytes are + * reserved this makes no functional difference. + */ union sca_utility { - __u16 val; + __u32 val; struct { - __u16 mtcr : 1; - __u16 reserved : 15; + __u32 mtcr : 1; + __u32 : 31; }; }; @@ -107,7 +112,7 @@ struct bsca_block { __u64 reserved[5]; __u64 mcn; union sca_utility utility; - __u8 reserved2[6]; + __u8 reserved2[4]; struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; }; @@ -115,7 +120,7 @@ struct esca_block { union ipte_control ipte_control; __u64 reserved1[6]; union sca_utility utility; - __u8 reserved2[6]; + __u8 reserved2[4]; __u64 mcn[4]; __u64 reserved3[20]; struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 8b67036edb69..48268095b0a3 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -277,7 +277,8 @@ static inline int is_module_addr(void *addr) #define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID) #define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) #define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID) -#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) +#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH | \ + _REGION3_ENTRY_PRESENT) #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY_HARDWARE_BITS 0xfffffffffffff6ffUL @@ -285,18 +286,27 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */ #define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */ #define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */ +#define _REGION3_ENTRY_COMM 0x0010 /* Common-Region, marks swap entry */ #define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */ -#define _REGION3_ENTRY_WRITE 0x0002 /* SW region write bit */ -#define _REGION3_ENTRY_READ 0x0001 /* SW region read bit */ +#define _REGION3_ENTRY_WRITE 0x8000 /* SW region write bit */ +#define _REGION3_ENTRY_READ 0x4000 /* SW region read bit */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */ +#define _REGION3_ENTRY_SOFT_DIRTY 0x0002 /* SW region soft dirty bit */ #else #define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */ #endif #define _REGION_ENTRY_BITS 0xfffffffffffff22fUL +/* + * SW region present bit. For non-leaf region-third-table entries, bits 62-63 + * indicate the TABLE LENGTH and both must be set to 1. But such entries + * would always be considered as present, so it is safe to use bit 63 as + * PRESENT bit for PUD. + */ +#define _REGION3_ENTRY_PRESENT 0x0001 + /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe3fUL #define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe3cUL @@ -308,21 +318,29 @@ static inline int is_module_addr(void *addr) #define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_TYPE_MASK 0x0c /* segment table type mask */ -#define _SEGMENT_ENTRY (0) +#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PRESENT) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) #define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */ #define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ + +#define _SEGMENT_ENTRY_COMM 0x0010 /* Common-Segment, marks swap entry */ #define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ -#define _SEGMENT_ENTRY_WRITE 0x0002 /* SW segment write bit */ -#define _SEGMENT_ENTRY_READ 0x0001 /* SW segment read bit */ +#define _SEGMENT_ENTRY_WRITE 0x8000 /* SW segment write bit */ +#define _SEGMENT_ENTRY_READ 0x4000 /* SW segment read bit */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */ +#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0002 /* SW segment soft dirty bit */ #else #define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */ #endif +#define _SEGMENT_ENTRY_PRESENT 0x0001 /* SW segment present bit */ + +/* Common bits in region and segment table entries, for swap entries */ +#define _RST_ENTRY_COMM 0x0010 /* Common-Region/Segment, marks swap entry */ +#define _RST_ENTRY_INVALID 0x0020 /* invalid region/segment table entry */ + #define _CRST_ENTRIES 2048 /* number of region/segment table entries */ #define _PAGE_ENTRIES 256 /* number of page table entries */ @@ -454,17 +472,22 @@ static inline int is_module_addr(void *addr) /* * Segment entry (large page) protection definitions. */ -#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ +#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PROTECT | \ +#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PROTECT | \ +#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ) -#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_READ | \ +#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_READ | \ +#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE) #define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ @@ -491,6 +514,7 @@ static inline int is_module_addr(void *addr) */ #define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_WRITE | \ @@ -498,12 +522,14 @@ static inline int is_module_addr(void *addr) _REGION3_ENTRY_DIRTY | \ _REGION_ENTRY_NOEXEC) #define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_YOUNG | \ _REGION_ENTRY_PROTECT | \ _REGION_ENTRY_NOEXEC) #define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_WRITE | \ @@ -746,7 +772,7 @@ static inline int pud_present(pud_t pud) { if (pud_folded(pud)) return 1; - return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; + return (pud_val(pud) & _REGION3_ENTRY_PRESENT) != 0; } static inline int pud_none(pud_t pud) @@ -761,13 +787,18 @@ static inline bool pud_leaf(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3) return 0; - return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); + return (pud_present(pud) && (pud_val(pud) & _REGION3_ENTRY_LARGE) != 0); +} + +static inline int pmd_present(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_PRESENT) != 0; } #define pmd_leaf pmd_leaf static inline bool pmd_leaf(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; + return (pmd_present(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0); } static inline int pmd_bad(pmd_t pmd) @@ -799,11 +830,6 @@ static inline int p4d_bad(p4d_t p4d) return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0; } -static inline int pmd_present(pmd_t pmd) -{ - return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY; -} - static inline int pmd_none(pmd_t pmd) { return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY; @@ -1851,7 +1877,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, static inline int pmd_trans_huge(pmd_t pmd) { - return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE; + return pmd_leaf(pmd); } #define has_transparent_hugepage has_transparent_hugepage @@ -1911,6 +1937,53 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +/* + * 64 bit swap entry format for REGION3 and SEGMENT table entries (RSTE) + * Bits 59 and 63 are used to indicate the swap entry. Bit 58 marks the rste + * as invalid. + * A swap entry is indicated by bit pattern (rste & 0x011) == 0x010 + * | offset |Xtype |11TT|S0| + * |0000000000111111111122222222223333333333444444444455|555555|5566|66| + * |0123456789012345678901234567890123456789012345678901|234567|8901|23| + * + * Bits 0-51 store the offset. + * Bits 53-57 store the type. + * Bit 62 (S) is used for softdirty tracking. + * Bits 60-61 (TT) indicate the table type: 0x01 for REGION3 and 0x00 for SEGMENT. + * Bit 52 (X) is unused. + */ + +#define __SWP_OFFSET_MASK_RSTE ((1UL << 52) - 1) +#define __SWP_OFFSET_SHIFT_RSTE 12 +#define __SWP_TYPE_MASK_RSTE ((1UL << 5) - 1) +#define __SWP_TYPE_SHIFT_RSTE 6 + +/* + * TT bits set to 0x00 == SEGMENT. For REGION3 entries, caller must add R3 + * bits 0x01. See also __set_huge_pte_at(). + */ +static inline unsigned long mk_swap_rste(unsigned long type, unsigned long offset) +{ + unsigned long rste; + + rste = _RST_ENTRY_INVALID | _RST_ENTRY_COMM; + rste |= (offset & __SWP_OFFSET_MASK_RSTE) << __SWP_OFFSET_SHIFT_RSTE; + rste |= (type & __SWP_TYPE_MASK_RSTE) << __SWP_TYPE_SHIFT_RSTE; + return rste; +} + +static inline unsigned long __swp_type_rste(swp_entry_t entry) +{ + return (entry.val >> __SWP_TYPE_SHIFT_RSTE) & __SWP_TYPE_MASK_RSTE; +} + +static inline unsigned long __swp_offset_rste(swp_entry_t entry) +{ + return (entry.val >> __SWP_OFFSET_SHIFT_RSTE) & __SWP_OFFSET_MASK_RSTE; +} + +#define __rste_to_swp_entry(rste) ((swp_entry_t) { rste }) + extern int vmem_add_mapping(unsigned long start, unsigned long size); extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc); diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 0cde7e240373..2c29bdf12127 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -130,10 +130,24 @@ static __always_inline bool should_resched(int preempt_offset) #define init_idle_preempt_count(p, cpu) do { } while (0) #ifdef CONFIG_PREEMPTION -extern void preempt_schedule(void); -#define __preempt_schedule() preempt_schedule() -extern void preempt_schedule_notrace(void); -#define __preempt_schedule_notrace() preempt_schedule_notrace() + +void preempt_schedule(void); +void preempt_schedule_notrace(void); + +#ifdef CONFIG_PREEMPT_DYNAMIC + +void dynamic_preempt_schedule(void); +void dynamic_preempt_schedule_notrace(void); +#define __preempt_schedule() dynamic_preempt_schedule() +#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace() + +#else /* CONFIG_PREEMPT_DYNAMIC */ + +#define __preempt_schedule() preempt_schedule() +#define __preempt_schedule_notrace() preempt_schedule_notrace() + +#endif /* CONFIG_PREEMPT_DYNAMIC */ + #endif /* CONFIG_PREEMPTION */ #endif /* __ASM_PREEMPT_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index ac868a9bb0d1..f87dd0a84855 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -82,9 +82,10 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) kcsan_release(); asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */ - " sth %1,%0\n" - : "=R" (((unsigned short *) &lp->lock)[1]) - : "d" (0) : "cc", "memory"); + " mvhhi %[lock],0\n" + : [lock] "=Q" (((unsigned short *)&lp->lock)[1]) + : + : "memory"); } /* diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 00ac01874a12..c33f7144d1b9 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -61,44 +61,45 @@ void arch_setup_new_exec(void); /* * thread information flags bit numbers */ -/* _TIF_WORK bits */ #define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_UPROBE 3 /* breakpointed or single-stepping */ -#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */ +#define TIF_UPROBE 4 /* breakpointed or single-stepping */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ #define TIF_PGSTE 6 /* New mm's will use 4K page tables */ #define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ +#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ #define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */ - #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ #define TIF_SINGLE_STEP 19 /* This task is single stepped */ #define TIF_BLOCK_STEP 20 /* This task is block stepped */ #define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */ - -/* _TIF_TRACE bits */ #define TIF_SYSCALL_TRACE 24 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */ #define TIF_SECCOMP 26 /* secure computing */ #define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */ #define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME) -#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) #define _TIF_SIGPENDING BIT(TIF_SIGPENDING) #define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED) +#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY) #define _TIF_UPROBE BIT(TIF_UPROBE) -#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) +#define _TIF_PGSTE BIT(TIF_PGSTE) +#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) +#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) #define _TIF_PER_TRAP BIT(TIF_PER_TRAP) - #define _TIF_31BIT BIT(TIF_31BIT) +#define _TIF_MEMDIE BIT(TIF_MEMDIE) +#define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK) #define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP) - +#define _TIF_BLOCK_STEP BIT(TIF_BLOCK_STEP) +#define _TIF_UPROBE_SINGLESTEP BIT(TIF_UPROBE_SINGLESTEP) #define _TIF_SYSCALL_TRACE BIT(TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT BIT(TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP BIT(TIF_SECCOMP) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index a6e2cd89b609..9dfd46dd03c6 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -46,11 +46,6 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) { unsigned long gmap_asce; - /* - * If the machine has IDTE we prefer to do a per mm flush - * on all cpus instead of doing a local flush if the mm - * only ran on the local cpu. - */ preempt_disable(); atomic_inc(&mm->context.flush_count); /* Reset TLB flush mask */ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index b3f2103694e4..de19fd8a6a95 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -77,12 +77,14 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area, static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t *id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf, + size_t out_buf_size); static int debug_input_level_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); static int debug_prolog_pages_fn(debug_info_t *id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf, + size_t out_buf_size); static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); @@ -90,9 +92,11 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf); + char *out_buf, size_t out_buf_size, + const char *in_buf); static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *inbuf); + char *out_buf, size_t out_buf_size, + const char *inbuf); static void debug_areas_swap(debug_info_t *a, debug_info_t *b); static void debug_events_append(debug_info_t *dest, debug_info_t *src); @@ -391,8 +395,10 @@ static int debug_format_entry(file_private_info_t *p_info) if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { /* print prolog */ - if (view->prolog_proc) - len += view->prolog_proc(id_snap, view, p_info->temp_buf); + if (view->prolog_proc) { + len += view->prolog_proc(id_snap, view, p_info->temp_buf, + sizeof(p_info->temp_buf)); + } goto out; } if (!id_snap->areas) /* this is true, if we have a prolog only view */ @@ -402,12 +408,16 @@ static int debug_format_entry(file_private_info_t *p_info) if (act_entry->clock == 0LL) goto out; /* empty entry */ - if (view->header_proc) + if (view->header_proc) { len += view->header_proc(id_snap, view, p_info->act_area, - act_entry, p_info->temp_buf + len); - if (view->format_proc) + act_entry, p_info->temp_buf + len, + sizeof(p_info->temp_buf) - len); + } + if (view->format_proc) { len += view->format_proc(id_snap, view, p_info->temp_buf + len, + sizeof(p_info->temp_buf) - len, DEBUG_DATA(act_entry)); + } out: return len; } @@ -1292,9 +1302,9 @@ static inline int debug_get_uint(char *buf) */ static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view, - char *out_buf) + char *out_buf, size_t out_buf_size) { - return sprintf(out_buf, "%i\n", id->pages_per_area); + return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area); } /* @@ -1341,14 +1351,14 @@ out: * prints out actual debug level */ static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view, - char *out_buf) + char *out_buf, size_t out_buf_size) { int rc = 0; if (id->level == DEBUG_OFF_LEVEL) - rc = sprintf(out_buf, "-\n"); + rc = scnprintf(out_buf, out_buf_size, "-\n"); else - rc = sprintf(out_buf, "%i\n", id->level); + rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level); return rc; } @@ -1465,22 +1475,24 @@ out: * prints debug data in hex/ascii format */ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf) + char *out_buf, size_t out_buf_size, const char *in_buf) { int i, rc = 0; - for (i = 0; i < id->buf_size; i++) - rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]); - rc += sprintf(out_buf + rc, "| "); + for (i = 0; i < id->buf_size; i++) { + rc += scnprintf(out_buf + rc, out_buf_size - rc, + "%02x ", ((unsigned char *)in_buf)[i]); + } + rc += scnprintf(out_buf + rc, out_buf_size - rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; if (isascii(c) && isprint(c)) - rc += sprintf(out_buf + rc, "%c", c); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c); else - rc += sprintf(out_buf + rc, "."); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "."); } - rc += sprintf(out_buf + rc, "\n"); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n"); return rc; } @@ -1488,7 +1500,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, * prints header for debug entry */ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf) + int area, debug_entry_t *entry, char *out_buf, + size_t out_buf_size) { unsigned long sec, usec; unsigned long caller; @@ -1505,9 +1518,9 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px ", - area, sec, usec, level, except_str, - entry->cpu, (void *)caller); + rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px ", + area, sec, usec, level, except_str, + entry->cpu, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); @@ -1520,7 +1533,7 @@ EXPORT_SYMBOL(debug_dflt_header_fn); #define DEBUG_SPRINTF_MAX_ARGS 10 static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *inbuf) + char *out_buf, size_t out_buf_size, const char *inbuf) { debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf; int num_longs, num_used_args = 0, i, rc = 0; @@ -1533,8 +1546,9 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, goto out; /* bufsize of entry too small */ if (num_longs == 1) { /* no args, we use only the string */ - strcpy(out_buf, curr_event->string); - rc = strlen(curr_event->string); + rc = strscpy(out_buf, curr_event->string, out_buf_size); + if (rc == -E2BIG) + rc = out_buf_size; goto out; } @@ -1546,12 +1560,13 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, for (i = 0; i < num_used_args; i++) index[i] = i; - rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]], - curr_event->args[index[1]], curr_event->args[index[2]], - curr_event->args[index[3]], curr_event->args[index[4]], - curr_event->args[index[5]], curr_event->args[index[6]], - curr_event->args[index[7]], curr_event->args[index[8]], - curr_event->args[index[9]]); + rc = scnprintf(out_buf, out_buf_size, + curr_event->string, curr_event->args[index[0]], + curr_event->args[index[1]], curr_event->args[index[2]], + curr_event->args[index[3]], curr_event->args[index[4]], + curr_event->args[index[5]], curr_event->args[index[6]], + curr_event->args[index[7]], curr_event->args[index[8]], + curr_event->args[index[9]]); out: return rc; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1ff13239d4e5..960c08700cf6 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -430,9 +430,13 @@ SYM_CODE_START(\name) SYM_CODE_END(\name) .endm + .section .irqentry.text, "ax" + INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq + .section .kprobes.text, "ax" + /* * Machine check handler routines */ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 6295faf0987d..8b80ea57125f 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -489,6 +489,12 @@ int __init arch_init_kprobes(void) return 0; } +int __init arch_populate_kprobe_blacklist(void) +{ + return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, + (unsigned long)__irqentry_text_end); +} + int arch_trampoline_kprobe(struct kprobe *p) { return 0; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0cde42f8af6e..1e99514fb7ae 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -180,39 +180,27 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw) */ static void free_sampling_buffer(struct sf_buffer *sfb) { - unsigned long *sdbt, *curr; - - if (!sfb->sdbt) - return; + unsigned long *sdbt, *curr, *head; sdbt = sfb->sdbt; - curr = sdbt; - + if (!sdbt) + return; + sfb->sdbt = NULL; /* Free the SDBT after all SDBs are processed... */ - while (1) { - if (!*curr || !sdbt) - break; - - /* Process table-link entries */ + head = sdbt; + curr = sdbt; + do { if (is_link_entry(curr)) { + /* Process table-link entries */ curr = get_next_sdbt(curr); - if (sdbt) - free_page((unsigned long)sdbt); - - /* If the origin is reached, sampling buffer is freed */ - if (curr == sfb->sdbt) - break; - else - sdbt = curr; + free_page((unsigned long)sdbt); + sdbt = curr; } else { /* Process SDB pointer */ - if (*curr) { - free_page((unsigned long)phys_to_virt(*curr)); - curr++; - } + free_page((unsigned long)phys_to_virt(*curr)); + curr++; } - } - + } while (curr != head); memset(sfb, 0, sizeof(*sfb)); } diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 9f59837d159e..40edfde25f5b 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -151,7 +151,7 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo break; } if (!store_ip(consume_entry, cookie, entry, perf, ip)) - return; + break; first = false; } pagefault_enable(); diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index a688351f4ab5..9816b0060fbe 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -129,8 +129,8 @@ static void ipte_lock_simple(struct kvm *kvm) retry: read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); if (old.k) { read_unlock(&kvm->arch.sca_lock); cond_resched(); @@ -138,7 +138,7 @@ retry: } new = old; new.k = 1; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); out: mutex_unlock(&kvm->arch.ipte_mutex); @@ -154,11 +154,11 @@ static void ipte_unlock_simple(struct kvm *kvm) goto out; read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); new = old; new.k = 0; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); wake_up(&kvm->arch.ipte_wq); out: @@ -172,8 +172,8 @@ static void ipte_lock_siif(struct kvm *kvm) retry: read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); if (old.kg) { read_unlock(&kvm->arch.sca_lock); cond_resched(); @@ -182,7 +182,7 @@ retry: new = old; new.k = 1; new.kh++; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); } @@ -192,13 +192,13 @@ static void ipte_unlock_siif(struct kvm *kvm) read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); new = old; new.kh--; if (!new.kh) new.k = 0; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); if (!new.kh) wake_up(&kvm->arch.ipte_wq); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 4f0e7f61edf7..ea8dce299954 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -118,8 +118,6 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) static void sca_clear_ext_call(struct kvm_vcpu *vcpu) { - int rc, expect; - if (!kvm_s390_use_sca_entries()) return; kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND); @@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu) struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl old; - old = READ_ONCE(*sigp_ctrl); - expect = old.value; - rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + WRITE_ONCE(sigp_ctrl->value, 0); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl old; - old = READ_ONCE(*sigp_ctrl); - expect = old.value; - rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + WRITE_ONCE(sigp_ctrl->value, 0); } read_unlock(&vcpu->kvm->arch.sca_lock); - WARN_ON(rc != expect); /* cannot clear? */ } int psw_extint_disabled(struct kvm_vcpu *vcpu) @@ -247,12 +238,12 @@ static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam) { u64 word, _word; + word = READ_ONCE(gisa->u64.word[0]); do { - word = READ_ONCE(gisa->u64.word[0]); if ((u64)gisa != word >> 32) return -EBUSY; _word = (word & ~0xffUL) | iam; - } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); + } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word)); return 0; } @@ -270,10 +261,10 @@ static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa) { u64 word, _word; + word = READ_ONCE(gisa->u64.word[0]); do { - word = READ_ONCE(gisa->u64.word[0]); _word = word & ~(0xffUL << 24); - } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); + } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word)); } /** @@ -291,14 +282,14 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi) u8 pending_mask, alert_mask; u64 word, _word; + word = READ_ONCE(gi->origin->u64.word[0]); do { - word = READ_ONCE(gi->origin->u64.word[0]); alert_mask = READ_ONCE(gi->alert.mask); pending_mask = (u8)(word >> 24) & alert_mask; if (pending_mask) return pending_mask; _word = (word & ~0xffUL) | alert_mask; - } while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word); + } while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word)); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 442d4a227c0e..d8080c27d45b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1937,11 +1937,11 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val) read_lock(&kvm->arch.sca_lock); sca = kvm->arch.sca; + old = READ_ONCE(sca->utility); do { - old = READ_ONCE(sca->utility); new = old; new.mtcr = val; - } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); } diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index a61518b549f0..9b9e7fdd5380 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -208,13 +208,12 @@ static inline int account_mem(unsigned long nr_pages) page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + cur_pages = atomic_long_read(&user->locked_vm); do { - cur_pages = atomic_long_read(&user->locked_vm); new_pages = cur_pages + nr_pages; if (new_pages > page_limit) return -ENOMEM; - } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages, - new_pages) != cur_pages); + } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages)); atomic64_add(nr_pages, ¤t->mm->pinned_vm); diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 09d735010ee1..a81a01c44927 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -15,6 +15,7 @@ #include <linux/percpu.h> #include <linux/io.h> #include <asm/alternative.h> +#include <asm/asm.h> int spin_retry = -1; @@ -76,24 +77,43 @@ static inline int arch_load_niai4(int *lock) asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */ - " l %0,%1\n" - : "=d" (owner) : "Q" (*lock) : "memory"); + " l %[owner],%[lock]\n" + : [owner] "=d" (owner) : [lock] "R" (*lock) : "memory"); return owner; } -static inline int arch_cmpxchg_niai8(int *lock, int old, int new) +#ifdef __HAVE_ASM_FLAG_OUTPUTS__ + +static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new) +{ + int cc; + + asm_inline volatile( + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ + " cs %[old],%[new],%[lock]\n" + : [old] "+d" (old), [lock] "+Q" (*lock), "=@cc" (cc) + : [new] "d" (new) + : "memory"); + return cc == 0; +} + +#else /* __HAVE_ASM_FLAG_OUTPUTS__ */ + +static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new) { int expected = old; asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ - " cs %0,%3,%1\n" - : "=d" (old), "=Q" (*lock) - : "0" (old), "d" (new), "Q" (*lock) + " cs %[old],%[new],%[lock]\n" + : [old] "+d" (old), [lock] "+Q" (*lock) + : [new] "d" (new) : "cc", "memory"); return expected == old; } +#endif /* __HAVE_ASM_FLAG_OUTPUTS__ */ + static inline struct spin_wait *arch_spin_decode_tail(int lock) { int ix, cpu; @@ -226,7 +246,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) /* Try to get the lock if it is free. */ if (!owner) { new = (old & _Q_TAIL_MASK) | lockval; - if (arch_cmpxchg_niai8(&lp->lock, old, new)) { + if (arch_try_cmpxchg_niai8(&lp->lock, old, new)) { /* Got the lock */ return; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 646326fa0fad..9b681f74dccc 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -338,7 +338,8 @@ done: handle_fault_error_nolock(regs, 0); else do_sigsegv(regs, SEGV_MAPERR); - } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)) { + } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | + VM_FAULT_HWPOISON_LARGE)) { if (!user_mode(regs)) handle_fault_error_nolock(regs, 0); else diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 643e47bfaddc..16b8a36c56de 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -587,7 +587,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) if (pmd_leaf(*pmd)) { *table = (pmd_val(*pmd) & _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) - | _SEGMENT_ENTRY_GMAP_UC; + | _SEGMENT_ENTRY_GMAP_UC + | _SEGMENT_ENTRY; } else *table = pmd_val(*pmd) & _SEGMENT_ENTRY_HARDWARE_BITS; @@ -2396,7 +2397,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (purge) __pmdp_csp(pmdp); set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); @@ -2450,7 +2452,8 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) gaddr = __gmap_segment_gaddr(entry); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); @@ -2485,7 +2488,8 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) gaddr = __gmap_segment_gaddr(entry); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 7c79cf1bc7d7..d9ce199953de 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -24,6 +24,7 @@ static inline unsigned long __pte_to_rste(pte_t pte) { + swp_entry_t arch_entry; unsigned long rste; /* @@ -48,6 +49,7 @@ static inline unsigned long __pte_to_rste(pte_t pte) */ if (pte_present(pte)) { rste = pte_val(pte) & PAGE_MASK; + rste |= _SEGMENT_ENTRY_PRESENT; rste |= move_set_bit(pte_val(pte), _PAGE_READ, _SEGMENT_ENTRY_READ); rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, @@ -66,6 +68,10 @@ static inline unsigned long __pte_to_rste(pte_t pte) #endif rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, _SEGMENT_ENTRY_NOEXEC); + } else if (!pte_none(pte)) { + /* swap pte */ + arch_entry = __pte_to_swp_entry(pte); + rste = mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry)); } else rste = _SEGMENT_ENTRY_EMPTY; return rste; @@ -73,13 +79,18 @@ static inline unsigned long __pte_to_rste(pte_t pte) static inline pte_t __rste_to_pte(unsigned long rste) { + swp_entry_t arch_entry; unsigned long pteval; - int present; + int present, none; + pte_t pte; - if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { present = pud_present(__pud(rste)); - else + none = pud_none(__pud(rste)); + } else { present = pmd_present(__pmd(rste)); + none = pmd_none(__pmd(rste)); + } /* * Convert encoding pmd / pud bits pte bits @@ -114,6 +125,11 @@ static inline pte_t __rste_to_pte(unsigned long rste) pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY); #endif pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); + } else if (!none) { + /* swap rste */ + arch_entry = __rste_to_swp_entry(rste); + pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry)); + pteval = pte_val(pte); } else pteval = _PAGE_INVALID; return __pte(pteval); @@ -148,8 +164,6 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, unsigned long rste; rste = __pte_to_rste(pte); - if (!MACHINE_HAS_NX) - rste &= ~_SEGMENT_ENTRY_NOEXEC; /* Set correct table type for 2G hugepages */ if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { @@ -223,11 +237,10 @@ pte_t *huge_pte_offset(struct mm_struct *mm, p4dp = p4d_offset(pgdp, addr); if (p4d_present(*p4dp)) { pudp = pud_offset(p4dp, addr); - if (pud_present(*pudp)) { - if (pud_leaf(*pudp)) - return (pte_t *) pudp; + if (sz == PUD_SIZE) + return (pte_t *)pudp; + if (pud_present(*pudp)) pmdp = pmd_offset(pudp, addr); - } } } return (pte_t *) pmdp; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index cbff587dc4e3..88f72745fa59 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -779,8 +779,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) * @fh: Current Function Handle of the device to be created * @state: Initial state after creation either Standby or Configured * - * Creates a new zpci device and adds it to its, possibly newly created, zbus - * as well as zpci_list. + * Allocates a new struct zpci_dev and queries the platform for its details. + * If successful the device can subsequently be added to the zPCI subsystem + * using zpci_add_device(). * * Returns: the zdev on success or an error pointer otherwise */ @@ -803,7 +804,6 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state) goto error; zdev->state = state; - kref_init(&zdev->kref); mutex_init(&zdev->state_lock); mutex_init(&zdev->fmb_lock); mutex_init(&zdev->kzdev_lock); @@ -816,6 +816,17 @@ error: return ERR_PTR(rc); } +/** + * zpci_add_device() - Add a previously created zPCI device to the zPCI subsystem + * @zdev: The zPCI device to be added + * + * A struct zpci_dev is added to the zPCI subsystem and to a virtual PCI bus creating + * a new one as necessary. A hotplug slot is created and events start to be handled. + * If successful from this point on zpci_zdev_get() and zpci_zdev_put() must be used. + * If adding the struct zpci_dev fails the device was not added and should be freed. + * + * Return: 0 on success, or an error code otherwise + */ int zpci_add_device(struct zpci_dev *zdev) { int rc; @@ -829,6 +840,7 @@ int zpci_add_device(struct zpci_dev *zdev) if (rc) goto error_destroy_iommu; + kref_init(&zdev->kref); spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); spin_unlock(&zpci_list_lock); @@ -928,10 +940,8 @@ void zpci_device_reserved(struct zpci_dev *zdev) void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); - int ret; - if (zdev->has_hp_slot) - zpci_exit_slot(zdev); + WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); @@ -939,28 +949,14 @@ void zpci_release_device(struct kref *kref) if (zdev_enabled(zdev)) zpci_disable_device(zdev); - switch (zdev->state) { - case ZPCI_FN_STATE_CONFIGURED: - ret = sclp_pci_deconfigure(zdev->fid); - zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); - fallthrough; - case ZPCI_FN_STATE_STANDBY: - if (zdev->has_hp_slot) - zpci_exit_slot(zdev); - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); - zpci_dbg(3, "rsv fid:%x\n", zdev->fid); - fallthrough; - case ZPCI_FN_STATE_RESERVED: - if (zdev->has_resources) - zpci_cleanup_bus_resources(zdev); - zpci_bus_device_unregister(zdev); - zpci_destroy_iommu(zdev); - fallthrough; - default: - break; - } + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); + + if (zdev->has_resources) + zpci_cleanup_bus_resources(zdev); + + zpci_bus_device_unregister(zdev); + zpci_destroy_iommu(zdev); zpci_dbg(3, "rem fid:%x\n", zdev->fid); kfree_rcu(zdev, rcu); } @@ -1121,7 +1117,8 @@ static void zpci_add_devices(struct list_head *scan_list) list_sort(NULL, scan_list, &zpci_cmp_rid); list_for_each_entry_safe(zdev, tmp, scan_list, entry) { list_del_init(&zdev->entry); - zpci_add_device(zdev); + if (zpci_add_device(zdev)) + kfree(zdev); } } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 47f934f4e828..7f7b732b3f3e 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -340,7 +340,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED); if (IS_ERR(zdev)) break; - zpci_add_device(zdev); + if (zpci_add_device(zdev)) { + kfree(zdev); + break; + } } else { /* the configuration request may be stale */ if (zdev->state != ZPCI_FN_STATE_STANDBY) @@ -354,7 +357,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); if (IS_ERR(zdev)) break; - zpci_add_device(zdev); + if (zpci_add_device(zdev)) { + kfree(zdev); + break; + } } else { zpci_update_fh(zdev, ccdf->fh); } diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c index 1dea43381b5a..2b51ad9d5586 100644 --- a/arch/sh/drivers/push-switch.c +++ b/arch/sh/drivers/push-switch.c @@ -110,7 +110,7 @@ static void switch_drv_remove(struct platform_device *pdev) static struct platform_driver switch_driver = { .probe = switch_drv_probe, - .remove_new = switch_drv_remove, + .remove = switch_drv_remove, .driver = { .name = DRV_NAME, }, diff --git a/arch/sh/kernel/cpu/proc.c b/arch/sh/kernel/cpu/proc.c index a306bcd6b341..5f6d0e827bae 100644 --- a/arch/sh/kernel/cpu/proc.c +++ b/arch/sh/kernel/cpu/proc.c @@ -132,7 +132,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + return *pos < nr_cpu_ids ? cpu_data + *pos : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 757451c3ea1d..0400078076e5 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -29,7 +29,7 @@ UTS_MACHINE := sparc # versions of gcc. Some gcc versions won't pass -Av8 to binutils when you # give -mcpu=v8. This silently worked with older bintutils versions but # does not any more. -KBUILD_CFLAGS += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7 +KBUILD_CFLAGS += -m32 -mcpu=v8 -pipe -mno-fpu $(call cc-option,-fcall-used-g5) $(call cc-option,-fcall-used-g7) KBUILD_CFLAGS += -Wa,-Av8 KBUILD_AFLAGS += -m32 -Wa,-Av8 @@ -45,7 +45,7 @@ export BITS := 64 UTS_MACHINE := sparc64 KBUILD_CFLAGS += -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare +KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 $(call cc-option,-fcall-used-g7) -Wno-sign-compare KBUILD_CFLAGS += -Wa,--undeclared-regs KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3) KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs diff --git a/arch/sparc/include/asm/hvtramp.h b/arch/sparc/include/asm/hvtramp.h index 688ea43af0f5..ce2453ea4f2b 100644 --- a/arch/sparc/include/asm/hvtramp.h +++ b/arch/sparc/include/asm/hvtramp.h @@ -17,7 +17,7 @@ struct hvtramp_descr { __u64 fault_info_va; __u64 fault_info_pa; __u64 thread_reg; - struct hvtramp_mapping maps[1]; + struct hvtramp_mapping maps[]; }; void hv_cpu_startup(unsigned long hvdescr_pa); diff --git a/arch/sparc/include/asm/parport_64.h b/arch/sparc/include/asm/parport_64.h index 4f530a270760..3068809ef9ad 100644 --- a/arch/sparc/include/asm/parport_64.h +++ b/arch/sparc/include/asm/parport_64.h @@ -243,7 +243,7 @@ static struct platform_driver ecpp_driver = { .of_match_table = ecpp_match, }, .probe = ecpp_probe, - .remove_new = ecpp_remove, + .remove = ecpp_remove, }; static int parport_pc_find_nonpci_ports(int autoirq, int autodma) diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c index e02074062001..d4c74d6b2e1b 100644 --- a/arch/sparc/kernel/chmc.c +++ b/arch/sparc/kernel/chmc.c @@ -814,7 +814,7 @@ static struct platform_driver us3mc_driver = { .of_match_table = us3mc_match, }, .probe = us3mc_probe, - .remove_new = us3mc_remove, + .remove = us3mc_remove, }; static inline bool us3mc_platform(void) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index e40c395db202..5cbd6ed5ef6f 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -297,9 +297,7 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, unsigned long hv_err; int i; - hdesc = kzalloc(sizeof(*hdesc) + - (sizeof(struct hvtramp_mapping) * - num_kernel_image_mappings - 1), + hdesc = kzalloc(struct_size(hdesc, maps, num_kernel_image_mappings), GFP_KERNEL); if (!hdesc) { printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate " diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 08bbdc458596..578fd0d49f30 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -255,6 +255,7 @@ static void mostek_write_byte(struct device *dev, u32 ofs, u8 val) static struct m48t59_plat_data m48t59_data = { .read_byte = mostek_read_byte, .write_byte = mostek_write_byte, + .yy_offset = 68, }; /* resource is set at runtime */ diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 60f1c8cc5363..b32f27f929d1 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -544,6 +544,7 @@ static void mostek_write_byte(struct device *dev, u32 ofs, u8 val) static struct m48t59_plat_data m48t59_data = { .read_byte = mostek_read_byte, .write_byte = mostek_write_byte, + .yy_offset = 68, }; static struct platform_device m48t59_rtc = { diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index d317a843f7ea..f1b86eb30340 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -48,6 +48,11 @@ SECTIONS { _text = .; HEAD_TEXT + ALIGN_FUNCTION(); +#ifdef CONFIG_SPARC64 + /* Match text section symbols in head_64.S first */ + *head_64.o(.text) +#endif TEXT_TEXT SCHED_TEXT LOCK_TEXT diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index 243dbfc4609d..50ec2978cda5 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -46,7 +46,7 @@ CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables -m64 -fno-omit-frame-pointer -foptimize-sibling-calls \ -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO -SPARC_REG_CFLAGS = -ffixed-g4 -ffixed-g5 -fcall-used-g5 -fcall-used-g7 +SPARC_REG_CFLAGS = -ffixed-g4 -ffixed-g5 $(call cc-option,-fcall-used-g5) $(call cc-option,-fcall-used-g7) $(vobjs): KBUILD_CFLAGS := $(filter-out $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c index e794edde6755..79607804ea1b 100644 --- a/arch/sparc/vdso/vclock_gettime.c +++ b/arch/sparc/vdso/vclock_gettime.c @@ -86,6 +86,11 @@ notrace static long vdso_fallback_gettimeofday(struct __kernel_old_timeval *tv, } #ifdef CONFIG_SPARC64 +notrace static __always_inline u64 __shr64(u64 val, int amt) +{ + return val >> amt; +} + notrace static __always_inline u64 vread_tick(void) { u64 ret; @@ -102,6 +107,21 @@ notrace static __always_inline u64 vread_tick_stick(void) return ret; } #else +notrace static __always_inline u64 __shr64(u64 val, int amt) +{ + u64 ret; + + __asm__ __volatile__("sllx %H1, 32, %%g1\n\t" + "srl %L1, 0, %L1\n\t" + "or %%g1, %L1, %%g1\n\t" + "srlx %%g1, %2, %L0\n\t" + "srlx %L0, 32, %H0" + : "=r" (ret) + : "r" (val), "r" (amt) + : "g1"); + return ret; +} + notrace static __always_inline u64 vread_tick(void) { register unsigned long long ret asm("o4"); @@ -154,7 +174,7 @@ notrace static __always_inline int do_realtime(struct vvar_data *vvar, ts->tv_sec = vvar->wall_time_sec; ns = vvar->wall_time_snsec; ns += vgetsns(vvar); - ns >>= vvar->clock.shift; + ns = __shr64(ns, vvar->clock.shift); } while (unlikely(vvar_read_retry(vvar, seq))); ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); @@ -174,7 +194,7 @@ notrace static __always_inline int do_realtime_stick(struct vvar_data *vvar, ts->tv_sec = vvar->wall_time_sec; ns = vvar->wall_time_snsec; ns += vgetsns_stick(vvar); - ns >>= vvar->clock.shift; + ns = __shr64(ns, vvar->clock.shift); } while (unlikely(vvar_read_retry(vvar, seq))); ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); @@ -194,7 +214,7 @@ notrace static __always_inline int do_monotonic(struct vvar_data *vvar, ts->tv_sec = vvar->monotonic_time_sec; ns = vvar->monotonic_time_snsec; ns += vgetsns(vvar); - ns >>= vvar->clock.shift; + ns = __shr64(ns, vvar->clock.shift); } while (unlikely(vvar_read_retry(vvar, seq))); ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); @@ -214,7 +234,7 @@ notrace static __always_inline int do_monotonic_stick(struct vvar_data *vvar, ts->tv_sec = vvar->monotonic_time_sec; ns = vvar->monotonic_time_snsec; ns += vgetsns_stick(vvar); - ns >>= vvar->clock.shift; + ns = __shr64(ns, vvar->clock.shift); } while (unlikely(vvar_read_retry(vvar, seq))); ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); diff --git a/arch/um/Kconfig b/arch/um/Kconfig index c89575d05021..18051b1cfce0 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -5,6 +5,7 @@ menu "UML-specific options" config UML bool default y + select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL @@ -32,6 +33,8 @@ config UML select HAVE_ARCH_VMAP_STACK select HAVE_RUST select ARCH_HAS_UBSAN + select HAVE_ARCH_TRACEHOOK + select THREAD_INFO_IN_TASK config MMU bool @@ -94,7 +97,7 @@ config MAY_HAVE_RUNTIME_DEPS config STATIC_LINK bool "Force a static link" - depends on CC_CAN_LINK_STATIC_NO_RUNTIME_DEPS || !MAY_HAVE_RUNTIME_DEPS + depends on !MAY_HAVE_RUNTIME_DEPS help This option gives you the ability to force a static link of UML. Normally, UML is linked as a shared binary. This is inconvenient for @@ -209,8 +212,8 @@ config MMAPPER config PGTABLE_LEVELS int - default 3 if 3_LEVEL_PGTABLES - default 2 + default 4 if 64BIT + default 2 if !64BIT config UML_TIME_TRAVEL_SUPPORT bool @@ -227,6 +230,21 @@ config UML_TIME_TRAVEL_SUPPORT It is safe to say Y, but you probably don't need this. +config UML_MAX_USERSPACE_ITERATIONS + int + prompt "Maximum number of unscheduled userspace iterations" + default 10000 + depends on UML_TIME_TRAVEL_SUPPORT + help + In UML inf-cpu and ext time-travel mode userspace can run without being + interrupted. This will eventually overwhelm the kernel and create OOM + situations (mainly RCU not running). This setting specifies the number + of kernel/userspace switches (minor/major page fault, signal or syscall) + for the same userspace thread before the sched_clock is advanced by a + jiffie to trigger scheduling. + + Setting it to zero disables the feature. + config KASAN_SHADOW_OFFSET hex depends on KASAN diff --git a/arch/um/Makefile b/arch/um/Makefile index 00b63bac5eff..1d36a613aad8 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -61,7 +61,8 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ -Din6addr_loopback=kernel_in6addr_loopback \ - -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr + -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ + -D__close_range=kernel__close_range KBUILD_RUSTFLAGS += -Crelocation-model=pie @@ -70,7 +71,9 @@ KBUILD_AFLAGS += $(ARCH_INCLUDE) USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \ $(ARCH_INCLUDE) $(MODE_INCLUDE) $(filter -I%,$(CFLAGS)) \ -D_FILE_OFFSET_BITS=64 -idirafter $(srctree)/include \ - -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ + -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ \ + -include $(srctree)/include/linux/compiler-version.h \ + -include $(srctree)/include/linux/kconfig.h #This will adjust *FLAGS accordingly to the platform. include $(srctree)/$(ARCH_DIR)/Makefile-os-Linux diff --git a/arch/um/Makefile-skas b/arch/um/Makefile-skas index 67323b028999..1a27e65bcb9c 100644 --- a/arch/um/Makefile-skas +++ b/arch/um/Makefile-skas @@ -3,15 +3,15 @@ # Licensed under the GPL # -GPROF_OPT += -pg +export UM_GPROF_OPT += -pg ifdef CONFIG_CC_IS_CLANG -GCOV_OPT += -fprofile-instr-generate -fcoverage-mapping +export UM_GCOV_OPT += -fprofile-instr-generate -fcoverage-mapping else -GCOV_OPT += -fprofile-arcs -ftest-coverage +export UM_GCOV_OPT += -fprofile-arcs -ftest-coverage endif -CFLAGS-$(CONFIG_GCOV) += $(GCOV_OPT) -CFLAGS-$(CONFIG_GPROF) += $(GPROF_OPT) -LINK-$(CONFIG_GCOV) += $(GCOV_OPT) -LINK-$(CONFIG_GPROF) += $(GPROF_OPT) +CFLAGS-$(CONFIG_GCOV) += $(UM_GCOV_OPT) +CFLAGS-$(CONFIG_GPROF) += $(UM_GPROF_OPT) +LINK-$(CONFIG_GCOV) += $(UM_GCOV_OPT) +LINK-$(CONFIG_GPROF) += $(UM_GPROF_OPT) diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig index 9c9c77f1255a..1ffa088739f4 100644 --- a/arch/um/configs/i386_defconfig +++ b/arch/um/configs/i386_defconfig @@ -1,4 +1,3 @@ -CONFIG_3_LEVEL_PGTABLES=y # CONFIG_COMPACTION is not set CONFIG_BINFMT_MISC=m CONFIG_HOSTFS=y diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index a66e556012c4..35f9beeb19b3 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -161,6 +161,8 @@ static __noreturn int winch_thread(void *arg) int count; char c = 1; + os_set_pdeathsig(); + pty_fd = data->pty_fd; pipe_fd = data->pipe_fd; count = write(pipe_fd, &c, sizeof(c)); diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index 9d228878cea2..0ac149de1ac0 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -48,6 +48,7 @@ MODULE_PARM_DESC(mixer, MIXER_HELP); #ifndef MODULE static int set_dsp(char *name, int *add) { + *add = 0; dsp = name; return 0; } @@ -56,6 +57,7 @@ __uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP); static int set_mixer(char *name, int *add) { + *add = 0; mixer = name; return 0; } diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 77c4afb8ab90..75d04fb4994a 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -336,7 +336,7 @@ static struct platform_driver uml_net_driver = { static void net_device_release(struct device *dev) { - struct uml_net *device = dev_get_drvdata(dev); + struct uml_net *device = container_of(dev, struct uml_net, pdev.dev); struct net_device *netdev = device->dev; struct uml_net_private *lp = netdev_priv(netdev); diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c index 3a1582219c4b..134a58f93c85 100644 --- a/arch/um/drivers/rtc_kern.c +++ b/arch/um/drivers/rtc_kern.c @@ -176,7 +176,7 @@ static void uml_rtc_remove(struct platform_device *pdev) static struct platform_driver uml_rtc_driver = { .probe = uml_rtc_probe, - .remove_new = uml_rtc_remove, + .remove = uml_rtc_remove, .driver = { .name = "uml-rtc", }, diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 7f28ec1929dc..66c1a8835e36 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -779,7 +779,7 @@ static int ubd_open_dev(struct ubd *ubd_dev) static void ubd_device_release(struct device *dev) { - struct ubd *ubd_dev = dev_get_drvdata(dev); + struct ubd *ubd_dev = container_of(dev, struct ubd, pdev.dev); blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); @@ -898,6 +898,8 @@ static int ubd_add(int n, char **error_out) if (err) goto out_cleanup_disk; + ubd_dev->disk = disk; + return 0; out_cleanup_disk: @@ -1499,6 +1501,7 @@ int io_thread(void *arg) { int n, count, written, res; + os_set_pdeathsig(); os_fix_helper_signals(); while(1){ diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index c992da83268d..64c09db392c1 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -815,7 +815,8 @@ static struct platform_driver uml_net_driver = { static void vector_device_release(struct device *dev) { - struct vector_device *device = dev_get_drvdata(dev); + struct vector_device *device = + container_of(dev, struct vector_device, pdev.dev); struct net_device *netdev = device->dev; list_del(&device->list); diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h index 6f147cd3c9f7..fcfa3b7e021b 100644 --- a/arch/um/drivers/vhost_user.h +++ b/arch/um/drivers/vhost_user.h @@ -10,6 +10,7 @@ /* Feature bits */ #define VHOST_USER_F_PROTOCOL_FEATURES 30 /* Protocol feature bits */ +#define VHOST_USER_PROTOCOL_F_MQ 0 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 #define VHOST_USER_PROTOCOL_F_CONFIG 9 @@ -23,7 +24,8 @@ /* Supported transport features */ #define VHOST_USER_SUPPORTED_F BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES) /* Supported protocol features */ -#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ +#define VHOST_USER_SUPPORTED_PROTOCOL_F (BIT_ULL(VHOST_USER_PROTOCOL_F_MQ) | \ + BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG) | \ BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 2b6e701776b6..65df43fa9be5 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -56,6 +56,7 @@ struct virtio_uml_device { int sock, req_fd, irq; u64 features; u64 protocol_features; + u64 max_vqs; u8 status; u8 registered:1; u8 suspended:1; @@ -72,8 +73,6 @@ struct virtio_uml_vq_info { bool suspended; }; -extern unsigned long long physmem_size, highmem; - #define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__) /* Vhost-user protocol */ @@ -343,6 +342,17 @@ static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev, protocol_features); } +static int vhost_user_get_queue_num(struct virtio_uml_device *vu_dev, + u64 *queue_num) +{ + int rc = vhost_user_send_no_payload(vu_dev, true, + VHOST_USER_GET_QUEUE_NUM); + + if (rc) + return rc; + return vhost_user_recv_u64(vu_dev, queue_num); +} + static void vhost_user_reply(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, int response) { @@ -516,6 +526,15 @@ static int vhost_user_init(struct virtio_uml_device *vu_dev) return rc; } + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_MQ)) { + rc = vhost_user_get_queue_num(vu_dev, &vu_dev->max_vqs); + if (rc) + return rc; + } else { + vu_dev->max_vqs = U64_MAX; + } + return 0; } @@ -625,7 +644,7 @@ static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) { struct vhost_user_msg msg = { .header.request = VHOST_USER_SET_MEM_TABLE, - .header.size = sizeof(msg.payload.mem_regions), + .header.size = offsetof(typeof(msg.payload.mem_regions), regions[1]), .payload.mem_regions.num = 1, }; unsigned long reserved = uml_reserved - uml_physmem; @@ -673,13 +692,6 @@ static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) if (rc < 0) return rc; - if (highmem) { - msg.payload.mem_regions.num++; - rc = vhost_user_init_mem_region(__pa(end_iomem), highmem, - &fds[1], &msg.payload.mem_regions.regions[1]); - if (rc < 0) - return rc; - } return vhost_user_send(vu_dev, false, &msg, fds, msg.payload.mem_regions.num); @@ -897,7 +909,7 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, { struct virtio_uml_vq_info *info = vq->priv; int call_fds[2]; - int rc; + int rc, irq; /* no call FD needed/desired in this case */ if (vu_dev->protocol_features & @@ -914,19 +926,23 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, return rc; info->call_fd = call_fds[0]; - rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, - vu_interrupt, IRQF_SHARED, info->name, vq); - if (rc < 0) + irq = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, + vu_interrupt, IRQF_SHARED, info->name, vq); + if (irq < 0) { + rc = irq; goto close_both; + } rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); if (rc) goto release_irq; + vu_dev->irq = irq; + goto out; release_irq: - um_free_irq(vu_dev->irq, vq); + um_free_irq(irq, vq); close_both: os_close_file(call_fds[0]); out: @@ -1023,7 +1039,9 @@ static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vq; /* not supported for now */ - if (WARN_ON(nvqs > 64)) + if (WARN(nvqs > 64 || nvqs > vu_dev->max_vqs, + "%d VQs requested, only up to 64 or %lld supported\n", + nvqs, vu_dev->max_vqs)) return -EINVAL; rc = vhost_user_set_mem_table(vu_dev); @@ -1210,6 +1228,7 @@ static int virtio_uml_probe(struct platform_device *pdev) vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID; vu_dev->pdev = pdev; vu_dev->req_fd = -1; + vu_dev->irq = UM_IRQ_ALLOC; time_travel_propagate_time(); @@ -1446,7 +1465,7 @@ static int virtio_uml_resume(struct platform_device *pdev) static struct platform_driver virtio_uml_driver = { .probe = virtio_uml_probe, - .remove_new = virtio_uml_remove, + .remove = virtio_uml_remove, .driver = { .name = "virtio-uml", .of_match_table = virtio_uml_match, diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 18f902da8e99..428f2c5158c2 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += bug.h generic-y += compat.h -generic-y += current.h generic-y += device.h generic-y += dma-mapping.h generic-y += emergency-restart.h diff --git a/arch/um/include/asm/current.h b/arch/um/include/asm/current.h new file mode 100644 index 000000000000..de64e032d66c --- /dev/null +++ b/arch/um/include/asm/current.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_CURRENT_H +#define __ASM_CURRENT_H + +#include <linux/compiler.h> +#include <linux/threads.h> + +#ifndef __ASSEMBLY__ + +struct task_struct; +extern struct task_struct *cpu_tasks[NR_CPUS]; + +static __always_inline struct task_struct *get_current(void) +{ + return cpu_tasks[0]; +} + + +#define current get_current() + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_CURRENT_H */ diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 834313ecd3d6..3d516f3ca9c7 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -29,51 +29,35 @@ struct page; #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT) - typedef struct { unsigned long pte; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; -#define pte_val(p) ((p).pte) -#define pte_get_bits(p, bits) ((p).pte & (bits)) -#define pte_set_bits(p, bits) ((p).pte |= (bits)) -#define pte_clear_bits(p, bits) ((p).pte &= ~(bits)) -#define pte_copy(to, from) ({ (to).pte = (from).pte; }) -#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) -#define pte_set_val(p, phys, prot) \ - ({ (p).pte = (phys) | pgprot_val(prot); }) +#if CONFIG_PGTABLE_LEVELS > 2 +typedef struct { unsigned long pmd; } pmd_t; #define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) } ) -typedef unsigned long long phys_t; +#if CONFIG_PGTABLE_LEVELS > 3 -#else - -typedef struct { unsigned long pte; } pte_t; -typedef struct { unsigned long pgd; } pgd_t; +typedef struct { unsigned long pud; } pud_t; +#define pud_val(x) ((x).pud) +#define __pud(x) ((pud_t) { (x) } ) -#ifdef CONFIG_3_LEVEL_PGTABLES -typedef struct { unsigned long pmd; } pmd_t; -#define pmd_val(x) ((x).pmd) -#define __pmd(x) ((pmd_t) { (x) } ) -#endif +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ #define pte_val(x) ((x).pte) - #define pte_get_bits(p, bits) ((p).pte & (bits)) #define pte_set_bits(p, bits) ((p).pte |= (bits)) #define pte_clear_bits(p, bits) ((p).pte &= ~(bits)) #define pte_copy(to, from) ((to).pte = (from).pte) -#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) +#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEEDSYNC)) #define pte_set_val(p, phys, prot) (p).pte = (phys | pgprot_val(prot)) typedef unsigned long phys_t; -#endif - typedef struct { unsigned long pgprot; } pgprot_t; typedef struct page *pgtable_t; diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index de5e31c64793..04fb4e6969a4 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -31,7 +31,7 @@ do { \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) -#ifdef CONFIG_3_LEVEL_PGTABLES +#if CONFIG_PGTABLE_LEVELS > 2 #define __pmd_free_tlb(tlb, pmd, address) \ do { \ @@ -39,6 +39,15 @@ do { \ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ } while (0) +#if CONFIG_PGTABLE_LEVELS > 3 + +#define __pud_free_tlb(tlb, pud, address) \ +do { \ + pagetable_pud_dtor(virt_to_ptdesc(pud)); \ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \ +} while (0) + +#endif #endif #endif diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index 8256ecc5b919..ab0c8dd86564 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -31,7 +31,7 @@ printk("%s:%d: bad pgd %p(%08lx).\n", __FILE__, __LINE__, &(e), \ pgd_val(e)) -static inline int pgd_newpage(pgd_t pgd) { return 0; } +static inline int pgd_needsync(pgd_t pgd) { return 0; } static inline void pgd_mkuptodate(pgd_t pgd) { } #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-4level.h index 8a5032ec231f..0d279caee93c 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-4level.h @@ -4,21 +4,25 @@ * Derived from include/asm-i386/pgtable.h */ -#ifndef __UM_PGTABLE_3LEVEL_H -#define __UM_PGTABLE_3LEVEL_H +#ifndef __UM_PGTABLE_4LEVEL_H +#define __UM_PGTABLE_4LEVEL_H -#include <asm-generic/pgtable-nopud.h> +#include <asm-generic/pgtable-nop4d.h> -/* PGDIR_SHIFT determines what a third-level page table entry can map */ +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ -#ifdef CONFIG_64BIT -#define PGDIR_SHIFT 30 -#else -#define PGDIR_SHIFT 31 -#endif +#define PGDIR_SHIFT 39 #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) +/* PUD_SHIFT determines the size of the area a third-level page table can + * map + */ + +#define PUD_SHIFT 30 +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + /* PMD_SHIFT determines the size of the area a second-level page table can * map */ @@ -32,13 +36,9 @@ */ #define PTRS_PER_PTE 512 -#ifdef CONFIG_64BIT #define PTRS_PER_PMD 512 +#define PTRS_PER_PUD 512 #define PTRS_PER_PGD 512 -#else -#define PTRS_PER_PMD 1024 -#define PTRS_PER_PGD 1024 -#endif #define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) @@ -48,11 +48,14 @@ #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pud_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ pgd_val(e)) -#define pud_none(x) (!(pud_val(x) & ~_PAGE_NEWPAGE)) +#define pud_none(x) (!(pud_val(x) & ~_PAGE_NEEDSYNC)) #define pud_bad(x) ((pud_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pud_present(x) (pud_val(x) & _PAGE_PRESENT) #define pud_populate(mm, pud, pmd) \ @@ -60,23 +63,40 @@ #define set_pud(pudptr, pudval) (*(pudptr) = (pudval)) -static inline int pgd_newpage(pgd_t pgd) +#define p4d_none(x) (!(p4d_val(x) & ~_PAGE_NEEDSYNC)) +#define p4d_bad(x) ((p4d_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define p4d_present(x) (p4d_val(x) & _PAGE_PRESENT) +#define p4d_populate(mm, p4d, pud) \ + set_p4d(p4d, __p4d(_PAGE_TABLE + __pa(pud))) + +#define set_p4d(p4dptr, p4dval) (*(p4dptr) = (p4dval)) + + +static inline int pgd_needsync(pgd_t pgd) { - return(pgd_val(pgd) & _PAGE_NEWPAGE); + return pgd_val(pgd) & _PAGE_NEEDSYNC; } -static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; } +static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEEDSYNC; } #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) static inline void pud_clear (pud_t *pud) { - set_pud(pud, __pud(_PAGE_NEWPAGE)); + set_pud(pud, __pud(_PAGE_NEEDSYNC)); +} + +static inline void p4d_clear (p4d_t *p4d) +{ + set_p4d(p4d, __p4d(_PAGE_NEEDSYNC)); } #define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK) #define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & PAGE_MASK)) +#define p4d_page(p4d) phys_to_page(p4d_val(p4d) & PAGE_MASK) +#define p4d_pgtable(p4d) ((pud_t *) __va(p4d_val(p4d) & PAGE_MASK)) + static inline unsigned long pte_pfn(pte_t pte) { return phys_to_pfn(pte_val(pte)); @@ -97,4 +117,3 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) } #endif - diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index faab5a2a4b06..0bd60afcc37d 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -11,8 +11,7 @@ #include <asm/fixmap.h> #define _PAGE_PRESENT 0x001 -#define _PAGE_NEWPAGE 0x002 -#define _PAGE_NEWPROT 0x004 +#define _PAGE_NEEDSYNC 0x002 #define _PAGE_RW 0x020 #define _PAGE_USER 0x040 #define _PAGE_ACCESSED 0x080 @@ -24,10 +23,12 @@ /* We borrow bit 10 to store the exclusive marker in swap PTEs. */ #define _PAGE_SWP_EXCLUSIVE 0x400 -#ifdef CONFIG_3_LEVEL_PGTABLES -#include <asm/pgtable-3level.h> -#else +#if CONFIG_PGTABLE_LEVELS == 4 +#include <asm/pgtable-4level.h> +#elif CONFIG_PGTABLE_LEVELS == 2 #include <asm/pgtable-2level.h> +#else +#error "Unsupported number of page table levels" #endif extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; @@ -78,22 +79,22 @@ extern unsigned long end_iomem; */ #define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) -#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE)) +#define pte_clear(mm, addr, xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEEDSYNC)) -#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE)) +#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEEDSYNC)) #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) +#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEEDSYNC; } while (0) -#define pmd_newpage(x) (pmd_val(x) & _PAGE_NEWPAGE) -#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE) +#define pmd_needsync(x) (pmd_val(x) & _PAGE_NEEDSYNC) +#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEEDSYNC) -#define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE) -#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE) +#define pud_needsync(x) (pud_val(x) & _PAGE_NEEDSYNC) +#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEEDSYNC) -#define p4d_newpage(x) (p4d_val(x) & _PAGE_NEWPAGE) -#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEWPAGE) +#define p4d_needsync(x) (p4d_val(x) & _PAGE_NEEDSYNC) +#define p4d_mkuptodate(x) (p4d_val(x) &= ~_PAGE_NEEDSYNC) #define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT) #define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK) @@ -144,14 +145,9 @@ static inline int pte_young(pte_t pte) return pte_get_bits(pte, _PAGE_ACCESSED); } -static inline int pte_newpage(pte_t pte) +static inline int pte_needsync(pte_t pte) { - return pte_get_bits(pte, _PAGE_NEWPAGE); -} - -static inline int pte_newprot(pte_t pte) -{ - return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); + return pte_get_bits(pte, _PAGE_NEEDSYNC); } /* @@ -160,12 +156,6 @@ static inline int pte_newprot(pte_t pte) * ================================= */ -static inline pte_t pte_mknewprot(pte_t pte) -{ - pte_set_bits(pte, _PAGE_NEWPROT); - return(pte); -} - static inline pte_t pte_mkclean(pte_t pte) { pte_clear_bits(pte, _PAGE_DIRTY); @@ -180,19 +170,14 @@ static inline pte_t pte_mkold(pte_t pte) static inline pte_t pte_wrprotect(pte_t pte) { - if (likely(pte_get_bits(pte, _PAGE_RW))) - pte_clear_bits(pte, _PAGE_RW); - else - return pte; - return(pte_mknewprot(pte)); + pte_clear_bits(pte, _PAGE_RW); + return pte; } static inline pte_t pte_mkread(pte_t pte) { - if (unlikely(pte_get_bits(pte, _PAGE_USER))) - return pte; pte_set_bits(pte, _PAGE_USER); - return(pte_mknewprot(pte)); + return pte; } static inline pte_t pte_mkdirty(pte_t pte) @@ -209,23 +194,19 @@ static inline pte_t pte_mkyoung(pte_t pte) static inline pte_t pte_mkwrite_novma(pte_t pte) { - if (unlikely(pte_get_bits(pte, _PAGE_RW))) - return pte; pte_set_bits(pte, _PAGE_RW); - return(pte_mknewprot(pte)); + return pte; } static inline pte_t pte_mkuptodate(pte_t pte) { - pte_clear_bits(pte, _PAGE_NEWPAGE); - if(pte_present(pte)) - pte_clear_bits(pte, _PAGE_NEWPROT); - return(pte); + pte_clear_bits(pte, _PAGE_NEEDSYNC); + return pte; } -static inline pte_t pte_mknewpage(pte_t pte) +static inline pte_t pte_mkneedsync(pte_t pte) { - pte_set_bits(pte, _PAGE_NEWPAGE); + pte_set_bits(pte, _PAGE_NEEDSYNC); return(pte); } @@ -233,13 +214,11 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) { pte_copy(*pteptr, pteval); - /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so - * fix_range knows to unmap it. _PAGE_NEWPROT is specific to - * mapped pages. + /* If it's a swap entry, it needs to be marked _PAGE_NEEDSYNC so + * update_pte_range knows to unmap it. */ - *pteptr = pte_mknewpage(*pteptr); - if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); + *pteptr = pte_mkneedsync(*pteptr); } #define PFN_PTE_SHIFT PAGE_SHIFT @@ -279,7 +258,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { - return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEWPAGE); + return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEEDSYNC); } /* @@ -294,8 +273,6 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) ({ pte_t pte; \ \ pte_set_val(pte, page_to_phys(page), (pgprot)); \ - if (pte_present(pte)) \ - pte_mknewprot(pte_mknewpage(pte)); \ pte;}) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) @@ -329,7 +306,7 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); * <--------------- offset ----------------> E < type -> 0 0 0 1 0 * * E is the exclusive marker that is not stored in swap entries. - * _PAGE_NEWPAGE (bit 1) is always set to 1 in set_pte(). + * _PAGE_NEEDSYNC (bit 1) is always set to 1 in set_pte(). */ #define __swp_type(x) (((x).val >> 5) & 0x1f) #define __swp_offset(x) ((x).val >> 11) diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index bce4595798da..5d6356eafffe 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -20,10 +20,7 @@ struct task_struct; struct mm_struct; struct thread_struct { - struct pt_regs regs; struct pt_regs *segv_regs; - void *fault_addr; - jmp_buf *fault_catcher; struct task_struct *prev_sched; struct arch_thread arch; jmp_buf switch_buf; @@ -33,12 +30,14 @@ struct thread_struct { void *arg; } thread; } request; + + /* Contains variable sized FP registers */ + struct pt_regs regs; }; #define INIT_THREAD \ { \ .regs = EMPTY_REGS, \ - .fault_addr = NULL, \ .prev_sched = NULL, \ .arch = INIT_ARCH_THREAD, \ .request = { } \ diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index c7b4b49826a2..f9ad06fcc991 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -17,35 +17,17 @@ #include <sysdep/ptrace_user.h> struct thread_info { - struct task_struct *task; /* main task structure */ unsigned long flags; /* low level flags */ __u32 cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ - struct thread_info *real_thread; /* Points to non-IRQ stack */ - unsigned long aux_fp_regs[FP_SIZE]; /* auxiliary fp_regs to save/restore - them out-of-band */ }; #define INIT_THREAD_INFO(tsk) \ { \ - .task = &tsk, \ .flags = 0, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .real_thread = NULL, \ -} - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - unsigned long mask = THREAD_SIZE - 1; - void *p; - - asm volatile ("" : "=r" (p) : "0" (&ti)); - ti = (struct thread_info *) (((unsigned long)p) & ~mask); - return ti; } #endif diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h index db997976b6ea..13a3009942be 100644 --- a/arch/um/include/asm/tlbflush.h +++ b/arch/um/include/asm/tlbflush.h @@ -9,8 +9,8 @@ #include <linux/mm.h> /* - * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls - * from the process handling the MM (which can be the kernel itself). + * In UML, we need to sync the TLB over by using mmap/munmap syscalls from + * the process handling the MM (which can be the kernel itself). * * To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes * we catch all PTE transitions where memory that was unusable becomes usable. diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index 06292fca5a4d..ea65f151bf48 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -30,25 +30,23 @@ #include <sysdep/ptrace.h> -struct cpu_task { - void *task; -}; +struct task_struct; +extern struct task_struct *cpu_tasks[]; -extern struct cpu_task cpu_tasks[]; +extern unsigned long long physmem_size; extern unsigned long high_physmem; extern unsigned long uml_physmem; extern unsigned long uml_reserved; extern unsigned long end_vm; extern unsigned long start_vm; -extern unsigned long long highmem; extern unsigned long brk_start; extern unsigned long host_task_size; extern unsigned long stub_start; -extern int linux_main(int argc, char **argv); +extern int linux_main(int argc, char **argv, char **envp); extern void uml_finishsetup(void); struct siginfo; diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 579ed946a3a9..73f3a4792ed8 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -6,7 +6,6 @@ DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); -DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); DEFINE(UM_GFP_KERNEL, GFP_KERNEL); DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC); @@ -15,17 +14,3 @@ DEFINE(UM_THREAD_SIZE, THREAD_SIZE); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); - -#ifdef CONFIG_PRINTK -DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK); -#endif -#ifdef CONFIG_UML_X86 -DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); -#endif -#ifdef CONFIG_64BIT -DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT); -#endif -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT -DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT); -#endif - diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index d8ffd2db168e..f21dc8517538 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -60,7 +60,6 @@ extern unsigned long from_irq_stack(int nested); extern int singlestepping(void); extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); -extern void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs); extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); extern void fatal_sigsegv(void) __attribute__ ((noreturn)); diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h index 11a723a58545..adfa08062f88 100644 --- a/arch/um/include/shared/mem_user.h +++ b/arch/um/include/shared/mem_user.h @@ -47,10 +47,9 @@ extern int iomem_size; #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) extern unsigned long find_iomem(char *driver, unsigned long *len_out); -extern void mem_total_pages(unsigned long physmem, unsigned long iomem, - unsigned long highmem); +extern void mem_total_pages(unsigned long physmem, unsigned long iomem); extern void setup_physmem(unsigned long start, unsigned long usable, - unsigned long len, unsigned long long highmem); + unsigned long len); extern void map_memory(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x); diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 9a039d6f1f74..5babad8c5f75 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -145,7 +145,6 @@ extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); extern int os_get_ifname(int fd, char *namebuf); extern int os_set_slip(int fd); extern int os_mode_fd(int fd, int mode); -extern int os_fsync_file(int fd); extern int os_seek_file(int fd, unsigned long long offset); extern int os_open_file(const char *file, struct openflags flags, int mode); @@ -199,15 +198,11 @@ extern int create_mem_file(unsigned long long len); extern void report_enomem(void); /* process.c */ -extern unsigned long os_process_pc(int pid); -extern int os_process_parent(int pid); extern void os_alarm_process(int pid); -extern void os_stop_process(int pid); extern void os_kill_process(int pid, int reap_child); extern void os_kill_ptraced_process(int pid, int reap_child); extern int os_getpid(void); -extern int os_getpgrp(void); extern void init_new_thread_signals(void); @@ -220,6 +215,8 @@ extern int os_drop_memory(void *addr, int length); extern int can_drop_memory(void); extern int os_mincore(void *addr, unsigned long len); +void os_set_pdeathsig(void); + /* execvp.c */ extern int execvp_noalloc(char *buf, const char *file, char *const argv[]); /* helper.c */ @@ -244,7 +241,6 @@ extern void block_signals(void); extern void unblock_signals(void); extern int um_set_signals(int enable); extern int um_set_signals_trace(int enable); -extern int os_is_signal_stack(void); extern void deliver_alarm(void); extern void register_pm_wake_signal(void); extern void block_signals_hard(void); @@ -283,13 +279,11 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, int phys_fd, unsigned long long offset); int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len); -int protect(struct mm_id *mm_idp, unsigned long addr, - unsigned long len, unsigned int prot); /* skas/process.c */ extern int is_skas_winch(int pid, int fd, void *data); extern int start_userspace(unsigned long stub_stack); -extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs); +extern void userspace(struct uml_pt_regs *regs); extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); extern void switch_threads(jmp_buf *me, jmp_buf *you); extern int start_idle_thread(void *stack, jmp_buf *switch_buf); @@ -329,9 +323,6 @@ extern int __ignore_sigio_fd(int fd); /* tty.c */ extern int get_pty(void); -/* sys-$ARCH/task_size.c */ -extern unsigned long os_get_top_address(void); - long syscall(long number, ...); /* irqflags tracing */ diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h index a0450326521c..7d81b2339a48 100644 --- a/arch/um/include/shared/registers.h +++ b/arch/um/include/shared/registers.h @@ -8,12 +8,6 @@ #include <sysdep/ptrace.h> -extern int save_i387_registers(int pid, unsigned long *fp_regs); -extern int restore_i387_registers(int pid, unsigned long *fp_regs); -extern int save_fp_registers(int pid, unsigned long *fp_regs); -extern int restore_fp_registers(int pid, unsigned long *fp_regs); -extern int save_fpx_registers(int pid, unsigned long *fp_regs); -extern int restore_fpx_registers(int pid, unsigned long *fp_regs); extern int init_pid_registers(int pid); extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs); extern int get_fp_registers(int pid, unsigned long *regs); diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h index 2b6b44759dfa..81a4cace032c 100644 --- a/arch/um/include/shared/skas/stub-data.h +++ b/arch/um/include/shared/skas/stub-data.h @@ -12,6 +12,17 @@ #include <as-layout.h> #include <sysdep/tls.h> +struct stub_init_data { + unsigned long stub_start; + + int stub_code_fd; + unsigned long stub_code_offset; + int stub_data_fd; + unsigned long stub_data_offset; + + unsigned long segv_handler; +}; + #define STUB_NEXT_SYSCALL(s) \ ((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len)) @@ -19,7 +30,6 @@ enum stub_syscall_type { STUB_SYSCALL_UNSET = 0, STUB_SYSCALL_MMAP, STUB_SYSCALL_MUNMAP, - STUB_SYSCALL_MPROTECT, }; struct stub_syscall { diff --git a/arch/um/include/shared/timetravel.h b/arch/um/include/shared/timetravel.h index c8db2f213dba..7c2b277b7eb0 100644 --- a/arch/um/include/shared/timetravel.h +++ b/arch/um/include/shared/timetravel.h @@ -12,14 +12,13 @@ enum time_travel_mode { TT_MODE_EXTERNAL, }; -#if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \ - defined(CONFIG_UML_TIME_TRAVEL_SUPPORT) +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) extern enum time_travel_mode time_travel_mode; extern int time_travel_should_print_bc_msg; #else #define time_travel_mode TT_MODE_OFF #define time_travel_should_print_bc_msg 0 -#endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */ +#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ void _time_travel_print_bc_msg(void); static inline void time_travel_print_bc_msg(void) diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index bbab79c0c074..139eb78a4767 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -38,7 +38,7 @@ extern void panic(const char *fmt, ...) #define UM_KERN_DEBUG KERN_DEBUG #define UM_KERN_CONT KERN_CONT -#ifdef UML_CONFIG_PRINTK +#if IS_ENABLED(CONFIG_PRINTK) #define printk(...) _printk(__VA_ARGS__) extern int _printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/arch/um/kernel/dtb.c b/arch/um/kernel/dtb.c index 8d78ced9e08f..15c342426489 100644 --- a/arch/um/kernel/dtb.c +++ b/arch/um/kernel/dtb.c @@ -31,6 +31,7 @@ void uml_dtb_init(void) static int __init uml_dtb_setup(char *line, int *add) { + *add = 0; dtb = line; return 0; } diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 3385d653ebd0..a36b7918a011 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -116,8 +116,6 @@ SECTIONS .fini_array : { *(.fini_array) } .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) - . = ALIGN(KERNEL_STACK_SIZE); - *(.data..init_irqstack) DATA_DATA *(.data.* .gnu.linkonce.d.*) SORT(CONSTRUCTORS) @@ -178,3 +176,6 @@ SECTIONS DISCARDS } + +ASSERT(__syscall_stub_end - __syscall_stub_start <= PAGE_SIZE, + "STUB code must not be larger than one page"); diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c index 47b8cb1a1156..99dba827461c 100644 --- a/arch/um/kernel/initrd.c +++ b/arch/um/kernel/initrd.c @@ -34,6 +34,7 @@ int __init read_initrd(void) static int __init uml_initrd_setup(char *line, int *add) { + *add = 0; initrd = line; return 0; } diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 534e91797f89..338450741aac 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -674,115 +674,3 @@ void __init init_IRQ(void) /* Initialize EPOLL Loop */ os_setup_epoll(); } - -/* - * IRQ stack entry and exit: - * - * Unlike i386, UML doesn't receive IRQs on the normal kernel stack - * and switch over to the IRQ stack after some preparation. We use - * sigaltstack to receive signals on a separate stack from the start. - * These two functions make sure the rest of the kernel won't be too - * upset by being on a different stack. The IRQ stack has a - * thread_info structure at the bottom so that current et al continue - * to work. - * - * to_irq_stack copies the current task's thread_info to the IRQ stack - * thread_info and sets the tasks's stack to point to the IRQ stack. - * - * from_irq_stack copies the thread_info struct back (flags may have - * been modified) and resets the task's stack pointer. - * - * Tricky bits - - * - * What happens when two signals race each other? UML doesn't block - * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal - * could arrive while a previous one is still setting up the - * thread_info. - * - * There are three cases - - * The first interrupt on the stack - sets up the thread_info and - * handles the interrupt - * A nested interrupt interrupting the copying of the thread_info - - * can't handle the interrupt, as the stack is in an unknown state - * A nested interrupt not interrupting the copying of the - * thread_info - doesn't do any setup, just handles the interrupt - * - * The first job is to figure out whether we interrupted stack setup. - * This is done by xchging the signal mask with thread_info->pending. - * If the value that comes back is zero, then there is no setup in - * progress, and the interrupt can be handled. If the value is - * non-zero, then there is stack setup in progress. In order to have - * the interrupt handled, we leave our signal in the mask, and it will - * be handled by the upper handler after it has set up the stack. - * - * Next is to figure out whether we are the outer handler or a nested - * one. As part of setting up the stack, thread_info->real_thread is - * set to non-NULL (and is reset to NULL on exit). This is the - * nesting indicator. If it is non-NULL, then the stack is already - * set up and the handler can run. - */ - -static unsigned long pending_mask; - -unsigned long to_irq_stack(unsigned long *mask_out) -{ - struct thread_info *ti; - unsigned long mask, old; - int nested; - - mask = xchg(&pending_mask, *mask_out); - if (mask != 0) { - /* - * If any interrupts come in at this point, we want to - * make sure that their bits aren't lost by our - * putting our bit in. So, this loop accumulates bits - * until xchg returns the same value that we put in. - * When that happens, there were no new interrupts, - * and pending_mask contains a bit for each interrupt - * that came in. - */ - old = *mask_out; - do { - old |= mask; - mask = xchg(&pending_mask, old); - } while (mask != old); - return 1; - } - - ti = current_thread_info(); - nested = (ti->real_thread != NULL); - if (!nested) { - struct task_struct *task; - struct thread_info *tti; - - task = cpu_tasks[ti->cpu].task; - tti = task_thread_info(task); - - *ti = *tti; - ti->real_thread = tti; - task->stack = ti; - } - - mask = xchg(&pending_mask, 0); - *mask_out |= mask | nested; - return 0; -} - -unsigned long from_irq_stack(int nested) -{ - struct thread_info *ti, *to; - unsigned long mask; - - ti = current_thread_info(); - - pending_mask = 1; - - to = ti->real_thread; - current->stack = to; - ti->real_thread = NULL; - *to = *ti; - - mask = xchg(&pending_mask, 0); - return mask & ~1; -} - diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index a5b4fe2ad931..53248ed04771 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -6,7 +6,6 @@ #include <linux/stddef.h> #include <linux/module.h> #include <linux/memblock.h> -#include <linux/highmem.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/slab.h> @@ -51,8 +50,6 @@ EXPORT_SYMBOL(empty_zero_page); pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* Initialized at boot time, and readonly after that */ -unsigned long long highmem; -EXPORT_SYMBOL(highmem); int kmalloc_ok = 0; /* Used during early boot */ @@ -98,7 +95,7 @@ static void __init one_page_table_init(pmd_t *pmd) static void __init one_md_table_init(pud_t *pud) { -#ifdef CONFIG_3_LEVEL_PGTABLES +#if CONFIG_PGTABLE_LEVELS > 2 pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); if (!pmd_table) panic("%s: Failed to allocate %lu bytes align=%lx\n", @@ -109,6 +106,19 @@ static void __init one_md_table_init(pud_t *pud) #endif } +static void __init one_ud_table_init(p4d_t *p4d) +{ +#if CONFIG_PGTABLE_LEVELS > 3 + pud_t *pud_table = (pud_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + if (!pud_table) + panic("%s: Failed to allocate %lu bytes align=%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + + set_p4d(p4d, __p4d(_KERNPG_TABLE + (unsigned long) __pa(pud_table))); + BUG_ON(pud_table != pud_offset(p4d, 0)); +#endif +} + static void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) { @@ -126,6 +136,8 @@ static void __init fixrange_init(unsigned long start, unsigned long end, for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { p4d = p4d_offset(pgd, vaddr); + if (p4d_none(*p4d)) + one_ud_table_init(p4d); pud = pud_offset(p4d, vaddr); if (pud_none(*pud)) one_md_table_init(pud); diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index fb2adfb49945..a74f17b033c4 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -22,19 +22,14 @@ static int physmem_fd = -1; unsigned long high_physmem; EXPORT_SYMBOL(high_physmem); -extern unsigned long long physmem_size; - -void __init mem_total_pages(unsigned long physmem, unsigned long iomem, - unsigned long highmem) +void __init mem_total_pages(unsigned long physmem, unsigned long iomem) { - unsigned long phys_pages, highmem_pages; - unsigned long iomem_pages, total_pages; + unsigned long phys_pages, iomem_pages, total_pages; - phys_pages = physmem >> PAGE_SHIFT; - iomem_pages = iomem >> PAGE_SHIFT; - highmem_pages = highmem >> PAGE_SHIFT; + phys_pages = physmem >> PAGE_SHIFT; + iomem_pages = iomem >> PAGE_SHIFT; - total_pages = phys_pages + iomem_pages + highmem_pages; + total_pages = phys_pages + iomem_pages; max_mapnr = total_pages; } @@ -64,13 +59,12 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, * @reserve_end: end address of the physical kernel memory. * @len: Length of total physical memory that should be mapped/made * available, in bytes. - * @highmem: Number of highmem bytes that should be mapped/made available. * - * Creates an unlinked temporary file of size (len + highmem) and memory maps + * Creates an unlinked temporary file of size (len) and memory maps * it on the last executable image address (uml_reserved). * * The offset is needed as the length of the total physical memory - * (len + highmem) includes the size of the memory used be the executable image, + * (len) includes the size of the memory used be the executable image, * but the mapped-to address is the last address of the executable image * (uml_reserved == end address of executable image). * @@ -78,24 +72,24 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, * of all user space processes/kernel tasks. */ void __init setup_physmem(unsigned long start, unsigned long reserve_end, - unsigned long len, unsigned long long highmem) + unsigned long len) { unsigned long reserve = reserve_end - start; - long map_size = len - reserve; + unsigned long map_size = len - reserve; int err; - if(map_size <= 0) { + if (len <= reserve) { os_warn("Too few physical memory! Needed=%lu, given=%lu\n", reserve, len); exit(1); } - physmem_fd = create_mem_file(len + highmem); + physmem_fd = create_mem_file(len); err = os_map_memory((void *) reserve_end, physmem_fd, reserve, map_size, 1, 1, 1); if (err < 0) { - os_warn("setup_physmem - mapping %ld bytes of memory at 0x%p " + os_warn("setup_physmem - mapping %lu bytes of memory at 0x%p " "failed - errno = %d\n", map_size, (void *) reserve_end, err); exit(1); @@ -107,9 +101,8 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end, */ os_seek_file(physmem_fd, __pa(__syscall_stub_start)); os_write_file(physmem_fd, __syscall_stub_start, PAGE_SIZE); - os_fsync_file(physmem_fd); - memblock_add(__pa(start), len + highmem); + memblock_add(__pa(start), len); memblock_reserve(__pa(start), reserve); min_low_pfn = PFN_UP(__pa(reserve_end)); @@ -137,10 +130,6 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out) region = region->next; } } - else if (phys < __pa(end_iomem) + highmem) { - fd = physmem_fd; - *offset_out = phys - iomem_size; - } return fd; } @@ -149,6 +138,8 @@ EXPORT_SYMBOL(phys_mapping); static int __init uml_mem_setup(char *line, int *add) { char *retptr; + + *add = 0; physmem_size = memparse(line,&retptr); return 0; } diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index be2856af6d4c..30bdc0a87dc8 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -43,7 +43,8 @@ * cares about its entry, so it's OK if another processor is modifying its * entry. */ -struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { NULL } }; +struct task_struct *cpu_tasks[NR_CPUS]; +EXPORT_SYMBOL(cpu_tasks); void free_stack(unsigned long stack, int order) { @@ -64,7 +65,7 @@ unsigned long alloc_stack(int order, int atomic) static inline void set_current(struct task_struct *task) { - cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) { task }); + cpu_tasks[task_thread_info(task)->cpu] = task; } struct task_struct *__switch_to(struct task_struct *from, struct task_struct *to) @@ -116,7 +117,7 @@ void new_thread_handler(void) * callback returns only if the kernel thread execs a process */ fn(arg); - userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); + userspace(¤t->thread.regs.regs); } /* Called magically, see new_thread_handler above */ @@ -133,7 +134,7 @@ static void fork_handler(void) current->thread.prev_sched = NULL; - userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); + userspace(¤t->thread.regs.regs); } int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) @@ -187,6 +188,13 @@ void initial_thread_cb(void (*proc)(void *), void *arg) kmalloc_ok = save_kmalloc_ok; } +int arch_dup_task_struct(struct task_struct *dst, + struct task_struct *src) +{ + memcpy(dst, src, arch_task_struct_size); + return 0; +} + void um_idle_sleep(void) { if (time_travel_mode != TT_MODE_OFF) @@ -287,11 +295,3 @@ unsigned long __get_wchan(struct task_struct *p) return 0; } - -int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu) -{ - int cpu = current_thread_info()->cpu; - - return save_i387_registers(userspace_pid[cpu], (unsigned long *) fpu); -} - diff --git a/arch/um/kernel/skas/.gitignore b/arch/um/kernel/skas/.gitignore new file mode 100644 index 000000000000..c3409ced0f38 --- /dev/null +++ b/arch/um/kernel/skas/.gitignore @@ -0,0 +1,2 @@ +stub_exe +stub_exe.dbg diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index 6f86d53e3d69..3384be42691f 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -3,14 +3,48 @@ # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) # -obj-y := stub.o mmu.o process.o syscall.o uaccess.o +obj-y := stub.o mmu.o process.o syscall.o uaccess.o \ + stub_exe_embed.o + +# Stub executable + +stub_exe_objs-y := stub_exe.o + +stub_exe_objs := $(foreach F,$(stub_exe_objs-y),$(obj)/$F) + +# Object file containing the ELF executable +$(obj)/stub_exe_embed.o: $(src)/stub_exe_embed.S $(obj)/stub_exe + +$(obj)/stub_exe.dbg: $(stub_exe_objs) FORCE + $(call if_changed,stub_exe) + +$(obj)/stub_exe: OBJCOPYFLAGS := -S +$(obj)/stub_exe: $(obj)/stub_exe.dbg FORCE + $(call if_changed,objcopy) + +quiet_cmd_stub_exe = STUB_EXE $@ + cmd_stub_exe = $(CC) -nostdlib -o $@ \ + $(filter-out $(UM_GPROF_OPT) $(UM_GCOV_OPT),$(KBUILD_CFLAGS)) $(STUB_EXE_LDFLAGS) \ + $(filter %.o,$^) + +STUB_EXE_LDFLAGS = -Wl,-n -static + +targets += stub_exe.dbg stub_exe $(stub_exe_objs-y) + +# end # stub.o is in the stub, so it can't be built with profiling # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work -> # disable it CFLAGS_stub.o := $(CFLAGS_NO_HARDENING) -UNPROFILE_OBJS := stub.o +CFLAGS_stub_exe.o := $(CFLAGS_NO_HARDENING) + +# Clang will call memset() from __builtin_alloca() when stack variable +# initialization is enabled, which is used in stub_exe.c. +CFLAGS_stub_exe.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized) + +UNPROFILE_OBJS := stub.o stub_exe.o KCOV_INSTRUMENT := n include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 886ed5e65674..0eb5a1d3ba70 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -40,35 +40,13 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) goto out_free; } - /* - * Ensure the new MM is clean and nothing unwanted is mapped. - * - * TODO: We should clear the memory up to STUB_START to ensure there is - * nothing mapped there, i.e. we (currently) have: - * - * |- user memory -|- unused -|- stub -|- unused -| - * ^ TASK_SIZE ^ STUB_START - * - * Meaning we have two unused areas where we may still have valid - * mappings from our internal clone(). That isn't really a problem as - * userspace is not going to access them, but it is definitely not - * correct. - * - * However, we are "lucky" and if rseq is configured, then on 32 bit - * it will fall into the first empty range while on 64 bit it is going - * to use an anonymous mapping in the second range. As such, things - * continue to work for now as long as we don't start unmapping these - * areas. - * - * Change this to STUB_START once we have a clean userspace. - */ - unmap(new_id, 0, TASK_SIZE); + /* Ensure the new MM is clean and nothing unwanted is mapped */ + unmap(new_id, 0, STUB_START); return 0; out_free: - if (new_id->stack != 0) - free_pages(new_id->stack, ilog2(STUB_DATA_PAGES)); + free_pages(new_id->stack, ilog2(STUB_DATA_PAGES)); out: return ret; } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 68657988c8d1..05dcdc057af9 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -22,15 +22,13 @@ static int __init start_kernel_proc(void *unused) { block_signals_trace(); - cpu_tasks[0].task = current; - start_kernel(); return 0; } extern int userspace_pid[]; -extern char cpu0_irqstack[]; +static char cpu0_irqstack[THREAD_SIZE] __aligned(THREAD_SIZE); int __init start_uml(void) { diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c index 5d52ffa682dc..796fc266d3bb 100644 --- a/arch/um/kernel/skas/stub.c +++ b/arch/um/kernel/skas/stub.c @@ -35,16 +35,6 @@ static __always_inline int syscall_handler(struct stub_data *d) return -1; } break; - case STUB_SYSCALL_MPROTECT: - res = stub_syscall3(__NR_mprotect, - sc->mem.addr, sc->mem.length, - sc->mem.prot); - if (res) { - d->err = res; - d->syscall_data_len = i; - return -1; - } - break; default: d->err = -95; /* EOPNOTSUPP */ d->syscall_data_len = i; diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c new file mode 100644 index 000000000000..23c99b285e82 --- /dev/null +++ b/arch/um/kernel/skas/stub_exe.c @@ -0,0 +1,95 @@ +#include <sys/ptrace.h> +#include <sys/prctl.h> +#include <asm/unistd.h> +#include <sysdep/stub.h> +#include <stub-data.h> + +void _start(void); + +noinline static void real_init(void) +{ + struct stub_init_data init_data; + unsigned long res; + struct { + void *ss_sp; + int ss_flags; + size_t ss_size; + } stack = { + .ss_size = STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, + }; + struct { + void *sa_handler_; + unsigned long sa_flags; + void *sa_restorer; + unsigned long long sa_mask; + } sa = { + /* Need to set SA_RESTORER (but the handler never returns) */ + .sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000, + /* no need to mask any signals */ + .sa_mask = 0, + }; + + /* set a nice name */ + stub_syscall2(__NR_prctl, PR_SET_NAME, (unsigned long)"uml-userspace"); + + /* Make sure this process dies if the kernel dies */ + stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL); + + /* read information from STDIN and close it */ + res = stub_syscall3(__NR_read, 0, + (unsigned long)&init_data, sizeof(init_data)); + if (res != sizeof(init_data)) + stub_syscall1(__NR_exit, 10); + + stub_syscall1(__NR_close, 0); + + /* map stub code + data */ + res = stub_syscall6(STUB_MMAP_NR, + init_data.stub_start, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_EXEC, MAP_FIXED | MAP_SHARED, + init_data.stub_code_fd, init_data.stub_code_offset); + if (res != init_data.stub_start) + stub_syscall1(__NR_exit, 11); + + res = stub_syscall6(STUB_MMAP_NR, + init_data.stub_start + UM_KERN_PAGE_SIZE, + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, + init_data.stub_data_fd, init_data.stub_data_offset); + if (res != init_data.stub_start + UM_KERN_PAGE_SIZE) + stub_syscall1(__NR_exit, 12); + + /* setup signal stack inside stub data */ + stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE; + stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0); + + /* register SIGSEGV handler */ + sa.sa_handler_ = (void *) init_data.segv_handler; + res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0, + sizeof(sa.sa_mask)); + if (res != 0) + stub_syscall1(__NR_exit, 13); + + stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + + stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP); + + stub_syscall1(__NR_exit, 14); + + __builtin_unreachable(); +} + +__attribute__((naked)) void _start(void) +{ + /* + * Since the stack after exec() starts at the top-most address, + * but that's exactly where we also want to map the stub data + * and code, this must: + * - push the stack by 1 code and STUB_DATA_PAGES data pages + * - call real_init() + * This way, real_init() can use the stack normally, while the + * original stack further down (higher address) will become + * inaccessible after the mmap() calls above. + */ + stub_start(real_init); +} diff --git a/arch/um/kernel/skas/stub_exe_embed.S b/arch/um/kernel/skas/stub_exe_embed.S new file mode 100644 index 000000000000..6d8914fbe8f1 --- /dev/null +++ b/arch/um/kernel/skas/stub_exe_embed.S @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/init.h> +#include <linux/linkage.h> + +__INITDATA + +SYM_DATA_START(stub_exe_start) + .incbin "arch/um/kernel/skas/stub_exe" +SYM_DATA_END_LABEL(stub_exe_start, SYM_L_GLOBAL, stub_exe_end) + +__FINIT diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 4bb8622dc512..13ee5666668d 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -32,12 +32,6 @@ void show_stack(struct task_struct *task, unsigned long *stack, struct pt_regs *segv_regs = current->thread.segv_regs; int i; - if (!segv_regs && os_is_signal_stack()) { - pr_err("Received SIGSEGV in SIGSEGV handler," - " aborting stack trace!\n"); - return; - } - if (!stack) stack = get_stack_pointer(task, segv_regs); @@ -52,5 +46,5 @@ void show_stack(struct task_struct *task, unsigned long *stack, } printk("%sCall Trace:\n", loglvl); - dump_trace(current, &stackops, (void *)loglvl); + dump_trace(task ?: current, &stackops, (void *)loglvl); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 29b27b90581f..1394568c0210 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -25,6 +25,8 @@ #include <shared/init.h> #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +#include <linux/sched/clock.h> + enum time_travel_mode time_travel_mode; EXPORT_SYMBOL_GPL(time_travel_mode); @@ -47,6 +49,15 @@ static u16 time_travel_shm_id; static struct um_timetravel_schedshm *time_travel_shm; static union um_timetravel_schedshm_client *time_travel_shm_client; +unsigned long tt_extra_sched_jiffies; + +notrace unsigned long long sched_clock(void) +{ + return (unsigned long long)(jiffies - INITIAL_JIFFIES + + tt_extra_sched_jiffies) + * (NSEC_PER_SEC / HZ); +} + static void time_travel_set_time(unsigned long long ns) { if (unlikely(ns < time_travel_time)) @@ -443,6 +454,11 @@ static void time_travel_periodic_timer(struct time_travel_event *e) { time_travel_add_event(&time_travel_timer_event, time_travel_time + time_travel_timer_interval); + + /* clock tick; decrease extra jiffies by keeping sched_clock constant */ + if (tt_extra_sched_jiffies > 0) + tt_extra_sched_jiffies -= 1; + deliver_alarm(); } @@ -594,6 +610,10 @@ EXPORT_SYMBOL_GPL(time_travel_add_irq_event); static void time_travel_oneshot_timer(struct time_travel_event *e) { + /* clock tick; decrease extra jiffies by keeping sched_clock constant */ + if (tt_extra_sched_jiffies > 0) + tt_extra_sched_jiffies -= 1; + deliver_alarm(); } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 548af31d4111..cf7e0d4407f2 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -23,9 +23,6 @@ struct vm_ops { int phys_fd, unsigned long long offset); int (*unmap)(struct mm_id *mm_idp, unsigned long virt, unsigned long len); - int (*mprotect)(struct mm_id *mm_idp, - unsigned long virt, unsigned long len, - unsigned int prot); }; static int kern_map(struct mm_id *mm_idp, @@ -44,15 +41,6 @@ static int kern_unmap(struct mm_id *mm_idp, return os_unmap_memory((void *)virt, len); } -static int kern_mprotect(struct mm_id *mm_idp, - unsigned long virt, unsigned long len, - unsigned int prot) -{ - return os_protect_memory((void *)virt, len, - prot & UM_PROT_READ, prot & UM_PROT_WRITE, - 1); -} - void report_enomem(void) { printk(KERN_ERR "UML ran out of memory on the host side! " @@ -65,33 +53,37 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, struct vm_ops *ops) { pte_t *pte; - int r, w, x, prot, ret = 0; + int ret = 0; pte = pte_offset_kernel(pmd, addr); do { - r = pte_read(*pte); - w = pte_write(*pte); - x = pte_exec(*pte); - if (!pte_young(*pte)) { - r = 0; - w = 0; - } else if (!pte_dirty(*pte)) - w = 0; - - prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | - (x ? UM_PROT_EXEC : 0)); - if (pte_newpage(*pte)) { - if (pte_present(*pte)) { - __u64 offset; - unsigned long phys = pte_val(*pte) & PAGE_MASK; - int fd = phys_mapping(phys, &offset); - - ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE, - prot, fd, offset); - } else - ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); - } else if (pte_newprot(*pte)) - ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot); + if (!pte_needsync(*pte)) + continue; + + if (pte_present(*pte)) { + __u64 offset; + unsigned long phys = pte_val(*pte) & PAGE_MASK; + int fd = phys_mapping(phys, &offset); + int r, w, x, prot; + + r = pte_read(*pte); + w = pte_write(*pte); + x = pte_exec(*pte); + if (!pte_young(*pte)) { + r = 0; + w = 0; + } else if (!pte_dirty(*pte)) + w = 0; + + prot = (r ? UM_PROT_READ : 0) | + (w ? UM_PROT_WRITE : 0) | + (x ? UM_PROT_EXEC : 0); + + ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE, + prot, fd, offset); + } else + ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); + *pte = pte_mkuptodate(*pte); } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); return ret; @@ -109,7 +101,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end); if (!pmd_present(*pmd)) { - if (pmd_newpage(*pmd)) { + if (pmd_needsync(*pmd)) { ret = ops->unmap(ops->mm_idp, addr, next - addr); pmd_mkuptodate(*pmd); @@ -132,7 +124,7 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr, do { next = pud_addr_end(addr, end); if (!pud_present(*pud)) { - if (pud_newpage(*pud)) { + if (pud_needsync(*pud)) { ret = ops->unmap(ops->mm_idp, addr, next - addr); pud_mkuptodate(*pud); @@ -155,7 +147,7 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, do { next = p4d_addr_end(addr, end); if (!p4d_present(*p4d)) { - if (p4d_newpage(*p4d)) { + if (p4d_needsync(*p4d)) { ret = ops->unmap(ops->mm_idp, addr, next - addr); p4d_mkuptodate(*p4d); @@ -180,18 +172,16 @@ int um_tlb_sync(struct mm_struct *mm) if (mm == &init_mm) { ops.mmap = kern_map; ops.unmap = kern_unmap; - ops.mprotect = kern_mprotect; } else { ops.mmap = map; ops.unmap = unmap; - ops.mprotect = protect; } pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, mm->context.sync_tlb_range_to); if (!pgd_present(*pgd)) { - if (pgd_newpage(*pgd)) { + if (pgd_needsync(*pgd)) { ret = ops.unmap(ops.mm_idp, addr, next - addr); pgd_mkuptodate(*pgd); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 97c8df9c4401..cdaee3e94273 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -201,7 +201,6 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, struct uml_pt_regs *regs) { - jmp_buf *catcher; int si_code; int err; int is_write = FAULT_WRITE(fi); @@ -246,15 +245,8 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, address = 0; } - catcher = current->thread.fault_catcher; if (!err) goto out; - else if (catcher != NULL) { - current->thread.fault_addr = (void *) address; - UML_LONGJMP(catcher, 1); - } - else if (current->thread.fault_addr != NULL) - panic("fault_addr set but no fault catcher"); else if (!is_user && arch_fixup(ip, regs)) goto out; @@ -310,14 +302,6 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) } } -void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs) -{ - if (current->thread.fault_catcher != NULL) - UML_LONGJMP(current->thread.fault_catcher, 1); - else - relay_signal(sig, si, regs); -} - void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { do_IRQ(WINCH_IRQ, regs); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index e8e8b54b3037..8037a967225d 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -65,9 +65,6 @@ struct cpuinfo_um boot_cpu_data = { EXPORT_SYMBOL(boot_cpu_data); -union thread_union cpu0_irqstack - __section(".data..init_irqstack") = - { .thread_info = INIT_THREAD_INFO(init_task) }; /* Changed in setup_arch, which is called in early boot */ static char host_info[(__NEW_UTS_LEN + 1) * 5]; @@ -131,7 +128,7 @@ static int have_root __initdata; static int have_console __initdata; /* Set in uml_mem_setup and modified in linux_main */ -long long physmem_size = 64 * 1024 * 1024; +unsigned long long physmem_size = 64 * 1024 * 1024; EXPORT_SYMBOL(physmem_size); static const char *usage_string = @@ -167,19 +164,6 @@ __uml_setup("root=", uml_root_setup, " root=/dev/ubd5\n\n" ); -static int __init no_skas_debug_setup(char *line, int *add) -{ - os_warn("'debug' is not necessary to gdb UML in skas mode - run\n"); - os_warn("'gdb linux'\n"); - - return 0; -} - -__uml_setup("debug", no_skas_debug_setup, -"debug\n" -" this flag is not needed to run gdb on UML in skas mode\n\n" -); - static int __init uml_console_setup(char *line, int *add) { have_console = 1; @@ -257,6 +241,8 @@ static struct notifier_block panic_exit_notifier = { void uml_finishsetup(void) { + cpu_tasks[0] = &init_task; + atomic_notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); @@ -302,7 +288,24 @@ static void parse_cache_line(char *line) } } -int __init linux_main(int argc, char **argv) +static unsigned long get_top_address(char **envp) +{ + unsigned long top_addr = (unsigned long) &top_addr; + int i; + + /* The earliest variable should be after the program name in ELF */ + for (i = 0; envp[i]; i++) { + if ((unsigned long) envp[i] > top_addr) + top_addr = (unsigned long) envp[i]; + } + + top_addr &= ~(UM_KERN_PAGE_SIZE - 1); + top_addr += UM_KERN_PAGE_SIZE; + + return top_addr; +} + +int __init linux_main(int argc, char **argv, char **envp) { unsigned long avail, diff; unsigned long virtmem_size, max_physmem; @@ -324,20 +327,23 @@ int __init linux_main(int argc, char **argv) if (have_console == 0) add_arg(DEFAULT_COMMAND_LINE_CONSOLE); - host_task_size = os_get_top_address(); - /* reserve a few pages for the stubs (taking care of data alignment) */ - /* align the data portion */ - BUILD_BUG_ON(!is_power_of_2(STUB_DATA_PAGES)); - stub_start = (host_task_size - 1) & ~(STUB_DATA_PAGES * PAGE_SIZE - 1); + host_task_size = get_top_address(envp); + /* reserve a few pages for the stubs */ + stub_start = host_task_size - STUB_DATA_PAGES * PAGE_SIZE; /* another page for the code portion */ stub_start -= PAGE_SIZE; host_task_size = stub_start; + /* Limit TASK_SIZE to what is addressable by the page table */ + task_size = host_task_size; + if (task_size > (unsigned long long) PTRS_PER_PGD * PGDIR_SIZE) + task_size = PTRS_PER_PGD * PGDIR_SIZE; + /* * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps * out */ - task_size = host_task_size & PGDIR_MASK; + task_size = task_size & PGDIR_MASK; /* OS sanity checks that need to happen before the kernel runs */ os_early_checks(); @@ -366,18 +372,15 @@ int __init linux_main(int argc, char **argv) setup_machinename(init_utsname()->machine); - highmem = 0; + physmem_size = (physmem_size + PAGE_SIZE - 1) & PAGE_MASK; iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; + max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; - /* - * Zones have to begin on a 1 << MAX_PAGE_ORDER page boundary, - * so this makes sure that's true for highmem - */ - max_physmem &= ~((1 << (PAGE_SHIFT + MAX_PAGE_ORDER)) - 1); if (physmem_size + iomem_size > max_physmem) { - highmem = physmem_size + iomem_size - max_physmem; - physmem_size -= highmem; + physmem_size = max_physmem - iomem_size; + os_info("Physical memory size shrunk to %llu bytes\n", + physmem_size); } high_physmem = uml_physmem + physmem_size; @@ -398,6 +401,8 @@ int __init linux_main(int argc, char **argv) os_info("Kernel virtual memory size shrunk to %lu bytes\n", virtmem_size); + arch_task_struct_size = sizeof(struct task_struct) + host_fp_size; + os_flush_stdout(); return start_uml(); @@ -412,9 +417,9 @@ void __init setup_arch(char **cmdline_p) { u8 rng_seed[32]; - stack_protections((unsigned long) &init_thread_info); - setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); - mem_total_pages(physmem_size, iomem_size, highmem); + stack_protections((unsigned long) init_task.stack); + setup_physmem(uml_physmem, uml_reserved, physmem_size); + mem_total_pages(physmem_size, iomem_size); uml_dtb_init(); read_initrd(); diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 5c92d58a78e8..a409d4b66114 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -77,8 +77,6 @@ SECTIONS .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) - . = ALIGN(KERNEL_STACK_SIZE); - *(.data..init_irqstack) DATA_DATA *(.gnu.linkonce.d*) CONSTRUCTORS diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 544e0b344c75..049dfa5bc9c6 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -12,6 +12,8 @@ obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \ CFLAGS_signal.o += -Wframe-larger-than=4096 +CFLAGS_main.o += -Wno-frame-larger-than + obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \ diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index f1d03cf3957f..a0d01c68ce3e 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -255,12 +255,6 @@ void os_close_file(int fd) { close(fd); } -int os_fsync_file(int fd) -{ - if (fsync(fd) < 0) - return -errno; - return 0; -} int os_seek_file(int fd, unsigned long long offset) { diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index f98ff79cdbf7..0afcdeb8995b 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -11,6 +11,7 @@ #include <signal.h> #include <string.h> #include <sys/resource.h> +#include <sys/personality.h> #include <as-layout.h> #include <init.h> #include <kern_util.h> @@ -108,6 +109,21 @@ int __init main(int argc, char **argv, char **envp) char **new_argv; int ret, i, err; + /* Disable randomization and re-exec if it was changed successfully */ + ret = personality(PER_LINUX | ADDR_NO_RANDOMIZE); + if (ret >= 0 && (ret & (PER_LINUX | ADDR_NO_RANDOMIZE)) != + (PER_LINUX | ADDR_NO_RANDOMIZE)) { + char buf[4096] = {}; + ssize_t ret; + + ret = readlink("/proc/self/exe", buf, sizeof(buf)); + if (ret < 0 || ret >= sizeof(buf)) { + perror("readlink failure"); + exit(1); + } + execve(buf, argv, envp); + } + set_stklim(); setup_env_path(); @@ -140,7 +156,7 @@ int __init main(int argc, char **argv, char **envp) #endif change_sig(SIGPIPE, 0); - ret = linux_main(argc, argv); + ret = linux_main(argc, argv, envp); /* * Disable SIGPROF - I have no idea why libc doesn't do this or turn @@ -182,6 +198,7 @@ int __init main(int argc, char **argv, char **envp) } extern void *__real_malloc(int); +extern void __real_free(void *); /* workaround for -Wmissing-prototypes warnings */ void *__wrap_malloc(int size); @@ -219,10 +236,6 @@ void *__wrap_calloc(int n, int size) return ptr; } -extern void __real_free(void *); - -extern unsigned long high_physmem; - void __wrap_free(void *ptr) { unsigned long addr = (unsigned long) ptr; diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index cf44d386f23c..72f302f4d197 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -39,10 +39,22 @@ void kasan_map_memory(void *start, size_t len) strerror(errno)); exit(1); } + + if (madvise(start, len, MADV_DONTDUMP)) { + os_info("Couldn't set MAD_DONTDUMP on shadow memory: %s\n.", + strerror(errno)); + exit(1); + } + + if (madvise(start, len, MADV_DONTFORK)) { + os_info("Couldn't set MADV_DONTFORK on shadow memory: %s\n.", + strerror(errno)); + exit(1); + } } /* Set by make_tempfile() during early boot. */ -static char *tempdir = NULL; +char *tempdir = NULL; /* Check if dir is on tmpfs. Return 0 if yes, -1 if no or error. */ static int __init check_tmpfs(const char *dir) diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index e52dd37ddadc..9f086f939420 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -12,94 +12,18 @@ #include <fcntl.h> #include <sys/mman.h> #include <sys/ptrace.h> +#include <sys/prctl.h> #include <sys/wait.h> #include <asm/unistd.h> #include <init.h> #include <longjmp.h> #include <os.h> -#define ARBITRARY_ADDR -1 -#define FAILURE_PID -1 - -#define STAT_PATH_LEN sizeof("/proc/#######/stat\0") -#define COMM_SCANF "%*[^)])" - -unsigned long os_process_pc(int pid) -{ - char proc_stat[STAT_PATH_LEN], buf[256]; - unsigned long pc = ARBITRARY_ADDR; - int fd, err; - - sprintf(proc_stat, "/proc/%d/stat", pid); - fd = open(proc_stat, O_RDONLY, 0); - if (fd < 0) { - printk(UM_KERN_ERR "os_process_pc - couldn't open '%s', " - "errno = %d\n", proc_stat, errno); - goto out; - } - CATCH_EINTR(err = read(fd, buf, sizeof(buf))); - if (err < 0) { - printk(UM_KERN_ERR "os_process_pc - couldn't read '%s', " - "err = %d\n", proc_stat, errno); - goto out_close; - } - os_close_file(fd); - pc = ARBITRARY_ADDR; - if (sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d " - "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " - "%*d %*d %*d %*d %*d %lu", &pc) != 1) - printk(UM_KERN_ERR "os_process_pc - couldn't find pc in '%s'\n", - buf); - out_close: - close(fd); - out: - return pc; -} - -int os_process_parent(int pid) -{ - char stat[STAT_PATH_LEN]; - char data[256]; - int parent = FAILURE_PID, n, fd; - - if (pid == -1) - return parent; - - snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); - fd = open(stat, O_RDONLY, 0); - if (fd < 0) { - printk(UM_KERN_ERR "Couldn't open '%s', errno = %d\n", stat, - errno); - return parent; - } - - CATCH_EINTR(n = read(fd, data, sizeof(data))); - close(fd); - - if (n < 0) { - printk(UM_KERN_ERR "Couldn't read '%s', errno = %d\n", stat, - errno); - return parent; - } - - parent = FAILURE_PID; - n = sscanf(data, "%*d " COMM_SCANF " %*c %d", &parent); - if (n != 1) - printk(UM_KERN_ERR "Failed to scan '%s'\n", data); - - return parent; -} - void os_alarm_process(int pid) { kill(pid, SIGALRM); } -void os_stop_process(int pid) -{ - kill(pid, SIGSTOP); -} - void os_kill_process(int pid, int reap_child) { kill(pid, SIGKILL); @@ -130,11 +54,6 @@ int os_getpid(void) return syscall(__NR_getpid); } -int os_getpgrp(void) -{ - return getpgrp(); -} - int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x) { @@ -285,3 +204,8 @@ void init_new_thread_signals(void) set_handler(SIGIO); signal(SIGWINCH, SIG_IGN); } + +void os_set_pdeathsig(void) +{ + prctl(PR_SET_PDEATHSIG, SIGKILL); +} diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c index bd80b921add0..d7ca148807b2 100644 --- a/arch/um/os-Linux/registers.c +++ b/arch/um/os-Linux/registers.c @@ -10,11 +10,12 @@ #include <sysdep/ptrace.h> #include <sysdep/ptrace_user.h> #include <registers.h> +#include <stdlib.h> /* This is set once at boot time and not changed thereafter */ static unsigned long exec_regs[MAX_REG_NR]; -static unsigned long exec_fp_regs[FP_SIZE]; +static unsigned long *exec_fp_regs; int init_pid_registers(int pid) { @@ -24,7 +25,11 @@ int init_pid_registers(int pid) if (err < 0) return -errno; - arch_init_registers(pid); + err = arch_init_registers(pid); + if (err < 0) + return err; + + exec_fp_regs = malloc(host_fp_size); get_fp_registers(pid, exec_fp_regs); return 0; } @@ -34,5 +39,5 @@ void get_safe_registers(unsigned long *regs, unsigned long *fp_regs) memcpy(regs, exec_regs, sizeof(exec_regs)); if (fp_regs) - memcpy(fp_regs, exec_fp_regs, sizeof(exec_fp_regs)); + memcpy(fp_regs, exec_fp_regs, host_fp_size); } diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 9e71794839e8..9aac8def4d63 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -55,6 +55,7 @@ static int write_sigio_thread(void *unused) int i, n, respond_fd; char c; + os_set_pdeathsig(); os_fix_helper_signals(); fds = ¤t_poll; while (1) { diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index b11ed66c8bb0..9ea7269ffb77 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -26,7 +26,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGFPE] = relay_signal, [SIGILL] = relay_signal, [SIGWINCH] = winch, - [SIGBUS] = bus_handler, + [SIGBUS] = relay_signal, [SIGSEGV] = segv_handler, [SIGIO] = sigio_handler, }; @@ -65,7 +65,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGALRM_MASK (1 << SIGALRM_BIT) int signals_enabled; -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) static int signals_blocked, signals_blocked_pending; #endif static unsigned int signals_pending; @@ -75,7 +75,7 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { int enabled = signals_enabled; -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) if ((signals_blocked || __atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) && (sig == SIGIO)) { @@ -190,43 +190,8 @@ static void hard_handler(int sig, siginfo_t *si, void *p) { ucontext_t *uc = p; mcontext_t *mc = &uc->uc_mcontext; - unsigned long pending = 1UL << sig; - do { - int nested, bail; - - /* - * pending comes back with one bit set for each - * interrupt that arrived while setting up the stack, - * plus a bit for this interrupt, plus the zero bit is - * set if this is a nested interrupt. - * If bail is true, then we interrupted another - * handler setting up the stack. In this case, we - * have to return, and the upper handler will deal - * with this interrupt. - */ - bail = to_irq_stack(&pending); - if (bail) - return; - - nested = pending & 1; - pending &= ~1; - - while ((sig = ffs(pending)) != 0){ - sig--; - pending &= ~(1 << sig); - (*handlers[sig])(sig, (struct siginfo *)si, mc); - } - - /* - * Again, pending comes back with a mask of signals - * that arrived while tearing down the stack. If this - * is non-zero, we just go back, set up the stack - * again, and handle the new interrupts. - */ - if (!nested) - pending = from_irq_stack(nested); - } while (pending); + (*handlers[sig])(sig, (struct siginfo *)si, mc); } void set_handler(int sig) @@ -297,7 +262,7 @@ void unblock_signals(void) return; signals_enabled = 1; -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) deliver_time_travel_irqs(); #endif @@ -389,7 +354,7 @@ int um_set_signals_trace(int enable) return ret; } -#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) void mark_sigio_pending(void) { /* @@ -487,11 +452,3 @@ void unblock_signals_hard(void) unblocking = false; } #endif - -int os_is_signal_stack(void) -{ - stack_t ss; - sigaltstack(NULL, &ss); - - return ss.ss_flags & SS_ONSTACK; -} diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index 9a13ac23c606..d7f1814b0e5a 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -217,24 +217,3 @@ int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len) return 0; } - -int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, - unsigned int prot) -{ - struct stub_syscall *sc; - - /* Compress with previous syscall if that is possible */ - sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MPROTECT, addr); - if (sc && sc->mem.prot == prot) { - sc->mem.length += len; - return 0; - } - - sc = syscall_stub_alloc(mm_idp); - sc->syscall = STUB_SYSCALL_MPROTECT; - sc->mem.addr = addr; - sc->mem.length = len; - sc->mem.prot = prot; - - return 0; -} diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index b6f656bcffb1..f683cfc9e51a 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -10,8 +10,11 @@ #include <sched.h> #include <errno.h> #include <string.h> +#include <fcntl.h> +#include <mem_user.h> #include <sys/mman.h> #include <sys/wait.h> +#include <sys/stat.h> #include <asm/unistd.h> #include <as-layout.h> #include <init.h> @@ -141,16 +144,10 @@ bad_wait: extern unsigned long current_stub_stack(void); -static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux_fp_regs) +static void get_skas_faultinfo(int pid, struct faultinfo *fi) { int err; - err = get_fp_registers(pid, aux_fp_regs); - if (err < 0) { - printk(UM_KERN_ERR "save_fp_registers returned %d\n", - err); - fatal_sigsegv(); - } err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); if (err) { printk(UM_KERN_ERR "Failed to continue stub, pid = %d, " @@ -164,18 +161,11 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux * the stub stack page. We just have to copy it. */ memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); - - err = put_fp_registers(pid, aux_fp_regs); - if (err < 0) { - printk(UM_KERN_ERR "put_fp_registers returned %d\n", - err); - fatal_sigsegv(); - } } -static void handle_segv(int pid, struct uml_pt_regs *regs, unsigned long *aux_fp_regs) +static void handle_segv(int pid, struct uml_pt_regs *regs) { - get_skas_faultinfo(pid, ®s->faultinfo, aux_fp_regs); + get_skas_faultinfo(pid, ®s->faultinfo); segv(regs->faultinfo, 0, 1, NULL); } @@ -189,69 +179,131 @@ static void handle_trap(int pid, struct uml_pt_regs *regs) extern char __syscall_stub_start[]; -/** - * userspace_tramp() - userspace trampoline - * @stack: pointer to the new userspace stack page - * - * The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed. - * This function will run on a temporary stack page. - * It ptrace()'es itself, then - * Two pages are mapped into the userspace address space: - * - STUB_CODE (with EXEC), which contains the skas stub code - * - STUB_DATA (with R/W), which contains a data page that is used to transfer certain data between the UML userspace process and the UML kernel. - * Also for the userspace process a SIGSEGV handler is installed to catch pagefaults in the userspace process. - * And last the process stops itself to give control to the UML kernel for this userspace process. - * - * Return: Always zero, otherwise the current userspace process is ended with non null exit() call - */ +static int stub_exe_fd; + static int userspace_tramp(void *stack) { - struct sigaction sa; - void *addr; - int fd; + char *const argv[] = { "uml-userspace", NULL }; + int pipe_fds[2]; unsigned long long offset; - unsigned long segv_handler = STUB_CODE + - (unsigned long) stub_segv_handler - - (unsigned long) __syscall_stub_start; - - ptrace(PTRACE_TRACEME, 0, 0, 0); - - signal(SIGTERM, SIG_DFL); - signal(SIGWINCH, SIG_IGN); - - fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); - addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, - PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); - if (addr == MAP_FAILED) { - os_info("mapping mmap stub at 0x%lx failed, errno = %d\n", - STUB_CODE, errno); - exit(1); + struct stub_init_data init_data = { + .stub_start = STUB_START, + .segv_handler = STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) __syscall_stub_start, + }; + struct iomem_region *iomem; + int ret; + + init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start), + &offset); + init_data.stub_code_offset = MMAP_OFFSET(offset); + + init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset); + init_data.stub_data_offset = MMAP_OFFSET(offset); + + /* Set CLOEXEC on all FDs and then unset on all memory related FDs */ + close_range(0, ~0U, CLOSE_RANGE_CLOEXEC); + + fcntl(init_data.stub_data_fd, F_SETFD, 0); + for (iomem = iomem_regions; iomem; iomem = iomem->next) + fcntl(iomem->fd, F_SETFD, 0); + + /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */ + if (pipe(pipe_fds)) + exit(2); + + if (dup2(pipe_fds[0], 0) < 0) + exit(3); + close(pipe_fds[0]); + + /* Write init_data and close write side */ + ret = write(pipe_fds[1], &init_data, sizeof(init_data)); + close(pipe_fds[1]); + + if (ret != sizeof(init_data)) + exit(4); + + execveat(stub_exe_fd, "", argv, NULL, AT_EMPTY_PATH); + + exit(5); +} + +extern char stub_exe_start[]; +extern char stub_exe_end[]; + +extern char *tempdir; + +#define STUB_EXE_NAME_TEMPLATE "/uml-userspace-XXXXXX" + +#ifndef MFD_EXEC +#define MFD_EXEC 0x0010U +#endif + +static int __init init_stub_exe_fd(void) +{ + size_t written = 0; + char *tmpfile = NULL; + + stub_exe_fd = memfd_create("uml-userspace", + MFD_EXEC | MFD_CLOEXEC | MFD_ALLOW_SEALING); + + if (stub_exe_fd < 0) { + printk(UM_KERN_INFO "Could not create executable memfd, using temporary file!"); + + tmpfile = malloc(strlen(tempdir) + + strlen(STUB_EXE_NAME_TEMPLATE) + 1); + if (tmpfile == NULL) + panic("Failed to allocate memory for stub binary name"); + + strcpy(tmpfile, tempdir); + strcat(tmpfile, STUB_EXE_NAME_TEMPLATE); + + stub_exe_fd = mkstemp(tmpfile); + if (stub_exe_fd < 0) + panic("Could not create temporary file for stub binary: %d", + -errno); } - fd = phys_mapping(uml_to_phys(stack), &offset); - addr = mmap((void *) STUB_DATA, - STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_SHARED, fd, offset); - if (addr == MAP_FAILED) { - os_info("mapping segfault stack at 0x%lx failed, errno = %d\n", - STUB_DATA, errno); - exit(1); + while (written < stub_exe_end - stub_exe_start) { + ssize_t res = write(stub_exe_fd, stub_exe_start + written, + stub_exe_end - stub_exe_start - written); + if (res < 0) { + if (errno == EINTR) + continue; + + if (tmpfile) + unlink(tmpfile); + panic("Failed write stub binary: %d", -errno); + } + + written += res; } - set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO; - sa.sa_sigaction = (void *) segv_handler; - sa.sa_restorer = NULL; - if (sigaction(SIGSEGV, &sa, NULL) < 0) { - os_info("%s - setting SIGSEGV handler failed - errno = %d\n", - __func__, errno); - exit(1); + if (!tmpfile) { + fcntl(stub_exe_fd, F_ADD_SEALS, + F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_SEAL); + } else { + if (fchmod(stub_exe_fd, 00500) < 0) { + unlink(tmpfile); + panic("Could not make stub binary executable: %d", + -errno); + } + + close(stub_exe_fd); + stub_exe_fd = open(tmpfile, O_RDONLY | O_CLOEXEC | O_NOFOLLOW); + if (stub_exe_fd < 0) { + unlink(tmpfile); + panic("Could not reopen stub binary: %d", -errno); + } + + unlink(tmpfile); + free(tmpfile); } - kill(os_getpid(), SIGSTOP); return 0; } +__initcall(init_stub_exe_fd); int userspace_pid[NR_CPUS]; @@ -270,7 +322,7 @@ int start_userspace(unsigned long stub_stack) { void *stack; unsigned long sp; - int pid, status, n, flags, err; + int pid, status, n, err; /* setup a temporary stack page */ stack = mmap(NULL, UM_KERN_PAGE_SIZE, @@ -286,10 +338,10 @@ int start_userspace(unsigned long stub_stack) /* set stack pointer to the end of the stack page, so it can grow downwards */ sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; - flags = CLONE_FILES | SIGCHLD; - /* clone into new userspace process */ - pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); + pid = clone(userspace_tramp, (void *) sp, + CLONE_VFORK | CLONE_VM | SIGCHLD, + (void *)stub_stack); if (pid < 0) { err = -errno; printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", @@ -336,7 +388,10 @@ int start_userspace(unsigned long stub_stack) return err; } -void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) +int unscheduled_userspace_iterations; +extern unsigned long tt_extra_sched_jiffies; + +void userspace(struct uml_pt_regs *regs) { int err, status, op, pid = userspace_pid[0]; siginfo_t si; @@ -345,6 +400,29 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) interrupt_end(); while (1) { + /* + * When we are in time-travel mode, userspace can theoretically + * do a *lot* of work without being scheduled. The problem with + * this is that it will prevent kernel bookkeeping (primarily + * the RCU) from running and this can for example cause OOM + * situations. + * + * This code accounts a jiffie against the scheduling clock + * after the defined userspace iterations in the same thread. + * By doing so the situation is effectively prevented. + */ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) { +#ifdef CONFIG_UML_MAX_USERSPACE_ITERATIONS + if (CONFIG_UML_MAX_USERSPACE_ITERATIONS && + unscheduled_userspace_iterations++ > + CONFIG_UML_MAX_USERSPACE_ITERATIONS) { + tt_extra_sched_jiffies += 1; + unscheduled_userspace_iterations = 0; + } +#endif + } + time_travel_print_bc_msg(); current_mm_sync(); @@ -435,11 +513,11 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) case SIGSEGV: if (PTRACE_FULL_FAULTINFO) { get_skas_faultinfo(pid, - ®s->faultinfo, aux_fp_regs); + ®s->faultinfo); (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, regs); } - else handle_segv(pid, regs, aux_fp_regs); + else handle_segv(pid, regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs); @@ -487,6 +565,8 @@ void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) void switch_threads(jmp_buf *me, jmp_buf *you) { + unscheduled_userspace_iterations = 0; + if (UML_SETJMP(me) == 0) UML_LONGJMP(you, 1); } @@ -570,6 +650,7 @@ static bool noreboot; static int __init noreboot_cmd_param(char *str, int *add) { + *add = 0; noreboot = true; return 0; } diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index e09d65b05d1c..eb523ab1e218 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -358,6 +358,8 @@ char *get_umid(void) static int __init set_uml_dir(char *name, int *add) { + *add = 0; + if (*name == '\0') { os_warn("uml_dir can't be an empty string\n"); return 0; diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 1dca4ffbd572..4193e04d7e4a 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -52,8 +52,8 @@ void setup_machinename(char *machine_out) struct utsname host; uname(&host); -#ifdef UML_CONFIG_UML_X86 -# ifndef UML_CONFIG_64BIT +#if IS_ENABLED(CONFIG_UML_X86) +# if !IS_ENABLED(CONFIG_64BIT) if (!strcmp(host.machine, "x86_64")) { strcpy(machine_out, "i686"); return; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6c633d93c639..9d7bd0ae48c4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -128,6 +128,8 @@ config X86 select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN select ARCH_SUPPORTS_RT + select ARCH_SUPPORTS_AUTOFDO_CLANG + select ARCH_SUPPORTS_PROPELLER_CLANG if X86_64 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if X86_CMPXCHG64 select ARCH_USE_MEMTEST diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index bb284aff7bfd..2e1e26846050 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -7135,6 +7135,7 @@ __init int intel_pmu_init(void) case INTEL_METEORLAKE: case INTEL_METEORLAKE_L: + case INTEL_ARROWLAKE_U: intel_pmu_init_hybrid(hybrid_big_small); x86_pmu.pebs_latency_data = cmt_latency_data; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8afc4ad3cd16..1a4b326ca2ce 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1489,7 +1489,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) * hence we need to drain when changing said * size. */ - intel_pmu_drain_large_pebs(cpuc); + intel_pmu_drain_pebs_buffer(); adaptive_pebs_record_size_update(); wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg); cpuc->active_pebs_data_cfg = pebs_data_cfg; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 6f82e75b6149..4b804531b03c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -36,10 +36,12 @@ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 #ifdef CONFIG_X86_64 -#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */ +#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */ +#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */ #else /* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */ -#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */ +#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */ +#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */ #endif /* If _PAGE_BIT_PRESENT is clear, we use these: */ @@ -139,6 +141,8 @@ #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) +#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW) + /* * Set of bits not changed in pte_modify. The pte's * protection key is treated like _PAGE_RW, for diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 580636cdc257..4d3c9d00d6b6 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -34,4 +34,8 @@ static inline void __tlb_remove_table(void *table) free_page_and_swap_cache(table); } +static inline void invlpg(unsigned long addr) +{ + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); +} #endif /* _ASM_X86_TLB_H */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 823f44f7bc94..79d2e17f6582 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -798,6 +798,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static const struct x86_cpu_desc erratum_1386_microcode[] = { AMD_CPU_DESC(0x17, 0x1, 0x2, 0x0800126e), AMD_CPU_DESC(0x17, 0x31, 0x0, 0x08301052), + {}, }; static void fix_erratum_1386(struct cpuinfo_x86 *c) @@ -1064,7 +1065,7 @@ static void init_amd(struct cpuinfo_x86 *c) */ if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && cpu_has(c, X86_FEATURE_AUTOIBRS)) - WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); + WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS) < 0); /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */ clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE); diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 392d09c936d6..e6fa03ed9172 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -178,8 +178,6 @@ struct _cpuid4_info_regs { struct amd_northbridge *nb; }; -static unsigned short num_cache_leaves; - /* AMD doesn't have CPUID4. Emulate it here to report the same information to the user. This makes some assumptions about the machine: L2 not shared, no SMT etc. that is currently true on AMD CPUs. @@ -717,20 +715,23 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c) void init_amd_cacheinfo(struct cpuinfo_x86 *c) { + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - num_cache_leaves = find_num_cache_leaves(c); + ci->num_leaves = find_num_cache_leaves(c); } else if (c->extended_cpuid_level >= 0x80000006) { if (cpuid_edx(0x80000006) & 0xf000) - num_cache_leaves = 4; + ci->num_leaves = 4; else - num_cache_leaves = 3; + ci->num_leaves = 3; } } void init_hygon_cacheinfo(struct cpuinfo_x86 *c) { - num_cache_leaves = find_num_cache_leaves(c); + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); + + ci->num_leaves = find_num_cache_leaves(c); } void init_intel_cacheinfo(struct cpuinfo_x86 *c) @@ -740,21 +741,21 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); if (c->cpuid_level > 3) { - static int is_initialized; - - if (is_initialized == 0) { - /* Init num_cache_leaves from boot CPU */ - num_cache_leaves = find_num_cache_leaves(c); - is_initialized++; - } + /* + * There should be at least one leaf. A non-zero value means + * that the number of leaves has been initialized. + */ + if (!ci->num_leaves) + ci->num_leaves = find_num_cache_leaves(c); /* * Whenever possible use cpuid(4), deterministic cache * parameters cpuid leaf to find the cache details */ - for (i = 0; i < num_cache_leaves; i++) { + for (i = 0; i < ci->num_leaves; i++) { struct _cpuid4_info_regs this_leaf = {}; int retval; @@ -790,14 +791,14 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for * trace cache */ - if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { + if ((!ci->num_leaves || c->x86 == 15) && c->cpuid_level > 1) { /* supports eax=2 call */ int j, n; unsigned int regs[4]; unsigned char *dp = (unsigned char *)regs; int only_trace = 0; - if (num_cache_leaves != 0 && c->x86 == 15) + if (ci->num_leaves && c->x86 == 15) only_trace = 1; /* Number of times to iterate */ @@ -991,14 +992,12 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, int init_cache_level(unsigned int cpu) { - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); - if (!num_cache_leaves) + /* There should be at least one leaf. */ + if (!ci->num_leaves) return -ENOENT; - if (!this_cpu_ci) - return -EINVAL; - this_cpu_ci->num_levels = 3; - this_cpu_ci->num_leaves = num_cache_leaves; + return 0; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ca327cfa42ae..a5c28975c608 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -169,7 +169,7 @@ static void ppin_init(struct cpuinfo_x86 *c) } clear_ppin: - clear_cpu_cap(c, info->feature); + setup_clear_cpu_cap(info->feature); } static void default_init(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index d1de300af173..8ded9f859a3a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -555,7 +555,9 @@ static void init_intel(struct cpuinfo_x86 *c) c->x86_vfm == INTEL_WESTMERE_EX)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); - if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT) + if (boot_cpu_has(X86_FEATURE_MWAIT) && + (c->x86_vfm == INTEL_ATOM_GOLDMONT || + c->x86_vfm == INTEL_LUNARLAKE_M)) set_cpu_bug(c, X86_BUG_MONITOR); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 31a73715d755..fb5d0c67fbab 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -34,6 +34,7 @@ #include <asm/setup.h> #include <asm/cpu.h> #include <asm/msr.h> +#include <asm/tlb.h> #include "internal.h" @@ -483,11 +484,25 @@ static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc) } } -static int __apply_microcode_amd(struct microcode_amd *mc) +static int __apply_microcode_amd(struct microcode_amd *mc, unsigned int psize) { + unsigned long p_addr = (unsigned long)&mc->hdr.data_code; u32 rev, dummy; - native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc->hdr.data_code); + native_wrmsrl(MSR_AMD64_PATCH_LOADER, p_addr); + + if (x86_family(bsp_cpuid_1_eax) == 0x17) { + unsigned long p_addr_end = p_addr + psize - 1; + + invlpg(p_addr); + + /* + * Flush next page too if patch image is crossing a page + * boundary. + */ + if (p_addr >> PAGE_SHIFT != p_addr_end >> PAGE_SHIFT) + invlpg(p_addr_end); + } /* verify patch application was successful */ native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); @@ -529,7 +544,7 @@ static bool early_apply_microcode(u32 old_rev, void *ucode, size_t size) if (old_rev > mc->hdr.patch_id) return ret; - return !__apply_microcode_amd(mc); + return !__apply_microcode_amd(mc, desc.psize); } static bool get_builtin_microcode(struct cpio_data *cp) @@ -745,7 +760,7 @@ void reload_ucode_amd(unsigned int cpu) rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); if (rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) + if (!__apply_microcode_amd(mc, p->size)) pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id); } } @@ -798,7 +813,7 @@ static enum ucode_state apply_microcode_amd(int cpu) goto out; } - if (__apply_microcode_amd(mc_amd)) { + if (__apply_microcode_amd(mc_amd, p->size)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); return UCODE_ERROR; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 621a151ccf7d..b2e313ea17bf 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -428,8 +428,8 @@ void __init topology_apply_cmdline_limits_early(void) { unsigned int possible = nr_cpu_ids; - /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */ - if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled) + /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' */ + if (!setup_max_cpus || apic_is_disabled) possible = 1; /* 'possible_cpus=N' */ @@ -443,7 +443,7 @@ void __init topology_apply_cmdline_limits_early(void) static __init bool restrict_to_up(void) { - if (!smp_found_config || ioapic_is_disabled) + if (!smp_found_config) return true; /* * XEN PV is special as it does not advertise the local APIC diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 1065ab995305..8f62e0666dea 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -64,16 +64,6 @@ setfx: } /* - * Update the value of PKRU register that was already pushed onto the signal frame. - */ -static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru) -{ - if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE))) - return 0; - return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU)); -} - -/* * Signal frame handlers. */ static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf) @@ -168,14 +158,8 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru) { - int err = 0; - - if (use_xsave()) { - err = xsave_to_user_sigframe(buf); - if (!err) - err = update_pkru_in_sigframe(buf, pkru); - return err; - } + if (use_xsave()) + return xsave_to_user_sigframe(buf, pkru); if (use_fxsr()) return fxsave_to_user_sigframe((struct fxregs_state __user *) buf); diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 0b86a5002c84..aa16f1a1bbcf 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -69,6 +69,28 @@ static inline u64 xfeatures_mask_independent(void) return fpu_kernel_cfg.independent_features; } +/* + * Update the value of PKRU register that was already pushed onto the signal frame. + */ +static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 mask, u32 pkru) +{ + u64 xstate_bv; + int err; + + if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE))) + return 0; + + /* Mark PKRU as in-use so that it is restored correctly. */ + xstate_bv = (mask & xfeatures_in_use()) | XFEATURE_MASK_PKRU; + + err = __put_user(xstate_bv, &buf->header.xfeatures); + if (err) + return err; + + /* Update PKRU value in the userspace xsave buffer. */ + return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU)); +} + /* XSAVE/XRSTOR wrapper functions */ #ifdef CONFIG_X86_64 @@ -256,7 +278,7 @@ static inline u64 xfeatures_need_sigframe_write(void) * The caller has to zero buf::header before calling this because XSAVE* * does not touch the reserved fields in the header. */ -static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) +static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkru) { /* * Include the features which are not xsaved/rstored by the kernel @@ -281,6 +303,9 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) XSTATE_OP(XSAVE, buf, lmask, hmask, err); clac(); + if (!err) + err = update_pkru_in_sigframe(buf, mask, pkru); + return err; } diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index e9e88c342f75..540443d699e3 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -13,6 +13,7 @@ #include <asm/pgtable_types.h> #include <asm/nospec-branch.h> #include <asm/unwind_hints.h> +#include <asm/asm-offsets.h> /* * Must be relocatable PIC code callable as a C function, in particular @@ -242,6 +243,13 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) movq CR0(%r8), %r8 movq %rax, %cr3 movq %r8, %cr0 + +#ifdef CONFIG_KEXEC_JUMP + /* Saved in save_processor_state. */ + movq $saved_context, %rax + lgdt saved_context_gdt_desc(%rax) +#endif + movq %rbp, %rax popf diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 68efd8cd8bf1..fab3ac9a4574 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -420,6 +420,10 @@ SECTIONS STABS_DEBUG DWARF_DEBUG +#ifdef CONFIG_PROPELLER_CLANG + .llvm_bb_addr_map : { *(.llvm_bb_addr_map) } +#endif + ELF_DETAILS DISCARDS diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 097bdc022d0f..ae0b438a2c99 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -36,6 +36,26 @@ u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; EXPORT_SYMBOL_GPL(kvm_cpu_caps); +struct cpuid_xstate_sizes { + u32 eax; + u32 ebx; + u32 ecx; +}; + +static struct cpuid_xstate_sizes xstate_sizes[XFEATURE_MAX] __ro_after_init; + +void __init kvm_init_xstate_sizes(void) +{ + u32 ign; + int i; + + for (i = XFEATURE_YMM; i < ARRAY_SIZE(xstate_sizes); i++) { + struct cpuid_xstate_sizes *xs = &xstate_sizes[i]; + + cpuid_count(0xD, i, &xs->eax, &xs->ebx, &xs->ecx, &ign); + } +} + u32 xstate_required_size(u64 xstate_bv, bool compacted) { int feature_bit = 0; @@ -44,14 +64,15 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted) xstate_bv &= XFEATURE_MASK_EXTEND; while (xstate_bv) { if (xstate_bv & 0x1) { - u32 eax, ebx, ecx, edx, offset; - cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx); + struct cpuid_xstate_sizes *xs = &xstate_sizes[feature_bit]; + u32 offset; + /* ECX[1]: 64B alignment in compacted form */ if (compacted) - offset = (ecx & 0x2) ? ALIGN(ret, 64) : ret; + offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret; else - offset = ebx; - ret = max(ret, offset + eax); + offset = xs->ebx; + ret = max(ret, offset + xs->eax); } xstate_bv >>= 1; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index c8dc66eddefd..f16a7b2c2adc 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -31,6 +31,7 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool exact_only); +void __init kvm_init_xstate_sizes(void); u32 xstate_required_size(u64 xstate_bv, bool compacted); int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h index 1715d2ab07be..ad9116a99bcc 100644 --- a/arch/x86/kvm/vmx/posted_intr.h +++ b/arch/x86/kvm/vmx/posted_intr.h @@ -2,7 +2,7 @@ #ifndef __KVM_X86_VMX_POSTED_INTR_H #define __KVM_X86_VMX_POSTED_INTR_H -#include <linux/find.h> +#include <linux/bitmap.h> #include <asm/posted_intr.h> void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b2fe4aa04a2..c79a8cc57ba4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12724,6 +12724,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_hv_init_vm(kvm); kvm_xen_init_vm(kvm); + if (ignore_msrs && !report_ignored_msrs) { + pr_warn_once("Running KVM with ignore_msrs=1 and report_ignored_msrs=0 is not a\n" + "a supported configuration. Lying to the guest about the existence of MSRs\n" + "may cause the guest operating system to hang or produce errors. If a guest\n" + "does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n"); + } + return 0; out_uninit_mmu: @@ -13997,6 +14004,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_rmp_fault); static int __init kvm_x86_init(void) { + kvm_init_xstate_sizes(); + kvm_mmu_x86_module_init(); mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible(); return 0; diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 437e96fb4977..5ab7bd2f1983 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -174,7 +174,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page, if (result) return result; - set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag)); + set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW)); } return 0; @@ -218,14 +218,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, if (result) return result; if (pgtable_l5_enabled()) { - set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); + set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag | _PAGE_NOPTISHADOW)); } else { /* * With p4d folded, pgd is equal to p4d. * The pgd entry has to point to the pud page table in this case. */ pud_t *pud = pud_offset(p4d, 0); - set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag)); + set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW)); } } diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 069e421c2247..95bc50a8541c 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -354,7 +354,7 @@ bool cpu_cache_has_invalidate_memregion(void) { return !cpu_feature_enabled(X86_FEATURE_HYPERVISOR); } -EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, DEVMEM); +EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, "DEVMEM"); int cpu_cache_invalidate_memregion(int res_desc) { @@ -363,7 +363,7 @@ int cpu_cache_invalidate_memregion(int res_desc) wbinvd_on_all_cpus(); return 0; } -EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, DEVMEM); +EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, "DEVMEM"); #endif static void __cpa_flush_all(void *arg) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 851ec8f1363a..5f0d579932c6 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -132,7 +132,7 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) * Top-level entries added to init_mm's usermode pgd after boot * will not be automatically propagated to other mms. */ - if (!pgdp_maps_userspace(pgdp)) + if (!pgdp_maps_userspace(pgdp) || (pgd.pgd & _PAGE_NOPTISHADOW)) return pgd; /* diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index b0d5a644fc84..a2becb85bea7 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -20,6 +20,7 @@ #include <asm/cacheflush.h> #include <asm/apic.h> #include <asm/perf_event.h> +#include <asm/tlb.h> #include "mm_internal.h" @@ -1140,7 +1141,7 @@ STATIC_NOPV void native_flush_tlb_one_user(unsigned long addr) bool cpu_pcide; /* Flush 'addr' from the kernel PCID: */ - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + invlpg(addr); /* If PTI is off there is no user PCID and nothing to flush. */ if (!static_cpu_has(X86_FEATURE_PTI)) diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 186f13268401..986045d5e638 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -10,6 +10,7 @@ config UML_X86 def_bool y select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 select DCACHE_WORD_ACCESS + select HAVE_EFFICIENT_UNALIGNED_ACCESS config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" @@ -28,17 +29,6 @@ config X86_64 def_bool 64BIT select MODULES_USE_ELF_RELA -config 3_LEVEL_PGTABLES - bool "Three-level pagetables" if !64BIT - default 64BIT - help - Three-level pagetables will let UML have more than 4G of physical - memory. All the memory that can't be mapped directly will be treated - as high memory. - - However, this it experimental on 32-bit architectures, so if unsure say - N (on x86-64 it's automatically enabled, instead, as it's safe there). - config ARCH_HAS_SC_SIGNALS def_bool !64BIT diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 36e67fc97c22..b42c31cd2390 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -10,7 +10,7 @@ else endif obj-y = bugs_$(BITS).o delay.o fault.o \ - ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ + ptrace.o ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ stub_segv.o \ sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ mem_$(BITS).o subarch.o os-Linux/ diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 6052200fe925..62ed5d68a978 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -8,6 +8,8 @@ #include <asm/user.h> #include <skas.h> +#define CORE_DUMP_USE_REGSET + #ifdef CONFIG_X86_32 #define R_386_NONE 0 diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h index 2fef3da55533..2641d28d115c 100644 --- a/arch/x86/um/asm/ptrace.h +++ b/arch/x86/um/asm/ptrace.h @@ -2,6 +2,16 @@ #ifndef __UM_X86_PTRACE_H #define __UM_X86_PTRACE_H +/* This is here because signal.c needs the REGSET_FP_LEGACY definition */ +enum { + REGSET_GENERAL, +#ifdef CONFIG_X86_32 + REGSET_FP_LEGACY, +#endif + REGSET_FP, + REGSET_XSTATE, +}; + #include <linux/compiler.h> #ifndef CONFIG_X86_32 #define __FRAME_OFFSETS /* Needed to get the R* macros */ diff --git a/arch/x86/um/os-Linux/Makefile b/arch/x86/um/os-Linux/Makefile index 5249bbc30dcd..77a308aaa5ec 100644 --- a/arch/x86/um/os-Linux/Makefile +++ b/arch/x86/um/os-Linux/Makefile @@ -3,7 +3,7 @@ # Licensed under the GPL # -obj-y = registers.o task_size.o mcontext.o +obj-y = registers.o mcontext.o obj-$(CONFIG_X86_32) += tls.o diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c index f3638dd09cec..76eaeb93928c 100644 --- a/arch/x86/um/os-Linux/registers.c +++ b/arch/x86/um/os-Linux/registers.c @@ -16,133 +16,58 @@ #include <asm/sigcontext.h> #include <linux/elf.h> #include <registers.h> +#include <sys/mman.h> -static int have_xstate_support; +unsigned long host_fp_size; -int save_i387_registers(int pid, unsigned long *fp_regs) -{ - if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0) - return -errno; - return 0; -} - -int save_fp_registers(int pid, unsigned long *fp_regs) +int get_fp_registers(int pid, unsigned long *regs) { -#ifdef PTRACE_GETREGSET - struct iovec iov; + struct iovec iov = { + .iov_base = regs, + .iov_len = host_fp_size, + }; - if (have_xstate_support) { - iov.iov_base = fp_regs; - iov.iov_len = FP_SIZE * sizeof(unsigned long); - if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0) - return -errno; - return 0; - } else -#endif - return save_i387_registers(pid, fp_regs); -} - -int restore_i387_registers(int pid, unsigned long *fp_regs) -{ - if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0) + if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0) return -errno; return 0; } -int restore_fp_registers(int pid, unsigned long *fp_regs) -{ -#ifdef PTRACE_SETREGSET - struct iovec iov; - if (have_xstate_support) { - iov.iov_base = fp_regs; - iov.iov_len = FP_SIZE * sizeof(unsigned long); - if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0) - return -errno; - return 0; - } else -#endif - return restore_i387_registers(pid, fp_regs); -} - -#ifdef __i386__ -int have_fpx_regs = 1; -int save_fpx_registers(int pid, unsigned long *fp_regs) +int put_fp_registers(int pid, unsigned long *regs) { - if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0) - return -errno; - return 0; -} + struct iovec iov = { + .iov_base = regs, + .iov_len = host_fp_size, + }; -int restore_fpx_registers(int pid, unsigned long *fp_regs) -{ - if (ptrace(PTRACE_SETFPXREGS, pid, 0, fp_regs) < 0) + if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0) return -errno; return 0; } -int get_fp_registers(int pid, unsigned long *regs) -{ - if (have_fpx_regs) - return save_fpx_registers(pid, regs); - else - return save_fp_registers(pid, regs); -} - -int put_fp_registers(int pid, unsigned long *regs) -{ - if (have_fpx_regs) - return restore_fpx_registers(pid, regs); - else - return restore_fp_registers(pid, regs); -} - -void arch_init_registers(int pid) -{ - struct user_fpxregs_struct fpx_regs; - int err; - - err = ptrace(PTRACE_GETFPXREGS, pid, 0, &fpx_regs); - if (!err) - return; - - if (errno != EIO) - panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", - errno); - - have_fpx_regs = 0; -} -#else - -int get_fp_registers(int pid, unsigned long *regs) +int arch_init_registers(int pid) { - return save_fp_registers(pid, regs); + struct iovec iov = { + /* Just use plenty of space, it does not cost us anything */ + .iov_len = 2 * 1024 * 1024, + }; + int ret; + + iov.iov_base = mmap(NULL, iov.iov_len, PROT_WRITE | PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (iov.iov_base == MAP_FAILED) + return -ENOMEM; + + /* GDB has x86_xsave_length, which uses x86_cpuid_count */ + ret = ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov); + if (ret) + ret = -errno; + munmap(iov.iov_base, 2 * 1024 * 1024); + + host_fp_size = iov.iov_len; + + return ret; } -int put_fp_registers(int pid, unsigned long *regs) -{ - return restore_fp_registers(pid, regs); -} - -void arch_init_registers(int pid) -{ -#ifdef PTRACE_GETREGSET - void * fp_regs; - struct iovec iov; - - fp_regs = malloc(FP_SIZE * sizeof(unsigned long)); - if(fp_regs == NULL) - return; - - iov.iov_base = fp_regs; - iov.iov_len = FP_SIZE * sizeof(unsigned long); - if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) == 0) - have_xstate_support = 1; - - free(fp_regs); -#endif -} -#endif - unsigned long get_thread_reg(int reg, jmp_buf *buf) { switch (reg) { diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c deleted file mode 100644 index 1dc9adc20b1c..000000000000 --- a/arch/x86/um/os-Linux/task_size.c +++ /dev/null @@ -1,151 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <stdlib.h> -#include <signal.h> -#include <sys/mman.h> -#include <longjmp.h> - -#ifdef __i386__ - -static jmp_buf buf; - -static void segfault(int sig) -{ - longjmp(buf, 1); -} - -static int page_ok(unsigned long page) -{ - unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT); - unsigned long n = ~0UL; - void *mapped = NULL; - int ok = 0; - - /* - * First see if the page is readable. If it is, it may still - * be a VDSO, so we go on to see if it's writable. If not - * then try mapping memory there. If that fails, then we're - * still in the kernel area. As a sanity check, we'll fail if - * the mmap succeeds, but gives us an address different from - * what we wanted. - */ - if (setjmp(buf) == 0) - n = *address; - else { - mapped = mmap(address, UM_KERN_PAGE_SIZE, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (mapped == MAP_FAILED) - return 0; - if (mapped != address) - goto out; - } - - /* - * Now, is it writeable? If so, then we're in user address - * space. If not, then try mprotecting it and try the write - * again. - */ - if (setjmp(buf) == 0) { - *address = n; - ok = 1; - goto out; - } else if (mprotect(address, UM_KERN_PAGE_SIZE, - PROT_READ | PROT_WRITE) != 0) - goto out; - - if (setjmp(buf) == 0) { - *address = n; - ok = 1; - } - - out: - if (mapped != NULL) - munmap(mapped, UM_KERN_PAGE_SIZE); - return ok; -} - -unsigned long os_get_top_address(void) -{ - struct sigaction sa, old; - unsigned long bottom = 0; - /* - * A 32-bit UML on a 64-bit host gets confused about the VDSO at - * 0xffffe000. It is mapped, is readable, can be reprotected writeable - * and written. However, exec discovers later that it can't be - * unmapped. So, just set the highest address to be checked to just - * below it. This might waste some address space on 4G/4G 32-bit - * hosts, but shouldn't hurt otherwise. - */ - unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT; - unsigned long test, original; - - printf("Locating the bottom of the address space ... "); - fflush(stdout); - - /* - * We're going to be longjmping out of the signal handler, so - * SA_DEFER needs to be set. - */ - sa.sa_handler = segfault; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_NODEFER; - if (sigaction(SIGSEGV, &sa, &old)) { - perror("os_get_top_address"); - exit(1); - } - - /* Manually scan the address space, bottom-up, until we find - * the first valid page (or run out of them). - */ - for (bottom = 0; bottom < top; bottom++) { - if (page_ok(bottom)) - break; - } - - /* If we've got this far, we ran out of pages. */ - if (bottom == top) { - fprintf(stderr, "Unable to determine bottom of address " - "space.\n"); - exit(1); - } - - printf("0x%lx\n", bottom << UM_KERN_PAGE_SHIFT); - printf("Locating the top of the address space ... "); - fflush(stdout); - - original = bottom; - - /* This could happen with a 4G/4G split */ - if (page_ok(top)) - goto out; - - do { - test = bottom + (top - bottom) / 2; - if (page_ok(test)) - bottom = test; - else - top = test; - } while (top - bottom > 1); - -out: - /* Restore the old SIGSEGV handling */ - if (sigaction(SIGSEGV, &old, NULL)) { - perror("os_get_top_address"); - exit(1); - } - top <<= UM_KERN_PAGE_SHIFT; - printf("0x%lx\n", top); - - return top; -} - -#else - -unsigned long os_get_top_address(void) -{ - /* The old value of CONFIG_TOP_ADDR */ - return 0x7fc0002000; -} - -#endif diff --git a/arch/x86/um/ptrace.c b/arch/x86/um/ptrace.c new file mode 100644 index 000000000000..57c504fd5626 --- /dev/null +++ b/arch/x86/um/ptrace.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/sched.h> +#include <linux/elf.h> +#include <linux/regset.h> +#include <asm/user32.h> +#include <asm/sigcontext.h> + +#ifdef CONFIG_X86_32 +/* + * FPU tag word conversions. + */ + +static inline unsigned short twd_i387_to_fxsr(unsigned short twd) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; +} + +static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave) +{ + struct _fpxreg *st = NULL; + unsigned long twd = (unsigned long) fxsave->twd; + unsigned long tag; + unsigned long ret = 0xffff0000; + int i; + +#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16) + + for (i = 0; i < 8; i++) { + if (twd & 0x1) { + st = (struct _fpxreg *) FPREG_ADDR(fxsave, i); + + switch (st->exponent & 0x7fff) { + case 0x7fff: + tag = 2; /* Special */ + break; + case 0x0000: + if (!st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3]) { + tag = 1; /* Zero */ + } else { + tag = 2; /* Special */ + } + break; + default: + if (st->significand[3] & 0x8000) + tag = 0; /* Valid */ + else + tag = 2; /* Special */ + break; + } + } else { + tag = 3; /* Empty */ + } + ret |= (tag << (2 * i)); + twd = twd >> 1; + } + return ret; +} + +/* Get/set the old 32bit i387 registers (pre-FPX) */ +static int fpregs_legacy_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp; + int i; + + membuf_store(&to, (unsigned long)fxsave->cwd | 0xffff0000ul); + membuf_store(&to, (unsigned long)fxsave->swd | 0xffff0000ul); + membuf_store(&to, twd_fxsr_to_i387(fxsave)); + membuf_store(&to, fxsave->fip); + membuf_store(&to, fxsave->fcs | ((unsigned long)fxsave->fop << 16)); + membuf_store(&to, fxsave->foo); + membuf_store(&to, fxsave->fos); + + for (i = 0; i < 8; i++) + membuf_write(&to, (void *)fxsave->st_space + i * 16, 10); + + return 0; +} + +static int fpregs_legacy_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp; + const struct user_i387_struct *from; + struct user_i387_struct buf; + int i; + + if (ubuf) { + if (copy_from_user(&buf, ubuf, sizeof(buf))) + return -EFAULT; + from = &buf; + } else { + from = kbuf; + } + + fxsave->cwd = (unsigned short)(from->cwd & 0xffff); + fxsave->swd = (unsigned short)(from->swd & 0xffff); + fxsave->twd = twd_i387_to_fxsr((unsigned short)(from->twd & 0xffff)); + fxsave->fip = from->fip; + fxsave->fop = (unsigned short)((from->fcs & 0xffff0000ul) >> 16); + fxsave->fcs = (from->fcs & 0xffff); + fxsave->foo = from->foo; + fxsave->fos = from->fos; + + for (i = 0; i < 8; i++) { + memcpy((void *)fxsave->st_space + i * 16, + (void *)from->st_space + i * 10, 10); + } + + return 0; +} +#endif + +static int genregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int reg; + + for (reg = 0; to.left; reg++) + membuf_store(&to, getreg(target, reg * sizeof(unsigned long))); + return 0; +} + +static int genregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret = 0; + + if (kbuf) { + const unsigned long *k = kbuf; + + while (count >= sizeof(*k) && !ret) { + ret = putreg(target, pos, *k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + const unsigned long __user *u = ubuf; + + while (count >= sizeof(*u) && !ret) { + unsigned long word; + + ret = __get_user(word, u++); + if (ret) + break; + ret = putreg(target, pos, word); + count -= sizeof(*u); + pos += sizeof(*u); + } + } + return ret; +} + +static int generic_fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + return regset->n; +} + +static int generic_fpregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + void *fpregs = task_pt_regs(target)->regs.fp; + + membuf_write(&to, fpregs, regset->size * regset->n); + return 0; +} + +static int generic_fpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + void *fpregs = task_pt_regs(target)->regs.fp; + + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + fpregs, 0, regset->size * regset->n); +} + +static struct user_regset uml_regsets[] __ro_after_init = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(struct user_regs_struct) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .regset_get = genregs_get, + .set = genregs_set + }, +#ifdef CONFIG_X86_32 + /* Old FP registers, they are needed in signal frames */ + [REGSET_FP_LEGACY] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_i387_ia32_struct) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .active = generic_fpregs_active, + .regset_get = fpregs_legacy_get, + .set = fpregs_legacy_set, + }, +#endif + [REGSET_FP] = { +#ifdef CONFIG_X86_32 + .core_note_type = NT_PRXFPREG, + .n = sizeof(struct user32_fxsr_struct) / sizeof(long), +#else + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_i387_struct) / sizeof(long), +#endif + .size = sizeof(long), + .align = sizeof(long), + .active = generic_fpregs_active, + .regset_get = generic_fpregs_get, + .set = generic_fpregs_set, + }, + [REGSET_XSTATE] = { + .core_note_type = NT_X86_XSTATE, + .size = sizeof(long), + .align = sizeof(long), + .active = generic_fpregs_active, + .regset_get = generic_fpregs_get, + .set = generic_fpregs_set, + }, + /* TODO: Add TLS regset for 32bit */ +}; + +static const struct user_regset_view user_uml_view = { +#ifdef CONFIG_X86_32 + .name = "i386", .e_machine = EM_386, +#else + .name = "x86_64", .e_machine = EM_X86_64, +#endif + .regsets = uml_regsets, .n = ARRAY_SIZE(uml_regsets) +}; + +const struct user_regset_view * +task_user_regset_view(struct task_struct *tsk) +{ + return &user_uml_view; +} + +static int __init init_regset_xstate_info(void) +{ + uml_regsets[REGSET_XSTATE].n = + host_fp_size / uml_regsets[REGSET_XSTATE].size; + + return 0; +} +arch_initcall(init_regset_xstate_info); diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index b0a71c6cdc6e..3af3cb821524 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -6,6 +6,7 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/uaccess.h> +#include <linux/regset.h> #include <asm/ptrace-abi.h> #include <registers.h> #include <skas.h> @@ -168,65 +169,6 @@ int peek_user(struct task_struct *child, long addr, long data) return put_user(tmp, (unsigned long __user *) data); } -static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int err, n, cpu = task_cpu(child); - struct user_i387_struct fpregs; - - err = save_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); - if (err) - return err; - - n = copy_to_user(buf, &fpregs, sizeof(fpregs)); - if(n > 0) - return -EFAULT; - - return n; -} - -static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int n, cpu = task_cpu(child); - struct user_i387_struct fpregs; - - n = copy_from_user(&fpregs, buf, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return restore_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); -} - -static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) -{ - int err, n, cpu = task_cpu(child); - struct user_fxsr_struct fpregs; - - err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); - if (err) - return err; - - n = copy_to_user(buf, &fpregs, sizeof(fpregs)); - if(n > 0) - return -EFAULT; - - return n; -} - -static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) -{ - int n, cpu = task_cpu(child); - struct user_fxsr_struct fpregs; - - n = copy_from_user(&fpregs, buf, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return restore_fpx_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); -} - long subarch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { @@ -234,17 +176,25 @@ long subarch_ptrace(struct task_struct *child, long request, void __user *datap = (void __user *) data; switch (request) { case PTRACE_GETFPREGS: /* Get the child FPU state. */ - ret = get_fpregs(datap, child); - break; + return copy_regset_to_user(child, task_user_regset_view(child), + REGSET_FP_LEGACY, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_SETFPREGS: /* Set the child FPU state. */ - ret = set_fpregs(datap, child); - break; + return copy_regset_from_user(child, task_user_regset_view(child), + REGSET_FP_LEGACY, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_GETFPXREGS: /* Get the child FPU state. */ - ret = get_fpxregs(datap, child); - break; + return copy_regset_to_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_fxsr_struct), + datap); case PTRACE_SETFPXREGS: /* Set the child FPU state. */ - ret = set_fpxregs(datap, child); - break; + return copy_regset_from_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_fxsr_struct), + datap); default: ret = -EIO; } diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index aa68d83d3f44..e0d4120a45c8 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -8,6 +8,7 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/errno.h> +#include <linux/regset.h> #define __FRAME_OFFSETS #include <asm/ptrace.h> #include <linux/uaccess.h> @@ -188,36 +189,6 @@ int peek_user(struct task_struct *child, long addr, long data) return put_user(tmp, (unsigned long *) data); } -static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int err, n, cpu = ((struct thread_info *) child->stack)->cpu; - struct user_i387_struct fpregs; - - err = save_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); - if (err) - return err; - - n = copy_to_user(buf, &fpregs, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return n; -} - -static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) -{ - int n, cpu = ((struct thread_info *) child->stack)->cpu; - struct user_i387_struct fpregs; - - n = copy_from_user(&fpregs, buf, sizeof(fpregs)); - if (n > 0) - return -EFAULT; - - return restore_i387_registers(userspace_pid[cpu], - (unsigned long *) &fpregs); -} - long subarch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { @@ -226,11 +197,15 @@ long subarch_ptrace(struct task_struct *child, long request, switch (request) { case PTRACE_GETFPREGS: /* Get the child FPU state. */ - ret = get_fpregs(datap, child); - break; + return copy_regset_to_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_SETFPREGS: /* Set the child FPU state. */ - ret = set_fpregs(datap, child); - break; + return copy_regset_from_user(child, task_user_regset_view(child), + REGSET_FP, + 0, sizeof(struct user_i387_struct), + datap); case PTRACE_ARCH_PRCTL: /* XXX Calls ptrace on the host - needs some SMP thinking */ ret = arch_prctl(child, data, (void __user *) addr); diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h index 6ca4ecabc55b..2dd4ca6713f8 100644 --- a/arch/x86/um/shared/sysdep/ptrace.h +++ b/arch/x86/um/shared/sysdep/ptrace.h @@ -56,12 +56,16 @@ struct syscall_args { UPT_SYSCALL_ARG5(r), \ UPT_SYSCALL_ARG6(r) } } ) +extern unsigned long host_fp_size; + struct uml_pt_regs { unsigned long gp[MAX_REG_NR]; - unsigned long fp[MAX_FP_NR]; struct faultinfo faultinfo; long syscall; int is_user; + + /* Dynamically sized FP registers (holds an XSTATE) */ + unsigned long fp[]; }; #define EMPTY_UML_PT_REGS { } @@ -72,4 +76,6 @@ struct uml_pt_regs { extern int user_context(unsigned long sp); +extern int arch_init_registers(int pid); + #endif /* __SYSDEP_X86_PTRACE_H */ diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h index 0c4989842fbe..2392470cac4d 100644 --- a/arch/x86/um/shared/sysdep/ptrace_32.h +++ b/arch/x86/um/shared/sysdep/ptrace_32.h @@ -6,8 +6,6 @@ #ifndef __SYSDEP_I386_PTRACE_H #define __SYSDEP_I386_PTRACE_H -#define MAX_FP_NR HOST_FPX_SIZE - #define UPT_SYSCALL_ARG1(r) UPT_BX(r) #define UPT_SYSCALL_ARG2(r) UPT_CX(r) #define UPT_SYSCALL_ARG3(r) UPT_DX(r) @@ -15,6 +13,4 @@ #define UPT_SYSCALL_ARG5(r) UPT_DI(r) #define UPT_SYSCALL_ARG6(r) UPT_BP(r) -extern void arch_init_registers(int pid); - #endif diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h index 0dc223aa1c2d..e73573ac871f 100644 --- a/arch/x86/um/shared/sysdep/ptrace_64.h +++ b/arch/x86/um/shared/sysdep/ptrace_64.h @@ -8,8 +8,6 @@ #ifndef __SYSDEP_X86_64_PTRACE_H #define __SYSDEP_X86_64_PTRACE_H -#define MAX_FP_NR HOST_FP_SIZE - #define REGS_R8(r) ((r)[HOST_R8]) #define REGS_R9(r) ((r)[HOST_R9]) #define REGS_R10(r) ((r)[HOST_R10]) @@ -57,6 +55,4 @@ #define UPT_SYSCALL_ARG5(r) UPT_R8(r) #define UPT_SYSCALL_ARG6(r) UPT_R9(r) -extern void arch_init_registers(int pid); - #endif diff --git a/arch/x86/um/shared/sysdep/ptrace_user.h b/arch/x86/um/shared/sysdep/ptrace_user.h index 1d1a824fa652..98da23120538 100644 --- a/arch/x86/um/shared/sysdep/ptrace_user.h +++ b/arch/x86/um/shared/sysdep/ptrace_user.h @@ -11,12 +11,6 @@ #define REGS_IP_INDEX HOST_IP #define REGS_SP_INDEX HOST_SP -#ifdef __i386__ -#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE) -#else -#define FP_SIZE HOST_FP_SIZE -#endif - /* * glibc before 2.27 does not include PTRACE_SYSEMU_SINGLESTEP in its enum, * ensure we have a definition by (re-)defining it here. diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h index 0b44a86dd346..390988132c0a 100644 --- a/arch/x86/um/shared/sysdep/stub_32.h +++ b/arch/x86/um/shared/sysdep/stub_32.h @@ -112,11 +112,23 @@ static __always_inline void *get_stub_data(void) unsigned long ret; asm volatile ( - "movl %%esp,%0 ;" - "andl %1,%0" + "call _here_%=;" + "_here_%=:" + "popl %0;" + "andl %1, %0 ;" + "addl %2, %0 ;" : "=a" (ret) - : "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1))); + : "g" (~(UM_KERN_PAGE_SIZE - 1)), + "g" (UM_KERN_PAGE_SIZE)); return (void *)ret; } + +#define stub_start(fn) \ + asm volatile ( \ + "subl %0,%%esp ;" \ + "movl %1, %%eax ; " \ + "call *%%eax ;" \ + :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \ + "i" (&fn)) #endif diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h index 67f44284f1aa..294affbec742 100644 --- a/arch/x86/um/shared/sysdep/stub_64.h +++ b/arch/x86/um/shared/sysdep/stub_64.h @@ -28,6 +28,17 @@ static __always_inline long stub_syscall0(long syscall) return ret; } +static __always_inline long stub_syscall1(long syscall, long arg1) +{ + long ret; + + __asm__ volatile (__syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1) : __syscall_clobber ); + + return ret; +} + static __always_inline long stub_syscall2(long syscall, long arg1, long arg2) { long ret; @@ -106,11 +117,21 @@ static __always_inline void *get_stub_data(void) unsigned long ret; asm volatile ( - "movq %%rsp,%0 ;" - "andq %1,%0" + "lea 0(%%rip), %0;" + "andq %1, %0 ;" + "addq %2, %0 ;" : "=a" (ret) - : "g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1))); + : "g" (~(UM_KERN_PAGE_SIZE - 1)), + "g" (UM_KERN_PAGE_SIZE)); return (void *)ret; } + +#define stub_start(fn) \ + asm volatile ( \ + "subq %0,%%rsp ;" \ + "movq %1,%%rax ;" \ + "call *%%rax ;" \ + :: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \ + "i" (&fn)) #endif diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 2cc8c2309022..75087e85b6fd 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -16,145 +16,24 @@ #include <registers.h> #include <skas.h> -#ifdef CONFIG_X86_32 - -/* - * FPU tag word conversions. - */ - -static inline unsigned short twd_i387_to_fxsr(unsigned short twd) -{ - unsigned int tmp; /* to avoid 16 bit prefixes in the code */ - - /* Transform each pair of bits into 01 (valid) or 00 (empty) */ - tmp = ~twd; - tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ - /* and move the valid bits to the lower byte. */ - tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ - tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ - tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ - return tmp; -} - -static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave) -{ - struct _fpxreg *st = NULL; - unsigned long twd = (unsigned long) fxsave->twd; - unsigned long tag; - unsigned long ret = 0xffff0000; - int i; - -#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16) - - for (i = 0; i < 8; i++) { - if (twd & 0x1) { - st = (struct _fpxreg *) FPREG_ADDR(fxsave, i); - - switch (st->exponent & 0x7fff) { - case 0x7fff: - tag = 2; /* Special */ - break; - case 0x0000: - if ( !st->significand[0] && - !st->significand[1] && - !st->significand[2] && - !st->significand[3] ) { - tag = 1; /* Zero */ - } else { - tag = 2; /* Special */ - } - break; - default: - if (st->significand[3] & 0x8000) { - tag = 0; /* Valid */ - } else { - tag = 2; /* Special */ - } - break; - } - } else { - tag = 3; /* Empty */ - } - ret |= (tag << (2 * i)); - twd = twd >> 1; - } - return ret; -} - -static int convert_fxsr_to_user(struct _fpstate __user *buf, - struct user_fxsr_struct *fxsave) -{ - unsigned long env[7]; - struct _fpreg __user *to; - struct _fpxreg *from; - int i; - - env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul; - env[1] = (unsigned long)fxsave->swd | 0xffff0000ul; - env[2] = twd_fxsr_to_i387(fxsave); - env[3] = fxsave->fip; - env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); - env[5] = fxsave->foo; - env[6] = fxsave->fos; - - if (__copy_to_user(buf, env, 7 * sizeof(unsigned long))) - return 1; - - to = &buf->_st[0]; - from = (struct _fpxreg *) &fxsave->st_space[0]; - for (i = 0; i < 8; i++, to++, from++) { - unsigned long __user *t = (unsigned long __user *)to; - unsigned long *f = (unsigned long *)from; - - if (__put_user(*f, t) || - __put_user(*(f + 1), t + 1) || - __put_user(from->exponent, &to->exponent)) - return 1; - } - return 0; -} - -static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave, - struct _fpstate __user *buf) -{ - unsigned long env[7]; - struct _fpxreg *to; - struct _fpreg __user *from; - int i; - - if (copy_from_user( env, buf, 7 * sizeof(long))) - return 1; - - fxsave->cwd = (unsigned short)(env[0] & 0xffff); - fxsave->swd = (unsigned short)(env[1] & 0xffff); - fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); - fxsave->fip = env[3]; - fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16); - fxsave->fcs = (env[4] & 0xffff); - fxsave->foo = env[5]; - fxsave->fos = env[6]; - - to = (struct _fpxreg *) &fxsave->st_space[0]; - from = &buf->_st[0]; - for (i = 0; i < 8; i++, to++, from++) { - unsigned long *t = (unsigned long *)to; - unsigned long __user *f = (unsigned long __user *)from; - - if (__get_user(*t, f) || - __get_user(*(t + 1), f + 1) || - __get_user(to->exponent, &from->exponent)) - return 1; - } - return 0; -} - -extern int have_fpx_regs; +#include <linux/regset.h> +#include <asm/sigframe.h> +#ifdef CONFIG_X86_32 +struct _xstate_64 { + struct _fpstate_64 fpstate; + struct _header xstate_hdr; + struct _ymmh_state ymmh; + /* New processor state extensions go here: */ +}; +#else +#define _xstate_64 _xstate #endif static int copy_sc_from_user(struct pt_regs *regs, struct sigcontext __user *from) { + struct _xstate_64 __user *from_fp64; struct sigcontext sc; int err; @@ -203,35 +82,27 @@ static int copy_sc_from_user(struct pt_regs *regs, #undef GETREG #ifdef CONFIG_X86_32 - if (have_fpx_regs) { - struct user_fxsr_struct fpx; - int pid = userspace_pid[current_thread_info()->cpu]; + from_fp64 = ((void __user *)sc.fpstate) + + offsetof(struct _fpstate_32, _fxsr_env); +#else + from_fp64 = (void __user *)sc.fpstate; +#endif - err = copy_from_user(&fpx, - &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0], - sizeof(struct user_fxsr_struct)); - if (err) - return 1; + err = copy_from_user(regs->regs.fp, from_fp64, host_fp_size); + if (err) + return 1; - err = convert_fxsr_from_user(&fpx, (void *)sc.fpstate); - if (err) - return 1; - - err = restore_fpx_registers(pid, (unsigned long *) &fpx); - if (err < 0) { - printk(KERN_ERR "copy_sc_from_user - " - "restore_fpx_registers failed, errno = %d\n", - -err); - return 1; - } - } else +#ifdef CONFIG_X86_32 + /* Data is duplicated and this copy is the important one */ + err = copy_regset_from_user(current, + task_user_regset_view(current), + REGSET_FP_LEGACY, 0, + sizeof(struct user_i387_struct), + (void __user *)sc.fpstate); + if (err < 0) + return err; #endif - { - err = copy_from_user(regs->regs.fp, (void *)sc.fpstate, - sizeof(struct _xstate)); - if (err) - return 1; - } + return 0; } @@ -239,6 +110,7 @@ static int copy_sc_to_user(struct sigcontext __user *to, struct _xstate __user *to_fp, struct pt_regs *regs, unsigned long mask) { + struct _xstate_64 __user *to_fp64; struct sigcontext sc; struct faultinfo * fi = ¤t->thread.arch.faultinfo; int err; @@ -290,35 +162,46 @@ static int copy_sc_to_user(struct sigcontext __user *to, return 1; #ifdef CONFIG_X86_32 - if (have_fpx_regs) { - int pid = userspace_pid[current_thread_info()->cpu]; - struct user_fxsr_struct fpx; - - err = save_fpx_registers(pid, (unsigned long *) &fpx); - if (err < 0){ - printk(KERN_ERR "copy_sc_to_user - save_fpx_registers " - "failed, errno = %d\n", err); - return 1; - } - - err = convert_fxsr_to_user(&to_fp->fpstate, &fpx); - if (err) - return 1; + err = copy_regset_to_user(current, + task_user_regset_view(current), + REGSET_FP_LEGACY, 0, + sizeof(struct _fpstate_32), to_fp); + if (err < 0) + return err; - err |= __put_user(fpx.swd, &to_fp->fpstate.status); - err |= __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic); - if (err) - return 1; + __put_user(X86_FXSR_MAGIC, &to_fp->fpstate.magic); + + BUILD_BUG_ON(offsetof(struct _xstate, xstate_hdr) != + offsetof(struct _xstate_64, xstate_hdr) + + offsetof(struct _fpstate_32, _fxsr_env)); + to_fp64 = (void __user *)to_fp + + offsetof(struct _fpstate_32, _fxsr_env); +#else + to_fp64 = to_fp; +#endif /* CONFIG_X86_32 */ + + if (copy_to_user(to_fp64, regs->regs.fp, host_fp_size)) + return 1; - if (copy_to_user(&to_fp->fpstate._fxsr_env[0], &fpx, - sizeof(struct user_fxsr_struct))) - return 1; - } else + /* + * Put magic/size values for userspace. We do not bother to verify them + * later on, however, userspace needs them should it try to read the + * XSTATE data. And ptrace does not fill in these parts. + */ + BUILD_BUG_ON(sizeof(int) != FP_XSTATE_MAGIC2_SIZE); +#ifdef CONFIG_X86_32 + __put_user(offsetof(struct _fpstate_32, _fxsr_env) + + host_fp_size + FP_XSTATE_MAGIC2_SIZE, + &to_fp64->fpstate.sw_reserved.extended_size); +#else + __put_user(host_fp_size + FP_XSTATE_MAGIC2_SIZE, + &to_fp64->fpstate.sw_reserved.extended_size); #endif - { - if (copy_to_user(to_fp, regs->regs.fp, sizeof(struct _xstate))) - return 1; - } + __put_user(host_fp_size, &to_fp64->fpstate.sw_reserved.xstate_size); + + __put_user(FP_XSTATE_MAGIC1, &to_fp64->fpstate.sw_reserved.magic1); + __put_user(FP_XSTATE_MAGIC2, + (int __user *)((void __user *)to_fp64 + host_fp_size)); return 0; } @@ -336,34 +219,15 @@ static int copy_ucontext_to_user(struct ucontext __user *uc, return err; } -struct sigframe -{ - char __user *pretcode; - int sig; - struct sigcontext sc; - struct _xstate fpstate; - unsigned long extramask[_NSIG_WORDS-1]; - char retcode[8]; -}; - -struct rt_sigframe -{ - char __user *pretcode; - int sig; - struct siginfo __user *pinfo; - void __user *puc; - struct siginfo info; - struct ucontext uc; - struct _xstate fpstate; - char retcode[8]; -}; - int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig, struct pt_regs *regs, sigset_t *mask) { + size_t math_size = offsetof(struct _fpstate_32, _fxsr_env) + + host_fp_size + FP_XSTATE_MAGIC2_SIZE; struct sigframe __user *frame; void __user *restorer; int err = 0, sig = ksig->sig; + unsigned long fp_to; /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */ stack_top = ((stack_top + 4) & -16UL) - 4; @@ -371,13 +235,21 @@ int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig, if (!access_ok(frame, sizeof(*frame))) return 1; + /* Add required space for math frame */ + frame = (struct sigframe __user *)((unsigned long)frame - math_size); + restorer = frame->retcode; if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = ksig->ka.sa.sa_restorer; - err |= __put_user(restorer, &frame->pretcode); + err |= __put_user(restorer, (void __user * __user *)&frame->pretcode); err |= __put_user(sig, &frame->sig); - err |= copy_sc_to_user(&frame->sc, &frame->fpstate, regs, mask->sig[0]); + + fp_to = (unsigned long)frame + sizeof(*frame); + + err |= copy_sc_to_user(&frame->sc, + (struct _xstate __user *)fp_to, + regs, mask->sig[0]); if (_NSIG_WORDS > 1) err |= __copy_to_user(&frame->extramask, &mask->sig[1], sizeof(frame->extramask)); @@ -407,26 +279,35 @@ int setup_signal_stack_sc(unsigned long stack_top, struct ksignal *ksig, int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, struct pt_regs *regs, sigset_t *mask) { + size_t math_size = offsetof(struct _fpstate_32, _fxsr_env) + + host_fp_size + FP_XSTATE_MAGIC2_SIZE; struct rt_sigframe __user *frame; void __user *restorer; int err = 0, sig = ksig->sig; + unsigned long fp_to; stack_top &= -8UL; frame = (struct rt_sigframe __user *) stack_top - 1; if (!access_ok(frame, sizeof(*frame))) return 1; + /* Add required space for math frame */ + frame = (struct rt_sigframe __user *)((unsigned long)frame - math_size); + restorer = frame->retcode; if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = ksig->ka.sa.sa_restorer; - err |= __put_user(restorer, &frame->pretcode); + err |= __put_user(restorer, (void __user * __user *)&frame->pretcode); err |= __put_user(sig, &frame->sig); - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); + err |= __put_user(&frame->info, (void __user * __user *)&frame->pinfo); + err |= __put_user(&frame->uc, (void __user * __user *)&frame->puc); err |= copy_siginfo_to_user(&frame->info, &ksig->info); - err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask, - PT_REGS_SP(regs)); + + fp_to = (unsigned long)frame + sizeof(*frame); + + err |= copy_ucontext_to_user(&frame->uc, (struct _xstate __user *)fp_to, + mask, PT_REGS_SP(regs)); /* * This is movl $,%eax ; int $0x80 @@ -478,27 +359,24 @@ SYSCALL_DEFINE0(sigreturn) #else -struct rt_sigframe -{ - char __user *pretcode; - struct ucontext uc; - struct siginfo info; - struct _xstate fpstate; -}; - int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, struct pt_regs *regs, sigset_t *set) { + unsigned long math_size = host_fp_size + FP_XSTATE_MAGIC2_SIZE; struct rt_sigframe __user *frame; int err = 0, sig = ksig->sig; unsigned long fp_to; frame = (struct rt_sigframe __user *) round_down(stack_top - sizeof(struct rt_sigframe), 16); + + /* Add required space for math frame */ + frame = (struct rt_sigframe __user *)((unsigned long)frame - math_size); + /* Subtract 128 for a red zone and 8 for proper alignment */ frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8); - if (!access_ok(frame, sizeof(*frame))) + if (!access_ok(frame, sizeof(*frame) + math_size)) goto out; if (ksig->ka.sa.sa_flags & SA_SIGINFO) { @@ -509,12 +387,14 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(NULL, &frame->uc.uc_link); err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs)); - err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs, - set->sig[0]); - fp_to = (unsigned long)&frame->fpstate; + fp_to = (unsigned long)frame + sizeof(*frame); + + err |= copy_sc_to_user(&frame->uc.uc_mcontext, + (struct _xstate __user *)fp_to, + regs, set->sig[0]); err |= __put_user(fp_to, &frame->uc.uc_mcontext.fpstate); if (sizeof(*set) == 16) { @@ -531,7 +411,7 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, */ /* x86-64 should always use SA_RESTORER. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) - err |= __put_user((void *)ksig->ka.sa.sa_restorer, + err |= __put_user((void __user *)ksig->ka.sa.sa_restorer, &frame->pretcode); else /* could use a vstub here */ diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index 1c77d9946199..d6e1cd9956bf 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -20,9 +20,6 @@ void foo(void); void foo(void) { #ifdef __i386__ - DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct)); - DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct)); - DEFINE(HOST_IP, EIP); DEFINE(HOST_SP, UESP); DEFINE(HOST_EFLAGS, EFL); @@ -41,11 +38,6 @@ void foo(void) DEFINE(HOST_GS, GS); DEFINE(HOST_ORIG_AX, ORIG_EAX); #else -#ifdef FP_XSTATE_MAGIC1 - DEFINE_LONGS(HOST_FP_SIZE, 2696); -#else - DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long)); -#endif DEFINE_LONGS(HOST_BX, RBX); DEFINE_LONGS(HOST_CX, RCX); DEFINE_LONGS(HOST_DI, RDI); diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile index 6a77ea6434ff..7478d11dacb7 100644 --- a/arch/x86/um/vdso/Makefile +++ b/arch/x86/um/vdso/Makefile @@ -56,7 +56,6 @@ CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage quiet_cmd_vdso = VDSO $@ cmd_vdso = $(CC) -nostdlib -o $@ \ $(CC_FLAGS_LTO) $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ - -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ - sh $(src)/checkundef.sh '$(NM)' '$@' + -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) -VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv -z noexecstack +VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv -z noexecstack -Wl,--no-undefined diff --git a/arch/x86/um/vdso/checkundef.sh b/arch/x86/um/vdso/checkundef.sh deleted file mode 100644 index 8e3ea6bb956f..000000000000 --- a/arch/x86/um/vdso/checkundef.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -nm="$1" -file="$2" -$nm "$file" | grep '^ *U' > /dev/null 2>&1 -if [ $? -eq 1 ]; then - exit 0 -else - echo "$file: undefined symbols found" >&2 - exit 1 -fi |