From 3073774e638ef18d222465fe92bfc8fccb90d288 Mon Sep 17 00:00:00 2001 From: Serhii Popovych Date: Mon, 4 Dec 2017 09:36:41 -0500 Subject: KVM: PPC: Book3S HV: Drop prepare_done from struct kvm_resize_hpt Currently the kvm_resize_hpt structure has two fields relevant to the state of an ongoing resize: 'prepare_done', which indicates whether the worker thread has completed or not, and 'error' which indicates whether it was successful or not. Since the success/failure isn't known until completion, this is confusingly redundant. This patch consolidates the information into just the 'error' value: -EBUSY indicates the worked is still in progress, other negative values indicate (completed) failure, 0 indicates successful completion. As a bonus this reduces size of struct kvm_resize_hpt by __alignof__(struct kvm_hpt_info) and saves few bytes of code. While there correct comment in struct kvm_resize_hpt which references a non-existent semaphore (leftover from an early draft). Assert with WARN_ON() in case of HPT allocation thread work runs more than once for resize request or resize_hpt_allocate() returns -EBUSY that is treated specially. Change comparison against zero to make checkpatch.pl happy. Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Serhii Popovych [dwg: Changed BUG_ON()s to WARN_ON()s and altered commit message for clarity] Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 44 +++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 17 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 966097232d21..f5f2c6bf5856 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -65,11 +65,17 @@ struct kvm_resize_hpt { u32 order; /* These fields protected by kvm->lock */ + + /* Possible values and their usage: + * <0 an error occurred during allocation, + * -EBUSY allocation is in the progress, + * 0 allocation made successfuly. + */ int error; - bool prepare_done; - /* Private to the work thread, until prepare_done is true, - * then protected by kvm->resize_hpt_sem */ + /* Private to the work thread, until error != -EBUSY, + * then protected by kvm->lock. + */ struct kvm_hpt_info hpt; }; @@ -1433,15 +1439,23 @@ static void resize_hpt_prepare_work(struct work_struct *work) struct kvm *kvm = resize->kvm; int err; + if (WARN_ON(resize->error != -EBUSY)) + return; + resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", resize->order); err = resize_hpt_allocate(resize); + /* We have strict assumption about -EBUSY + * when preparing for HPT resize. + */ + if (WARN_ON(err == -EBUSY)) + err = -EINPROGRESS; + mutex_lock(&kvm->lock); resize->error = err; - resize->prepare_done = true; mutex_unlock(&kvm->lock); } @@ -1466,14 +1480,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, if (resize) { if (resize->order == shift) { - /* Suitable resize in progress */ - if (resize->prepare_done) { - ret = resize->error; - if (ret != 0) - resize_hpt_release(kvm, resize); - } else { + /* Suitable resize in progress? */ + ret = resize->error; + if (ret == -EBUSY) ret = 100; /* estimated time in ms */ - } + else if (ret) + resize_hpt_release(kvm, resize); goto out; } @@ -1493,6 +1505,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, ret = -ENOMEM; goto out; } + + resize->error = -EBUSY; resize->order = shift; resize->kvm = kvm; INIT_WORK(&resize->work, resize_hpt_prepare_work); @@ -1547,16 +1561,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm, if (!resize || (resize->order != shift)) goto out; - ret = -EBUSY; - if (!resize->prepare_done) - goto out; - ret = resize->error; - if (ret != 0) + if (ret) goto out; ret = resize_hpt_rehash(resize); - if (ret != 0) + if (ret) goto out; resize_hpt_pivot(resize); -- cgit v1.2.3-59-g8ed1b From 4ed11aeefda439c76ddae3ceebcfa4fad111f149 Mon Sep 17 00:00:00 2001 From: Serhii Popovych Date: Mon, 4 Dec 2017 09:36:42 -0500 Subject: KVM: PPC: Book3S HV: Fix use after free in case of multiple resize requests When serving multiple resize requests following could happen: CPU0 CPU1 ---- ---- kvm_vm_ioctl_resize_hpt_prepare(1); -> schedule_work() /* system_rq might be busy: delay */ kvm_vm_ioctl_resize_hpt_prepare(2); mutex_lock(); if (resize) { ... release_hpt_resize(); } ... resize_hpt_prepare_work() -> schedule_work() { mutex_unlock() /* resize->kvm could be wrong */ struct kvm *kvm = resize->kvm; mutex_lock(&kvm->lock); <<<< UAF ... } i.e. a second resize request with different order could be started by kvm_vm_ioctl_resize_hpt_prepare(), causing the previous request to be free()d when there's still an active worker thread which will try to access it. This leads to a use after free in point marked with UAF on the diagram above. To prevent this from happening, instead of unconditionally releasing a pre-existing resize structure from the prepare ioctl(), we check if the existing structure has an in-progress worker. We do that by checking if the resize->error == -EBUSY, which is safe because the resize->error field is protected by the kvm->lock. If there is an active worker, instead of releasing, we mark the structure as stale by unlinking it from kvm_struct. In the worker thread we check for a stale structure (with kvm->lock held), and in that case abort, releasing the stale structure ourself. We make the check both before and the actual allocation. Strictly, only the check afterwards is needed, the check before is an optimization: if the structure happens to become stale before the worker thread is dispatched, rather than during the allocation, it means we can avoid allocating then immediately freeing a potentially substantial amount of memory. This fixes following or similar host kernel crash message: [ 635.277361] Unable to handle kernel paging request for data at address 0x00000000 [ 635.277438] Faulting instruction address: 0xc00000000052f568 [ 635.277446] Oops: Kernel access of bad area, sig: 11 [#1] [ 635.277451] SMP NR_CPUS=2048 NUMA PowerNV [ 635.277470] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter nfsv3 nfs_acl nfs lockd grace fscache kvm_hv kvm rpcrdma sunrpc ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ext4 ib_srp scsi_transport_srp ib_ipoib mbcache jbd2 rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ocrdma(T) ib_core ses enclosure scsi_transport_sas sg shpchp leds_powernv ibmpowernv i2c_opal i2c_core powernv_rng ipmi_powernv ipmi_devintf ipmi_msghandler ip_tables xfs libcrc32c sr_mod sd_mod cdrom lpfc nvme_fc(T) nvme_fabrics nvme_core ipr nvmet_fc(T) tg3 nvmet libata be2net crc_t10dif crct10dif_generic scsi_transport_fc ptp scsi_tgt pps_core crct10dif_common dm_mirror dm_region_hash dm_log dm_mod [ 635.278687] CPU: 40 PID: 749 Comm: kworker/40:1 Tainted: G ------------ T 3.10.0.bz1510771+ #1 [ 635.278782] Workqueue: events resize_hpt_prepare_work [kvm_hv] [ 635.278851] task: c0000007e6840000 ti: c0000007e9180000 task.ti: c0000007e9180000 [ 635.278919] NIP: c00000000052f568 LR: c0000000009ea310 CTR: c0000000009ea4f0 [ 635.278988] REGS: c0000007e91837f0 TRAP: 0300 Tainted: G ------------ T (3.10.0.bz1510771+) [ 635.279077] MSR: 9000000100009033 CR: 24002022 XER: 00000000 [ 635.279248] CFAR: c000000000009368 DAR: 0000000000000000 DSISR: 40000000 SOFTE: 1 GPR00: c0000000009ea310 c0000007e9183a70 c000000001250b00 c0000007e9183b10 GPR04: 0000000000000000 0000000000000000 c0000007e9183650 0000000000000000 GPR08: c0000007ffff7b80 00000000ffffffff 0000000080000028 d00000000d2529a0 GPR12: 0000000000002200 c000000007b56800 c000000000120028 c0000007f135bb40 GPR16: 0000000000000000 c000000005c1e018 c000000005c1e018 0000000000000000 GPR20: 0000000000000001 c0000000011bf778 0000000000000001 fffffffffffffef7 GPR24: 0000000000000000 c000000f1e262e50 0000000000000002 c0000007e9180000 GPR28: c000000f1e262e4c c000000f1e262e50 0000000000000000 c0000007e9183b10 [ 635.280149] NIP [c00000000052f568] __list_add+0x38/0x110 [ 635.280197] LR [c0000000009ea310] __mutex_lock_slowpath+0xe0/0x2c0 [ 635.280253] Call Trace: [ 635.280277] [c0000007e9183af0] [c0000000009ea310] __mutex_lock_slowpath+0xe0/0x2c0 [ 635.280356] [c0000007e9183b70] [c0000000009ea554] mutex_lock+0x64/0x70 [ 635.280426] [c0000007e9183ba0] [d00000000d24da04] resize_hpt_prepare_work+0xe4/0x1c0 [kvm_hv] [ 635.280507] [c0000007e9183c40] [c000000000113c0c] process_one_work+0x1dc/0x680 [ 635.280587] [c0000007e9183ce0] [c000000000114250] worker_thread+0x1a0/0x520 [ 635.280655] [c0000007e9183d80] [c00000000012010c] kthread+0xec/0x100 [ 635.280724] [c0000007e9183e30] [c00000000000a4b8] ret_from_kernel_thread+0x5c/0xa4 [ 635.280814] Instruction dump: [ 635.280880] 7c0802a6 fba1ffe8 fbc1fff0 7cbd2b78 fbe1fff8 7c9e2378 7c7f1b78 f8010010 [ 635.281099] f821ff81 e8a50008 7fa52040 40de00b8 7fbd2840 40de008c 7fbff040 [ 635.281324] ---[ end trace b628b73449719b9d ]--- Cc: stable@vger.kernel.org # v4.10+ Fixes: b5baa6877315 ("KVM: PPC: Book3S HV: KVM-HV HPT resizing implementation") Signed-off-by: Serhii Popovych [dwg: Replaced BUG_ON()s with WARN_ONs() and reworded commit message for clarity] Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 50 ++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 15 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f5f2c6bf5856..8355398f0bb6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1419,16 +1419,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize) static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) { - BUG_ON(kvm->arch.resize_hpt != resize); + if (WARN_ON(!mutex_is_locked(&kvm->lock))) + return; if (!resize) return; - if (resize->hpt.virt) - kvmppc_free_hpt(&resize->hpt); + if (resize->error != -EBUSY) { + if (resize->hpt.virt) + kvmppc_free_hpt(&resize->hpt); + kfree(resize); + } - kvm->arch.resize_hpt = NULL; - kfree(resize); + if (kvm->arch.resize_hpt == resize) + kvm->arch.resize_hpt = NULL; } static void resize_hpt_prepare_work(struct work_struct *work) @@ -1437,26 +1441,42 @@ static void resize_hpt_prepare_work(struct work_struct *work) struct kvm_resize_hpt, work); struct kvm *kvm = resize->kvm; - int err; + int err = 0; if (WARN_ON(resize->error != -EBUSY)) return; - resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", - resize->order); + mutex_lock(&kvm->lock); - err = resize_hpt_allocate(resize); + /* Request is still current? */ + if (kvm->arch.resize_hpt == resize) { + /* We may request large allocations here: + * do not sleep with kvm->lock held for a while. + */ + mutex_unlock(&kvm->lock); - /* We have strict assumption about -EBUSY - * when preparing for HPT resize. - */ - if (WARN_ON(err == -EBUSY)) - err = -EINPROGRESS; + resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", + resize->order); - mutex_lock(&kvm->lock); + err = resize_hpt_allocate(resize); + + /* We have strict assumption about -EBUSY + * when preparing for HPT resize. + */ + if (WARN_ON(err == -EBUSY)) + err = -EINPROGRESS; + + mutex_lock(&kvm->lock); + /* It is possible that kvm->arch.resize_hpt != resize + * after we grab kvm->lock again. + */ + } resize->error = err; + if (kvm->arch.resize_hpt != resize) + resize_hpt_release(kvm, resize); + mutex_unlock(&kvm->lock); } -- cgit v1.2.3-59-g8ed1b From f41d84dddc66b164ac16acf3f584c276146f1c48 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 12 Dec 2017 17:59:15 +0530 Subject: powerpc/perf: Dereference BHRB entries safely It's theoretically possible that branch instructions recorded in BHRB (Branch History Rolling Buffer) entries have already been unmapped before they are processed by the kernel. Hence, trying to dereference such memory location will result in a crash. eg: Unable to handle kernel paging request for data at address 0xd000000019c41764 Faulting instruction address: 0xc000000000084a14 NIP [c000000000084a14] branch_target+0x4/0x70 LR [c0000000000eb828] record_and_restart+0x568/0x5c0 Call Trace: [c0000000000eb3b4] record_and_restart+0xf4/0x5c0 (unreliable) [c0000000000ec378] perf_event_interrupt+0x298/0x460 [c000000000027964] performance_monitor_exception+0x54/0x70 [c000000000009ba4] performance_monitor_common+0x114/0x120 Fix it by deferefencing the addresses safely. Fixes: 691231846ceb ("powerpc/perf: Fix setting of "to" addresses for BHRB") Cc: stable@vger.kernel.org # v3.10+ Suggested-by: Naveen N. Rao Signed-off-by: Ravi Bangoria Reviewed-by: Naveen N. Rao [mpe: Use probe_kernel_read() which is clearer, tweak change log] Signed-off-by: Michael Ellerman --- arch/powerpc/perf/core-book3s.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 153812966365..fce545774d50 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr) int ret; __u64 target; - if (is_kernel_addr(addr)) - return branch_target((unsigned int *)addr); + if (is_kernel_addr(addr)) { + if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + return 0; + + return branch_target(&instr); + } /* Userspace: need copy instruction here then translate it */ pagefault_disable(); -- cgit v1.2.3-59-g8ed1b From ad2b6e01024ef23bddc3ce0bcb115ecd8c520b7e Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Tue, 5 Dec 2017 11:00:38 +0530 Subject: powerpc/perf/imc: Fix nest-imc cpuhotplug callback failure Oops is observed during boot: Faulting instruction address: 0xc000000000248340 cpu 0x0: Vector: 380 (Data Access Out of Range) at [c000000ff66fb850] pc: c000000000248340: event_function_call+0x50/0x1f0 lr: c00000000024878c: perf_remove_from_context+0x3c/0x100 sp: c000000ff66fbad0 msr: 9000000000009033 dar: 7d20e2a6f92d03c0 pid = 14, comm = cpuhp/0 While registering the cpuhotplug callbacks for nest-imc, if we fail in the cpuhotplug online path for any random node in a multi node system (because the opal call to stop nest-imc counters fails for that node), ppc_nest_imc_cpu_offline() will get invoked for other nodes who successfully returned from cpuhotplug online path. This call trace is generated since in the ppc_nest_imc_cpu_offline() path we are trying to migrate the event context, when nest-imc counters are not even initialized. Patch to add a check to ensure that nest-imc is registered before migrating the event context. Fixes: 885dcd709ba9 ("powerpc/perf: Add nest IMC PMU support") Signed-off-by: Anju T Sudhakar Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0ead3cd73caa..f1b940714d65 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask)) return 0; + /* + * Check whether nest_imc is registered. We could end up here if the + * cpuhotplug callback registration fails. i.e, callback invokes the + * offline path for all successfully registered nodes. At this stage, + * nest_imc pmu will not be registered and we should return here. + * + * We return with a zero since this is not an offline failure. And + * cpuhp_setup_state() returns the actual failure reason to the caller, + * which in turn will call the cleanup routine. + */ + if (!nest_pmus) + return 0; + /* * Now that this cpu is one of the designated, * find a next cpu a) which is online and b) in same chip. -- cgit v1.2.3-59-g8ed1b From 110df8bd3e418b3476cae80babe8add48a8ea523 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Thu, 7 Dec 2017 22:53:27 +0530 Subject: powerpc/perf: Fix kfree memory allocated for nest pmus imc_common_cpuhp_mem_free() is the common function for all IMC (In-memory Collection counters) domains to unregister cpuhotplug callback and free memory. Since kfree of memory allocated for nest-imc (per_nest_pmu_arr) is in the common code, all domains (core/nest/thread) can do the kfree in the failure case. This could potentially create a call trace as shown below, where core(/thread/nest) imc pmu initialization fails and in the failure path imc_common_cpuhp_mem_free() free the memory(per_nest_pmu_arr), which is allocated by successfully registered nest units. The call trace is generated in a scenario where core-imc initialization is made to fail and a cpuhotplug is performed in a p9 system. During cpuhotplug ppc_nest_imc_cpu_offline() tries to access per_nest_pmu_arr, which is already freed by core-imc. NIP [c000000000cb6a94] mutex_lock+0x34/0x90 LR [c000000000cb6a88] mutex_lock+0x28/0x90 Call Trace: mutex_lock+0x28/0x90 (unreliable) perf_pmu_migrate_context+0x90/0x3a0 ppc_nest_imc_cpu_offline+0x190/0x1f0 cpuhp_invoke_callback+0x160/0x820 cpuhp_thread_fun+0x1bc/0x270 smpboot_thread_fn+0x250/0x290 kthread+0x1a8/0x1b0 ret_from_kernel_thread+0x5c/0x74 To address this scenario do the kfree(per_nest_pmu_arr) only in case of nest-imc initialization failure, and when there is no other nest units registered. Fixes: 73ce9aec65b1 ("powerpc/perf: Fix IMC_MAX_PMU macro") Signed-off-by: Anju T Sudhakar Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index f1b940714d65..be4e7f84f70a 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1184,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); } if (nest_pmus > 0) @@ -1208,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); - kfree(per_nest_pmu_arr); return; } @@ -1322,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id ret = nest_pmu_cpumask_init(); if (ret) { mutex_unlock(&nest_init_lock); + kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); goto err_free; } } -- cgit v1.2.3-59-g8ed1b From 87338c8e2cbb317b5f757e6172f94e2e3799cd20 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 14 Dec 2017 21:07:24 +0100 Subject: bpf, ppc64: do not reload skb pointers in non-skb context The assumption of unconditionally reloading skb pointers on BPF helper calls where bpf_helper_changes_pkt_data() holds true is wrong. There can be different contexts where the helper would enforce a reload such as in case of XDP. Here, we do have a struct xdp_buff instead of struct sk_buff as context, thus this will access garbage. JITs only ever need to deal with cached skb pointer reload when ld_abs/ind was seen, therefore guard the reload behind SEEN_SKB. Fixes: 156d0e290e96 ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") Signed-off-by: Daniel Borkmann Reviewed-by: Naveen N. Rao Acked-by: Alexei Starovoitov Tested-by: Sandipan Das Signed-off-by: Alexei Starovoitov --- arch/powerpc/net/bpf_jit_comp64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 46d74e81aff1..d183b4801bdb 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -763,7 +763,8 @@ emit_clear: func = (u8 *) __bpf_call_base + imm; /* Save skb pointer if we need to re-cache skb data */ - if (bpf_helper_changes_pkt_data(func)) + if ((ctx->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data(func)) PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_func_call(image, ctx, (u64)func); @@ -772,7 +773,8 @@ emit_clear: PPC_MR(b2p[BPF_REG_0], 3); /* refresh skb cache */ - if (bpf_helper_changes_pkt_data(func)) { + if ((ctx->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data(func)) { /* reload skb pointer to r3 */ PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_skb_loads(image, ctx); -- cgit v1.2.3-59-g8ed1b From 182dc9c7f217146d69d9c0b75c150c0314b9b170 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 18 Dec 2017 16:33:36 +1100 Subject: powerpc/kernel: Print actual address of regs when oopsing When we oops or otherwise call show_regs() we print the address of the regs structure. Being able to see the address is fairly useful, firstly to verify that the regs pointer is not completely bogus, and secondly it allows you to dump the regs and surrounding memory with a debugger if you have one. In the normal case the regs will be located somewhere on the stack, so printing their location discloses no further information than printing the stack pointer does already. So switch to %px and print the actual address, not the hashed value. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5acb5a176dbe..72be0c32e902 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs) printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); - printk("REGS: %p TRAP: %04lx %s (%s)\n", + printk("REGS: %px TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); print_msr_bits(regs->msr); -- cgit v1.2.3-59-g8ed1b From dc1c4165d189350cb51bdd3057deb6ecd164beda Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Tue, 12 Dec 2017 12:02:04 +0000 Subject: KVM: PPC: Book3S: fix XIVE migration of pending interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When restoring a pending interrupt, we are setting the Q bit to force a retrigger in xive_finish_unmask(). But we also need to force an EOI in this case to reach the same initial state : P=1, Q=0. This can be done by not setting 'old_p' for pending interrupts which will inform xive_finish_unmask() that an EOI needs to be sent. Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.12+ Suggested-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: Laurent Vivier Tested-by: Laurent Vivier Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_xive.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index bf457843e032..b5e6d227a034 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1558,7 +1558,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* * Restore P and Q. If the interrupt was pending, we - * force both P and Q, which will trigger a resend. + * force Q and !P, which will trigger a resend. * * That means that a guest that had both an interrupt * pending (queued) and Q set will restore with only @@ -1566,7 +1566,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) * is perfectly fine as coalescing interrupts that haven't * been presented yet is always allowed. */ - if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) + if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING)) state->old_p = true; if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) state->old_q = true; -- cgit v1.2.3-59-g8ed1b From 7333b5aca412d6ad02667b5a513485838a91b136 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Tue, 12 Dec 2017 18:23:56 +0100 Subject: KVM: PPC: Book3S HV: Fix pending_pri value in kvmppc_xive_get_icp() When we migrate a VM from a POWER8 host (XICS) to a POWER9 host (XICS-on-XIVE), we have an error: qemu-kvm: Unable to restore KVM interrupt controller state \ (0xff000000) for CPU 0: Invalid argument This is because kvmppc_xics_set_icp() checks the new state is internaly consistent, and especially: ... 1129 if (xisr == 0) { 1130 if (pending_pri != 0xff) 1131 return -EINVAL; ... On the other side, kvmppc_xive_get_icp() doesn't set neither the pending_pri value, nor the xisr value (set to 0) (and kvmppc_xive_set_icp() ignores the pending_pri value) As xisr is 0, pending_pri must be set to 0xff. Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Laurent Vivier Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_xive.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index b5e6d227a034..0d750d274c4e 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) /* Return the per-cpu state for state saving/migration */ return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | - (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; + (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT | + (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT; } int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) -- cgit v1.2.3-59-g8ed1b From c10e83f598d08046dd1ebc8360d4bb12d802d51b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Dec 2017 12:27:29 +0100 Subject: arch, mm: Allow arch_dup_mmap() to fail In order to sanitize the LDT initialization on x86 arch_dup_mmap() must be allowed to fail. Fix up all instances. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Andy Lutomirsky Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: dan.j.williams@intel.com Cc: hughd@google.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/mmu_context.h | 5 +++-- arch/um/include/asm/mmu_context.h | 3 ++- arch/unicore32/include/asm/mmu_context.h | 5 +++-- arch/x86/include/asm/mmu_context.h | 4 ++-- include/asm-generic/mm_hooks.h | 5 +++-- kernel/fork.c | 3 +-- 6 files changed, 14 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 492d8140a395..44fdf4786638 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -114,9 +114,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, #endif } -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index b668e351fd6c..fca34b2177e2 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm); /* * Needed since we do not use the asm-generic/mm_hooks.h: */ -static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { uml_setup_stubs(mm); + return 0; } extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 59b06b48f27d..5c205a9cb5a6 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -81,9 +81,10 @@ do { \ } \ } while (0) -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 6d16d15d09a0..c76162439c8a 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -176,10 +176,10 @@ do { \ } while (0) #endif -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { paravirt_arch_dup_mmap(oldmm, mm); + return 0; } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h index ea189d88a3cc..8ac4e68a12f0 100644 --- a/include/asm-generic/mm_hooks.h +++ b/include/asm-generic/mm_hooks.h @@ -7,9 +7,10 @@ #ifndef _ASM_GENERIC_MM_HOOKS_H #define _ASM_GENERIC_MM_HOOKS_H -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/kernel/fork.c b/kernel/fork.c index 07cc743698d3..500ce64517d9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto out; } /* a new mm has just been created */ - arch_dup_mmap(oldmm, mm); - retval = 0; + retval = arch_dup_mmap(oldmm, mm); out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); -- cgit v1.2.3-59-g8ed1b From 39c3fd58952d7599d367c84c1330b785d91d6088 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 2 Dec 2017 18:11:04 +0100 Subject: kernel/irq: Extend lockdep class for request mutex The IRQ code already has support for lockdep class for the lock mutex in an interrupt descriptor. Extend this to add a second class for the request mutex in the descriptor. Not having a class is resulting in false positive splats in some code paths. Signed-off-by: Andrew Lunn Signed-off-by: Thomas Gleixner Acked-by: linus.walleij@linaro.org Cc: grygorii.strashko@ti.com Cc: f.fainelli@gmail.com Link: https://lkml.kernel.org/r/1512234664-21555-1-git-send-email-andrew@lunn.ch --- arch/powerpc/sysdev/fsl_msi.c | 4 +++- drivers/gpio/gpio-bcm-kona.c | 3 ++- drivers/gpio/gpio-brcmstb.c | 4 +++- drivers/gpio/gpio-tegra.c | 4 +++- drivers/gpio/gpiolib.c | 27 ++++++++++++++++--------- drivers/irqchip/irq-renesas-intc-irqpin.c | 6 +++++- drivers/mfd/arizona-irq.c | 4 +++- drivers/pinctrl/pinctrl-single.c | 5 ++++- include/linux/gpio/driver.h | 33 ++++++++++++++++++++----------- include/linux/irqdesc.h | 9 ++++++--- kernel/irq/generic-chip.c | 11 +++++++---- 11 files changed, 75 insertions(+), 35 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 44cbf4c12ea1..df95102e732c 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) } static struct lock_class_key fsl_msi_irq_class; +static struct lock_class_key fsl_msi_irq_request_class; static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, int offset, int irq_index) @@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, dev_err(&dev->dev, "No memory for MSI cascade data\n"); return -ENOMEM; } - irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); + irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class, + &fsl_msi_irq_request_class); cascade_data->index = offset; cascade_data->msi_data = msi; cascade_data->virq = virt_msir; diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index dfcf56ee3c61..76861a00bb92 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -522,6 +522,7 @@ static struct of_device_id const bcm_kona_gpio_of_match[] = { * category than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) @@ -531,7 +532,7 @@ static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, d->host_data); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class); irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq); irq_set_noprobe(irq); diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index 545d43a587b7..5b24801bffef 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -327,6 +327,7 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank( * category than their parents, so it won't report false recursion. */ static struct lock_class_key brcmstb_gpio_irq_lock_class; +static struct lock_class_key brcmstb_gpio_irq_request_class; static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, @@ -346,7 +347,8 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, &bank->gc); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class); + irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class, + &brcmstb_gpio_irq_lock_class); irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq); irq_set_noprobe(irq); return 0; diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 8db47f671708..02fa8fe2292a 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -565,6 +565,7 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = { * than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int tegra_gpio_probe(struct platform_device *pdev) { @@ -670,7 +671,8 @@ static int tegra_gpio_probe(struct platform_device *pdev) bank = &tgi->bank_info[GPIO_BANK(gpio)]; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, + &gpio_request_class); irq_set_chip_data(irq, bank); irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq); } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index aad84a6306c4..44332b793718 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -73,7 +73,8 @@ LIST_HEAD(gpio_devices); static void gpiochip_free_hogs(struct gpio_chip *chip); static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip); static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip); @@ -1100,7 +1101,8 @@ static void gpiochip_setup_devs(void) } int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { unsigned long flags; int status = 0; @@ -1246,7 +1248,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (status) goto err_remove_from_list; - status = gpiochip_add_irqchip(chip, key); + status = gpiochip_add_irqchip(chip, lock_key, request_key); if (status) goto err_remove_chip; @@ -1632,7 +1634,7 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, * This lock class tells lockdep that GPIO irqs are in a different * category than their parents, so it won't report false recursion. */ - irq_set_lockdep_class(irq, chip->irq.lock_key); + irq_set_lockdep_class(irq, chip->irq.lock_key, chip->irq.request_key); irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler); /* Chips that use nested thread handlers have them marked */ if (chip->irq.threaded) @@ -1712,10 +1714,12 @@ static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset) /** * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip * @gpiochip: the GPIO chip to add the IRQ chip to - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request */ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct irq_chip *irqchip = gpiochip->irq.chip; const struct irq_domain_ops *ops; @@ -1753,6 +1757,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.default_type = type; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; if (gpiochip->irq.domain_ops) ops = gpiochip->irq.domain_ops; @@ -1850,7 +1855,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE * to have the core avoid setting up any default type in the hardware. * @threaded: whether this irqchip uses a nested thread handler - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request * * This function closely associates a certain irqchip with a certain * gpiochip, providing an irq domain to translate the local IRQs to @@ -1872,7 +1878,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct device_node *of_node; @@ -1913,6 +1920,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, gpiochip->irq.default_type = type; gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; gpiochip->irq.domain = irq_domain_add_simple(of_node, gpiochip->ngpio, first_irq, &gpiochip_domain_ops, gpiochip); @@ -1940,7 +1948,8 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key); #else /* CONFIG_GPIOLIB_IRQCHIP */ static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { return 0; } diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 06f29cf5018a..cee59fe1321c 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -342,6 +342,9 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id) */ static struct lock_class_key intc_irqpin_irq_lock_class; +/* And this is for the request mutex */ +static struct lock_class_key intc_irqpin_irq_request_class; + static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { @@ -352,7 +355,8 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, intc_irqpin_dbg(&p->irq[hw], "map"); irq_set_chip_data(virq, h->host_data); - irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class); + irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class, + &intc_irqpin_irq_request_class); irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); return 0; } diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c index 09cf3699e354..a307832d7e45 100644 --- a/drivers/mfd/arizona-irq.c +++ b/drivers/mfd/arizona-irq.c @@ -184,6 +184,7 @@ static struct irq_chip arizona_irq_chip = { }; static struct lock_class_key arizona_irq_lock_class; +static struct lock_class_key arizona_irq_request_class; static int arizona_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) @@ -191,7 +192,8 @@ static int arizona_irq_map(struct irq_domain *h, unsigned int virq, struct arizona *data = h->host_data; irq_set_chip_data(virq, data); - irq_set_lockdep_class(virq, &arizona_irq_lock_class); + irq_set_lockdep_class(virq, &arizona_irq_lock_class, + &arizona_irq_request_class); irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq); irq_set_nested_thread(virq, 1); irq_set_noprobe(virq); diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index e6cd8de793e2..3501491e5bfc 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -222,6 +222,9 @@ static enum pin_config_param pcs_bias[] = { */ static struct lock_class_key pcs_lock_class; +/* Class for the IRQ request mutex */ +static struct lock_class_key pcs_request_class; + /* * REVISIT: Reads and writes could eventually use regmap or something * generic. But at least on omaps, some mux registers are performance @@ -1486,7 +1489,7 @@ static int pcs_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_set_chip_data(irq, pcs_soc); irq_set_chip_and_handler(irq, &pcs->chip, handle_level_irq); - irq_set_lockdep_class(irq, &pcs_lock_class); + irq_set_lockdep_class(irq, &pcs_lock_class, &pcs_request_class); irq_set_noprobe(irq); return 0; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 55e672592fa9..7258cd676df4 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -66,9 +66,10 @@ struct gpio_irq_chip { /** * @lock_key: * - * Per GPIO IRQ chip lockdep class. + * Per GPIO IRQ chip lockdep classes. */ struct lock_class_key *lock_key; + struct lock_class_key *request_key; /** * @parent_handler: @@ -323,7 +324,8 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip, /* add/remove chips */ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip @@ -350,11 +352,13 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, */ #ifdef CONFIG_LOCKDEP #define gpiochip_add_data(chip, data) ({ \ - static struct lock_class_key key; \ - gpiochip_add_data_with_key(chip, data, &key); \ + static struct lock_class_key lock_key; \ + static struct lock_class_key request_key; \ + gpiochip_add_data_with_key(chip, data, &lock_key, \ + &request_key); \ }) #else -#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL) +#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL) #endif static inline int gpiochip_add(struct gpio_chip *chip) @@ -429,7 +433,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); #ifdef CONFIG_LOCKDEP @@ -445,10 +450,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, &key); + handler, type, false, + &lock_key, &request_key); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -458,10 +465,12 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, &key); + handler, type, true, + &lock_key, &request_key); } #else static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, @@ -471,7 +480,7 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, NULL); + handler, type, false, NULL, NULL); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -481,7 +490,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, NULL); + handler, type, true, NULL, NULL); } #endif /* CONFIG_LOCKDEP */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 39fb3700f7a9..25b33b664537 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -255,12 +255,15 @@ static inline bool irq_is_percpu_devid(unsigned int irq) } static inline void -irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class) +irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class) { struct irq_desc *desc = irq_to_desc(irq); - if (desc) - lockdep_set_class(&desc->lock, class); + if (desc) { + lockdep_set_class(&desc->lock, lock_class); + lockdep_set_class(&desc->request_mutex, request_class); + } } #ifdef CONFIG_IRQ_PREFLOW_FASTEOI diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index c26c5bb6b491..508c03dfef25 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq) EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); /* - * Separate lockdep class for interrupt chip which can nest irq_desc - * lock. + * Separate lockdep classes for interrupt chip which can nest irq_desc + * lock and request mutex. */ static struct lock_class_key irq_nested_lock_class; +static struct lock_class_key irq_nested_request_class; /* * irq_map_generic_chip - Map a generic chip for an irq domain @@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, set_bit(idx, &gc->installed); if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK) - irq_set_lockdep_class(virq, &irq_nested_lock_class); + irq_set_lockdep_class(virq, &irq_nested_lock_class, + &irq_nested_request_class); if (chip->irq_calc_mask) chip->irq_calc_mask(data); @@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, continue; if (flags & IRQ_GC_INIT_NESTED_LOCK) - irq_set_lockdep_class(i, &irq_nested_lock_class); + irq_set_lockdep_class(i, &irq_nested_lock_class, + &irq_nested_request_class); if (!(flags & IRQ_GC_NO_MASK)) { struct irq_data *d = irq_get_irq_data(i); -- cgit v1.2.3-59-g8ed1b From ecb101aed86156ec7cd71e5dca668e09146e6994 Mon Sep 17 00:00:00 2001 From: John Sperbeck Date: Sun, 31 Dec 2017 21:24:58 -0800 Subject: powerpc/mm: Fix SEGV on mapped region to return SEGV_ACCERR The recent refactoring of the powerpc page fault handler in commit c3350602e876 ("powerpc/mm: Make bad_area* helper functions") caused access to protected memory regions to indicate SEGV_MAPERR instead of the traditional SEGV_ACCERR in the si_code field of a user-space signal handler. This can confuse debug libraries that temporarily change the protection of memory regions, and expect to use SEGV_ACCERR as an indication to restore access to a region. This commit restores the previous behavior. The following program exhibits the issue: $ ./repro read || echo "FAILED" $ ./repro write || echo "FAILED" $ ./repro exec || echo "FAILED" #include #include #include #include #include #include #include static void segv_handler(int n, siginfo_t *info, void *arg) { _exit(info->si_code == SEGV_ACCERR ? 0 : 1); } int main(int argc, char **argv) { void *p = NULL; struct sigaction act = { .sa_sigaction = segv_handler, .sa_flags = SA_SIGINFO, }; assert(argc == 2); p = mmap(NULL, getpagesize(), (strcmp(argv[1], "write") == 0) ? PROT_READ : 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); assert(p != MAP_FAILED); assert(sigaction(SIGSEGV, &act, NULL) == 0); if (strcmp(argv[1], "read") == 0) printf("%c", *(unsigned char *)p); else if (strcmp(argv[1], "write") == 0) *(unsigned char *)p = 0; else if (strcmp(argv[1], "exec") == 0) ((void (*)(void))p)(); return 1; /* failed to generate SEGV */ } Fixes: c3350602e876 ("powerpc/mm: Make bad_area* helper functions") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: John Sperbeck Acked-by: Benjamin Herrenschmidt [mpe: Add commit references in change log] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/fault.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 4797d08581ce..6e1e39035380 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } +static noinline int bad_access(struct pt_regs *regs, unsigned long address) +{ + return __bad_area(regs, address, SEGV_ACCERR); +} + static int do_sigbus(struct pt_regs *regs, unsigned long address, unsigned int fault) { @@ -490,7 +495,7 @@ retry: good_area: if (unlikely(access_error(is_write, is_exec, vma))) - return bad_area(regs, address); + return bad_access(regs, address); /* * If for any reason at all we couldn't handle the fault, -- cgit v1.2.3-59-g8ed1b From e2d5915293ffdff977ddcfc12b817b08c53ffa7a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 8 Jan 2018 14:54:32 +1100 Subject: powerpc/pseries: Make RAS IRQ explicitly dependent on DLPAR WQ The hotplug code uses its own workqueue to handle IRQ requests (pseries_hp_wq), however that workqueue is initialized after init_ras_IRQ(). That can lead to a kernel panic if any hotplug interrupts fire after init_ras_IRQ() but before pseries_hp_wq is initialised. eg: UDP-Lite hash table entries: 2048 (order: 0, 65536 bytes) NET: Registered protocol family 1 Unpacking initramfs... (qemu) object_add memory-backend-ram,id=mem1,size=10G (qemu) device_add pc-dimm,id=dimm1,memdev=mem1 Unable to handle kernel paging request for data at address 0xf94d03007c421378 Faulting instruction address: 0xc00000000012d744 Oops: Kernel access of bad area, sig: 11 [#1] LE SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc2-ziviani+ #26 task: (ptrval) task.stack: (ptrval) NIP: c00000000012d744 LR: c00000000012d744 CTR: 0000000000000000 REGS: (ptrval) TRAP: 0380 Not tainted (4.15.0-rc2-ziviani+) MSR: 8000000000009033 CR: 28088042 XER: 20040000 CFAR: c00000000012d3c4 SOFTE: 0 ... NIP [c00000000012d744] __queue_work+0xd4/0x5c0 LR [c00000000012d744] __queue_work+0xd4/0x5c0 Call Trace: [c0000000fffefb90] [c00000000012d744] __queue_work+0xd4/0x5c0 (unreliable) [c0000000fffefc70] [c00000000012dce4] queue_work_on+0xb4/0xf0 This commit makes the RAS IRQ registration explicitly dependent on the creation of the pseries_hp_wq. Reported-by: Min Deng Reported-by: Daniel Henrique Barboza Tested-by: Jose Ricardo Ziviani Signed-off-by: Michael Ellerman Reviewed-by: David Gibson --- arch/powerpc/platforms/pseries/dlpar.c | 21 ++++++++++++++++++--- arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/ras.c | 3 ++- 3 files changed, 22 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 6e35780c5962..a0b20c03f078 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr, static CLASS_ATTR_RW(dlpar); -static int __init pseries_dlpar_init(void) +int __init dlpar_workqueue_init(void) { + if (pseries_hp_wq) + return 0; + pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue", - WQ_UNBOUND, 1); + WQ_UNBOUND, 1); + + return pseries_hp_wq ? 0 : -ENOMEM; +} + +static int __init dlpar_sysfs_init(void) +{ + int rc; + + rc = dlpar_workqueue_init(); + if (rc) + return rc; + return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr); } -machine_device_initcall(pseries, pseries_dlpar_init); +machine_device_initcall(pseries, dlpar_sysfs_init); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 4470a3194311..1ae1d9f4dbe9 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void) return CMO_PageSize; } +int dlpar_workqueue_init(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 4923ffe230cf..81d8614e7379 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void) /* Hotplug Events */ np = of_find_node_by_path("/event-sources/hot-plug-events"); if (np != NULL) { - request_event_sources_irqs(np, ras_hotplug_interrupt, + if (dlpar_workqueue_init() == 0) + request_event_sources_irqs(np, ras_hotplug_interrupt, "RAS_HOTPLUG"); of_node_put(np); } -- cgit v1.2.3-59-g8ed1b From 191eccb1580939fb0d47deb405b82a85b0379070 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 9 Jan 2018 03:52:05 +1100 Subject: powerpc/pseries: Add H_GET_CPU_CHARACTERISTICS flags & wrapper A new hypervisor call has been defined to communicate various characteristics of the CPU to guests. Add definitions for the hcall number, flags and a wrapper function. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 17 +++++++++++++++++ arch/powerpc/include/asm/plpar_wrappers.h | 14 ++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index a409177be8bd..f0461618bf7b 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -241,6 +241,7 @@ #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC #define H_MANAGE_TRACE 0x1C0 +#define H_GET_CPU_CHARACTERISTICS 0x1C8 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 #define H_QUERY_INT_STATE 0x1E4 #define H_POLL_PENDING 0x1D8 @@ -330,6 +331,17 @@ #define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 /* >= 0 values are CPU number */ +/* H_GET_CPU_CHARACTERISTICS return values */ +#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0 +#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1 +#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 +#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 + +#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 +#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 + /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 #define PROC_TABLE_DEREG 0x10 @@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc) } } +struct h_cpu_char_result { + u64 character; + u64 behaviour; +}; + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 7f01b22fa6cb..55eddf50d149 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu) return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); } +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); + if (rc == H_SUCCESS) { + p->character = retbuf[0]; + p->behaviour = retbuf[1]; + } + + return rc; +} + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ -- cgit v1.2.3-59-g8ed1b From 50e51c13b3822d14ff6df4279423e4b7b2269bc3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64: Add macros for annotating the destination of rfid/hrfid The rfid/hrfid ((Hypervisor) Return From Interrupt) instruction is used for switching from the kernel to userspace, and from the hypervisor to the guest kernel. However it can and is also used for other transitions, eg. from real mode kernel code to virtual mode kernel code, and it's not always clear from the code what the destination context is. To make it clearer when reading the code, add macros which encode the expected destination context. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64e.h | 6 ++++++ arch/powerpc/include/asm/exception-64s.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index a703452d67b6..555e22d5e07f 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -209,5 +209,11 @@ exc_##label##_book3e: ori r3,r3,vector_offset@l; \ mtspr SPRN_IVOR##vector_number,r3; +#define RFI_TO_KERNEL \ + rfi + +#define RFI_TO_USER \ + rfi + #endif /* _ASM_POWERPC_EXCEPTION_64E_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index b27205297e1d..1af427a3c74f 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,6 +74,35 @@ */ #define EX_R3 EX_DAR +/* Macros for annotating the expected destination of (h)rfid */ + +#define RFI_TO_KERNEL \ + rfid + +#define RFI_TO_USER \ + rfid + +#define RFI_TO_USER_OR_KERNEL \ + rfid + +#define RFI_TO_GUEST \ + rfid + +#define HRFI_TO_KERNEL \ + hrfid + +#define HRFI_TO_USER \ + hrfid + +#define HRFI_TO_USER_OR_KERNEL \ + hrfid + +#define HRFI_TO_GUEST \ + hrfid + +#define HRFI_TO_UNKNOWN \ + hrfid + #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ -- cgit v1.2.3-59-g8ed1b From 222f20f140623ef6033491d0103ee0875fe87d35 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64s: Simple RFI macro conversions This commit does simple conversions of rfi/rfid to the new macros that include the expected destination context. By simple we mean cases where there is a single well known destination context, and it's simply a matter of substituting the instruction for the appropriate macro. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 4 ++-- arch/powerpc/kernel/entry_64.S | 14 +++++++++----- arch/powerpc/kernel/exceptions-64s.S | 24 ++++++++++++------------ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 9 ++++----- arch/powerpc/kvm/book3s_rmhandlers.S | 7 +++++-- arch/powerpc/kvm/book3s_segment.S | 4 ++-- 6 files changed, 34 insertions(+), 28 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 1af427a3c74f..dfc56daed98b 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -247,7 +247,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtspr SPRN_##h##SRR0,r12; \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ mtspr SPRN_##h##SRR1,r10; \ - h##rfid; \ + h##RFI_TO_KERNEL; \ b . /* prevent speculative execution */ #define EXCEPTION_PROLOG_PSERIES_1(label, h) \ __EXCEPTION_PROLOG_PSERIES_1(label, h) @@ -261,7 +261,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtspr SPRN_##h##SRR0,r12; \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ mtspr SPRN_##h##SRR1,r10; \ - h##rfid; \ + h##RFI_TO_KERNEL; \ b . /* prevent speculative execution */ #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3320bcac7192..e68faa4d1b13 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -37,6 +37,11 @@ #include #include #include +#ifdef CONFIG_PPC_BOOK3S +#include +#else +#include +#endif /* * System calls. @@ -397,8 +402,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtmsrd r10, 1 mtspr SPRN_SRR0, r11 mtspr SPRN_SRR1, r12 - - rfid + RFI_TO_USER b . /* prevent speculative execution */ #endif _ASM_NOKPROBE_SYMBOL(system_call_common); @@ -1073,7 +1077,7 @@ __enter_rtas: mtspr SPRN_SRR0,r5 mtspr SPRN_SRR1,r6 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ rtas_return_loc: @@ -1098,7 +1102,7 @@ rtas_return_loc: mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ _ASM_NOKPROBE_SYMBOL(__enter_rtas) _ASM_NOKPROBE_SYMBOL(rtas_return_loc) @@ -1171,7 +1175,7 @@ _GLOBAL(enter_prom) LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) andc r11,r11,r12 mtsrr1 r11 - rfid + RFI_TO_KERNEL #endif /* CONFIG_PPC_BOOK3E */ 1: /* Return from OF */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e441b469dc8f..5502b0147c4e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -256,7 +256,7 @@ BEGIN_FTR_SECTION LOAD_HANDLER(r12, machine_check_handle_early) 1: mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r11 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ 2: /* Stack overflow. Stay on emergency stack and panic. @@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) li r3,MSR_ME andc r10,r10,r3 /* Turn off MSR_ME */ mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . 2: /* @@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) */ bl machine_check_queue_event MACHINE_CHECK_HANDLER_WINDUP - rfid + RFI_TO_USER_OR_KERNEL 9: /* Deliver the machine check to host kernel in V mode. */ MACHINE_CHECK_HANDLER_WINDUP @@ -651,7 +651,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . 8: std r3,PACA_EXSLB+EX_DAR(r13) @@ -662,7 +662,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . EXC_COMMON_BEGIN(unrecov_slb) @@ -901,7 +901,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) mtspr SPRN_SRR0,r10 ; \ ld r10,PACAKMSR(r13) ; \ mtspr SPRN_SRR1,r10 ; \ - rfid ; \ + RFI_TO_KERNEL ; \ b . ; /* prevent speculative execution */ #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH @@ -917,7 +917,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ xori r12,r12,MSR_LE ; \ mtspr SPRN_SRR1,r12 ; \ mr r13,r9 ; \ - rfid ; /* return to userspace */ \ + RFI_TO_USER ; /* return to userspace */ \ b . ; /* prevent speculative execution */ #else #define SYSCALL_FASTENDIAN_TEST @@ -1063,7 +1063,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) mtcr r11 REST_GPR(11, r1) ld r1,GPR1(r1) - hrfid + HRFI_TO_USER_OR_KERNEL 1: mtcr r11 REST_GPR(11, r1) @@ -1314,7 +1314,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) ld r13,PACA_EXGEN+EX_R13(r13) - HRFID + HRFI_TO_UNKNOWN b . #endif @@ -1418,7 +1418,7 @@ masked_##_H##interrupt: \ ld r10,PACA_EXGEN+EX_R10(r13); \ ld r11,PACA_EXGEN+EX_R11(r13); \ /* returns to kernel where r13 must be set up, so don't restore it */ \ - ##_H##rfid; \ + ##_H##RFI_TO_KERNEL; \ b .; \ MASKED_DEC_HANDLER(_H) @@ -1441,7 +1441,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) addi r13, r13, 4 mtspr SPRN_SRR0, r13 GET_SCRATCH0(r13) - rfid + RFI_TO_KERNEL b . TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) @@ -1453,7 +1453,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) addi r13, r13, 4 mtspr SPRN_HSRR0, r13 GET_SCRATCH0(r13) - hrfid + HRFI_TO_KERNEL b . #endif diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2659844784b8..9c61f736c75b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline) mtmsrd r0,1 /* clear RI in MSR */ mtsrr0 r5 mtsrr1 r6 - RFI + RFI_TO_KERNEL kvmppc_call_hv_entry: BEGIN_FTR_SECTION @@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtmsrd r6, 1 /* Clear RI in MSR */ mtsrr0 r8 mtsrr1 r7 - RFI + RFI_TO_KERNEL /* Virtual-mode return */ .Lvirt_return: @@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r0, VCPU_GPR(R0)(r4) ld r4, VCPU_GPR(R4)(r4) - - hrfid + HRFI_TO_GUEST b . secondary_too_late: @@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ld r4, PACAKMSR(r13) mtspr SPRN_SRR0, r3 mtspr SPRN_SRR1, r4 - rfid + RFI_TO_KERNEL 9: addi r3, r1, STACK_FRAME_OVERHEAD bl kvmppc_bad_interrupt b 9b diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 42a4b237df5f..34a5adeff084 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -46,6 +46,9 @@ #define FUNC(name) name +#define RFI_TO_KERNEL RFI +#define RFI_TO_GUEST RFI + .macro INTERRUPT_TRAMPOLINE intno .global kvmppc_trampoline_\intno @@ -141,7 +144,7 @@ kvmppc_handler_skip_ins: GET_SCRATCH0(r13) /* And get back into the code */ - RFI + RFI_TO_KERNEL #endif /* @@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline) ori r5, r5, MSR_EE mtsrr0 r7 mtsrr1 r6 - RFI + RFI_TO_KERNEL #include "book3s_segment.S" diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 2a2b96d53999..93a180ceefad 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -156,7 +156,7 @@ no_dcbz32_on: PPC_LL r9, SVCPU_R9(r3) PPC_LL r3, (SVCPU_R3)(r3) - RFI + RFI_TO_GUEST kvmppc_handler_trampoline_enter_end: @@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) cmpwi r12, BOOK3S_INTERRUPT_DOORBELL beqa BOOK3S_INTERRUPT_DOORBELL - RFI + RFI_TO_KERNEL kvmppc_handler_trampoline_exit_end: -- cgit v1.2.3-59-g8ed1b From b8e90cb7bc04a509e821e82ab6ed7a8ef11ba333 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64: Convert the syscall exit path to use RFI_TO_USER/KERNEL In the syscall exit path we may be returning to user or kernel context. We already have a test for that, because we conditionally restore r13. So use that existing test and branch, and bifurcate the return based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index e68faa4d1b13..724733b74744 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -267,13 +267,23 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ + ld r2,GPR2(r1) + ld r1,GPR1(r1) + mtlr r4 + mtcr r5 + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r8 + RFI_TO_USER + b . /* prevent speculative execution */ + + /* exit to kernel */ 1: ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ .Lsyscall_error: -- cgit v1.2.3-59-g8ed1b From a08f828cf47e6c605af21d2cdec68f84e799c318 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64: Convert fast_exception_return to use RFI_TO_USER/KERNEL Similar to the syscall return path, in fast_exception_return we may be returning to user or kernel context. We already have a test for that, because we conditionally restore r13. So use that existing test and branch, and bifurcate the return based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 724733b74744..2748584b767d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -892,7 +892,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) -1: + mtspr SPRN_SRR1,r3 ld r2,_CCR(r1) @@ -905,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r3,GPR3(r1) ld r4,GPR4(r1) ld r1,GPR1(r1) + RFI_TO_USER + b . /* prevent speculative execution */ + +1: mtspr SPRN_SRR1,r3 + + ld r2,_CCR(r1) + mtcrf 0xFF,r2 + ld r2,_NIP(r1) + mtspr SPRN_SRR0,r2 - rfid + ld r0,GPR0(r1) + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r1,GPR1(r1) + RFI_TO_KERNEL b . /* prevent speculative execution */ #endif /* CONFIG_PPC_BOOK3E */ -- cgit v1.2.3-59-g8ed1b From c7305645eb0c1621351cfc104038831ae87c0053 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64s: Convert slb_miss_common to use RFI_TO_USER/KERNEL In the SLB miss handler we may be returning to user or kernel. We need to add a check early on and save the result in the cr4 register, and then we bifurcate the return path based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5502b0147c4e..ed356194f09c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common) stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + andi. r9,r11,MSR_PR // Check for exception from userspace + cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later + /* * Test MSR_RI before calling slb_allocate_realmode, because the * MSR in r11 gets clobbered. However we still want to allocate @@ -624,9 +627,32 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) /* All done -- return from exception. */ + bne cr4,1f /* returning to kernel */ + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ + mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ + mtcrf 0x02,r9 /* I/D indication is in cr6 */ + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + + RESTORE_CTR(r9, PACA_EXSLB) + RESTORE_PPR_PACA(PACA_EXSLB, r9) + mr r3,r12 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + RFI_TO_USER + b . /* prevent speculative execution */ +1: .machine push .machine "power4" mtcrf 0x80,r9 + mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ mtcrf 0x02,r9 /* I/D indication is in cr6 */ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ @@ -640,9 +666,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) ld r11,PACA_EXSLB+EX_R11(r13) ld r12,PACA_EXSLB+EX_R12(r13) ld r13,PACA_EXSLB+EX_R13(r13) - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ + 2: std r3,PACA_EXSLB+EX_DAR(r13) mr r3,r12 mfspr r11,SPRN_SRR0 -- cgit v1.2.3-59-g8ed1b From 0500871f21b237b2bea2d9db405eadf78e5aab05 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 15:12:01 +0000 Subject: Construct init thread stack in the linker script rather than by union Construct the init thread stack in the linker script rather than doing it by means of a union so that ia64's init_task.c can be got rid of. The following symbols are then made available from INIT_TASK_DATA() linker script macro: init_thread_union init_stack INIT_TASK_DATA() also expands the region to THREAD_SIZE to accommodate the size of the init stack. init_thread_union is given its own section so that it can be placed into the stack space in the right order. I'm assuming that the ia64 ordering is correct and that the task_struct is first and the thread_info second. Signed-off-by: David Howells Tested-by: Tony Luck Tested-by: Will Deacon (arm64) Tested-by: Palmer Dabbelt Acked-by: Thomas Gleixner --- arch/Kconfig | 4 +-- arch/alpha/include/asm/thread_info.h | 3 --- arch/arc/include/asm/thread_info.h | 3 --- arch/arm/include/asm/thread_info.h | 3 --- arch/arm64/include/asm/thread_info.h | 2 -- arch/blackfin/include/asm/thread_info.h | 2 -- arch/c6x/include/asm/thread_info.h | 3 --- arch/cris/include/asm/processor.h | 2 -- arch/cris/include/asm/thread_info.h | 2 -- arch/frv/include/asm/thread_info.h | 3 --- arch/h8300/include/asm/thread_info.h | 3 --- arch/hexagon/include/asm/thread_info.h | 3 --- arch/ia64/Kconfig | 2 +- arch/ia64/Makefile | 2 +- arch/ia64/include/asm/thread_info.h | 4 +-- arch/ia64/kernel/Makefile | 2 +- arch/ia64/kernel/init_task.c | 44 ------------------------------- arch/ia64/kernel/vmlinux.lds.S | 1 + arch/m32r/include/asm/thread_info.h | 3 --- arch/m68k/include/asm/thread_info.h | 4 --- arch/metag/include/asm/thread_info.h | 3 --- arch/microblaze/include/asm/thread_info.h | 3 --- arch/mips/include/asm/thread_info.h | 3 --- arch/mn10300/include/asm/thread_info.h | 2 -- arch/nios2/include/asm/thread_info.h | 3 --- arch/openrisc/include/asm/processor.h | 2 -- arch/openrisc/include/asm/thread_info.h | 2 -- arch/parisc/include/asm/thread_info.h | 3 --- arch/powerpc/include/asm/thread_info.h | 3 --- arch/riscv/include/asm/thread_info.h | 2 -- arch/s390/include/asm/thread_info.h | 2 -- arch/score/include/asm/thread_info.h | 3 --- arch/sh/include/asm/thread_info.h | 3 --- arch/sparc/include/asm/thread_info_32.h | 3 --- arch/sparc/include/asm/thread_info_64.h | 3 --- arch/tile/include/asm/thread_info.h | 3 --- arch/um/include/asm/processor-generic.h | 5 +++- arch/um/include/asm/thread_info.h | 9 +++---- arch/um/include/asm/vmlinux.lds.h | 2 ++ arch/um/kernel/dyn.lds.S | 3 +-- arch/um/kernel/um_arch.c | 2 +- arch/um/kernel/uml.lds.S | 2 +- arch/unicore32/include/asm/thread_info.h | 3 --- arch/x86/include/asm/thread_info.h | 2 -- arch/xtensa/include/asm/thread_info.h | 3 --- include/asm-generic/vmlinux.lds.h | 4 +++ include/linux/init_task.h | 3 +++ include/linux/sched.h | 9 +++++++ init/Makefile | 2 -- init/init_task.c | 10 ++++--- 50 files changed, 42 insertions(+), 155 deletions(-) delete mode 100644 arch/ia64/kernel/init_task.c create mode 100644 arch/um/include/asm/vmlinux.lds.h (limited to 'arch/powerpc') diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..a26d6f8ab967 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -234,8 +234,8 @@ config ARCH_HAS_FORTIFY_SOURCE config ARCH_HAS_SET_MEMORY bool -# Select if arch init_task initializer is different to init/init_task.c -config ARCH_INIT_TASK +# Select if arch init_task must go in the __init_task_data section +config ARCH_TASK_STRUCT_ON_STACK bool # Select if arch has its private alloc_task_struct() function diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 8c20c5e35432..807d7b9a1860 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -39,9 +39,6 @@ struct thread_info { .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* How to get the thread information struct from C. */ register struct thread_info *__current_thread_info __asm__("$8"); #define current_thread_info() __current_thread_info diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 2d79e527fa50..c85947bac5e5 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -62,9 +62,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - static inline __attribute_const__ struct thread_info *current_thread_info(void) { register unsigned long sp asm("sp"); diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 776757d1604a..e71cc35de163 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -75,9 +75,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* * how to get the current stack pointer in C */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index eb431286bacd..740aa03c5f0d 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -51,8 +51,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_stack (init_thread_union.stack) - #define thread_saved_pc(tsk) \ ((unsigned long)(tsk->thread.cpu_context.pc)) #define thread_saved_sp(tsk) \ diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h index 2966b93850a1..a5aeab4e5f2d 100644 --- a/arch/blackfin/include/asm/thread_info.h +++ b/arch/blackfin/include/asm/thread_info.h @@ -56,8 +56,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) /* Given a task stack pointer, you can find its corresponding * thread_info structure just by masking it to the THREAD_SIZE diff --git a/arch/c6x/include/asm/thread_info.h b/arch/c6x/include/asm/thread_info.h index acc70c135ab8..59a5697fe0f3 100644 --- a/arch/c6x/include/asm/thread_info.h +++ b/arch/c6x/include/asm/thread_info.h @@ -60,9 +60,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* get the thread information struct of current task */ static inline __attribute__((const)) struct thread_info *current_thread_info(void) diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h index b50907799cb2..ee4d8b03d048 100644 --- a/arch/cris/include/asm/processor.h +++ b/arch/cris/include/asm/processor.h @@ -52,8 +52,6 @@ static inline void release_thread(struct task_struct *dead_task) /* Nothing needs to be done. */ } -#define init_stack (init_thread_union.stack) - #define cpu_relax() barrier() void default_idle(void); diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h index 108f77081a3c..996fef3be1d5 100644 --- a/arch/cris/include/asm/thread_info.h +++ b/arch/cris/include/asm/thread_info.h @@ -63,8 +63,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) - #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h index ccba3b6ce918..0f950845fad9 100644 --- a/arch/frv/include/asm/thread_info.h +++ b/arch/frv/include/asm/thread_info.h @@ -64,9 +64,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ register struct thread_info *__current_thread_info asm("gr15"); diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index 072b92c0d8b5..0cdaa302d3d2 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -46,9 +46,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/hexagon/include/asm/thread_info.h b/arch/hexagon/include/asm/thread_info.h index b80fe1db7b64..f41f9c6f0e31 100644 --- a/arch/hexagon/include/asm/thread_info.h +++ b/arch/hexagon/include/asm/thread_info.h @@ -84,9 +84,6 @@ struct thread_info { .regs = NULL, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* Tacky preprocessor trickery */ #define qqstr(s) qstr(s) #define qstr(s) #s diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 49583c5a5d44..315c51f58811 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -43,7 +43,7 @@ config IA64 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP select GENERIC_SMP_IDLE_THREAD - select ARCH_INIT_TASK + select ARCH_TASK_STRUCT_ON_STACK select ARCH_TASK_STRUCT_ALLOCATOR select ARCH_THREAD_STACK_ALLOCATOR select ARCH_CLOCKSOURCE_DATA diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index c100d780f1eb..2dd7f519ad0b 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -42,7 +42,7 @@ $(error Sorry, you need a newer version of the assember, one that is built from endif KBUILD_CFLAGS += $(cflags-y) -head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o +head-y := arch/ia64/kernel/head.o libs-y += arch/ia64/lib/ core-y += arch/ia64/kernel/ arch/ia64/mm/ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 1d172a4119a7..64a1011f6812 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -12,6 +12,8 @@ #include #include +#define THREAD_SIZE KERNEL_STACK_SIZE + #ifndef __ASSEMBLY__ /* @@ -41,8 +43,6 @@ struct thread_info { #endif }; -#define THREAD_SIZE KERNEL_STACK_SIZE - #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 14ad79f394e5..0b4c65a1af25 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -7,7 +7,7 @@ ifdef CONFIG_DYNAMIC_FTRACE CFLAGS_REMOVE_ftrace.o = -pg endif -extra-y := head.o init_task.o vmlinux.lds +extra-y := head.o vmlinux.lds obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c deleted file mode 100644 index 8df9245e29d9..000000000000 --- a/arch/ia64/kernel/init_task.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This is where we statically allocate and initialize the initial - * task. - * - * Copyright (C) 1999, 2002-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -/* - * Initial task structure. - * - * We need to make sure that this is properly aligned due to the way process stacks are - * handled. This is done by having a special ".data..init_task" section... - */ -#define init_thread_info init_task_mem.s.thread_info -#define init_stack init_task_mem.stack - -union { - struct { - struct task_struct task; - struct thread_info thread_info; - } s; - unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)]; -} init_task_mem asm ("init_task") __init_task_data = - {{ - .task = INIT_TASK(init_task_mem.s.task), - .thread_info = INIT_THREAD_INFO(init_task_mem.s.task) -}}; - -EXPORT_SYMBOL(init_task); diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 58db59da0bd8..b0b2070e0591 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -3,6 +3,7 @@ #include #include #include +#include #include diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h index b3a215b0ce0a..ba00f1032587 100644 --- a/arch/m32r/include/asm/thread_info.h +++ b/arch/m32r/include/asm/thread_info.h @@ -56,9 +56,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h index 928035591f2e..015f1ca38305 100644 --- a/arch/m68k/include/asm/thread_info.h +++ b/arch/m68k/include/asm/thread_info.h @@ -41,8 +41,6 @@ struct thread_info { .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_stack (init_thread_union.stack) - #ifndef __ASSEMBLY__ /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) @@ -58,8 +56,6 @@ static inline struct thread_info *current_thread_info(void) } #endif -#define init_thread_info (init_thread_union.thread_info) - /* entry.S relies on these definitions! * bits 0-7 are tested at every exception exit * bits 8-15 are also tested at syscall exit diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h index 554f73a77e6e..a1a9c7f5ca8c 100644 --- a/arch/metag/include/asm/thread_info.h +++ b/arch/metag/include/asm/thread_info.h @@ -74,9 +74,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the current stack pointer from C */ register unsigned long current_stack_pointer asm("A0StP") __used; diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h index e7e8954e9815..9afe4b5bd6c8 100644 --- a/arch/microblaze/include/asm/thread_info.h +++ b/arch/microblaze/include/asm/thread_info.h @@ -86,9 +86,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 5e8927f99a76..4993db40482c 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -49,9 +49,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* How to get the thread information struct from C. */ register struct thread_info *__current_thread_info __asm__("$28"); diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index f5f90bbf019d..1748a7b25bf8 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -79,8 +79,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) #define init_uregs \ ((struct pt_regs *) \ ((unsigned long) init_stack + THREAD_SIZE - sizeof(struct pt_regs))) diff --git a/arch/nios2/include/asm/thread_info.h b/arch/nios2/include/asm/thread_info.h index d69c338bd19c..7349a4fa635b 100644 --- a/arch/nios2/include/asm/thread_info.h +++ b/arch/nios2/include/asm/thread_info.h @@ -63,9 +63,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index 396d8f306c21..af31a9fe736a 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -84,8 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp); void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); -#define init_stack (init_thread_union.stack) - #define cpu_relax() barrier() #endif /* __ASSEMBLY__ */ diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h index c229aa6bb502..5c15dfa2fd4f 100644 --- a/arch/openrisc/include/asm/thread_info.h +++ b/arch/openrisc/include/asm/thread_info.h @@ -79,8 +79,6 @@ struct thread_info { .ksp = 0, \ } -#define init_thread_info (init_thread_union.thread_info) - /* how to get the thread information struct from C */ register struct thread_info *current_thread_info_reg asm("r10"); #define current_thread_info() (current_thread_info_reg) diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 598c8d60fa5e..285757544cca 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -25,9 +25,6 @@ struct thread_info { .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *)mfctl(30)) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index a264c3ad366b..4a12c00f8de3 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -58,9 +58,6 @@ struct thread_info { .flags = 0, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) /* how to get the thread information struct from C */ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 22c3536ed281..f8fa1cd2dad9 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -64,8 +64,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_stack (init_thread_union.stack) - #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 0880a37b6d3b..25d6ec3aaddd 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -42,8 +42,6 @@ struct thread_info { .flags = 0, \ } -#define init_stack (init_thread_union.stack) - void arch_release_task_struct(struct task_struct *tsk); int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h index ad51b56e51bd..bc4c7c90550f 100644 --- a/arch/score/include/asm/thread_info.h +++ b/arch/score/include/asm/thread_info.h @@ -58,9 +58,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* How to get the thread information struct from C. */ register struct thread_info *__current_thread_info __asm__("r28"); #define current_thread_info() __current_thread_info diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index becb798f1b04..cf5c792bf70b 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -63,9 +63,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the current stack pointer from C */ register unsigned long current_stack_pointer asm("r15") __used; diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index febaaeb1a0fe..548b366165dd 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -63,9 +63,6 @@ struct thread_info { .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ register struct thread_info *current_thread_info_reg asm("g6"); #define current_thread_info() (current_thread_info_reg) diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index caf915321ba9..f7e7b0baec9f 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -120,9 +120,6 @@ struct thread_info { .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ register struct thread_info *current_thread_info_reg asm("g6"); #define current_thread_info() (current_thread_info_reg) diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index b7659b8f1117..2adcacd85749 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -59,9 +59,6 @@ struct thread_info { .align_ctl = 0, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - #endif /* !__ASSEMBLY__ */ #if PAGE_SIZE < 8192 diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 86942a492454..b58b746d3f2c 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -58,7 +58,10 @@ static inline void release_thread(struct task_struct *task) { } -#define init_stack (init_thread_union.stack) +static inline void mm_copy_segments(struct mm_struct *from_mm, + struct mm_struct *new_mm) +{ +} /* * User space process size: 3GB (default). diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 9300f7630d2a..4eecd960ee8c 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -6,6 +6,9 @@ #ifndef __UM_THREAD_INFO_H #define __UM_THREAD_INFO_H +#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER +#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) + #ifndef __ASSEMBLY__ #include @@ -37,10 +40,6 @@ struct thread_info { .real_thread = NULL, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - -#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { @@ -53,8 +52,6 @@ static inline struct thread_info *current_thread_info(void) return ti; } -#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER - #endif #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ diff --git a/arch/um/include/asm/vmlinux.lds.h b/arch/um/include/asm/vmlinux.lds.h new file mode 100644 index 000000000000..149494ae78ea --- /dev/null +++ b/arch/um/include/asm/vmlinux.lds.h @@ -0,0 +1,2 @@ +#include +#include diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index d417e3899700..5568cf882371 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -1,5 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include +#include #include OUTPUT_FORMAT(ELF_FORMAT) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index f433690b9b37..a818ccef30ca 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -54,7 +54,7 @@ struct cpuinfo_um boot_cpu_data = { union thread_union cpu0_irqstack __attribute__((__section__(".data..init_irqstack"))) = - { INIT_THREAD_INFO(init_task) }; + { .thread_info = INIT_THREAD_INFO(init_task) }; /* Changed in setup_arch, which is called in early boot */ static char host_info[(__NEW_UTS_LEN + 1) * 5]; diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 3d6ed6ba5b78..36b07ec09742 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include +#include #include OUTPUT_FORMAT(ELF_FORMAT) diff --git a/arch/unicore32/include/asm/thread_info.h b/arch/unicore32/include/asm/thread_info.h index e79ad6d5b5b2..5fb728f3b49a 100644 --- a/arch/unicore32/include/asm/thread_info.h +++ b/arch/unicore32/include/asm/thread_info.h @@ -87,9 +87,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* * how to get the thread information struct from C */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 00223333821a..d25a638a2720 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -62,8 +62,6 @@ struct thread_info { .flags = 0, \ } -#define init_stack (init_thread_union.stack) - #else /* !__ASSEMBLY__ */ #include diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 7be2400f745a..2ccd37510aaa 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -77,9 +77,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ee8b707d9fa9..a564b83bf013 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -268,7 +268,11 @@ #define INIT_TASK_DATA(align) \ . = ALIGN(align); \ VMLINUX_SYMBOL(__start_init_task) = .; \ + VMLINUX_SYMBOL(init_thread_union) = .; \ + VMLINUX_SYMBOL(init_stack) = .; \ *(.data..init_task) \ + *(.data..init_thread_info) \ + . = VMLINUX_SYMBOL(__start_init_task) + THREAD_SIZE; \ VMLINUX_SYMBOL(__end_init_task) = .; /* diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6a532629c983..30a89b99a5af 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -304,5 +304,8 @@ extern struct cred init_cred; /* Attach to the init_task data structure for proper alignment */ #define __init_task_data __attribute__((__section__(".data..init_task"))) +/* Attach to the thread_info data structure for proper alignment */ +#define __init_thread_info __attribute__((__section__(".data..init_thread_info"))) + #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index d2588263a989..68a504f6e474 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1446,12 +1446,21 @@ extern void ia64_set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { +#ifndef CONFIG_ARCH_TASK_STRUCT_ON_STACK + struct task_struct task; +#endif #ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; #endif unsigned long stack[THREAD_SIZE/sizeof(long)]; }; +#ifndef CONFIG_THREAD_INFO_IN_TASK +extern struct thread_info init_thread_info; +#endif + +extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; + #ifdef CONFIG_THREAD_INFO_IN_TASK static inline struct thread_info *task_thread_info(struct task_struct *task) { diff --git a/init/Makefile b/init/Makefile index 1dbb23787290..a3e5ce2bcf08 100644 --- a/init/Makefile +++ b/init/Makefile @@ -13,9 +13,7 @@ obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o endif obj-$(CONFIG_GENERIC_CALIBRATE_DELAY) += calibrate.o -ifneq ($(CONFIG_ARCH_INIT_TASK),y) obj-y += init_task.o -endif mounts-y := do_mounts.o mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o diff --git a/init/init_task.c b/init/init_task.c index 9325fee7dc82..2285aa42cbe1 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -17,15 +17,17 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); /* Initial task structure */ -struct task_struct init_task = INIT_TASK(init_task); +struct task_struct init_task +#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK + __init_task_data +#endif + = INIT_TASK(init_task); EXPORT_SYMBOL(init_task); /* * Initial thread structure. Alignment of this is handled by a special * linker map entry. */ -union thread_union init_thread_union __init_task_data = { #ifndef CONFIG_THREAD_INFO_IN_TASK - INIT_THREAD_INFO(init_task) +struct thread_info init_thread_info __init_thread_info = INIT_THREAD_INFO(init_task); #endif -}; -- cgit v1.2.3-59-g8ed1b From 6c7d47c33ed323f14f2a3b8de925e831dbaa4e69 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 22 Nov 2017 14:42:21 +1100 Subject: KVM: PPC: Book3S PR: Fix WIMG handling under pHyp Commit 96df226 ("KVM: PPC: Book3S PR: Preserve storage control bits") added code to preserve WIMG bits but it missed 2 special cases: - a magic page in kvmppc_mmu_book3s_64_xlate() and - guest real mode in kvmppc_handle_pagefault(). For these ptes, WIMG was 0 and pHyp failed on these causing a guest to stop in the very beginning at NIP=0x100 (due to bd9166ffe "KVM: PPC: Book3S PR: Exit KVM on failed mapping"). According to LoPAPR v1.1 14.5.4.1.2 H_ENTER: The hypervisor checks that the WIMG bits within the PTE are appropriate for the physical page number else H_Parameter return. (For System Memory pages WIMG=0010, or, 1110 if the SAO option is enabled, and for IO pages WIMG=01**.) This hence initializes WIMG to non-zero value HPTE_R_M (0x10), as expected by pHyp. [paulus@ozlabs.org - fix compile for 32-bit] Cc: stable@vger.kernel.org # v4.11+ Fixes: 96df226 "KVM: PPC: Book3S PR: Preserve storage control bits" Signed-off-by: Alexey Kardashevskiy Tested-by: Ruediger Oertel Reviewed-by: Greg Kurz Tested-by: Greg Kurz Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu.c | 1 + arch/powerpc/kvm/book3s_pr.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 29ebe2fd5867..a93d719edc90 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, gpte->may_read = true; gpte->may_write = true; gpte->page_size = MMU_PAGE_4K; + gpte->wimg = HPTE_R_M; return 0; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d0dc8624198f..7deaeeb14b93 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); #define MSR_USER32 MSR_USER #define MSR_USER64 MSR_USER #define HW_PAGE_SIZE PAGE_SIZE +#define HPTE_R_M _PAGE_COHERENT #endif static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu) @@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.eaddr = eaddr; pte.vpage = eaddr >> 12; pte.page_size = MMU_PAGE_64K; + pte.wimg = HPTE_R_M; } switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) { -- cgit v1.2.3-59-g8ed1b From ecba8297aafd50db6ae867e90844eead1611ef1c Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 10 Jan 2018 17:04:39 +1100 Subject: KVM: PPC: Book3S HV: Always flush TLB in kvmppc_alloc_reset_hpt() The KVM_PPC_ALLOCATE_HTAB ioctl(), implemented by kvmppc_alloc_reset_hpt() is supposed to completely clear and reset a guest's Hashed Page Table (HPT) allocating or re-allocating it if necessary. In the case where an HPT of the right size already exists and it just zeroes it, it forces a TLB flush on all guest CPUs, to remove any stale TLB entries loaded from the old HPT. However, that situation can arise when the HPT is resizing as well - or even when switching from an RPT to HPT - so those cases need a TLB flush as well. So, move the TLB flush to trigger in all cases except for errors. Cc: stable@vger.kernel.org # v4.10+ Fixes: f98a8bf9ee20 ("KVM: PPC: Book3S HV: Allow KVM_PPC_ALLOCATE_HTAB ioctl() to change HPT size") Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8355398f0bb6..b73dbc9e797d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -165,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) * Reset all the reverse-mapping chains for all memslots */ kvmppc_rmap_reset(kvm); - /* Ensure that each vcpu will flush its TLB on next entry. */ - cpumask_setall(&kvm->arch.need_tlb_flush); err = 0; goto out; } @@ -182,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) kvmppc_set_hpt(kvm, &info); out: + if (err == 0) + /* Ensure that each vcpu will flush its TLB on next entry. */ + cpumask_setall(&kvm->arch.need_tlb_flush); + mutex_unlock(&kvm->lock); return err; } -- cgit v1.2.3-59-g8ed1b From aa8a5e0062ac940f7659394f4817c948dc8c0667 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64s: Add support for RFI flush of L1-D cache On some CPUs we can prevent the Meltdown vulnerability by flushing the L1-D cache on exit from kernel to user mode, and from hypervisor to guest. This is known to be the case on at least Power7, Power8 and Power9. At this time we do not know the status of the vulnerability on other CPUs such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale CPUs. As more information comes to light we can enable this, or other mechanisms on those CPUs. The vulnerability occurs when the load of an architecturally inaccessible memory region (eg. userspace load of kernel memory) is speculatively executed to the point where its result can influence the address of a subsequent speculatively executed load. In order for that to happen, the first load must hit in the L1, because before the load is sent to the L2 the permission check is performed. Therefore if no kernel addresses hit in the L1 the vulnerability can not occur. We can ensure that is the case by flushing the L1 whenever we return to userspace. Similarly for hypervisor vs guest. In order to flush the L1-D cache on exit, we add a section of nops at each (h)rfi location that returns to a lower privileged context, and patch that with some sequence. Newer firmwares are able to advertise to us that there is a special nop instruction that flushes the L1-D. If we do not see that advertised, we fall back to doing a displacement flush in software. For guest kernels we support migration between some CPU versions, and different CPUs may use different flush instructions. So that we are prepared to migrate to a machine with a different flush instruction activated, we may have to patch more than one flush instruction at boot if the hypervisor tells us to. In the end this patch is mostly the work of Nicholas Piggin and Michael Ellerman. However a cast of thousands contributed to analysis of the issue, earlier versions of the patch, back ports testing etc. Many thanks to all of them. Tested-by: Jon Masters Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 40 ++++++++++++--- arch/powerpc/include/asm/feature-fixups.h | 13 +++++ arch/powerpc/include/asm/paca.h | 10 ++++ arch/powerpc/include/asm/setup.h | 13 +++++ arch/powerpc/kernel/asm-offsets.c | 5 ++ arch/powerpc/kernel/exceptions-64s.S | 84 +++++++++++++++++++++++++++++++ arch/powerpc/kernel/setup_64.c | 79 +++++++++++++++++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 9 ++++ arch/powerpc/lib/feature-fixups.c | 41 +++++++++++++++ 9 files changed, 286 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index dfc56daed98b..7197b179c1b1 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,34 +74,58 @@ */ #define EX_R3 EX_DAR -/* Macros for annotating the expected destination of (h)rfid */ +/* + * Macros for annotating the expected destination of (h)rfid + * + * The nop instructions allow us to insert one or more instructions to flush the + * L1-D cache when returning to userspace or a guest. + */ +#define RFI_FLUSH_SLOT \ + RFI_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop #define RFI_TO_KERNEL \ rfid #define RFI_TO_USER \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define RFI_TO_USER_OR_KERNEL \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define RFI_TO_GUEST \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define HRFI_TO_KERNEL \ hrfid #define HRFI_TO_USER \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_USER_OR_KERNEL \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_GUEST \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_UNKNOWN \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 8f88f771cc55..1e82eb3caabd 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -187,7 +187,20 @@ label##3: \ FTR_ENTRY_OFFSET label##1b-label##3b; \ .popsection; +#define RFI_FLUSH_FIXUP_SECTION \ +951: \ + .pushsection __rfi_flush_fixup,"a"; \ + .align 2; \ +952: \ + FTR_ENTRY_OFFSET 951b-952b; \ + .popsection; + + #ifndef __ASSEMBLY__ +#include + +extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; + void apply_feature_fixups(void); void setup_feature_keys(void); #endif diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 3892db93b837..23ac7fc0af23 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -232,6 +232,16 @@ struct paca_struct { struct sibling_subcore_state *sibling_subcore_state; #endif #endif +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * rfi fallback flush must be in its own cacheline to prevent + * other paca data leaking into the L1d + */ + u64 exrfi[EX_SIZE] __aligned(0x80); + void *rfi_flush_fallback_area; + u64 l1d_flush_congruence; + u64 l1d_flush_sets; +#endif }; extern void copy_mm_to_paca(struct mm_struct *mm); diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index cf00ec26303a..469b7fdc9be4 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {} static inline void pseries_little_endian_exceptions(void) {} #endif /* CONFIG_PPC_PSERIES */ +void rfi_flush_enable(bool enable); + +/* These are bit flags */ +enum l1d_flush_type { + L1D_FLUSH_NONE = 0x1, + L1D_FLUSH_FALLBACK = 0x2, + L1D_FLUSH_ORI = 0x4, + L1D_FLUSH_MTTRIG = 0x8, +}; + +void __init setup_rfi_flush(enum l1d_flush_type, bool enable); +void do_rfi_flush_fixups(enum l1d_flush_type types); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6b958414b4e0..f390d57cf2e1 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -237,6 +237,11 @@ int main(void) OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp); OFFSET(PACA_IN_MCE, paca_struct, in_mce); OFFSET(PACA_IN_NMI, paca_struct, in_nmi); + OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area); + OFFSET(PACA_EXRFI, paca_struct, exrfi); + OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence); + OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets); + #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ed356194f09c..2dc10bf646b8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1449,6 +1449,90 @@ masked_##_H##interrupt: \ b .; \ MASKED_DEC_HANDLER(_H) +TRAMP_REAL_BEGIN(rfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + rfid + +TRAMP_REAL_BEGIN(hrfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + hrfid + /* * Real mode exceptions actually use this too, but alternate * instruction code patches (which end up in the common .text area) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8956a9856604..96163f4c3673 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -801,3 +801,82 @@ static int __init disable_hardlockup_detector(void) return 0; } early_initcall(disable_hardlockup_detector); + +#ifdef CONFIG_PPC_BOOK3S_64 +static enum l1d_flush_type enabled_flush_types; +static void *l1d_flush_fallback_area; +bool rfi_flush; + +static void do_nothing(void *unused) +{ + /* + * We don't need to do the flush explicitly, just enter+exit kernel is + * sufficient, the RFI exit handlers will do the right thing. + */ +} + +void rfi_flush_enable(bool enable) +{ + if (rfi_flush == enable) + return; + + if (enable) { + do_rfi_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else + do_rfi_flush_fixups(L1D_FLUSH_NONE); + + rfi_flush = enable; +} + +static void init_fallback_flush(void) +{ + u64 l1d_size, limit; + int cpu; + + l1d_size = ppc64_caches.l1d.size; + limit = min(safe_stack_limit(), ppc64_rma_size); + + /* + * Align to L1d size, and size it at 2x L1d size, to catch possible + * hardware prefetch runoff. We don't have a recipe for load patterns to + * reliably avoid the prefetcher. + */ + l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); + memset(l1d_flush_fallback_area, 0, l1d_size * 2); + + for_each_possible_cpu(cpu) { + /* + * The fallback flush is currently coded for 8-way + * associativity. Different associativity is possible, but it + * will be treated as 8-way and may not evict the lines as + * effectively. + * + * 128 byte lines are mandatory. + */ + u64 c = l1d_size / 8; + + paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; + paca[cpu].l1d_flush_congruence = c; + paca[cpu].l1d_flush_sets = c / 128; + } +} + +void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +{ + if (types & L1D_FLUSH_FALLBACK) { + pr_info("rfi-flush: Using fallback displacement flush\n"); + init_fallback_flush(); + } + + if (types & L1D_FLUSH_ORI) + pr_info("rfi-flush: Using ori type flush\n"); + + if (types & L1D_FLUSH_MTTRIG) + pr_info("rfi-flush: Using mttrig type flush\n"); + + enabled_flush_types = types; + + rfi_flush_enable(enable); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 0494e1566ee2..307843d23682 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -132,6 +132,15 @@ SECTIONS /* Read-only data */ RO_DATA(PAGE_SIZE) +#ifdef CONFIG_PPC64 + . = ALIGN(8); + __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { + __start___rfi_flush_fixup = .; + *(__rfi_flush_fixup) + __stop___rfi_flush_fixup = .; + } +#endif + EXCEPTION_TABLE(0) NOTES :kernel :notes diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 41cf5ae273cf..a95ea007d654 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +#ifdef CONFIG_PPC_BOOK3S_64 +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___rfi_flush_fixup), + end = PTRRELOC(&__stop___rfi_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + if (types & L1D_FLUSH_FALLBACK) + /* b .+16 to fallback flush */ + instrs[0] = 0x48000010; + + i = 0; + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ + void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; -- cgit v1.2.3-59-g8ed1b From bc9c9304a45480797e13a8e1df96ffcf44fb62fe Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64s: Support disabling RFI flush with no_rfi_flush and nopti Because there may be some performance overhead of the RFI flush, add kernel command line options to disable it. We add a sensibly named 'no_rfi_flush' option, but we also hijack the x86 option 'nopti'. The RFI flush is not the same as KPTI, but if we see 'nopti' we can guess that the user is trying to avoid any overhead of Meltdown mitigations, and it means we don't have to educate every one about a different command line option. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 96163f4c3673..491be4179ddd 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -805,8 +805,29 @@ early_initcall(disable_hardlockup_detector); #ifdef CONFIG_PPC_BOOK3S_64 static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; +static bool no_rfi_flush; bool rfi_flush; +static int __init handle_no_rfi_flush(char *p) +{ + pr_info("rfi-flush: disabled on command line."); + no_rfi_flush = true; + return 0; +} +early_param("no_rfi_flush", handle_no_rfi_flush); + +/* + * The RFI flush is not KPTI, but because users will see doco that says to use + * nopti we hijack that option here to also disable the RFI flush. + */ +static int __init handle_no_pti(char *p) +{ + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); + handle_no_rfi_flush(NULL); + return 0; +} +early_param("nopti", handle_no_pti); + static void do_nothing(void *unused) { /* @@ -877,6 +898,7 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - rfi_flush_enable(enable); + if (!no_rfi_flush) + rfi_flush_enable(enable); } #endif /* CONFIG_PPC_BOOK3S_64 */ -- cgit v1.2.3-59-g8ed1b From 8989d56878a7735dfdb234707a2fee6faf631085 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/pseries: Query hypervisor for RFI flush settings A new hypervisor call is available which tells the guest settings related to the RFI flush. Use it to query the appropriate flush instruction(s), and whether the flush is required. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/setup.c | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index a8531e012658..ae4f596273b5 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void) of_pci_check_probe_only(); } +static void pseries_setup_rfi_flush(void) +{ + struct h_cpu_char_result result; + enum l1d_flush_type types; + bool enable; + long rc; + + /* Enable by default */ + enable = true; + + rc = plpar_get_cpu_characteristics(&result); + if (rc == H_SUCCESS) { + types = L1D_FLUSH_NONE; + + if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) + types |= L1D_FLUSH_MTTRIG; + if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) + types |= L1D_FLUSH_ORI; + + /* Use fallback if nothing set in hcall */ + if (types == L1D_FLUSH_NONE) + types = L1D_FLUSH_FALLBACK; + + if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + enable = false; + } else { + /* Default to fallback if case hcall is not available */ + types = L1D_FLUSH_FALLBACK; + } + + setup_rfi_flush(types, enable); +} + static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); @@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); + pseries_setup_rfi_flush(); + /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); -- cgit v1.2.3-59-g8ed1b From 6e032b350cd1fdb830f18f8320ef0e13b4e24094 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/powernv: Check device-tree for RFI flush settings New device-tree properties are available which tell the hypervisor settings related to the RFI flush. Use them to determine the appropriate flush instruction to use, and whether the flush is required. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/setup.c | 49 ++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 1edfbc1e40f4..4fb21e17504a 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -37,13 +37,62 @@ #include #include #include +#include #include "powernv.h" +static void pnv_setup_rfi_flush(void) +{ + struct device_node *np, *fw_features; + enum l1d_flush_type type; + int enable; + + /* Default to fallback in case fw-features are not available */ + type = L1D_FLUSH_FALLBACK; + enable = 1; + + np = of_find_node_by_name(NULL, "ibm,opal"); + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + + if (fw_features) { + np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_MTTRIG; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_ORI; + + of_node_put(np); + + /* Enable unless firmware says NOT to */ + enable = 2; + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + of_node_put(fw_features); + } + + setup_rfi_flush(type, enable > 0); +} + static void __init pnv_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + pnv_setup_rfi_flush(); + /* Initialize SMP */ pnv_smp_init(); -- cgit v1.2.3-59-g8ed1b From 274920a3ecd5f43af0cc380bc0a9ee73a52b9f8a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 Jan 2018 23:49:12 +1100 Subject: powerpc/xmon: Add RFI flush related fields to paca dump Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index cab24f549e7c..b3bb5beec54a 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2375,6 +2375,10 @@ static void dump_one_paca(int cpu) DUMP(p, slb_cache_ptr, "x"); for (i = 0; i < SLB_CACHE_ENTRIES; i++) printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]); + + DUMP(p, rfi_flush_fallback_area, "px"); + DUMP(p, l1d_flush_congruence, "llx"); + DUMP(p, l1d_flush_sets, "llx"); #endif DUMP(p, dscr_default, "llx"); #ifdef CONFIG_PPC_BOOK3E -- cgit v1.2.3-59-g8ed1b From 2248fade965a5f1ba2a8e6e63f84df696b2d2780 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 11 Jan 2018 01:17:24 +1100 Subject: powerpc/xmon: Don't print hashed pointers in paca dump Remember when the biggest problem we had to worry about was hashed pointers, those were the days. These were missed in my earlier patch because they don't match "%p", but the macro is hiding a "%p", so these all end up being hashed, which is not what we want in xmon. Convert them to "%px". Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index b3bb5beec54a..0ddc7ac6c5f1 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2344,10 +2344,10 @@ static void dump_one_paca(int cpu) DUMP(p, kernel_toc, "lx"); DUMP(p, kernelbase, "lx"); DUMP(p, kernel_msr, "lx"); - DUMP(p, emergency_sp, "p"); + DUMP(p, emergency_sp, "px"); #ifdef CONFIG_PPC_BOOK3S_64 - DUMP(p, nmi_emergency_sp, "p"); - DUMP(p, mc_emergency_sp, "p"); + DUMP(p, nmi_emergency_sp, "px"); + DUMP(p, mc_emergency_sp, "px"); DUMP(p, in_nmi, "x"); DUMP(p, in_mce, "x"); DUMP(p, hmi_event_available, "x"); @@ -2382,14 +2382,14 @@ static void dump_one_paca(int cpu) #endif DUMP(p, dscr_default, "llx"); #ifdef CONFIG_PPC_BOOK3E - DUMP(p, pgd, "p"); - DUMP(p, kernel_pgd, "p"); - DUMP(p, tcd_ptr, "p"); - DUMP(p, mc_kstack, "p"); - DUMP(p, crit_kstack, "p"); - DUMP(p, dbg_kstack, "p"); + DUMP(p, pgd, "px"); + DUMP(p, kernel_pgd, "px"); + DUMP(p, tcd_ptr, "px"); + DUMP(p, mc_kstack, "px"); + DUMP(p, crit_kstack, "px"); + DUMP(p, dbg_kstack, "px"); #endif - DUMP(p, __current, "p"); + DUMP(p, __current, "px"); DUMP(p, kstack, "lx"); printf(" kstack_base = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1)); DUMP(p, stab_rr, "lx"); @@ -2407,7 +2407,7 @@ static void dump_one_paca(int cpu) #endif #ifdef CONFIG_PPC_POWERNV - DUMP(p, core_idle_state_ptr, "p"); + DUMP(p, core_idle_state_ptr, "px"); DUMP(p, thread_idle_state, "x"); DUMP(p, thread_mask, "x"); DUMP(p, subcore_sibling_mask, "x"); -- cgit v1.2.3-59-g8ed1b From 349524bc0da698ec77f2057cf4a4948eb6349265 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 10 Jan 2018 17:10:12 +1100 Subject: powerpc: Don't preempt_disable() in show_cpuinfo() This causes warnings from cpufreq mutex code. This is also rather unnecessary and ineffective. If we really want to prevent concurrent unplug, we could take the unplug read lock but I don't see this being critical. Fixes: cd77b5ce208c ("powerpc/powernv/cpufreq: Fix the frequency read by /proc/cpuinfo") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d213542a48b..8fd3a70047f1 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -242,14 +242,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short maj; unsigned short min; - /* We only show online cpus: disable preempt (overzealous, I - * knew) to prevent cpu going down. */ - preempt_disable(); - if (!cpu_online(cpu_id)) { - preempt_enable(); - return 0; - } - #ifdef CONFIG_SMP pvr = per_cpu(cpu_pvr, cpu_id); #else @@ -358,9 +350,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "\n"); #endif - - preempt_enable(); - /* If this is the last cpu, print the summary */ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) show_cpuinfo_summary(m); -- cgit v1.2.3-59-g8ed1b From fd6e440f20b1a4304553775fc55938848ff617c9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jan 2018 21:20:05 +1100 Subject: powerpc/64s: Wire up cpu_show_meltdown() The recent commit 87590ce6e373 ("sysfs/cpu: Add vulnerability folder") added a generic folder and set of files for reporting information on CPU vulnerabilities. One of those was for meltdown: /sys/devices/system/cpu/vulnerabilities/meltdown This commit wires up that file for 64-bit Book3S powerpc. For now we default to "Vulnerable" unless the RFI flush is enabled. That may not actually be true on all hardware, further patches will refine the reporting based on the CPU/platform etc. But for now we default to being pessimists. Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/setup_64.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c51e6ce42e7a..2ed525a44734 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -166,6 +166,7 @@ config PPC select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_SMP_IDLE_THREAD diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 491be4179ddd..624d2a62d05d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -901,4 +901,12 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) if (!no_rfi_flush) rfi_flush_enable(enable); } + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (rfi_flush) + return sprintf(buf, "Mitigation: RFI Flush\n"); + + return sprintf(buf, "Vulnerable\n"); +} #endif /* CONFIG_PPC_BOOK3S_64 */ -- cgit v1.2.3-59-g8ed1b From 236003e6b5443c45c18e613d2b0d776a9f87540e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jan 2018 22:17:18 +1100 Subject: powerpc/64s: Allow control of RFI flush via debugfs Expose the state of the RFI flush (enabled/disabled) via debugfs, and allow it to be enabled/disabled at runtime. eg: $ cat /sys/kernel/debug/powerpc/rfi_flush 1 $ echo 0 > /sys/kernel/debug/powerpc/rfi_flush $ cat /sys/kernel/debug/powerpc/rfi_flush 0 Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin --- arch/powerpc/kernel/setup_64.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 624d2a62d05d..e67413f4a8f0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -902,6 +903,35 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) rfi_flush_enable(enable); } +#ifdef CONFIG_DEBUG_FS +static int rfi_flush_set(void *data, u64 val) +{ + if (val == 1) + rfi_flush_enable(true); + else if (val == 0) + rfi_flush_enable(false); + else + return -EINVAL; + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); +#endif + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { if (rfi_flush) -- cgit v1.2.3-59-g8ed1b From 1b689a95ce7427075f9ac9fb4aea1af530742b7f Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Mon, 15 Jan 2018 14:30:03 +0100 Subject: powerpc/pseries: include linux/types.h in asm/hvcall.h Commit 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings") uses u64 in asm/hvcall.h without including linux/types.h This breaks hvcall.h users that do not include the header themselves. Fixes: 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings") Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index f0461618bf7b..eca3f9c68907 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -353,6 +353,7 @@ #define PROC_TABLE_GTSE 0x01 #ifndef __ASSEMBLY__ +#include /** * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments -- cgit v1.2.3-59-g8ed1b From 3214d01f139b7544e870fc0b7fcce8da13c1cb51 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Jan 2018 16:06:47 +1100 Subject: KVM: PPC: Book3S: Provide information about hardware/firmware CVE workarounds This adds a new ioctl, KVM_PPC_GET_CPU_CHAR, that gives userspace information about the underlying machine's level of vulnerability to the recently announced vulnerabilities CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754, and whether the machine provides instructions to assist software to work around the vulnerabilities. The ioctl returns two u64 words describing characteristics of the CPU and required software behaviour respectively, plus two mask words which indicate which bits have been filled in by the kernel, for extensibility. The bit definitions are the same as for the new H_GET_CPU_CHARACTERISTICS hypercall. There is also a new capability, KVM_CAP_PPC_GET_CPU_CHAR, which indicates whether the new ioctl is available. Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt | 46 +++++++++++++ arch/powerpc/include/uapi/asm/kvm.h | 25 +++++++ arch/powerpc/kvm/powerpc.c | 131 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 3 + 4 files changed, 205 insertions(+) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 57d3ee9e4bde..fc3ae951bc07 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3403,6 +3403,52 @@ invalid, if invalid pages are written to (e.g. after the end of memory) or if no page table is present for the addresses (e.g. when using hugepages). +4.108 KVM_PPC_GET_CPU_CHAR + +Capability: KVM_CAP_PPC_GET_CPU_CHAR +Architectures: powerpc +Type: vm ioctl +Parameters: struct kvm_ppc_cpu_char (out) +Returns: 0 on successful completion + -EFAULT if struct kvm_ppc_cpu_char cannot be written + +This ioctl gives userspace information about certain characteristics +of the CPU relating to speculative execution of instructions and +possible information leakage resulting from speculative execution (see +CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754). The information is +returned in struct kvm_ppc_cpu_char, which looks like this: + +struct kvm_ppc_cpu_char { + __u64 character; /* characteristics of the CPU */ + __u64 behaviour; /* recommended software behaviour */ + __u64 character_mask; /* valid bits in character */ + __u64 behaviour_mask; /* valid bits in behaviour */ +}; + +For extensibility, the character_mask and behaviour_mask fields +indicate which bits of character and behaviour have been filled in by +the kernel. If the set of defined bits is extended in future then +userspace will be able to tell whether it is running on a kernel that +knows about the new bits. + +The character field describes attributes of the CPU which can help +with preventing inadvertent information disclosure - specifically, +whether there is an instruction to flash-invalidate the L1 data cache +(ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set +to a mode where entries can only be used by the thread that created +them, whether the bcctr[l] instruction prevents speculation, and +whether a speculation barrier instruction (ori 31,31,0) is provided. + +The behaviour field describes actions that software should take to +prevent inadvertent information disclosure, and thus describes which +vulnerabilities the hardware is subject to; specifically whether the +L1 data cache should be flushed when returning to user mode from the +kernel, and whether a speculation barrier should be placed between an +array bounds check and the array access. + +These fields use the same bit definitions as the new +H_GET_CPU_CHARACTERISTICS hypercall. + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 61d6049f4c1e..637b7263cb86 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -443,6 +443,31 @@ struct kvm_ppc_rmmu_info { __u32 ap_encodings[8]; }; +/* For KVM_PPC_GET_CPU_CHAR */ +struct kvm_ppc_cpu_char { + __u64 character; /* characteristics of the CPU */ + __u64 behaviour; /* recommended software behaviour */ + __u64 character_mask; /* valid bits in character */ + __u64 behaviour_mask; /* valid bits in behaviour */ +}; + +/* + * Values for character and character_mask. + * These are identical to the values used by H_GET_CPU_CHARACTERISTICS. + */ +#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 (1ULL << 63) +#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED (1ULL << 62) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 (1ULL << 61) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 (1ULL << 60) +#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV (1ULL << 59) +#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) +#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) +#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) + +#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) +#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) +#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1915e86cef6f..0a7c88786ec0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -39,6 +39,10 @@ #include #include #include +#ifdef CONFIG_PPC_PSERIES +#include +#include +#endif #include "timing.h" #include "irq.h" @@ -548,6 +552,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: #endif + case KVM_CAP_PPC_GET_CPU_CHAR: r = 1; break; @@ -1759,6 +1764,124 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, return r; } +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * These functions check whether the underlying hardware is safe + * against attacks based on observing the effects of speculatively + * executed instructions, and whether it supplies instructions for + * use in workarounds. The information comes from firmware, either + * via the device tree on powernv platforms or from an hcall on + * pseries platforms. + */ +#ifdef CONFIG_PPC_PSERIES +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + struct h_cpu_char_result c; + unsigned long rc; + + if (!machine_is(pseries)) + return -ENOTTY; + + rc = plpar_get_cpu_characteristics(&c); + if (rc == H_SUCCESS) { + cp->character = c.character; + cp->behaviour = c.behaviour; + cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | + KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | + KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | + KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | + KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | + KVM_PPC_CPU_CHAR_BR_HINT_HONOURED | + KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF | + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | + KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + } + return 0; +} +#else +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + return -ENOTTY; +} +#endif + +static inline bool have_fw_feat(struct device_node *fw_features, + const char *state, const char *name) +{ + struct device_node *np; + bool r = false; + + np = of_get_child_by_name(fw_features, name); + if (np) { + r = of_property_read_bool(np, state); + of_node_put(np); + } + return r; +} + +static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + struct device_node *np, *fw_features; + int r; + + memset(cp, 0, sizeof(*cp)); + r = pseries_get_cpu_char(cp); + if (r != -ENOTTY) + return r; + + np = of_find_node_by_name(NULL, "ibm,opal"); + if (np) { + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + if (!fw_features) + return 0; + if (have_fw_feat(fw_features, "enabled", + "inst-spec-barrier-ori31,31,0")) + cp->character |= KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31; + if (have_fw_feat(fw_features, "enabled", + "fw-bcctrl-serialized")) + cp->character |= KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED; + if (have_fw_feat(fw_features, "enabled", + "inst-l1d-flush-ori30,30,0")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30; + if (have_fw_feat(fw_features, "enabled", + "inst-l1d-flush-trig2")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2; + if (have_fw_feat(fw_features, "enabled", + "fw-l1d-thread-split")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV; + if (have_fw_feat(fw_features, "enabled", + "fw-count-cache-disabled")) + cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | + KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | + KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | + KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | + KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + + if (have_fw_feat(fw_features, "enabled", + "speculation-policy-favor-security")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY; + if (!have_fw_feat(fw_features, "disabled", + "needs-l1d-flush-msr-pr-0-to-1")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR; + if (!have_fw_feat(fw_features, "disabled", + "needs-spec-barrier-for-bound-checks")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | + KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + + of_node_put(fw_features); + } + + return 0; +} +#endif + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1861,6 +1984,14 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_PPC_GET_CPU_CHAR: { + struct kvm_ppc_cpu_char cpuchar; + + r = kvmppc_get_cpu_char(&cpuchar); + if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar))) + r = -EFAULT; + break; + } default: { struct kvm *kvm = filp->private_data; r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 496e59a2738b..7a99b98cf88e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -932,6 +932,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_SYNIC2 148 #define KVM_CAP_HYPERV_VP_INDEX 149 #define KVM_CAP_S390_AIS_MIGRATION 150 +#define KVM_CAP_PPC_GET_CPU_CHAR 151 #ifdef KVM_CAP_IRQ_ROUTING @@ -1261,6 +1262,8 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) /* Available with KVM_CAP_PPC_RADIX_MMU */ #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) +/* Available with KVM_CAP_PPC_GET_CPU_CHAR */ +#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3-59-g8ed1b From c5baa1be8f559d5f33c412d00cc1c86762a8bbbf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Jan 2018 14:26:47 +0000 Subject: irqdomain: Kill CONFIG_IRQ_DOMAIN_DEBUG CONFIG_IRQ_DOMAIN_DEBUG is similar to CONFIG_GENERIC_IRQ_DEBUGFS, just with less information. Spring cleanup time. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Yang Shunyong Link: https://lkml.kernel.org/r/20180117142647.23622-1-marc.zyngier@arm.com --- Documentation/IRQ-domain.txt | 36 +-------- arch/arm/configs/aspeed_g4_defconfig | 1 - arch/arm/configs/aspeed_g5_defconfig | 1 - arch/arm/configs/hisi_defconfig | 1 - arch/arm/configs/multi_v7_defconfig | 1 - arch/arm/configs/mvebu_v7_defconfig | 1 - arch/arm/configs/pxa_defconfig | 1 - arch/arm/configs/sama5_defconfig | 1 - arch/arm/configs/tegra_defconfig | 1 - arch/arm/configs/vt8500_v6_v7_defconfig | 1 - arch/powerpc/configs/fsl-emb-nonhw.config | 1 - arch/powerpc/configs/powernv_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/configs/pseries_defconfig | 1 - arch/xtensa/configs/audio_kc705_defconfig | 1 - arch/xtensa/configs/cadence_csp_defconfig | 1 - arch/xtensa/configs/generic_kc705_defconfig | 1 - arch/xtensa/configs/nommu_kc705_defconfig | 1 - arch/xtensa/configs/smp_lx200_defconfig | 1 - kernel/irq/Kconfig | 10 --- kernel/irq/irqdomain.c | 118 ---------------------------- 21 files changed, 2 insertions(+), 180 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt index 4a1cd7645d85..507775cce753 100644 --- a/Documentation/IRQ-domain.txt +++ b/Documentation/IRQ-domain.txt @@ -265,37 +265,5 @@ support other architectures, such as ARM, ARM64 etc. === Debugging === -If you switch on CONFIG_IRQ_DOMAIN_DEBUG (which depends on -CONFIG_IRQ_DOMAIN and CONFIG_DEBUG_FS), you will find a new file in -your debugfs mount point, called irq_domain_mapping. This file -contains a live snapshot of all the IRQ domains in the system: - - name mapped linear-max direct-max devtree-node - pl061 8 8 0 /smb/gpio@e0080000 - pl061 8 8 0 /smb/gpio@e1050000 - pMSI 0 0 0 /interrupt-controller@e1101000/v2m@e0080000 - MSI 37 0 0 /interrupt-controller@e1101000/v2m@e0080000 - GICv2m 37 0 0 /interrupt-controller@e1101000/v2m@e0080000 - GICv2 448 448 0 /interrupt-controller@e1101000 - -it also iterates over the interrupts to display their mapping in the -domains, and makes the domain stacking visible: - - -irq hwirq chip name chip data active type domain - 1 0x00019 GICv2 0xffff00000916bfd8 * LINEAR GICv2 - 2 0x0001d GICv2 0xffff00000916bfd8 LINEAR GICv2 - 3 0x0001e GICv2 0xffff00000916bfd8 * LINEAR GICv2 - 4 0x0001b GICv2 0xffff00000916bfd8 * LINEAR GICv2 - 5 0x0001a GICv2 0xffff00000916bfd8 LINEAR GICv2 -[...] - 96 0x81808 MSI 0x (null) RADIX MSI - 96+ 0x00063 GICv2m 0xffff8003ee116980 RADIX GICv2m - 96+ 0x00063 GICv2 0xffff00000916bfd8 LINEAR GICv2 - 97 0x08800 MSI 0x (null) * RADIX MSI - 97+ 0x00064 GICv2m 0xffff8003ee116980 * RADIX GICv2m - 97+ 0x00064 GICv2 0xffff00000916bfd8 * LINEAR GICv2 - -Here, interrupts 1-5 are only using a single domain, while 96 and 97 -are build out of a stack of three domain, each level performing a -particular function. +Most of the internals of the IRQ subsystem are exposed in debugfs by +turning CONFIG_GENERIC_IRQ_DEBUGFS on. diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig index d23b9d56a88b..95946dee9c77 100644 --- a/arch/arm/configs/aspeed_g4_defconfig +++ b/arch/arm/configs/aspeed_g4_defconfig @@ -1,7 +1,6 @@ CONFIG_KERNEL_XZ=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig index c0ad7b82086b..8c7ea033cdc2 100644 --- a/arch/arm/configs/aspeed_g5_defconfig +++ b/arch/arm/configs/aspeed_g5_defconfig @@ -1,7 +1,6 @@ CONFIG_KERNEL_XZ=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 diff --git a/arch/arm/configs/hisi_defconfig b/arch/arm/configs/hisi_defconfig index b2e340b272ee..74d611e41e02 100644 --- a/arch/arm/configs/hisi_defconfig +++ b/arch/arm/configs/hisi_defconfig @@ -1,4 +1,3 @@ -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 61509c4b769f..b659244902cd 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1,6 +1,5 @@ CONFIG_SYSVIPC=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_CGROUPS=y diff --git a/arch/arm/configs/mvebu_v7_defconfig b/arch/arm/configs/mvebu_v7_defconfig index 69553704f2dc..ee61be093633 100644 --- a/arch/arm/configs/mvebu_v7_defconfig +++ b/arch/arm/configs/mvebu_v7_defconfig @@ -1,6 +1,5 @@ CONFIG_SYSVIPC=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 830e817a028a..837d0c9c8b0e 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig index 6529cb43e0fd..2080025556b5 100644 --- a/arch/arm/configs/sama5_defconfig +++ b/arch/arm/configs/sama5_defconfig @@ -2,7 +2,6 @@ # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 6678f2929356..c819be04187e 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -1,5 +1,4 @@ CONFIG_SYSVIPC=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y diff --git a/arch/arm/configs/vt8500_v6_v7_defconfig b/arch/arm/configs/vt8500_v6_v7_defconfig index 1bfaa7bfc392..9b85326ba287 100644 --- a/arch/arm/configs/vt8500_v6_v7_defconfig +++ b/arch/arm/configs/vt8500_v6_v7_defconfig @@ -1,4 +1,3 @@ -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index cc49c95494da..e0567dc41968 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -71,7 +71,6 @@ CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_SCTP=m CONFIG_IPV6=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_ISO9660_FS=m CONFIG_JFFS2_FS_DEBUG=1 CONFIG_JFFS2_FS=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 4891bbed6258..73dab7a37386 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -4,7 +4,6 @@ CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_TASKSTATS=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 6ddca80c52c3..5033e630afea 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -1,7 +1,6 @@ CONFIG_PPC64=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_TASKSTATS=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index bde2cd1005a2..0dd5cf7b566d 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -3,7 +3,6 @@ CONFIG_NR_CPUS=2048 CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_TASKSTATS=y diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig index 8d16925765cb..2bf964df37ba 100644 --- a/arch/xtensa/configs/audio_kc705_defconfig +++ b/arch/xtensa/configs/audio_kc705_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig index f2d3094aa1d1..3221b7053fa3 100644 --- a/arch/xtensa/configs/cadence_csp_defconfig +++ b/arch/xtensa/configs/cadence_csp_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_USELIB=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig index 744adeaf2945..985fa8546e4e 100644 --- a/arch/xtensa/configs/generic_kc705_defconfig +++ b/arch/xtensa/configs/generic_kc705_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index 78c2529d0459..624f9b3a3878 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 14e3ca353ac8..11fed6c06a7c 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 89e355866450..6fc87ccda1d7 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -103,16 +103,6 @@ config GENERIC_IRQ_MATRIX_ALLOCATOR config GENERIC_IRQ_RESERVATION_MODE bool -config IRQ_DOMAIN_DEBUG - bool "Expose hardware/virtual IRQ mapping via debugfs" - depends on IRQ_DOMAIN && DEBUG_FS - help - This option will show the mapping relationship between hardware irq - numbers and Linux irq numbers. The mapping is exposed via debugfs - in the file "irq_domain_mapping". - - If you don't know what this means you don't need it. - # Support forced irq threading config IRQ_FORCED_THREADING bool diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 62068ad46930..e6a9c36470ee 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -897,124 +897,6 @@ unsigned int irq_find_mapping(struct irq_domain *domain, } EXPORT_SYMBOL_GPL(irq_find_mapping); -#ifdef CONFIG_IRQ_DOMAIN_DEBUG -static void virq_debug_show_one(struct seq_file *m, struct irq_desc *desc) -{ - struct irq_domain *domain; - struct irq_data *data; - - domain = desc->irq_data.domain; - data = &desc->irq_data; - - while (domain) { - unsigned int irq = data->irq; - unsigned long hwirq = data->hwirq; - struct irq_chip *chip; - bool direct; - - if (data == &desc->irq_data) - seq_printf(m, "%5d ", irq); - else - seq_printf(m, "%5d+ ", irq); - seq_printf(m, "0x%05lx ", hwirq); - - chip = irq_data_get_irq_chip(data); - seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none"); - - seq_printf(m, "0x%p ", irq_data_get_irq_chip_data(data)); - - seq_printf(m, " %c ", (desc->action && desc->action->handler) ? '*' : ' '); - direct = (irq == hwirq) && (irq < domain->revmap_direct_max_irq); - seq_printf(m, "%6s%-8s ", - (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX", - direct ? "(DIRECT)" : ""); - seq_printf(m, "%s\n", domain->name); -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - domain = domain->parent; - data = data->parent_data; -#else - domain = NULL; -#endif - } -} - -static int virq_debug_show(struct seq_file *m, void *private) -{ - unsigned long flags; - struct irq_desc *desc; - struct irq_domain *domain; - struct radix_tree_iter iter; - void __rcu **slot; - int i; - - seq_printf(m, " %-16s %-6s %-10s %-10s %s\n", - "name", "mapped", "linear-max", "direct-max", "devtree-node"); - mutex_lock(&irq_domain_mutex); - list_for_each_entry(domain, &irq_domain_list, link) { - struct device_node *of_node; - const char *name; - - int count = 0; - - of_node = irq_domain_get_of_node(domain); - if (of_node) - name = of_node_full_name(of_node); - else if (is_fwnode_irqchip(domain->fwnode)) - name = container_of(domain->fwnode, struct irqchip_fwid, - fwnode)->name; - else - name = ""; - - radix_tree_for_each_slot(slot, &domain->revmap_tree, &iter, 0) - count++; - seq_printf(m, "%c%-16s %6u %10u %10u %s\n", - domain == irq_default_domain ? '*' : ' ', domain->name, - domain->revmap_size + count, domain->revmap_size, - domain->revmap_direct_max_irq, - name); - } - mutex_unlock(&irq_domain_mutex); - - seq_printf(m, "%-5s %-7s %-15s %-*s %6s %-14s %s\n", "irq", "hwirq", - "chip name", (int)(2 * sizeof(void *) + 2), "chip data", - "active", "type", "domain"); - - for (i = 1; i < nr_irqs; i++) { - desc = irq_to_desc(i); - if (!desc) - continue; - - raw_spin_lock_irqsave(&desc->lock, flags); - virq_debug_show_one(m, desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); - } - - return 0; -} - -static int virq_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, virq_debug_show, inode->i_private); -} - -static const struct file_operations virq_debug_fops = { - .open = virq_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init irq_debugfs_init(void) -{ - if (debugfs_create_file("irq_domain_mapping", S_IRUGO, NULL, - NULL, &virq_debug_fops) == NULL) - return -ENOMEM; - - return 0; -} -__initcall(irq_debugfs_init); -#endif /* CONFIG_IRQ_DOMAIN_DEBUG */ - /** * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings * -- cgit v1.2.3-59-g8ed1b