From 433c5c20c505fef92be84c6afab70f1c2ab5eda3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Apr 2015 10:34:35 +1000 Subject: powerpc/kvm: Fix SMP=n build error in book3s_xics.c Commit 34cb7954c0aa "Convert ICS mutex lock to spin lock" added an include of asm/spinlock.h, which does not work in the SMP=n case. It should instead include linux/spinlock.h Fixes: 34cb7954c0aa ("KVM: PPC: Book3S HV: Convert ICS mutex lock to spin lock") Acked-by: Paul Mackerras Reviewed-by: Alexander Graf Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_xics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 8f3e6cc54d95..c6ca7db64673 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -20,7 +21,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3-59-g8ed1b From d33047fd7e7d93662622888681861ba84d43c506 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 27 Apr 2015 12:33:51 +1000 Subject: powerpc/powernv: Fix early pci_controller_ops loading. Load the PowerNV platform pci controller ops into pci controllers after all the operations are loaded into the platform ops struct, not before. Otherwise we aren't actually setting the ops properly which can break IO for some devices. Fixes: 65ebf4b63 ("powerpc/powernv: Move controller ops from ppc_md to controller_ops") Reported-by: Gavin Shan Reviewed-by: Gavin Shan Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 920c252d1f49..f8bc950efcae 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2693,7 +2693,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, hose->last_busno = 0xff; } hose->private_data = phb; - hose->controller_ops = pnv_pci_controller_ops; phb->hub_id = hub_id; phb->opal_id = phb_id; phb->type = ioda_type; @@ -2812,6 +2811,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, pnv_pci_controller_ops.enable_device_hook = pnv_pci_enable_device_hook; pnv_pci_controller_ops.window_alignment = pnv_pci_window_alignment; pnv_pci_controller_ops.reset_secondary_bus = pnv_pci_reset_secondary_bus; + hose->controller_ops = pnv_pci_controller_ops; #ifdef CONFIG_PCI_IOV ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; -- cgit v1.2.3-59-g8ed1b From 68fc378ce332cc4efd7f314d3e6e15e83f53ebf2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 30 Apr 2015 15:13:14 +1000 Subject: Revert "powerpc/tm: Abort syscalls in active transactions" This reverts commit feba40362b11341bee6d8ed58d54b896abbd9f84. Although the principle of this change is good, the implementation has a few issues. Firstly we can sometimes fail to abort a syscall because r12 may have been clobbered by C code if we went down the virtual CPU accounting path, or if syscall tracing was enabled. Secondly we have decided that it is safer to abort the syscall even earlier in the syscall entry path, so that we avoid the syscall tracing path when we are transactional. So that we have time to thoroughly test those changes we have decided to revert this for this merge window and will merge the fixed version in the next window. NB. Rather than reverting the selftest we just drop tm-syscall from TEST_PROGS so that it's not run by default. Fixes: feba40362b11 ("powerpc/tm: Abort syscalls in active transactions") Signed-off-by: Michael Ellerman --- Documentation/powerpc/transactional_memory.txt | 32 +++++++++++++------------- arch/powerpc/include/uapi/asm/tm.h | 2 +- arch/powerpc/kernel/entry_64.S | 19 --------------- tools/testing/selftests/powerpc/tm/Makefile | 2 +- 4 files changed, 18 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt index ba0a2a4a54ba..ded69794a5c0 100644 --- a/Documentation/powerpc/transactional_memory.txt +++ b/Documentation/powerpc/transactional_memory.txt @@ -74,23 +74,22 @@ Causes of transaction aborts Syscalls ======== -Syscalls made from within an active transaction will not be performed and the -transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL -| TM_CAUSE_PERSISTENT. +Performing syscalls from within transaction is not recommended, and can lead +to unpredictable results. -Syscalls made from within a suspended transaction are performed as normal and -the transaction is not explicitly doomed by the kernel. However, what the -kernel does to perform the syscall may result in the transaction being doomed -by the hardware. The syscall is performed in suspended mode so any side -effects will be persistent, independent of transaction success or failure. No -guarantees are provided by the kernel about which syscalls will affect -transaction success. +Syscalls do not by design abort transactions, but beware: The kernel code will +not be running in transactional state. The effect of syscalls will always +remain visible, but depending on the call they may abort your transaction as a +side-effect, read soon-to-be-aborted transactional data that should not remain +invisible, etc. If you constantly retry a transaction that constantly aborts +itself by calling a syscall, you'll have a livelock & make no progress. -Care must be taken when relying on syscalls to abort during active transactions -if the calls are made via a library. Libraries may cache values (which may -give the appearance of success) or perform operations that cause transaction -failure before entering the kernel (which may produce different failure codes). -Examples are glibc's getpid() and lazy symbol resolution. +Simple syscalls (e.g. sigprocmask()) "could" be OK. Even things like write() +from, say, printf() should be OK as long as the kernel does not access any +memory that was accessed transactionally. + +Consider any syscalls that happen to work as debug-only -- not recommended for +production use. Best to queue them up till after the transaction is over. Signals @@ -177,7 +176,8 @@ kernel aborted a transaction: TM_CAUSE_RESCHED Thread was rescheduled. TM_CAUSE_TLBI Software TLB invalid. TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap. - TM_CAUSE_SYSCALL Syscall from active transaction. + TM_CAUSE_SYSCALL Currently unused; future syscalls that must abort + transactions for consistency will use this. TM_CAUSE_SIGNAL Signal delivered. TM_CAUSE_MISC Currently unused. TM_CAUSE_ALIGNMENT Alignment fault. diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h index 5047659815a5..5d836b7c1176 100644 --- a/arch/powerpc/include/uapi/asm/tm.h +++ b/arch/powerpc/include/uapi/asm/tm.h @@ -11,7 +11,7 @@ #define TM_CAUSE_RESCHED 0xde #define TM_CAUSE_TLBI 0xdc #define TM_CAUSE_FAC_UNAV 0xda -#define TM_CAUSE_SYSCALL 0xd8 +#define TM_CAUSE_SYSCALL 0xd8 /* future use */ #define TM_CAUSE_MISC 0xd6 /* future use */ #define TM_CAUSE_SIGNAL 0xd4 #define TM_CAUSE_ALIGNMENT 0xd2 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 8ca9434c40e6..afbc20019c2e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -34,7 +34,6 @@ #include #include #include -#include /* * System calls. @@ -146,24 +145,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) andi. r11,r10,_TIF_SYSCALL_DOTRACE bne syscall_dotrace .Lsyscall_dotrace_cont: -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -BEGIN_FTR_SECTION - b 1f -END_FTR_SECTION_IFCLR(CPU_FTR_TM) - extrdi. r11, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - beq+ 1f - - /* Doom the transaction and don't perform the syscall: */ - mfmsr r11 - li r12, 1 - rldimi r11, r12, MSR_TM_LG, 63-MSR_TM_LG - mtmsrd r11, 0 - li r11, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) - TABORT(R11) - - b .Lsyscall_exit -1: -#endif cmpldi 0,r0,NR_syscalls bge- syscall_enosys diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 1b616fa79e93..6bff955e1d55 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,4 +1,4 @@ -TEST_PROGS := tm-resched-dscr tm-syscall +TEST_PROGS := tm-resched-dscr all: $(TEST_PROGS) -- cgit v1.2.3-59-g8ed1b From f32393c943e297b8ae180c8f83d81a156c7d0412 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Wed, 29 Apr 2015 20:42:06 -0500 Subject: powerpc/pseries: Correct cpu affinity for dlpar added cpus The incorrect ordering of operations during cpu dlpar add results in invalid affinity for the cpu being added. The ibm,associativity property in the device tree is populated with all zeroes for the added cpu which results in invalid affinity mappings and all cpus appear to belong to node 0. This occurs because rtas configure-connector is called prior to making the rtas set-indicator calls. Phyp does not assign affinity information for a cpu until the rtas set-indicator calls are made to set the isolation and allocation state. Correct the order of operations to make the rtas set-indicator calls (done in dlpar_acquire_drc) before calling rtas configure-connector. Fixes: 1a8061c46c46 ("powerpc/pseries: Add kernel based CPU DLPAR handling") Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/dlpar.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index b4b11096ea8b..019d34aaf054 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -412,6 +412,10 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) if (rc) return -EINVAL; + rc = dlpar_acquire_drc(drc_index); + if (rc) + return -EINVAL; + parent = of_find_node_by_path("/cpus"); if (!parent) return -ENODEV; @@ -422,12 +426,6 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) of_node_put(parent); - rc = dlpar_acquire_drc(drc_index); - if (rc) { - dlpar_free_cc_nodes(dn); - return -EINVAL; - } - rc = dlpar_attach_node(dn); if (rc) { dlpar_release_drc(drc_index); -- cgit v1.2.3-59-g8ed1b From 1ae79b78bc52b910a224f3795122538516e07b5f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 1 May 2015 09:14:11 +1000 Subject: powerpc/eeh: Fix race condition in pcibios_set_pcie_reset_state() When asserting reset in pcibios_set_pcie_reset_state(), the PE is enforced to (hardware) frozen state in order to drop unexpected PCI transactions (except PCI config read/write) automatically by hardware during reset, which would cause recursive EEH error. However, the (software) frozen state EEH_PE_ISOLATED is missed. When users get 0xFF from PCI config or MMIO read, EEH_PE_ISOLATED is set in PE state retrival backend. Unfortunately, nobody (the reset handler or the EEH recovery functinality in host) will clear EEH_PE_ISOLATED when the PE has been passed through to guest. The patch sets and clears EEH_PE_ISOLATED properly during reset in function pcibios_set_pcie_reset_state() to fix the issue. Fixes: 28158cd ("Enhance pcibios_set_pcie_reset_state()") Reported-by: Carol L. Soto Signed-off-by: Gavin Shan Tested-by: Carol L. Soto Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 44b480e3a5af..fa046ca6d0fa 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -749,21 +749,24 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat eeh_unfreeze_pe(pe, false); eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); break; case pcie_hot_reset: + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); return -EINVAL; }; -- cgit v1.2.3-59-g8ed1b From d91dafc02f42e23c1a906202ebde5d7c49ef058d Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 1 May 2015 09:22:15 +1000 Subject: powerpc/eeh: Delay probing EEH device during hotplug Commit 1c509148b ("powerpc/eeh: Do probe on pci_dn") probes EEH devices in early stage, which is reasonable to pSeries platform. However, it's wrong for PowerNV platform because the PE# isn't determined until the resources (IO and MMIO) are assigned to PE in hotplug case. So we have to delay probing EEH devices for PowerNV platform until the PE# is assigned. Fixes: ff57b454ddb9 ("powerpc/eeh: Do probe on pci_dn") Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index fa046ca6d0fa..9ee61d15653d 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1061,6 +1061,9 @@ void eeh_add_device_early(struct pci_dn *pdn) if (!edev || !eeh_enabled()) return; + if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) + return; + /* USB Bus children of PCI devices will not have BUID's */ phb = edev->phb; if (NULL == phb || @@ -1115,6 +1118,9 @@ void eeh_add_device_late(struct pci_dev *dev) return; } + if (eeh_has_flag(EEH_PROBE_MODE_DEV)) + eeh_ops->probe(pdn, NULL); + /* * The EEH cache might not be removed correctly because of * unbalanced kref to the device during unplug time, which -- cgit v1.2.3-59-g8ed1b From 0aab3747091db309b8a484cfd382a41644552aa3 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 1 May 2015 16:50:34 +1000 Subject: powerpc/powernv: Restore non-volatile CRs after nap Patches 7cba160ad "powernv/cpuidle: Redesign idle states management" and 77b54e9f2 "powernv/powerpc: Add winkle support for offline cpus" use non-volatile condition registers (cr2, cr3 and cr4) early in the system reset interrupt handler (system_reset_pSeries()) before it has been determined if state loss has occurred. If state loss has not occurred, control returns via the power7_wakeup_noloss() path which does not restore those condition registers, leaving them corrupted. Fix this by restoring the condition registers in the power7_wakeup_noloss() case. This is apparent when running a KVM guest on hardware that does not support winkle or sleep and the guest makes use of secondary threads. In practice this means Power7 machines, though some early unreleased Power8 machines may also be susceptible. The secondary CPUs are taken off line before the guest is started and they call pnv_smp_cpu_kill_self(). This checks support for sleep states (in this case there is no support) and power7_nap() is called. When the CPU is woken, power7_nap() returns and because the CPU is still off line, the main while loop executes again. The sleep states support test is executed again, but because the tested values cannot have changed, the compiler has optimized the test away and instead we rely on the result of the first test, which has been left in cr3 and/or cr4. With the result overwritten, the wrong branch is taken and power7_winkle() is called on a CPU that does not support it, leading to it stalling. Fixes: 7cba160ad789 ("powernv/cpuidle: Redesign idle states management") Fixes: 77b54e9f213f ("powernv/powerpc: Add winkle support for offline cpus") [mpe: Massage change log a bit more] Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_power7.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index eeaa0d5f69d5..ccde8f084ce4 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -501,9 +501,11 @@ BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) ld r1,PACAR1(r13) + ld r6,_CCR(r1) ld r4,_MSR(r1) ld r5,_NIP(r1) addi r1,r1,INT_FRAME_SIZE + mtcr r6 mtspr SPRN_SRR1,r4 mtspr SPRN_SRR0,r5 rfid -- cgit v1.2.3-59-g8ed1b