aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-opal.txt37
-rw-r--r--arch/powerpc/include/asm/cpuidle.h20
-rw-r--r--arch/powerpc/include/asm/opal.h42
-rw-r--r--arch/powerpc/include/asm/paca.h10
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/include/asm/processor.h3
-rw-r--r--arch/powerpc/include/asm/reg.h4
-rw-r--r--arch/powerpc/include/asm/syscall.h6
-rw-r--r--arch/powerpc/include/asm/uaccess.h6
-rw-r--r--arch/powerpc/kernel/asm-offsets.c11
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S35
-rw-r--r--arch/powerpc/kernel/idle_power7.S344
-rw-r--r--arch/powerpc/kernel/smp.c9
-rw-r--r--arch/powerpc/perf/hv-24x7.c23
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S39
-rw-r--r--arch/powerpc/platforms/powernv/opal.c50
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c166
-rw-r--r--arch/powerpc/platforms/powernv/smp.c29
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c34
-rw-r--r--arch/powerpc/platforms/powernv/subcore.h9
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c10
-rw-r--r--drivers/i2c/busses/Kconfig11
-rw-r--r--drivers/i2c/busses/Makefile1
-rw-r--r--drivers/i2c/busses/i2c-opal.c294
-rw-r--r--drivers/misc/cxl/context.c26
-rw-r--r--drivers/misc/cxl/cxl.h9
-rw-r--r--drivers/misc/cxl/file.c6
-rw-r--r--drivers/misc/cxl/native.c12
-rw-r--r--drivers/misc/cxl/pci.c2
-rw-r--r--drivers/misc/cxl/sysfs.c10
-rw-r--r--include/uapi/linux/audit.h2
32 files changed, 1156 insertions, 108 deletions
diff --git a/Documentation/devicetree/bindings/i2c/i2c-opal.txt b/Documentation/devicetree/bindings/i2c/i2c-opal.txt
new file mode 100644
index 000000000000..12bc61465ee5
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-opal.txt
@@ -0,0 +1,37 @@
+Device-tree bindings for I2C OPAL driver
+----------------------------------------
+
+Most of the device node and properties layout is specific to the firmware and
+used by the firmware itself for configuring the port. From the linux
+perspective, the properties of use are "ibm,port-name" and "ibm,opal-id".
+
+Required properties:
+
+- reg: Port-id within a given master
+- compatible: must be "ibm,opal-i2c"
+- ibm,opal-id: Refers to a specific bus and used to identify it when calling
+ the relevant OPAL functions.
+- bus-frequency: Operating frequency of the i2c bus (in HZ). Informational for
+ linux, used by the FW though.
+
+Optional properties:
+- ibm,port-name: Firmware provides this name that uniquely identifies the i2c
+ port.
+
+The node contains a number of other properties that are used by the FW itself
+and depend on the specific hardware implementation. The example below depicts
+a P8 on-chip bus.
+
+Example:
+
+i2c-bus@0 {
+ reg = <0x0>;
+ bus-frequency = <0x61a80>;
+ compatible = "ibm,power8-i2c-port", "ibm,opal-i2c";
+ ibm,opal-id = <0x1>;
+ ibm,port-name = "p8_00000000_e1p0";
+ #address-cells = <0x1>;
+ phandle = <0x10000006>;
+ #size-cells = <0x0>;
+ linux,phandle = <0x10000006>;
+};
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
new file mode 100644
index 000000000000..d2f99ca1e3a6
--- /dev/null
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_POWERPC_CPUIDLE_H
+#define _ASM_POWERPC_CPUIDLE_H
+
+#ifdef CONFIG_PPC_POWERNV
+/* Used in powernv idle state management */
+#define PNV_THREAD_RUNNING 0
+#define PNV_THREAD_NAP 1
+#define PNV_THREAD_SLEEP 2
+#define PNV_THREAD_WINKLE 3
+#define PNV_CORE_IDLE_LOCK_BIT 0x100
+#define PNV_CORE_IDLE_THREAD_BITS 0x0FF
+
+#ifndef __ASSEMBLY__
+extern u32 pnv_fastsleep_workaround_at_entry[];
+extern u32 pnv_fastsleep_workaround_at_exit[];
+#endif
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5cd8d2fddba9..eb95b675109b 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -56,6 +56,14 @@ struct opal_sg_list {
#define OPAL_HARDWARE_FROZEN -13
#define OPAL_WRONG_STATE -14
#define OPAL_ASYNC_COMPLETION -15
+#define OPAL_I2C_TIMEOUT -17
+#define OPAL_I2C_INVALID_CMD -18
+#define OPAL_I2C_LBUS_PARITY -19
+#define OPAL_I2C_BKEND_OVERRUN -20
+#define OPAL_I2C_BKEND_ACCESS -21
+#define OPAL_I2C_ARBT_LOST -22
+#define OPAL_I2C_NACK_RCVD -23
+#define OPAL_I2C_STOP_ERR -24
/* API Tokens (in r0) */
#define OPAL_INVALID_CALL -1
@@ -152,12 +160,25 @@ struct opal_sg_list {
#define OPAL_PCI_ERR_INJECT 96
#define OPAL_PCI_EEH_FREEZE_SET 97
#define OPAL_HANDLE_HMI 98
+#define OPAL_CONFIG_CPU_IDLE_STATE 99
+#define OPAL_SLW_SET_REG 100
#define OPAL_REGISTER_DUMP_REGION 101
#define OPAL_UNREGISTER_DUMP_REGION 102
#define OPAL_WRITE_TPO 103
#define OPAL_READ_TPO 104
#define OPAL_IPMI_SEND 107
#define OPAL_IPMI_RECV 108
+#define OPAL_I2C_REQUEST 109
+
+/* Device tree flags */
+
+/* Flags set in power-mgmt nodes in device tree if
+ * respective idle states are supported in the platform.
+ */
+#define OPAL_PM_NAP_ENABLED 0x00010000
+#define OPAL_PM_SLEEP_ENABLED 0x00020000
+#define OPAL_PM_WINKLE_ENABLED 0x00040000
+#define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000
#ifndef __ASSEMBLY__
@@ -712,6 +733,24 @@ typedef struct oppanel_line {
uint64_t line_len;
} oppanel_line_t;
+/* OPAL I2C request */
+struct opal_i2c_request {
+ uint8_t type;
+#define OPAL_I2C_RAW_READ 0
+#define OPAL_I2C_RAW_WRITE 1
+#define OPAL_I2C_SM_READ 2
+#define OPAL_I2C_SM_WRITE 3
+ uint8_t flags;
+#define OPAL_I2C_ADDR_10 0x01 /* Not supported yet */
+ uint8_t subaddr_sz; /* Max 4 */
+ uint8_t reserved;
+ __be16 addr; /* 7 or 10 bit address */
+ __be16 reserved2;
+ __be32 subaddr; /* Sub-address if any */
+ __be32 size; /* Data size */
+ __be64 buffer_ra; /* Buffer real address */
+};
+
/* /sys/firmware/opal */
extern struct kobject *opal_kobj;
@@ -876,11 +915,14 @@ int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
int64_t opal_handle_hmi(void);
int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
int64_t opal_unregister_dump_region(uint32_t id);
+int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t *msg_len);
+int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
+ struct opal_i2c_request *oreq);
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 24a386cbb928..e5f22c6c4bf9 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -152,6 +152,16 @@ struct paca_struct {
u64 tm_scratch; /* TM scratch area for reclaim */
#endif
+#ifdef CONFIG_PPC_POWERNV
+ /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
+ u32 *core_idle_state_ptr;
+ u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */
+ /* Mask to indicate thread id in core */
+ u8 thread_mask;
+ /* Mask to denote subcore sibling threads */
+ u8 subcore_sibling_mask;
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
/* Exclusive emergency stack pointer for machine check exception. */
void *mc_emergency_sp;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1a5287759fc8..03cd858a401c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -194,6 +194,7 @@
#define PPC_INST_NAP 0x4c000364
#define PPC_INST_SLEEP 0x4c0003a4
+#define PPC_INST_WINKLE 0x4c0003e4
/* A2 specific instructions */
#define PPC_INST_ERATWE 0x7c0001a6
@@ -375,6 +376,7 @@
#define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)
+#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE)
/* BHRB instructions */
#define PPC_CLRBHRB stringify_in_c(.long PPC_INST_CLRBHRB)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 29c3798cf800..bf117d8fb45f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -452,7 +452,8 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
extern unsigned long power7_nap(int check_irq);
-extern void power7_sleep(void);
+extern unsigned long power7_sleep(void);
+extern unsigned long power7_winkle(void);
extern void flush_instruction_cache(void);
extern void hard_reset_now(void);
extern void poweroff_now(void);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c998279bd85b..1c874fb533bb 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -118,8 +118,10 @@
#define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
#ifdef __BIG_ENDIAN__
#define MSR_ __MSR
+#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV)
#else
#define MSR_ (__MSR | MSR_LE)
+#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV | MSR_LE)
#endif
#define MSR_KERNEL (MSR_ | MSR_64BIT)
#define MSR_USER32 (MSR_ | MSR_PR | MSR_EE)
@@ -371,6 +373,7 @@
#define SPRN_DBAT7L 0x23F /* Data BAT 7 Lower Register */
#define SPRN_DBAT7U 0x23E /* Data BAT 7 Upper Register */
#define SPRN_PPR 0x380 /* SMT Thread status Register */
+#define SPRN_TSCR 0x399 /* Thread Switch Control Register */
#define SPRN_DEC 0x016 /* Decrement Register */
#define SPRN_DER 0x095 /* Debug Enable Regsiter */
@@ -728,6 +731,7 @@
#define SPRN_BESCR 806 /* Branch event status and control register */
#define BESCR_GE 0x8000000000000000ULL /* Global Enable */
#define SPRN_WORT 895 /* Workload optimization register - thread */
+#define SPRN_WORC 863 /* Workload optimization register - core */
#define SPRN_PMC1 787
#define SPRN_PMC2 788
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 6240698fee9a..ff21b7a2f0cc 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,6 +90,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
static inline int syscall_get_arch(void)
{
- return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+ int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+#ifdef __LITTLE_ENDIAN__
+ arch |= __AUDIT_ARCH_LE;
+#endif
+ return arch;
}
#endif /* _ASM_SYSCALL_H */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9485b43a7c00..a0c071d24e0e 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -284,7 +284,7 @@ do { \
if (!is_kernel_addr((unsigned long)__gu_addr)) \
might_fault(); \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
__gu_err; \
})
#endif /* __powerpc64__ */
@@ -297,7 +297,7 @@ do { \
might_fault(); \
if (access_ok(VERIFY_READ, __gu_addr, (size))) \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
__gu_err; \
})
@@ -308,7 +308,7 @@ do { \
const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
__chk_user_ptr(ptr); \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
__gu_err; \
})
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 24d78e1871c9..e624f9646350 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -726,5 +726,16 @@ int main(void)
arch.timing_last_enter.tv32.tbl));
#endif
+#ifdef CONFIG_PPC_POWERNV
+ DEFINE(PACA_CORE_IDLE_STATE_PTR,
+ offsetof(struct paca_struct, core_idle_state_ptr));
+ DEFINE(PACA_THREAD_IDLE_STATE,
+ offsetof(struct paca_struct, thread_idle_state));
+ DEFINE(PACA_THREAD_MASK,
+ offsetof(struct paca_struct, thread_mask));
+ DEFINE(PACA_SUBCORE_SIBLING_MASK,
+ offsetof(struct paca_struct, subcore_sibling_mask));
+#endif
+
return 0;
}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index db08382e19f1..c2df8150bd7a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -15,6 +15,7 @@
#include <asm/hw_irq.h>
#include <asm/exception-64s.h>
#include <asm/ptrace.h>
+#include <asm/cpuidle.h>
/*
* We layout physical memory as follows:
@@ -101,23 +102,34 @@ system_reset_pSeries:
#ifdef CONFIG_PPC_P7_NAP
BEGIN_FTR_SECTION
/* Running native on arch 2.06 or later, check if we are
- * waking up from nap. We only handle no state loss and
- * supervisor state loss. We do -not- handle hypervisor
- * state loss at this time.
+ * waking up from nap/sleep/winkle.
*/
mfspr r13,SPRN_SRR1
rlwinm. r13,r13,47-31,30,31
beq 9f
- /* waking up from powersave (nap) state */
- cmpwi cr1,r13,2
- /* Total loss of HV state is fatal, we could try to use the
- * PIR to locate a PACA, then use an emergency stack etc...
- * OPAL v3 based powernv platforms have new idle states
- * which fall in this catagory.
+ cmpwi cr3,r13,2
+
+ /*
+ * Check if last bit of HSPGR0 is set. This indicates whether we are
+ * waking up from winkle.
*/
- bgt cr1,8f
GET_PACA(r13)
+ clrldi r5,r13,63
+ clrrdi r13,r13,1
+ cmpwi cr4,r5,1
+ mtspr SPRN_HSPRG0,r13
+
+ lbz r0,PACA_THREAD_IDLE_STATE(r13)
+ cmpwi cr2,r0,PNV_THREAD_NAP
+ bgt cr2,8f /* Either sleep or Winkle */
+
+ /* Waking up from nap should not cause hypervisor state loss */
+ bgt cr3,.
+
+ /* Waking up from nap */
+ li r0,PNV_THREAD_RUNNING
+ stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
li r0,KVM_HWTHREAD_IN_KERNEL
@@ -133,7 +145,7 @@ BEGIN_FTR_SECTION
/* Return SRR1 from power7_nap() */
mfspr r3,SPRN_SRR1
- beq cr1,2f
+ beq cr3,2f
b power7_wakeup_noloss
2: b power7_wakeup_loss
@@ -1382,6 +1394,7 @@ machine_check_handle_early:
MACHINE_CHECK_HANDLER_WINDUP
GET_PACA(r13)
ld r1,PACAR1(r13)
+ li r3,PNV_THREAD_NAP
b power7_enter_nap_mode
4:
#endif
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 18c0687e5ab3..05adc8bbdef8 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -18,9 +18,25 @@
#include <asm/hw_irq.h>
#include <asm/kvm_book3s_asm.h>
#include <asm/opal.h>
+#include <asm/cpuidle.h>
+#include <asm/mmu-hash64.h>
#undef DEBUG
+/*
+ * Use unused space in the interrupt stack to save and restore
+ * registers for winkle support.
+ */
+#define _SDR1 GPR3
+#define _RPR GPR4
+#define _SPURR GPR5
+#define _PURR GPR6
+#define _TSCR GPR7
+#define _DSCR GPR8
+#define _AMOR GPR9
+#define _WORT GPR10
+#define _WORC GPR11
+
/* Idle state entry routines */
#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
@@ -37,8 +53,7 @@
/*
* Pass requested state in r3:
- * 0 - nap
- * 1 - sleep
+ * r3 - PNV_THREAD_NAP/SLEEP/WINKLE
*
* To check IRQ_HAPPENED in r4
* 0 - don't check
@@ -101,18 +116,105 @@ _GLOBAL(power7_powersave_common)
std r9,_MSR(r1)
std r1,PACAR1(r13)
-_GLOBAL(power7_enter_nap_mode)
+ /*
+ * Go to real mode to do the nap, as required by the architecture.
+ * Also, we need to be in real mode before setting hwthread_state,
+ * because as soon as we do that, another thread can switch
+ * the MMU context to the guest.
+ */
+ LOAD_REG_IMMEDIATE(r5, MSR_IDLE)
+ li r6, MSR_RI
+ andc r6, r9, r6
+ LOAD_REG_ADDR(r7, power7_enter_nap_mode)
+ mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
+ mtspr SPRN_SRR0, r7
+ mtspr SPRN_SRR1, r5
+ rfid
+
+ .globl power7_enter_nap_mode
+power7_enter_nap_mode:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/* Tell KVM we're napping */
li r4,KVM_HWTHREAD_IN_NAP
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
- cmpwi cr0,r3,1
- beq 2f
+ stb r3,PACA_THREAD_IDLE_STATE(r13)
+ cmpwi cr3,r3,PNV_THREAD_SLEEP
+ bge cr3,2f
IDLE_STATE_ENTER_SEQ(PPC_NAP)
/* No return */
-2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
- /* No return */
+2:
+ /* Sleep or winkle */
+ lbz r7,PACA_THREAD_MASK(r13)
+ ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop1:
+ lwarx r15,0,r14
+ andc r15,r15,r7 /* Clear thread bit */
+
+ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
+
+/*
+ * If cr0 = 0, then current thread is the last thread of the core entering
+ * sleep. Last thread needs to execute the hardware bug workaround code if
+ * required by the platform.
+ * Make the workaround call unconditionally here. The below branch call is
+ * patched out when the idle states are discovered if the platform does not
+ * require it.
+ */
+.global pnv_fastsleep_workaround_at_entry
+pnv_fastsleep_workaround_at_entry:
+ beq fastsleep_workaround_at_entry
+
+ stwcx. r15,0,r14
+ bne- lwarx_loop1
+ isync
+
+common_enter: /* common code for all the threads entering sleep or winkle */
+ bgt cr3,enter_winkle
+ IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+
+fastsleep_workaround_at_entry:
+ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
+ stwcx. r15,0,r14
+ bne- lwarx_loop1
+ isync
+
+ /* Fast sleep workaround */
+ li r3,1
+ li r4,1
+ li r0,OPAL_CONFIG_CPU_IDLE_STATE
+ bl opal_call_realmode
+
+ /* Clear Lock bit */
+ li r0,0
+ lwsync
+ stw r0,0(r14)
+ b common_enter
+
+enter_winkle:
+ /*
+ * Note all register i.e per-core, per-subcore or per-thread is saved
+ * here since any thread in the core might wake up first
+ */
+ mfspr r3,SPRN_SDR1
+ std r3,_SDR1(r1)
+ mfspr r3,SPRN_RPR
+ std r3,_RPR(r1)
+ mfspr r3,SPRN_SPURR
+ std r3,_SPURR(r1)
+ mfspr r3,SPRN_PURR
+ std r3,_PURR(r1)
+ mfspr r3,SPRN_TSCR
+ std r3,_TSCR(r1)
+ mfspr r3,SPRN_DSCR
+ std r3,_DSCR(r1)
+ mfspr r3,SPRN_AMOR
+ std r3,_AMOR(r1)
+ mfspr r3,SPRN_WORT
+ std r3,_WORT(r1)
+ mfspr r3,SPRN_WORC
+ std r3,_WORC(r1)
+ IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
_GLOBAL(power7_idle)
/* Now check if user or arch enabled NAP mode */
@@ -125,48 +227,21 @@ _GLOBAL(power7_idle)
_GLOBAL(power7_nap)
mr r4,r3
- li r3,0
+ li r3,PNV_THREAD_NAP
b power7_powersave_common
/* No return */
_GLOBAL(power7_sleep)
- li r3,1
+ li r3,PNV_THREAD_SLEEP
li r4,1
b power7_powersave_common
/* No return */
-/*
- * Make opal call in realmode. This is a generic function to be called
- * from realmode from reset vector. It handles endianess.
- *
- * r13 - paca pointer
- * r1 - stack pointer
- * r3 - opal token
- */
-opal_call_realmode:
- mflr r12
- std r12,_LINK(r1)
- ld r2,PACATOC(r13)
- /* Set opal return address */
- LOAD_REG_ADDR(r0,return_from_opal_call)
- mtlr r0
- /* Handle endian-ness */
- li r0,MSR_LE
- mfmsr r12
- andc r12,r12,r0
- mtspr SPRN_HSRR1,r12
- mr r0,r3 /* Move opal token to r0 */
- LOAD_REG_ADDR(r11,opal)
- ld r12,8(r11)
- ld r2,0(r11)
- mtspr SPRN_HSRR0,r12
- hrfid
-
-return_from_opal_call:
- FIXUP_ENDIAN
- ld r0,_LINK(r1)
- mtlr r0
- blr
+_GLOBAL(power7_winkle)
+ li r3,3
+ li r4,1
+ b power7_powersave_common
+ /* No return */
#define CHECK_HMI_INTERRUPT \
mfspr r0,SPRN_SRR1; \
@@ -181,7 +256,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
ld r2,PACATOC(r13); \
ld r1,PACAR1(r13); \
std r3,ORIG_GPR3(r1); /* Save original r3 */ \
- li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
+ li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
bl opal_call_realmode; \
ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
20: nop;
@@ -190,16 +265,190 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
_GLOBAL(power7_wakeup_tb_loss)
ld r2,PACATOC(r13);
ld r1,PACAR1(r13)
+ /*
+ * Before entering any idle state, the NVGPRs are saved in the stack
+ * and they are restored before switching to the process context. Hence
+ * until they are restored, they are free to be used.
+ *
+ * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
+ * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
+ * wakeup reason if we branch to kvm_start_guest.
+ */
+ mfspr r16,SPRN_SRR1
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
+ lbz r7,PACA_THREAD_MASK(r13)
+ ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop2:
+ lwarx r15,0,r14
+ andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
+ /*
+ * Lock bit is set in one of the 2 cases-
+ * a. In the sleep/winkle enter path, the last thread is executing
+ * fastsleep workaround code.
+ * b. In the wake up path, another thread is executing fastsleep
+ * workaround undo code or resyncing timebase or restoring context
+ * In either case loop until the lock bit is cleared.
+ */
+ bne core_idle_lock_held
+
+ cmpwi cr2,r15,0
+ lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
+ and r4,r4,r15
+ cmpwi cr1,r4,0 /* Check if first in subcore */
+
+ /*
+ * At this stage
+ * cr1 - 0b0100 if first thread to wakeup in subcore
+ * cr2 - 0b0100 if first thread to wakeup in core
+ * cr3- 0b0010 if waking up from sleep or winkle
+ * cr4 - 0b0100 if waking up from winkle
+ */
+
+ or r15,r15,r7 /* Set thread bit */
+
+ beq cr1,first_thread_in_subcore
+
+ /* Not first thread in subcore to wake up */
+ stwcx. r15,0,r14
+ bne- lwarx_loop2
+ isync
+ b common_exit
+
+core_idle_lock_held:
+ HMT_LOW
+core_idle_lock_loop:
+ lwz r15,0(14)
+ andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
+ bne core_idle_lock_loop
+ HMT_MEDIUM
+ b lwarx_loop2
+
+first_thread_in_subcore:
+ /* First thread in subcore to wakeup */
+ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
+ stwcx. r15,0,r14
+ bne- lwarx_loop2
+ isync
+
+ /*
+ * If waking up from sleep, subcore state is not lost. Hence
+ * skip subcore state restore
+ */
+ bne cr4,subcore_state_restored
+
+ /* Restore per-subcore state */
+ ld r4,_SDR1(r1)
+ mtspr SPRN_SDR1,r4
+ ld r4,_RPR(r1)
+ mtspr SPRN_RPR,r4
+ ld r4,_AMOR(r1)
+ mtspr SPRN_AMOR,r4
+
+subcore_state_restored:
+ /*
+ * Check if the thread is also the first thread in the core. If not,
+ * skip to clear_lock.
+ */
+ bne cr2,clear_lock
+
+first_thread_in_core:
+
+ /*
+ * First thread in the core waking up from fastsleep. It needs to
+ * call the fastsleep workaround code if the platform requires it.
+ * Call it unconditionally here. The below branch instruction will
+ * be patched out when the idle states are discovered if platform
+ * does not require workaround.
+ */
+.global pnv_fastsleep_workaround_at_exit
+pnv_fastsleep_workaround_at_exit:
+ b fastsleep_workaround_at_exit
+
+timebase_resync:
+ /* Do timebase resync if we are waking up from sleep. Use cr3 value
+ * set in exceptions-64s.S */
+ ble cr3,clear_lock
/* Time base re-sync */
- li r3,OPAL_RESYNC_TIMEBASE
+ li r0,OPAL_RESYNC_TIMEBASE
bl opal_call_realmode;
-
/* TODO: Check r3 for failure */
+ /*
+ * If waking up from sleep, per core state is not lost, skip to
+ * clear_lock.
+ */
+ bne cr4,clear_lock
+
+ /* Restore per core state */
+ ld r4,_TSCR(r1)
+ mtspr SPRN_TSCR,r4
+ ld r4,_WORC(r1)
+ mtspr SPRN_WORC,r4
+
+clear_lock:
+ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
+ lwsync
+ stw r15,0(r14)
+
+common_exit:
+ /*
+ * Common to all threads.
+ *
+ * If waking up from sleep, hypervisor state is not lost. Hence
+ * skip hypervisor state restore.
+ */
+ bne cr4,hypervisor_state_restored
+
+ /* Waking up from winkle */
+
+ /* Restore per thread state */
+ bl __restore_cpu_power8
+
+ /* Restore SLB from PACA */
+ ld r8,PACA_SLBSHADOWPTR(r13)
+
+ .rept SLB_NUM_BOLTED
+ li r3, SLBSHADOW_SAVEAREA
+ LDX_BE r5, r8, r3
+ addi r3, r3, 8
+ LDX_BE r6, r8, r3
+ andis. r7,r5,SLB_ESID_V@h
+ beq 1f
+ slbmte r6,r5
+1: addi r8,r8,16
+ .endr
+
+ ld r4,_SPURR(r1)
+ mtspr SPRN_SPURR,r4
+ ld r4,_PURR(r1)
+ mtspr SPRN_PURR,r4
+ ld r4,_DSCR(r1)
+ mtspr SPRN_DSCR,r4
+ ld r4,_WORT(r1)
+ mtspr SPRN_WORT,r4
+
+hypervisor_state_restored:
+
+ li r5,PNV_THREAD_RUNNING
+ stb r5,PACA_THREAD_IDLE_STATE(r13)
+
+ mtspr SPRN_SRR1,r16
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beq 6f
+ b kvm_start_guest
+6:
+#endif
+
REST_NVGPRS(r1)
REST_GPR(2, r1)
ld r3,_CCR(r1)
@@ -212,6 +461,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
mtspr SPRN_SRR0,r5
rfid
+fastsleep_workaround_at_exit:
+ li r3,1
+ li r4,0
+ li r0,OPAL_CONFIG_CPU_IDLE_STATE
+ bl opal_call_realmode
+ b timebase_resync
+
/*
* R3 here contains the value that will be returned to the caller
* of power7_nap.
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8b2d2dc8ef10..8ec017cb4446 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -700,7 +700,6 @@ void start_secondary(void *unused)
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
preempt_disable();
- cpu_callin_map[cpu] = 1;
if (smp_ops->setup_cpu)
smp_ops->setup_cpu(cpu);
@@ -739,6 +738,14 @@ void start_secondary(void *unused)
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
+ /*
+ * CPU must be marked active and online before we signal back to the
+ * master, because the scheduler needs to see the cpu_online and
+ * cpu_active bits set.
+ */
+ smp_wmb();
+ cpu_callin_map[cpu] = 1;
+
local_irq_enable();
cpu_startup_entry(CPUHP_ONLINE);
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index dba34088da28..f162d0b8eea3 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -177,7 +177,7 @@ static ssize_t _name##_show(struct device *dev, \
} \
ret = sprintf(buf, _fmt, _expr); \
e_free: \
- kfree(page); \
+ kmem_cache_free(hv_page_cache, page); \
return ret; \
} \
static DEVICE_ATTR_RO(_name)
@@ -217,11 +217,14 @@ static bool is_physical_domain(int domain)
domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE;
}
+DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096);
+DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096);
+
static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
u16 lpar, u64 *res,
bool success_expected)
{
- unsigned long ret = -ENOMEM;
+ unsigned long ret;
/*
* request_buffer and result_buffer are not required to be 4k aligned,
@@ -243,13 +246,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
- request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
- if (!request_buffer)
- goto out;
+ request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+ result_buffer = (void *)get_cpu_var(hv_24x7_resb);
- result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
- if (!result_buffer)
- goto out_free_request_buffer;
+ memset(request_buffer, 0, 4096);
+ memset(result_buffer, 0, 4096);
*request_buffer = (struct reqb) {
.buf = {
@@ -278,15 +279,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
domain, offset, ix, lpar, ret, ret,
result_buffer->buf.detailed_rc,
result_buffer->buf.failing_request_ix);
- goto out_free_result_buffer;
+ goto out;
}
*res = be64_to_cpu(result_buffer->result);
-out_free_result_buffer:
- kfree(result_buffer);
-out_free_request_buffer:
- kfree(request_buffer);
out:
return ret;
}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 0a299be588af..54eca8b3b288 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -158,6 +158,43 @@ opal_tracepoint_return:
blr
#endif
+/*
+ * Make opal call in realmode. This is a generic function to be called
+ * from realmode. It handles endianness.
+ *
+ * r13 - paca pointer
+ * r1 - stack pointer
+ * r0 - opal token
+ */
+_GLOBAL(opal_call_realmode)
+ mflr r12
+ std r12,PPC_LR_STKOFF(r1)
+ ld r2,PACATOC(r13)
+ /* Set opal return address */
+ LOAD_REG_ADDR(r12,return_from_opal_call)
+ mtlr r12
+
+ mfmsr r12
+#ifdef __LITTLE_ENDIAN__
+ /* Handle endian-ness */
+ li r11,MSR_LE
+ andc r12,r12,r11
+#endif
+ mtspr SPRN_HSRR1,r12
+ LOAD_REG_ADDR(r11,opal)
+ ld r12,8(r11)
+ ld r2,0(r11)
+ mtspr SPRN_HSRR0,r12
+ hrfid
+
+return_from_opal_call:
+#ifdef __LITTLE_ENDIAN__
+ FIXUP_ENDIAN
+#endif
+ ld r12,PPC_LR_STKOFF(r1)
+ mtlr r12
+ blr
+
OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
@@ -247,6 +284,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE);
@@ -254,3 +292,4 @@ OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO);
OPAL_CALL(opal_tpo_read, OPAL_READ_TPO);
OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND);
OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV);
+OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index cb0b6de79cd4..f10b9ec8c1f5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -9,8 +9,9 @@
* 2 of the License, or (at your option) any later version.
*/
-#undef DEBUG
+#define pr_fmt(fmt) "opal: " fmt
+#include <linux/printk.h>
#include <linux/types.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
@@ -625,6 +626,39 @@ static int opal_sysfs_init(void)
return 0;
}
+static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ return memory_read_from_buffer(buf, count, &off, bin_attr->private,
+ bin_attr->size);
+}
+
+static BIN_ATTR_RO(symbol_map, 0);
+
+static void opal_export_symmap(void)
+{
+ const __be64 *syms;
+ unsigned int size;
+ struct device_node *fw;
+ int rc;
+
+ fw = of_find_node_by_path("/ibm,opal/firmware");
+ if (!fw)
+ return;
+ syms = of_get_property(fw, "symbol-map", &size);
+ if (!syms || size != 2 * sizeof(__be64))
+ return;
+
+ /* Setup attributes */
+ bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
+ bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
+
+ rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
+ if (rc)
+ pr_warn("Error %d creating OPAL symbols file\n", rc);
+}
+
static void __init opal_dump_region_init(void)
{
void *addr;
@@ -653,6 +687,14 @@ static void opal_ipmi_init(struct device_node *opal_node)
of_platform_device_create(np, NULL, NULL);
}
+static void opal_i2c_create_devs(void)
+{
+ struct device_node *np;
+
+ for_each_compatible_node(np, NULL, "ibm,opal-i2c")
+ of_platform_device_create(np, NULL, NULL);
+}
+
static int __init opal_init(void)
{
struct device_node *np, *consoles;
@@ -679,6 +721,9 @@ static int __init opal_init(void)
of_node_put(consoles);
}
+ /* Create i2c platform devices */
+ opal_i2c_create_devs();
+
/* Find all OPAL interrupts and request them */
irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
pr_debug("opal: Found %d interrupts reserved for OPAL\n",
@@ -702,6 +747,8 @@ static int __init opal_init(void)
/* Create "opal" kobject under /sys/firmware */
rc = opal_sysfs_init();
if (rc == 0) {
+ /* Export symbol map to userspace */
+ opal_export_symmap();
/* Setup dump region interface */
opal_dump_region_init();
/* Setup error log interface */
@@ -824,3 +871,4 @@ EXPORT_SYMBOL_GPL(opal_rtc_read);
EXPORT_SYMBOL_GPL(opal_rtc_write);
EXPORT_SYMBOL_GPL(opal_tpo_read);
EXPORT_SYMBOL_GPL(opal_tpo_write);
+EXPORT_SYMBOL_GPL(opal_i2c_request);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 6c8e2d188cd0..604c48e7879a 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -29,6 +29,8 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
}
#endif
+extern u32 pnv_get_supported_cpuidle_states(void);
+
extern void pnv_lpc_init(void);
bool cpu_core_split_required(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 30b1c3e298a6..b700a329c31d 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,8 +36,12 @@
#include <asm/opal.h>
#include <asm/kexec.h>
#include <asm/smp.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
#include "powernv.h"
+#include "subcore.h"
static void __init pnv_setup_arch(void)
{
@@ -288,6 +292,168 @@ static void __init pnv_setup_machdep_rtas(void)
}
#endif /* CONFIG_PPC_POWERNV_RTAS */
+static u32 supported_cpuidle_states;
+
+int pnv_save_sprs_for_winkle(void)
+{
+ int cpu;
+ int rc;
+
+ /*
+ * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+ * all cpus at boot. Get these reg values of current cpu and use the
+ * same accross all cpus.
+ */
+ uint64_t lpcr_val = mfspr(SPRN_LPCR);
+ uint64_t hid0_val = mfspr(SPRN_HID0);
+ uint64_t hid1_val = mfspr(SPRN_HID1);
+ uint64_t hid4_val = mfspr(SPRN_HID4);
+ uint64_t hid5_val = mfspr(SPRN_HID5);
+ uint64_t hmeer_val = mfspr(SPRN_HMEER);
+
+ for_each_possible_cpu(cpu) {
+ uint64_t pir = get_hard_smp_processor_id(cpu);
+ uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+
+ /*
+ * HSPRG0 is used to store the cpu's pointer to paca. Hence last
+ * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
+ * with 63rd bit set, so that when a thread wakes up at 0x100 we
+ * can use this bit to distinguish between fastsleep and
+ * deep winkle.
+ */
+ hsprg0_val |= 1;
+
+ rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+ if (rc != 0)
+ return rc;
+
+ /* HIDs are per core registers */
+ if (cpu_thread_in_core(cpu) == 0) {
+
+ rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+ if (rc != 0)
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static void pnv_alloc_idle_core_states(void)
+{
+ int i, j;
+ int nr_cores = cpu_nr_cores();
+ u32 *core_idle_state;
+
+ /*
+ * core_idle_state - First 8 bits track the idle state of each thread
+ * of the core. The 8th bit is the lock bit. Initially all thread bits
+ * are set. They are cleared when the thread enters deep idle state
+ * like sleep and winkle. Initially the lock bit is cleared.
+ * The lock bit has 2 purposes
+ * a. While the first thread is restoring core state, it prevents
+ * other threads in the core from switching to process context.
+ * b. While the last thread in the core is saving the core state, it
+ * prevents a different thread from waking up.
+ */
+ for (i = 0; i < nr_cores; i++) {
+ int first_cpu = i * threads_per_core;
+ int node = cpu_to_node(first_cpu);
+
+ core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
+ *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+
+ for (j = 0; j < threads_per_core; j++) {
+ int cpu = first_cpu + j;
+
+ paca[cpu].core_idle_state_ptr = core_idle_state;
+ paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
+ paca[cpu].thread_mask = 1 << j;
+ }
+ }
+
+ update_subcore_sibling_mask();
+
+ if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
+ pnv_save_sprs_for_winkle();
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+ return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+static int __init pnv_init_idle_states(void)
+{
+ struct device_node *power_mgt;
+ int dt_idle_states;
+ const __be32 *idle_state_flags;
+ u32 len_flags, flags;
+ int i;
+
+ supported_cpuidle_states = 0;
+
+ if (cpuidle_disable != IDLE_NO_OVERRIDE)
+ return 0;
+
+ if (!firmware_has_feature(FW_FEATURE_OPALv3))
+ return 0;
+
+ power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+ if (!power_mgt) {
+ pr_warn("opal: PowerMgmt Node not found\n");
+ return 0;
+ }
+
+ idle_state_flags = of_get_property(power_mgt,
+ "ibm,cpu-idle-state-flags", &len_flags);
+ if (!idle_state_flags) {
+ pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
+ return 0;
+ }
+
+ dt_idle_states = len_flags / sizeof(u32);
+
+ for (i = 0; i < dt_idle_states; i++) {
+ flags = be32_to_cpu(idle_state_flags[i]);
+ supported_cpuidle_states |= flags;
+ }
+ if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+ patch_instruction(
+ (unsigned int *)pnv_fastsleep_workaround_at_entry,
+ PPC_INST_NOP);
+ patch_instruction(
+ (unsigned int *)pnv_fastsleep_workaround_at_exit,
+ PPC_INST_NOP);
+ }
+ pnv_alloc_idle_core_states();
+ return 0;
+}
+
+subsys_initcall(pnv_init_idle_states);
+
static int __init pnv_probe(void)
{
unsigned long root = of_get_flat_dt_root();
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index b716f666e48a..fc34025ef822 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -150,6 +150,7 @@ static void pnv_smp_cpu_kill_self(void)
{
unsigned int cpu;
unsigned long srr1;
+ u32 idle_states;
/* Standard hot unplug procedure */
local_irq_disable();
@@ -160,13 +161,23 @@ static void pnv_smp_cpu_kill_self(void)
generic_set_cpu_dead(cpu);
smp_wmb();
+ idle_states = pnv_get_supported_cpuidle_states();
/* We don't want to take decrementer interrupts while we are offline,
* so clear LPCR:PECE1. We keep PECE2 enabled.
*/
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
while (!generic_check_cpu_restart(cpu)) {
+
ppc64_runlatch_off();
- srr1 = power7_nap(1);
+
+ if (idle_states & OPAL_PM_WINKLE_ENABLED)
+ srr1 = power7_winkle();
+ else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
+ (idle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+ srr1 = power7_sleep();
+ else
+ srr1 = power7_nap(1);
+
ppc64_runlatch_on();
/*
@@ -198,13 +209,27 @@ static void pnv_smp_cpu_kill_self(void)
#endif /* CONFIG_HOTPLUG_CPU */
+static int pnv_cpu_bootable(unsigned int nr)
+{
+ /*
+ * Starting with POWER8, the subcore logic relies on all threads of a
+ * core being booted so that they can participate in split mode
+ * switches. So on those machines we ignore the smt_enabled_at_boot
+ * setting (smt-enabled on the kernel command line).
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return 1;
+
+ return smp_generic_cpu_bootable(nr);
+}
+
static struct smp_ops_t pnv_smp_ops = {
.message_pass = smp_muxed_ipi_message_pass,
.cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */
.probe = xics_smp_probe,
.kick_cpu = pnv_smp_kick_cpu,
.setup_cpu = pnv_smp_setup_cpu,
- .cpu_bootable = smp_generic_cpu_bootable,
+ .cpu_bootable = pnv_cpu_bootable,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = pnv_smp_cpu_disable,
.cpu_die = generic_cpu_die,
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index c87f96b79d1a..f60f80ada903 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -160,6 +160,18 @@ static void wait_for_sync_step(int step)
mb();
}
+static void update_hid_in_slw(u64 hid0)
+{
+ u64 idle_states = pnv_get_supported_cpuidle_states();
+
+ if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+ /* OPAL call to patch slw with the new HID0 value */
+ u64 cpu_pir = hard_smp_processor_id();
+
+ opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
+ }
+}
+
static void unsplit_core(void)
{
u64 hid0, mask;
@@ -179,6 +191,7 @@ static void unsplit_core(void)
hid0 = mfspr(SPRN_HID0);
hid0 &= ~HID0_POWER8_DYNLPARDIS;
mtspr(SPRN_HID0, hid0);
+ update_hid_in_slw(hid0);
while (mfspr(SPRN_HID0) & mask)
cpu_relax();
@@ -215,6 +228,7 @@ static void split_core(int new_mode)
hid0 = mfspr(SPRN_HID0);
hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
mtspr(SPRN_HID0, hid0);
+ update_hid_in_slw(hid0);
/* Wait for it to happen */
while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
@@ -251,6 +265,25 @@ bool cpu_core_split_required(void)
return true;
}
+void update_subcore_sibling_mask(void)
+{
+ int cpu;
+ /*
+ * sibling mask for the first cpu. Left shift this by required bits
+ * to get sibling mask for the rest of the cpus.
+ */
+ int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1;
+
+ for_each_possible_cpu(cpu) {
+ int tid = cpu_thread_in_core(cpu);
+ int offset = (tid / threads_per_subcore) * threads_per_subcore;
+ int mask = sibling_mask_first_cpu << offset;
+
+ paca[cpu].subcore_sibling_mask = mask;
+
+ }
+}
+
static int cpu_update_split_mode(void *data)
{
int cpu, new_mode = *(int *)data;
@@ -284,6 +317,7 @@ static int cpu_update_split_mode(void *data)
/* Make the new mode public */
subcores_per_core = new_mode;
threads_per_subcore = threads_per_core / subcores_per_core;
+ update_subcore_sibling_mask();
/* Make sure the new mode is written before we exit */
mb();
diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
index 148abc91debf..84e02ae52895 100644
--- a/arch/powerpc/platforms/powernv/subcore.h
+++ b/arch/powerpc/platforms/powernv/subcore.h
@@ -14,5 +14,12 @@
#define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */
#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
void split_core_secondary_loop(u8 *state);
-#endif
+extern void update_subcore_sibling_mask(void);
+#else
+static inline void update_subcore_sibling_mask(void) { };
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index e9248bb9173a..aedec0957934 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -16,13 +16,10 @@
#include <asm/machdep.h>
#include <asm/firmware.h>
+#include <asm/opal.h>
#include <asm/runlatch.h>
-/* Flags and constants used in PowerNV platform */
-
#define MAX_POWERNV_IDLE_STATES 8
-#define IDLE_USE_INST_NAP 0x00010000 /* Use nap instruction */
-#define IDLE_USE_INST_SLEEP 0x00020000 /* Use sleep instruction */
struct cpuidle_driver powernv_idle_driver = {
.name = "powernv_idle",
@@ -197,7 +194,7 @@ static int powernv_add_idle_states(void)
* target residency to be 10x exit_latency
*/
latency_ns = be32_to_cpu(idle_state_latency[i]);
- if (flags & IDLE_USE_INST_NAP) {
+ if (flags & OPAL_PM_NAP_ENABLED) {
/* Add NAP state */
strcpy(powernv_states[nr_idle_states].name, "Nap");
strcpy(powernv_states[nr_idle_states].desc, "Nap");
@@ -210,7 +207,8 @@ static int powernv_add_idle_states(void)
nr_idle_states++;
}
- if (flags & IDLE_USE_INST_SLEEP) {
+ if (flags & OPAL_PM_SLEEP_ENABLED ||
+ flags & OPAL_PM_SLEEP_ENABLED_ER1) {
/* Add FASTSLEEP state */
strcpy(powernv_states[nr_idle_states].name, "FastSleep");
strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index c1351d9fb35b..91a488c7cc44 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -1072,4 +1072,15 @@ config SCx200_ACB
This support is also available as a module. If so, the module
will be called scx200_acb.
+config I2C_OPAL
+ tristate "IBM OPAL I2C driver"
+ depends on PPC_POWERNV
+ default y
+ help
+ This exposes the PowerNV platform i2c busses to the linux i2c layer,
+ the driver is based on the OPAL interfaces.
+
+ This driver can also be built as a module. If so, the module will be
+ called as i2c-opal.
+
endmenu
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 5e6c8223719e..56388f658d2f 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_I2C_ACORN) += i2c-acorn.o
obj-$(CONFIG_I2C_BCM_KONA) += i2c-bcm-kona.o
obj-$(CONFIG_I2C_CROS_EC_TUNNEL) += i2c-cros-ec-tunnel.o
obj-$(CONFIG_I2C_ELEKTOR) += i2c-elektor.o
+obj-$(CONFIG_I2C_OPAL) += i2c-opal.o
obj-$(CONFIG_I2C_PCA_ISA) += i2c-pca-isa.o
obj-$(CONFIG_I2C_SIBYTE) += i2c-sibyte.o
obj-$(CONFIG_SCx200_ACB) += scx200_acb.o
diff --git a/drivers/i2c/busses/i2c-opal.c b/drivers/i2c/busses/i2c-opal.c
new file mode 100644
index 000000000000..16f90b1a7508
--- /dev/null
+++ b/drivers/i2c/busses/i2c-opal.c
@@ -0,0 +1,294 @@
+/*
+ * IBM OPAL I2C driver
+ * Copyright (C) 2014 IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ */
+
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <asm/firmware.h>
+#include <asm/opal.h>
+
+static int i2c_opal_translate_error(int rc)
+{
+ switch (rc) {
+ case OPAL_NO_MEM:
+ return -ENOMEM;
+ case OPAL_PARAMETER:
+ return -EINVAL;
+ case OPAL_I2C_ARBT_LOST:
+ return -EAGAIN;
+ case OPAL_I2C_TIMEOUT:
+ return -ETIMEDOUT;
+ case OPAL_I2C_NACK_RCVD:
+ return -ENXIO;
+ case OPAL_I2C_STOP_ERR:
+ return -EBUSY;
+ default:
+ return -EIO;
+ }
+}
+
+static int i2c_opal_send_request(u32 bus_id, struct opal_i2c_request *req)
+{
+ struct opal_msg msg;
+ int token, rc;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ if (token != -ERESTARTSYS)
+ pr_err("Failed to get the async token\n");
+
+ return token;
+ }
+
+ rc = opal_i2c_request(token, bus_id, req);
+ if (rc != OPAL_ASYNC_COMPLETION) {
+ rc = i2c_opal_translate_error(rc);
+ goto exit;
+ }
+
+ rc = opal_async_wait_response(token, &msg);
+ if (rc)
+ goto exit;
+
+ rc = be64_to_cpu(msg.params[1]);
+ if (rc != OPAL_SUCCESS) {
+ rc = i2c_opal_translate_error(rc);
+ goto exit;
+ }
+
+exit:
+ opal_async_release_token(token);
+ return rc;
+}
+
+static int i2c_opal_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+ int num)
+{
+ unsigned long opal_id = (unsigned long)adap->algo_data;
+ struct opal_i2c_request req;
+ int rc, i;
+
+ /* We only support fairly simple combinations here of one
+ * or two messages
+ */
+ memset(&req, 0, sizeof(req));
+ switch(num) {
+ case 0:
+ return 0;
+ case 1:
+ req.type = (msgs[0].flags & I2C_M_RD) ?
+ OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE;
+ req.addr = cpu_to_be16(msgs[0].addr);
+ req.size = cpu_to_be32(msgs[0].len);
+ req.buffer_ra = cpu_to_be64(__pa(msgs[0].buf));
+ break;
+ case 2:
+ /* For two messages, we basically support only simple
+ * smbus transactions of a write plus a read. We might
+ * want to allow also two writes but we'd have to bounce
+ * the data into a single buffer.
+ */
+ if ((msgs[0].flags & I2C_M_RD) || !(msgs[1].flags & I2C_M_RD))
+ return -EOPNOTSUPP;
+ if (msgs[0].len > 4)
+ return -EOPNOTSUPP;
+ if (msgs[0].addr != msgs[1].addr)
+ return -EOPNOTSUPP;
+ req.type = OPAL_I2C_SM_READ;
+ req.addr = cpu_to_be16(msgs[0].addr);
+ req.subaddr_sz = msgs[0].len;
+ for (i = 0; i < msgs[0].len; i++)
+ req.subaddr = (req.subaddr << 8) | msgs[0].buf[i];
+ req.subaddr = cpu_to_be32(req.subaddr);
+ req.size = cpu_to_be32(msgs[1].len);
+ req.buffer_ra = cpu_to_be64(__pa(msgs[1].buf));
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ rc = i2c_opal_send_request(opal_id, &req);
+ if (rc)
+ return rc;
+
+ return num;
+}
+
+static int i2c_opal_smbus_xfer(struct i2c_adapter *adap, u16 addr,
+ unsigned short flags, char read_write,
+ u8 command, int size, union i2c_smbus_data *data)
+{
+ unsigned long opal_id = (unsigned long)adap->algo_data;
+ struct opal_i2c_request req;
+ u8 local[2];
+ int rc;
+
+ memset(&req, 0, sizeof(req));
+
+ req.addr = cpu_to_be16(addr);
+ switch (size) {
+ case I2C_SMBUS_BYTE:
+ req.buffer_ra = cpu_to_be64(__pa(&data->byte));
+ req.size = cpu_to_be32(1);
+ /* Fall through */
+ case I2C_SMBUS_QUICK:
+ req.type = (read_write == I2C_SMBUS_READ) ?
+ OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE;
+ break;
+ case I2C_SMBUS_BYTE_DATA:
+ req.buffer_ra = cpu_to_be64(__pa(&data->byte));
+ req.size = cpu_to_be32(1);
+ req.subaddr = cpu_to_be32(command);
+ req.subaddr_sz = 1;
+ req.type = (read_write == I2C_SMBUS_READ) ?
+ OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+ break;
+ case I2C_SMBUS_WORD_DATA:
+ if (!read_write) {
+ local[0] = data->word & 0xff;
+ local[1] = (data->word >> 8) & 0xff;
+ }
+ req.buffer_ra = cpu_to_be64(__pa(local));
+ req.size = cpu_to_be32(2);
+ req.subaddr = cpu_to_be32(command);
+ req.subaddr_sz = 1;
+ req.type = (read_write == I2C_SMBUS_READ) ?
+ OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+ break;
+ case I2C_SMBUS_I2C_BLOCK_DATA:
+ req.buffer_ra = cpu_to_be64(__pa(&data->block[1]));
+ req.size = cpu_to_be32(data->block[0]);
+ req.subaddr = cpu_to_be32(command);
+ req.subaddr_sz = 1;
+ req.type = (read_write == I2C_SMBUS_READ) ?
+ OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rc = i2c_opal_send_request(opal_id, &req);
+ if (!rc && read_write && size == I2C_SMBUS_WORD_DATA) {
+ data->word = ((u16)local[1]) << 8;
+ data->word |= local[0];
+ }
+
+ return rc;
+}
+
+static u32 i2c_opal_func(struct i2c_adapter *adapter)
+{
+ return I2C_FUNC_I2C | I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+ I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+ I2C_FUNC_SMBUS_I2C_BLOCK;
+}
+
+static const struct i2c_algorithm i2c_opal_algo = {
+ .master_xfer = i2c_opal_master_xfer,
+ .smbus_xfer = i2c_opal_smbus_xfer,
+ .functionality = i2c_opal_func,
+};
+
+static int i2c_opal_probe(struct platform_device *pdev)
+{
+ struct i2c_adapter *adapter;
+ const char *pname;
+ u32 opal_id;
+ int rc;
+
+ if (!pdev->dev.of_node)
+ return -ENODEV;
+
+ rc = of_property_read_u32(pdev->dev.of_node, "ibm,opal-id", &opal_id);
+ if (rc) {
+ dev_err(&pdev->dev, "Missing ibm,opal-id property !\n");
+ return -EIO;
+ }
+
+ adapter = devm_kzalloc(&pdev->dev, sizeof(*adapter), GFP_KERNEL);
+ if (!adapter)
+ return -ENOMEM;
+
+ adapter->algo = &i2c_opal_algo;
+ adapter->algo_data = (void *)(unsigned long)opal_id;
+ adapter->dev.parent = &pdev->dev;
+ adapter->dev.of_node = of_node_get(pdev->dev.of_node);
+ pname = of_get_property(pdev->dev.of_node, "ibm,port-name", NULL);
+ if (pname)
+ strlcpy(adapter->name, pname, sizeof(adapter->name));
+ else
+ strlcpy(adapter->name, "opal", sizeof(adapter->name));
+
+ platform_set_drvdata(pdev, adapter);
+ rc = i2c_add_adapter(adapter);
+ if (rc)
+ dev_err(&pdev->dev, "Failed to register the i2c adapter\n");
+
+ return rc;
+}
+
+static int i2c_opal_remove(struct platform_device *pdev)
+{
+ struct i2c_adapter *adapter = platform_get_drvdata(pdev);
+
+ i2c_del_adapter(adapter);
+
+ return 0;
+}
+
+static const struct of_device_id i2c_opal_of_match[] = {
+ {
+ .compatible = "ibm,opal-i2c",
+ },
+ { }
+};
+MODULE_DEVICE_TABLE(of, i2c_opal_of_match);
+
+static struct platform_driver i2c_opal_driver = {
+ .probe = i2c_opal_probe,
+ .remove = i2c_opal_remove,
+ .driver = {
+ .name = "i2c-opal",
+ .of_match_table = i2c_opal_of_match,
+ },
+};
+
+static int __init i2c_opal_init(void)
+{
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
+ return -ENODEV;
+
+ return platform_driver_register(&i2c_opal_driver);
+}
+module_init(i2c_opal_init);
+
+static void __exit i2c_opal_exit(void)
+{
+ return platform_driver_unregister(&i2c_opal_driver);
+}
+module_exit(i2c_opal_exit);
+
+MODULE_AUTHOR("Neelesh Gupta <neelegup@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM OPAL I2C driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index cca472109135..51fd6b524371 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -34,7 +34,8 @@ struct cxl_context *cxl_context_alloc(void)
/*
* Initialises a CXL context.
*/
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+ struct address_space *mapping)
{
int i;
@@ -42,6 +43,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
ctx->afu = afu;
ctx->master = master;
ctx->pid = NULL; /* Set in start work ioctl */
+ mutex_init(&ctx->mapping_lock);
+ ctx->mapping = mapping;
/*
* Allocate the segment table before we put it in the IDR so that we
@@ -82,12 +85,12 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
* Allocating IDR! We better make sure everything's setup that
* dereferences from it.
*/
+ mutex_lock(&afu->contexts_lock);
idr_preload(GFP_KERNEL);
- spin_lock(&afu->contexts_lock);
i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0,
ctx->afu->num_procs, GFP_NOWAIT);
- spin_unlock(&afu->contexts_lock);
idr_preload_end();
+ mutex_unlock(&afu->contexts_lock);
if (i < 0)
return i;
@@ -147,6 +150,12 @@ static void __detach_context(struct cxl_context *ctx)
afu_release_irqs(ctx);
flush_work(&ctx->fault_work); /* Only needed for dedicated process */
wake_up_all(&ctx->wq);
+
+ /* Release Problem State Area mapping */
+ mutex_lock(&ctx->mapping_lock);
+ if (ctx->mapping)
+ unmap_mapping_range(ctx->mapping, 0, 0, 1);
+ mutex_unlock(&ctx->mapping_lock);
}
/*
@@ -168,21 +177,22 @@ void cxl_context_detach_all(struct cxl_afu *afu)
struct cxl_context *ctx;
int tmp;
- rcu_read_lock();
- idr_for_each_entry(&afu->contexts_idr, ctx, tmp)
+ mutex_lock(&afu->contexts_lock);
+ idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
/*
* Anything done in here needs to be setup before the IDR is
* created and torn down after the IDR removed
*/
__detach_context(ctx);
- rcu_read_unlock();
+ }
+ mutex_unlock(&afu->contexts_lock);
}
void cxl_context_free(struct cxl_context *ctx)
{
- spin_lock(&ctx->afu->contexts_lock);
+ mutex_lock(&ctx->afu->contexts_lock);
idr_remove(&ctx->afu->contexts_idr, ctx->pe);
- spin_unlock(&ctx->afu->contexts_lock);
+ mutex_unlock(&ctx->afu->contexts_lock);
synchronize_rcu();
free_page((u64)ctx->sstp);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b5b6bda44a00..28078f8894a5 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -351,7 +351,7 @@ struct cxl_afu {
struct device *chardev_s, *chardev_m, *chardev_d;
struct idr contexts_idr;
struct dentry *debugfs;
- spinlock_t contexts_lock;
+ struct mutex contexts_lock;
struct mutex spa_mutex;
spinlock_t afu_cntl_lock;
@@ -398,6 +398,10 @@ struct cxl_context {
phys_addr_t psn_phys;
u64 psn_size;
+ /* Used to unmap any mmaps when force detaching */
+ struct address_space *mapping;
+ struct mutex mapping_lock;
+
spinlock_t sste_lock; /* Protects segment table entries */
struct cxl_sste *sstp;
u64 sstp0, sstp1;
@@ -599,7 +603,8 @@ int cxl_alloc_sst(struct cxl_context *ctx);
void init_cxl_native(void);
struct cxl_context *cxl_context_alloc(void);
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master);
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+ struct address_space *mapping);
void cxl_context_free(struct cxl_context *ctx);
int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 378b099e7c0b..e9f2f10dbb37 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -77,7 +77,7 @@ static int __afu_open(struct inode *inode, struct file *file, bool master)
goto err_put_afu;
}
- if ((rc = cxl_context_init(ctx, afu, master)))
+ if ((rc = cxl_context_init(ctx, afu, master, inode->i_mapping)))
goto err_put_afu;
pr_devel("afu_open pe: %i\n", ctx->pe);
@@ -113,6 +113,10 @@ static int afu_release(struct inode *inode, struct file *file)
__func__, ctx->pe);
cxl_context_detach(ctx);
+ mutex_lock(&ctx->mapping_lock);
+ ctx->mapping = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+
put_device(&ctx->afu->dev);
/*
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 9a5a442269a8..f2b37b41a0da 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -277,6 +277,7 @@ static int do_process_element_cmd(struct cxl_context *ctx,
u64 cmd, u64 pe_state)
{
u64 state;
+ unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
WARN_ON(!ctx->afu->enabled);
@@ -286,6 +287,10 @@ static int do_process_element_cmd(struct cxl_context *ctx,
smp_mb();
cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe);
while (1) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n");
+ return -EBUSY;
+ }
state = be64_to_cpup(ctx->afu->sw_command_status);
if (state == ~0ULL) {
pr_err("cxl: Error adding process element to AFU\n");
@@ -610,13 +615,6 @@ static inline int detach_process_native_dedicated(struct cxl_context *ctx)
return 0;
}
-/*
- * TODO: handle case when this is called inside a rcu_read_lock() which may
- * happen when we unbind the driver (ie. cxl_context_detach_all()) . Terminate
- * & remove use a mutex lock and schedule which will not good with lock held.
- * May need to write do_process_element_cmd() that handles outstanding page
- * faults synchronously.
- */
static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
{
if (!ctx->pe_inserted)
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 10c98ab7f46e..0f2cc9f8b4db 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -502,7 +502,7 @@ static struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
afu->dev.release = cxl_release_afu;
afu->slice = slice;
idr_init(&afu->contexts_idr);
- spin_lock_init(&afu->contexts_lock);
+ mutex_init(&afu->contexts_lock);
spin_lock_init(&afu->afu_cntl_lock);
mutex_init(&afu->spa_mutex);
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index ce7ec06d87d1..461bdbd5d483 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -121,7 +121,7 @@ static ssize_t reset_store_afu(struct device *device,
int rc;
/* Not safe to reset if it is currently in use */
- spin_lock(&afu->contexts_lock);
+ mutex_lock(&afu->contexts_lock);
if (!idr_is_empty(&afu->contexts_idr)) {
rc = -EBUSY;
goto err;
@@ -132,7 +132,7 @@ static ssize_t reset_store_afu(struct device *device,
rc = count;
err:
- spin_unlock(&afu->contexts_lock);
+ mutex_unlock(&afu->contexts_lock);
return rc;
}
@@ -247,7 +247,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr,
int rc = -EBUSY;
/* can't change this if we have a user */
- spin_lock(&afu->contexts_lock);
+ mutex_lock(&afu->contexts_lock);
if (!idr_is_empty(&afu->contexts_idr))
goto err;
@@ -271,7 +271,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr,
afu->current_mode = 0;
afu->num_procs = 0;
- spin_unlock(&afu->contexts_lock);
+ mutex_unlock(&afu->contexts_lock);
if ((rc = _cxl_afu_deactivate_mode(afu, old_mode)))
return rc;
@@ -280,7 +280,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr,
return count;
err:
- spin_unlock(&afu->contexts_lock);
+ mutex_unlock(&afu->contexts_lock);
return rc;
}
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 12e26683c706..d3475e1f15ec 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -371,7 +371,9 @@ enum {
#define AUDIT_ARCH_PARISC (EM_PARISC)
#define AUDIT_ARCH_PARISC64 (EM_PARISC|__AUDIT_ARCH_64BIT)
#define AUDIT_ARCH_PPC (EM_PPC)
+/* do not define AUDIT_ARCH_PPCLE since it is not supported by audit */
#define AUDIT_ARCH_PPC64 (EM_PPC64|__AUDIT_ARCH_64BIT)
+#define AUDIT_ARCH_PPC64LE (EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
#define AUDIT_ARCH_S390 (EM_S390)
#define AUDIT_ARCH_S390X (EM_S390|__AUDIT_ARCH_64BIT)
#define AUDIT_ARCH_SH (EM_SH)