aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig12
-rw-r--r--arch/powerpc/configs/powernv_defconfig6
-rw-r--r--arch/powerpc/configs/ppc64_defconfig6
-rw-r--r--arch/powerpc/configs/pseries_defconfig6
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h10
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h16
-rw-r--r--arch/powerpc/include/asm/book3s/64/hugetlb.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h200
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h53
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h8
-rw-r--r--arch/powerpc/include/asm/cpuidle.h1
-rw-r--r--arch/powerpc/include/asm/debug.h2
-rw-r--r--arch/powerpc/include/asm/debugfs.h17
-rw-r--r--arch/powerpc/include/asm/hvcall.h10
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h2
-rw-r--r--arch/powerpc/include/asm/mce.h94
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h5
-rw-r--r--arch/powerpc/include/asm/mmu.h19
-rw-r--r--arch/powerpc/include/asm/mmu_context.h3
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h5
-rw-r--r--arch/powerpc/include/asm/opal-api.h5
-rw-r--r--arch/powerpc/include/asm/opal.h5
-rw-r--r--arch/powerpc/include/asm/paca.h24
-rw-r--r--arch/powerpc/include/asm/page_64.h14
-rw-r--r--arch/powerpc/include/asm/powernv.h22
-rw-r--r--arch/powerpc/include/asm/processor.h41
-rw-r--r--arch/powerpc/include/asm/thread_info.h10
-rw-r--r--arch/powerpc/include/uapi/asm/mman.h16
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/eeh.c3
-rw-r--r--arch/powerpc/kernel/fadump.c36
-rw-r--r--arch/powerpc/kernel/ftrace.c1
-rw-r--r--arch/powerpc/kernel/head_32.S16
-rw-r--r--arch/powerpc/kernel/head_64.S3
-rw-r--r--arch/powerpc/kernel/idle_book3s.S48
-rw-r--r--arch/powerpc/kernel/irq.c1
-rw-r--r--arch/powerpc/kernel/mce.c16
-rw-r--r--arch/powerpc/kernel/mce_power.c772
-rw-r--r--arch/powerpc/kernel/paca.c21
-rw-r--r--arch/powerpc/kernel/prom.c1
-rw-r--r--arch/powerpc/kernel/prom_init.c2
-rw-r--r--arch/powerpc/kernel/setup-common.c11
-rw-r--r--arch/powerpc/kernel/setup_64.c4
-rw-r--r--arch/powerpc/kernel/stacktrace.c9
-rw-r--r--arch/powerpc/kernel/swsusp.c1
-rw-r--r--arch/powerpc/kernel/traps.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c1
-rw-r--r--arch/powerpc/kvm/book3s_xics.c3
-rw-r--r--arch/powerpc/mm/fault.c84
-rw-r--r--arch/powerpc/mm/hash_low_32.S2
-rw-r--r--arch/powerpc/mm/hash_utils_64.c26
-rw-r--r--arch/powerpc/mm/hugetlbpage-book3e.c7
-rw-r--r--arch/powerpc/mm/hugetlbpage-radix.c11
-rw-r--r--arch/powerpc/mm/hugetlbpage.c18
-rw-r--r--arch/powerpc/mm/init_64.c4
-rw-r--r--arch/powerpc/mm/mmap.c40
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c116
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c5
-rw-r--r--arch/powerpc/mm/numa.c7
-rw-r--r--arch/powerpc/mm/slb.c2
-rw-r--r--arch/powerpc/mm/slb_low.S82
-rw-r--r--arch/powerpc/mm/slice.c257
-rw-r--r--arch/powerpc/mm/subpage-prot.c3
-rw-r--r--arch/powerpc/mm/tlb-radix.c18
-rw-r--r--arch/powerpc/mm/tlb_nohash.c2
-rw-r--r--arch/powerpc/platforms/44x/sam440ep.c2
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype2
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c2
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig1
-rw-r--r--arch/powerpc/platforms/powernv/idle.c96
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c462
-rw-r--r--arch/powerpc/platforms/powernv/opal-lpc.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S56
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c27
-rw-r--r--arch/powerpc/platforms/powernv/opal.c78
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c9
-rw-r--r--arch/powerpc/platforms/powernv/pci.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci.h15
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c4
-rw-r--r--arch/powerpc/platforms/powernv/smp.c18
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c3
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c10
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c61
-rw-r--r--arch/powerpc/platforms/pseries/setup.c4
-rw-r--r--arch/powerpc/sysdev/scom.c3
-rw-r--r--arch/powerpc/xmon/xmon.c56
92 files changed, 2093 insertions, 1103 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 97a8bc8a095c..9ff731f50a29 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -586,7 +586,7 @@ config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
def_bool y
- depends on (SMP && PPC_PSERIES) || PPC_PS3
+ depends on PPC_BOOK3S_64
config SYS_SUPPORTS_HUGETLBFS
bool
@@ -678,6 +678,16 @@ config PPC_256K_PAGES
endchoice
+config THREAD_SHIFT
+ int "Thread shift" if EXPERT
+ range 13 15
+ default "15" if PPC_256K_PAGES
+ default "14" if PPC64
+ default "13"
+ help
+ Used to define the stack size. The default is almost always what you
+ want. Only change this if you know what you are doing.
+
config FORCE_MAX_ZONEORDER
int "Maximum zone order"
range 8 9 if PPC64 && PPC_64K_PAGES
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index ac8b8332ed82..0695ce047d56 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -33,7 +33,7 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
+CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
@@ -261,7 +261,7 @@ CONFIG_NILFS2_FS=m
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
-CONFIG_ISO9660_FS=m
+CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=m
@@ -306,7 +306,7 @@ CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPT_CRC32C_VPMSUM=m
+CONFIG_CRYPTO_CRC32C_VPMSUM=m
CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA256=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 4f1288b04303..e353168f98a7 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -19,7 +19,7 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
+CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
@@ -291,7 +291,7 @@ CONFIG_NILFS2_FS=m
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
-CONFIG_ISO9660_FS=m
+CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=m
@@ -340,7 +340,7 @@ CONFIG_PPC_EARLY_DEBUG=y
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPT_CRC32C_VPMSUM=m
+CONFIG_CRYPTO_CRC32C_VPMSUM=m
CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA256=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 4ff68b752618..1a61aa20dfba 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -34,7 +34,7 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
+CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
@@ -259,7 +259,7 @@ CONFIG_NILFS2_FS=m
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
-CONFIG_ISO9660_FS=m
+CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=m
@@ -303,7 +303,7 @@ CONFIG_XMON=y
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPT_CRC32C_VPMSUM=m
+CONFIG_CRYPTO_CRC32C_VPMSUM=m
CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA256=y
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index f6c5264287e5..7330150bfe34 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -17,6 +17,8 @@
#include <asm/checksum.h>
#include <linux/uaccess.h>
#include <asm/epapr_hcalls.h>
+#include <asm/dcr.h>
+#include <asm/mmu_context.h>
#include <uapi/asm/ucontext.h>
@@ -120,6 +122,8 @@ extern s64 __ashrdi3(s64, int);
extern int __cmpdi2(s64, s64);
extern int __ucmpdi2(u64, u64);
+/* tracing */
void _mcount(void);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 0c4e470571ca..b4b5e6b671ca 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -8,7 +8,7 @@
#define H_PTE_INDEX_SIZE 9
#define H_PMD_INDEX_SIZE 7
#define H_PUD_INDEX_SIZE 9
-#define H_PGD_INDEX_SIZE 9
+#define H_PGD_INDEX_SIZE 12
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index f3dd21efa2ea..214219dff87c 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -4,10 +4,14 @@
#define H_PTE_INDEX_SIZE 8
#define H_PMD_INDEX_SIZE 5
#define H_PUD_INDEX_SIZE 5
-#define H_PGD_INDEX_SIZE 12
+#define H_PGD_INDEX_SIZE 15
-#define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */
-#define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */
+/*
+ * 64k aligned address free up few of the lower bits of RPN for us
+ * We steal that here. For more deatils look at pte_pfn/pfn_pte()
+ */
+#define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */
+#define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */
/*
* We need to differentiate between explicit huge page and THP huge
* page, since THP huge page also need to track real subpage details
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index f7b721bbf918..4e957b027fe0 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -6,19 +6,13 @@
* Common bits between 4K and 64K pages in a linux-style PTE.
* Additional bits may be defined in pgtable-hash64-*.h
*
- * Note: We only support user read/write permissions. Supervisor always
- * have full read/write to pages above PAGE_OFFSET (pages below that
- * always use the user access permissions).
- *
- * We could create separate kernel read-only if we used the 3 PP bits
- * combinations that newer processors provide but we currently don't.
*/
-#define H_PAGE_BUSY 0x00800 /* software: PTE & hash are busy */
#define H_PTE_NONE_MASK _PAGE_HPTEFLAGS
-#define H_PAGE_F_GIX_SHIFT 57
-#define H_PAGE_F_GIX (7ul << 57) /* HPTE index within HPTEG */
-#define H_PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */
-#define H_PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */
+#define H_PAGE_F_GIX_SHIFT 56
+#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */
+#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */
+#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44)
+#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */
#ifdef CONFIG_PPC_64K_PAGES
#include <asm/book3s/64/hash-64k.h>
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index c62f14d0bec1..6666cd366596 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -46,7 +46,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
*/
VM_WARN_ON(page_shift == mmu_psize_defs[MMU_PAGE_1G].shift);
if (page_shift == mmu_psize_defs[MMU_PAGE_2M].shift)
- return __pte(pte_val(entry) | _PAGE_LARGE);
+ return __pte(pte_val(entry) | R_PAGE_LARGE);
else
return entry;
}
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 52d8d1e4b772..6d56974adf28 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -39,6 +39,7 @@
/* Bits in the SLB VSID word */
#define SLB_VSID_SHIFT 12
+#define SLB_VSID_SHIFT_256M SLB_VSID_SHIFT
#define SLB_VSID_SHIFT_1T 24
#define SLB_VSID_SSIZE_SHIFT 62
#define SLB_VSID_B ASM_CONST(0xc000000000000000)
@@ -408,7 +409,7 @@ static inline unsigned long hpt_vpn(unsigned long ea,
static inline unsigned long hpt_hash(unsigned long vpn,
unsigned int shift, int ssize)
{
- int mask;
+ unsigned long mask;
unsigned long hash, vsid;
/* VPN_SHIFT can be atmost 12 */
@@ -491,13 +492,14 @@ extern void slb_set_size(u16 size);
* We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated
* from mmu context id and effective segment id of the address.
*
- * For user processes max context id is limited to ((1ul << 19) - 5)
- * for kernel space, we use the top 4 context ids to map address as below
+ * For user processes max context id is limited to MAX_USER_CONTEXT.
+
+ * For kernel space, we use context ids 1-5 to map address as below:
* NOTE: each context only support 64TB now.
- * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ]
- * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ]
- * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ]
- * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ]
+ * 0x00001 - [ 0xc000000000000000 - 0xc0003fffffffffff ]
+ * 0x00002 - [ 0xd000000000000000 - 0xd0003fffffffffff ]
+ * 0x00003 - [ 0xe000000000000000 - 0xe0003fffffffffff ]
+ * 0x00004 - [ 0xf000000000000000 - 0xf0003fffffffffff ]
*
* The proto-VSIDs are then scrambled into real VSIDs with the
* multiplicative hash:
@@ -511,20 +513,28 @@ extern void slb_set_size(u16 size);
* robust scattering in the hash table (at least based on some initial
* results).
*
- * We also consider VSID 0 special. We use VSID 0 for slb entries mapping
- * bad address. This enables us to consolidate bad address handling in
- * hash_page.
+ * We use VSID 0 to indicate an invalid VSID. The means we can't use context id
+ * 0, because a context id of 0 and an EA of 0 gives a proto-VSID of 0, which
+ * will produce a VSID of 0.
*
* We also need to avoid the last segment of the last context, because that
* would give a protovsid of 0x1fffffffff. That will result in a VSID 0
- * because of the modulo operation in vsid scramble. But the vmemmap
- * (which is what uses region 0xf) will never be close to 64TB in size
- * (it's 56 bytes per page of system memory).
+ * because of the modulo operation in vsid scramble.
*/
+/*
+ * Max Va bits we support as of now is 68 bits. We want 19 bit
+ * context ID.
+ * Restrictions:
+ * GPU has restrictions of not able to access beyond 128TB
+ * (47 bit effective address). We also cannot do more than 20bit PID.
+ * For p4 and p5 which can only do 65 bit VA, we restrict our CONTEXT_BITS
+ * to 16 bits (ie, we can only have 2^16 pids at the same time).
+ */
+#define VA_BITS 68
#define CONTEXT_BITS 19
-#define ESID_BITS 18
-#define ESID_BITS_1T 6
+#define ESID_BITS (VA_BITS - (SID_SHIFT + CONTEXT_BITS))
+#define ESID_BITS_1T (VA_BITS - (SID_SHIFT_1T + CONTEXT_BITS))
#define ESID_BITS_MASK ((1 << ESID_BITS) - 1)
#define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1)
@@ -532,63 +542,70 @@ extern void slb_set_size(u16 size);
/*
* 256MB segment
* The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
- * available for user + kernel mapping. The top 4 contexts are used for
- * kernel mapping. Each segment contains 2^28 bytes. Each
- * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
- * (19 == 37 + 28 - 46).
+ * available for user + kernel mapping. VSID 0 is reserved as invalid, contexts
+ * 1-4 are used for kernel mapping. Each segment contains 2^28 bytes. Each
+ * context maps 2^49 bytes (512TB).
+ *
+ * We also need to avoid the last segment of the last context, because that
+ * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
+ * because of the modulo operation in vsid scramble.
+ */
+#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2)
+#define MIN_USER_CONTEXT (5)
+
+/* Would be nice to use KERNEL_REGION_ID here */
+#define KERNEL_REGION_CONTEXT_OFFSET (0xc - 1)
+
+/*
+ * For platforms that support on 65bit VA we limit the context bits
*/
-#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 5)
+#define MAX_USER_CONTEXT_65BIT_VA ((ASM_CONST(1) << (65 - (SID_SHIFT + ESID_BITS))) - 2)
/*
* This should be computed such that protovosid * vsid_mulitplier
- * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
+ * doesn't overflow 64 bits. The vsid_mutliplier should also be
+ * co-prime to vsid_modulus. We also need to make sure that number
+ * of bits in multiplied result (dividend) is less than twice the number of
+ * protovsid bits for our modulus optmization to work.
+ *
+ * The below table shows the current values used.
+ * |-------+------------+----------------------+------------+-------------------|
+ * | | Prime Bits | proto VSID_BITS_65VA | Total Bits | 2* prot VSID_BITS |
+ * |-------+------------+----------------------+------------+-------------------|
+ * | 1T | 24 | 25 | 49 | 50 |
+ * |-------+------------+----------------------+------------+-------------------|
+ * | 256MB | 24 | 37 | 61 | 74 |
+ * |-------+------------+----------------------+------------+-------------------|
+ *
+ * |-------+------------+----------------------+------------+--------------------|
+ * | | Prime Bits | proto VSID_BITS_68VA | Total Bits | 2* proto VSID_BITS |
+ * |-------+------------+----------------------+------------+--------------------|
+ * | 1T | 24 | 28 | 52 | 56 |
+ * |-------+------------+----------------------+------------+--------------------|
+ * | 256MB | 24 | 40 | 64 | 80 |
+ * |-------+------------+----------------------+------------+--------------------|
+ *
*/
#define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */
-#define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS)
-#define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1)
+#define VSID_BITS_256M (VA_BITS - SID_SHIFT)
+#define VSID_BITS_65_256M (65 - SID_SHIFT)
+/*
+ * Modular multiplicative inverse of VSID_MULTIPLIER under modulo VSID_MODULUS
+ */
+#define VSID_MULINV_256M ASM_CONST(665548017062)
#define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */
-#define VSID_BITS_1T (CONTEXT_BITS + ESID_BITS_1T)
-#define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1)
-
+#define VSID_BITS_1T (VA_BITS - SID_SHIFT_1T)
+#define VSID_BITS_65_1T (65 - SID_SHIFT_1T)
+#define VSID_MULINV_1T ASM_CONST(209034062)
+/* 1TB VSID reserved for VRMA */
+#define VRMA_VSID 0x1ffffffUL
#define USER_VSID_RANGE (1UL << (ESID_BITS + SID_SHIFT))
-/*
- * This macro generates asm code to compute the VSID scramble
- * function. Used in slb_allocate() and do_stab_bolted. The function
- * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
- *
- * rt = register containing the proto-VSID and into which the
- * VSID will be stored
- * rx = scratch register (clobbered)
- *
- * - rt and rx must be different registers
- * - The answer will end up in the low VSID_BITS bits of rt. The higher
- * bits may contain other garbage, so you may need to mask the
- * result.
- */
-#define ASM_VSID_SCRAMBLE(rt, rx, size) \
- lis rx,VSID_MULTIPLIER_##size@h; \
- ori rx,rx,VSID_MULTIPLIER_##size@l; \
- mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \
- \
- srdi rx,rt,VSID_BITS_##size; \
- clrldi rt,rt,(64-VSID_BITS_##size); \
- add rt,rt,rx; /* add high and low bits */ \
- /* NOTE: explanation based on VSID_BITS_##size = 36 \
- * Now, r3 == VSID (mod 2^36-1), and lies between 0 and \
- * 2^36-1+2^28-1. That in particular means that if r3 >= \
- * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \
- * the bit clear, r3 already has the answer we want, if it \
- * doesn't, the answer is the low 36 bits of r3+1. So in all \
- * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\
- addi rx,rt,1; \
- srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \
- add rt,rt,rx
-
/* 4 bits per slice and we have one slice per 1TB */
-#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
+#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
+#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.addr_limit >> 41)
#ifndef __ASSEMBLY__
@@ -634,7 +651,7 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
#define vsid_scramble(protovsid, size) \
((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size))
-#else /* 1 */
+/* simplified form avoiding mod operation */
#define vsid_scramble(protovsid, size) \
({ \
unsigned long x; \
@@ -642,6 +659,21 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \
(x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \
})
+
+#else /* 1 */
+static inline unsigned long vsid_scramble(unsigned long protovsid,
+ unsigned long vsid_multiplier, int vsid_bits)
+{
+ unsigned long vsid;
+ unsigned long vsid_modulus = ((1UL << vsid_bits) - 1);
+ /*
+ * We have same multipler for both 256 and 1T segements now
+ */
+ vsid = protovsid * vsid_multiplier;
+ vsid = (vsid >> vsid_bits) + (vsid & vsid_modulus);
+ return (vsid + ((vsid + 1) >> vsid_bits)) & vsid_modulus;
+}
+
#endif /* 1 */
/* Returns the segment size indicator for a user address */
@@ -656,36 +688,56 @@ static inline int user_segment_size(unsigned long addr)
static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
int ssize)
{
+ unsigned long va_bits = VA_BITS;
+ unsigned long vsid_bits;
+ unsigned long protovsid;
+
/*
* Bad address. We return VSID 0 for that
*/
if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
return 0;
- if (ssize == MMU_SEGSIZE_256M)
- return vsid_scramble((context << ESID_BITS)
- | ((ea >> SID_SHIFT) & ESID_BITS_MASK), 256M);
- return vsid_scramble((context << ESID_BITS_1T)
- | ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK), 1T);
+ if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
+ va_bits = 65;
+
+ if (ssize == MMU_SEGSIZE_256M) {
+ vsid_bits = va_bits - SID_SHIFT;
+ protovsid = (context << ESID_BITS) |
+ ((ea >> SID_SHIFT) & ESID_BITS_MASK);
+ return vsid_scramble(protovsid, VSID_MULTIPLIER_256M, vsid_bits);
+ }
+ /* 1T segment */
+ vsid_bits = va_bits - SID_SHIFT_1T;
+ protovsid = (context << ESID_BITS_1T) |
+ ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK);
+ return vsid_scramble(protovsid, VSID_MULTIPLIER_1T, vsid_bits);
}
/*
* This is only valid for addresses >= PAGE_OFFSET
- *
- * For kernel space, we use the top 4 context ids to map address as below
- * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ]
- * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ]
- * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ]
- * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ]
*/
static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
{
unsigned long context;
+ if (!is_kernel_addr(ea))
+ return 0;
+
/*
- * kernel take the top 4 context from the available range
+ * For kernel space, we use context ids 1-4 to map the address space as
+ * below:
+ *
+ * 0x00001 - [ 0xc000000000000000 - 0xc0003fffffffffff ]
+ * 0x00002 - [ 0xd000000000000000 - 0xd0003fffffffffff ]
+ * 0x00003 - [ 0xe000000000000000 - 0xe0003fffffffffff ]
+ * 0x00004 - [ 0xf000000000000000 - 0xf0003fffffffffff ]
+ *
+ * So we can compute the context from the region (top nibble) by
+ * subtracting 11, or 0xc - 1.
*/
- context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1;
+ context = (ea >> 60) - KERNEL_REGION_CONTEXT_OFFSET;
+
return get_vsid(context, ea, ssize);
}
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 805d4105e9bb..77529a3e3811 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -65,6 +65,8 @@ extern struct patb_entry *partition_tb;
* MAX_USER_CONTEXT * 16 bytes of space.
*/
#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4)
+#define PRTB_ENTRIES (1ul << CONTEXT_BITS)
+
/*
* Power9 currently only support 64K partition table size.
*/
@@ -73,13 +75,20 @@ extern struct patb_entry *partition_tb;
typedef unsigned long mm_context_id_t;
struct spinlock;
+/* Maximum possible number of NPUs in a system. */
+#define NV_MAX_NPUS 8
+
typedef struct {
mm_context_id_t id;
u16 user_psize; /* page size index */
+ /* NPU NMMU context */
+ struct npu_context *npu_context;
+
#ifdef CONFIG_PPC_MM_SLICES
u64 low_slices_psize; /* SLB page size encodings */
unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+ unsigned long addr_limit;
#else
u16 sllp; /* SLB page size encoding */
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8f4d41936e5a..fb72ff6b98e6 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -37,21 +37,47 @@
#define _RPAGE_RSV3 0x0400000000000000UL
#define _RPAGE_RSV4 0x0200000000000000UL
-#ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */
-#else
-#define _PAGE_SOFT_DIRTY 0x00000
-#endif
-#define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */
+#define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */
+#define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */
/*
- * For P9 DD1 only, we need to track whether the pte's huge.
+ * Top and bottom bits of RPN which can be used by hash
+ * translation mode, because we expect them to be zero
+ * otherwise.
*/
-#define _PAGE_LARGE _RPAGE_RSV1
+#define _RPAGE_RPN0 0x01000
+#define _RPAGE_RPN1 0x02000
+#define _RPAGE_RPN44 0x0100000000000000UL
+#define _RPAGE_RPN43 0x0080000000000000UL
+#define _RPAGE_RPN42 0x0040000000000000UL
+#define _RPAGE_RPN41 0x0020000000000000UL
+
+/* Max physical address bit as per radix table */
+#define _RPAGE_PA_MAX 57
+/*
+ * Max physical address bit we will use for now.
+ *
+ * This is mostly a hardware limitation and for now Power9 has
+ * a 51 bit limit.
+ *
+ * This is different from the number of physical bit required to address
+ * the last byte of memory. That is defined by MAX_PHYSMEM_BITS.
+ * MAX_PHYSMEM_BITS is a linux limitation imposed by the maximum
+ * number of sections we can support (SECTIONS_SHIFT).
+ *
+ * This is different from Radix page table limitation above and
+ * should always be less than that. The limit is done such that
+ * we can overload the bits between _RPAGE_PA_MAX and _PAGE_PA_MAX
+ * for hash linux page table specific bits.
+ *
+ * In order to be compatible with future hardware generations we keep
+ * some offsets and limit this for now to 53
+ */
+#define _PAGE_PA_MAX 53
-#define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */
-#define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */
+#define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */
+#define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */
/*
* Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
* Instead of fixing all of them, add an alternate define which
@@ -59,10 +85,11 @@
*/
#define _PAGE_NO_CACHE _PAGE_TOLERANT
/*
- * We support 57 bit real address in pte. Clear everything above 57, and
- * every thing below PAGE_SHIFT;
+ * We support _RPAGE_PA_MAX bit real address in pte. On the linux side
+ * we are limited by _PAGE_PA_MAX. Clear everything above _PAGE_PA_MAX
+ * and every thing below PAGE_SHIFT;
*/
-#define PTE_RPN_MASK (((1UL << 57) - 1) & (PAGE_MASK))
+#define PTE_RPN_MASK (((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK))
/*
* set of bits not changed in pmd_modify. Even though we have hash specific bits
* in here, on radix we expect them to be zero.
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 9e0bb7cd6e22..ac16d1943022 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -11,6 +11,12 @@
#include <asm/book3s/64/radix-4k.h>
#endif
+/*
+ * For P9 DD1 only, we need to track whether the pte's huge.
+ */
+#define R_PAGE_LARGE _RPAGE_RSV1
+
+
#ifndef __ASSEMBLY__
#include <asm/book3s/64/tlbflush-radix.h>
#include <asm/cpu_has_feature.h>
@@ -252,7 +258,7 @@ static inline int radix__pmd_trans_huge(pmd_t pmd)
static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
{
if (cpu_has_feature(CPU_FTR_POWER9_DD1))
- return __pmd(pmd_val(pmd) | _PAGE_PTE | _PAGE_LARGE);
+ return __pmd(pmd_val(pmd) | _PAGE_PTE | R_PAGE_LARGE);
return __pmd(pmd_val(pmd) | _PAGE_PTE);
}
static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma,
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 155731557c9b..4649ca0d28e3 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -46,6 +46,7 @@ extern u32 pnv_fastsleep_workaround_at_exit[];
extern u64 pnv_first_deep_stop_state;
+unsigned long pnv_cpu_offline(unsigned int cpu);
int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
static inline void report_invalid_psscr_val(u64 psscr_val, int err)
{
diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index 86308f177f2d..5d5af3fddfd8 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -8,8 +8,6 @@
struct pt_regs;
-extern struct dentry *powerpc_debugfs_root;
-
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
extern int (*__debugger)(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/debugfs.h b/arch/powerpc/include/asm/debugfs.h
new file mode 100644
index 000000000000..4f3b39f3e3d2
--- /dev/null
+++ b/arch/powerpc/include/asm/debugfs.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_POWERPC_DEBUGFS_H
+#define _ASM_POWERPC_DEBUGFS_H
+
+/*
+ * Copyright 2017, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/debugfs.h>
+
+extern struct dentry *powerpc_debugfs_root;
+
+#endif /* _ASM_POWERPC_DEBUGFS_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 3cc12a86ef5d..d73755fafbb0 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -377,16 +377,6 @@ long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...);
long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...);
-/* For hcall instrumentation. One structure per-hcall, per-CPU */
-struct hcall_stats {
- unsigned long num_calls; /* number of calls (on this CPU) */
- unsigned long tb_total; /* total wall time (mftb) of calls. */
- unsigned long purr_total; /* total cpu time (PURR) of calls. */
- unsigned long tb_start;
- unsigned long purr_start;
-};
-#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
-
struct hvcall_mpp_data {
unsigned long entitled_mem;
unsigned long mapped_mem;
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index d9b48f5bb606..d55c7f881ce7 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -49,8 +49,6 @@ static inline bool kvm_is_radix(struct kvm *kvm)
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
-#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
-
/*
* We use a lock bit in HPTE dword 0 to synchronize updates and
* accesses to each HPTE, and another bit to indicate non-present
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index ed62efe01e49..81eff8631434 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -24,97 +24,6 @@
#include <linux/bitops.h>
-/*
- * Machine Check bits on power7 and power8
- */
-#define P7_SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) /* P8 too */
-
-/* SRR1 bits for machine check (On Power7 and Power8) */
-#define P7_SRR1_MC_IFETCH(srr1) ((srr1) & PPC_BITMASK(43, 45)) /* P8 too */
-
-#define P7_SRR1_MC_IFETCH_UE (0x1 << PPC_BITLSHIFT(45)) /* P8 too */
-#define P7_SRR1_MC_IFETCH_SLB_PARITY (0x2 << PPC_BITLSHIFT(45)) /* P8 too */
-#define P7_SRR1_MC_IFETCH_SLB_MULTIHIT (0x3 << PPC_BITLSHIFT(45)) /* P8 too */
-#define P7_SRR1_MC_IFETCH_SLB_BOTH (0x4 << PPC_BITLSHIFT(45))
-#define P7_SRR1_MC_IFETCH_TLB_MULTIHIT (0x5 << PPC_BITLSHIFT(45)) /* P8 too */
-#define P7_SRR1_MC_IFETCH_UE_TLB_RELOAD (0x6 << PPC_BITLSHIFT(45)) /* P8 too */
-#define P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL (0x7 << PPC_BITLSHIFT(45))
-
-/* SRR1 bits for machine check (On Power8) */
-#define P8_SRR1_MC_IFETCH_ERAT_MULTIHIT (0x4 << PPC_BITLSHIFT(45))
-
-/* DSISR bits for machine check (On Power7 and Power8) */
-#define P7_DSISR_MC_UE (PPC_BIT(48)) /* P8 too */
-#define P7_DSISR_MC_UE_TABLEWALK (PPC_BIT(49)) /* P8 too */
-#define P7_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52)) /* P8 too */
-#define P7_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53)) /* P8 too */
-#define P7_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55)) /* P8 too */
-#define P7_DSISR_MC_SLB_MULTIHIT (PPC_BIT(56)) /* P8 too */
-#define P7_DSISR_MC_SLB_MULTIHIT_PARITY (PPC_BIT(57)) /* P8 too */
-
-/*
- * DSISR bits for machine check (Power8) in addition to above.
- * Secondary DERAT Multihit
- */
-#define P8_DSISR_MC_ERAT_MULTIHIT_SEC (PPC_BIT(54))
-
-/* SLB error bits */
-#define P7_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_ERAT_MULTIHIT | \
- P7_DSISR_MC_SLB_PARITY_MFSLB | \
- P7_DSISR_MC_SLB_MULTIHIT | \
- P7_DSISR_MC_SLB_MULTIHIT_PARITY)
-
-#define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \
- P8_DSISR_MC_ERAT_MULTIHIT_SEC)
-
-/*
- * Machine Check bits on power9
- */
-#define P9_SRR1_MC_LOADSTORE(srr1) (((srr1) >> PPC_BITLSHIFT(42)) & 1)
-
-#define P9_SRR1_MC_IFETCH(srr1) ( \
- PPC_BITEXTRACT(srr1, 45, 0) | \
- PPC_BITEXTRACT(srr1, 44, 1) | \
- PPC_BITEXTRACT(srr1, 43, 2) | \
- PPC_BITEXTRACT(srr1, 36, 3) )
-
-/* 0 is reserved */
-#define P9_SRR1_MC_IFETCH_UE 1
-#define P9_SRR1_MC_IFETCH_SLB_PARITY 2
-#define P9_SRR1_MC_IFETCH_SLB_MULTIHIT 3
-#define P9_SRR1_MC_IFETCH_ERAT_MULTIHIT 4
-#define P9_SRR1_MC_IFETCH_TLB_MULTIHIT 5
-#define P9_SRR1_MC_IFETCH_UE_TLB_RELOAD 6
-/* 7 is reserved */
-#define P9_SRR1_MC_IFETCH_LINK_TIMEOUT 8
-#define P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT 9
-/* 10 ? */
-#define P9_SRR1_MC_IFETCH_RA 11
-#define P9_SRR1_MC_IFETCH_RA_TABLEWALK 12
-#define P9_SRR1_MC_IFETCH_RA_ASYNC_STORE 13
-#define P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT 14
-#define P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN 15
-
-/* DSISR bits for machine check (On Power9) */
-#define P9_DSISR_MC_UE (PPC_BIT(48))
-#define P9_DSISR_MC_UE_TABLEWALK (PPC_BIT(49))
-#define P9_DSISR_MC_LINK_LOAD_TIMEOUT (PPC_BIT(50))
-#define P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT (PPC_BIT(51))
-#define P9_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52))
-#define P9_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53))
-#define P9_DSISR_MC_USER_TLBIE (PPC_BIT(54))
-#define P9_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55))
-#define P9_DSISR_MC_SLB_MULTIHIT_MFSLB (PPC_BIT(56))
-#define P9_DSISR_MC_RA_LOAD (PPC_BIT(57))
-#define P9_DSISR_MC_RA_TABLEWALK (PPC_BIT(58))
-#define P9_DSISR_MC_RA_TABLEWALK_FOREIGN (PPC_BIT(59))
-#define P9_DSISR_MC_RA_FOREIGN (PPC_BIT(60))
-
-/* SLB error bits */
-#define P9_DSISR_MC_SLB_ERRORS (P9_DSISR_MC_ERAT_MULTIHIT | \
- P9_DSISR_MC_SLB_PARITY_MFSLB | \
- P9_DSISR_MC_SLB_MULTIHIT_MFSLB)
-
enum MCE_Version {
MCE_V1 = 1,
};
@@ -298,7 +207,8 @@ extern void save_mce_event(struct pt_regs *regs, long handled,
extern int get_mce_event(struct machine_check_event *mce, bool release);
extern void release_mce_event(void);
extern void machine_check_queue_event(void);
-extern void machine_check_print_event_info(struct machine_check_event *evt);
+extern void machine_check_print_event_info(struct machine_check_event *evt,
+ bool user_mode);
extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index b62a8d43a06c..7ca8d8e80ffa 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -229,11 +229,6 @@ typedef struct {
unsigned int id;
unsigned int active;
unsigned long vdso_base;
-#ifdef CONFIG_PPC_MM_SLICES
- u64 low_slices_psize; /* SLB page size encodings */
- u64 high_slices_psize; /* 4 bits per slice for now */
- u16 user_psize; /* page size index */
-#endif
#ifdef CONFIG_PPC_64K_PAGES
/* for 4K PTE fragment support */
void *pte_frag;
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 065e762fae85..78260409dc9c 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -29,6 +29,10 @@
*/
/*
+ * Support for 68 bit VA space. We added that from ISA 2.05
+ */
+#define MMU_FTR_68_BIT_VA ASM_CONST(0x00002000)
+/*
* Kernel read only support.
* We added the ppp value 0b110 in ISA 2.04.
*/
@@ -109,10 +113,10 @@
#define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2
#define MMU_FTRS_PPC970 MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA
#define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
-#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
-#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
-#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
+#define MMU_FTRS_POWER6 MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA
+#define MMU_FTRS_POWER7 MMU_FTRS_POWER6
+#define MMU_FTRS_POWER8 MMU_FTRS_POWER6
+#define MMU_FTRS_POWER9 MMU_FTRS_POWER6
#define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
MMU_FTR_CI_LARGE_PAGE
#define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
@@ -136,7 +140,7 @@ enum {
MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL |
MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
- MMU_FTR_KERNEL_RO |
+ MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA |
#ifdef CONFIG_PPC_RADIX_MMU
MMU_FTR_TYPE_RADIX |
#endif
@@ -290,7 +294,10 @@ static inline bool early_radix_enabled(void)
#define MMU_PAGE_16G 14
#define MMU_PAGE_64G 15
-/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */
+/*
+ * N.B. we need to change the type of hpte_page_sizes if this gets to be > 16
+ * Also we need to change he type of mm_context.low/high_slices_psize.
+ */
#define MMU_PAGE_COUNT 16
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index b9e3f0aca261..78803a7ebdd9 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -51,7 +51,8 @@ static inline void switch_mmu_context(struct mm_struct *prev,
return switch_slb(tsk, next);
}
-extern int __init_new_context(void);
+extern int hash__alloc_context_id(void);
+extern void hash__reserve_context_id(int id);
extern void __destroy_context(int context_id);
static inline void mmu_context_init(void) { }
#else
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index c7f927e67d14..f0ff384d4ca5 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -88,11 +88,6 @@
#include <asm/nohash/pte-book3e.h>
#include <asm/pte-common.h>
-#ifdef CONFIG_PPC_MM_SLICES
-#define HAVE_ARCH_UNMAPPED_AREA
-#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#endif /* CONFIG_PPC_MM_SLICES */
-
#ifndef __ASSEMBLY__
/* pte_clear moved to later in this file */
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index bc8ac3c0e649..cb3e6242a78c 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -187,7 +187,10 @@
#define OPAL_XIVE_DUMP 142
#define OPAL_XIVE_RESERVED3 143
#define OPAL_XIVE_RESERVED4 144
-#define OPAL_LAST 144
+#define OPAL_NPU_INIT_CONTEXT 146
+#define OPAL_NPU_DESTROY_CONTEXT 147
+#define OPAL_NPU_MAP_LPAR 148
+#define OPAL_LAST 148
/* Device tree flags */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index cb7d6078b03a..588fb1c23af9 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -29,6 +29,11 @@ extern struct device_node *opal_node;
/* API functions */
int64_t opal_invalid_call(void);
+int64_t opal_npu_destroy_context(uint64_t phb_id, uint64_t pid, uint64_t bdf);
+int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr,
+ uint64_t bdf);
+int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
+ uint64_t lpcr);
int64_t opal_console_write(int64_t term_number, __be64 *length,
const uint8_t *buffer);
int64_t opal_console_read(int64_t term_number, __be64 *length,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 708c3e592eeb..140ddb9ae5a8 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -139,6 +139,7 @@ struct paca_struct {
#ifdef CONFIG_PPC_MM_SLICES
u64 mm_ctx_low_slices_psize;
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
+ unsigned long addr_limit;
#else
u16 mm_ctx_user_psize;
u16 mm_ctx_sllp;
@@ -172,6 +173,11 @@ struct paca_struct {
u8 thread_mask;
/* Mask to denote subcore sibling threads */
u8 subcore_sibling_mask;
+ /*
+ * Pointer to an array which contains pointer
+ * to the sibling threads' paca.
+ */
+ struct paca_struct **thread_sibling_pacas;
#endif
#ifdef CONFIG_PPC_BOOK3S_64
@@ -206,23 +212,7 @@ struct paca_struct {
#endif
};
-#ifdef CONFIG_PPC_BOOK3S
-static inline void copy_mm_to_paca(mm_context_t *context)
-{
- get_paca()->mm_ctx_id = context->id;
-#ifdef CONFIG_PPC_MM_SLICES
- get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
- memcpy(&get_paca()->mm_ctx_high_slices_psize,
- &context->high_slices_psize, SLICE_ARRAY_SIZE);
-#else
- get_paca()->mm_ctx_user_psize = context->user_psize;
- get_paca()->mm_ctx_sllp = context->sllp;
-#endif
-}
-#else
-static inline void copy_mm_to_paca(mm_context_t *context){}
-#endif
-
+extern void copy_mm_to_paca(struct mm_struct *mm);
extern struct paca_struct *paca;
extern void initialise_paca(struct paca_struct *new_paca, int cpu);
extern void setup_paca(struct paca_struct *new_paca);
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 3e83d2a20b6f..c4d9654bd637 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -98,21 +98,7 @@ extern u64 ppc64_pft_size;
#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
-/*
- * 1 bit per slice and we have one slice per 1TB
- * Right now we support only 64TB.
- * IF we change this we will have to change the type
- * of high_slices
- */
-#define SLICE_MASK_SIZE 8
-
#ifndef __ASSEMBLY__
-
-struct slice_mask {
- u16 low_slices;
- u64 high_slices;
-};
-
struct mm_struct;
extern unsigned long slice_get_unmapped_area(unsigned long addr,
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index 0e9c2402dd20..f62797702300 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -11,9 +11,31 @@
#define _ASM_POWERNV_H
#ifdef CONFIG_PPC_POWERNV
+#define NPU2_WRITE 1
extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
+extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
+ unsigned long flags,
+ struct npu_context *(*cb)(struct npu_context *, void *),
+ void *priv);
+extern void pnv_npu2_destroy_context(struct npu_context *context,
+ struct pci_dev *gpdev);
+extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
+ unsigned long *flags, unsigned long *status,
+ int count);
#else
static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { }
+static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
+ unsigned long flags,
+ struct npu_context *(*cb)(struct npu_context *, void *),
+ void *priv) { return ERR_PTR(-ENODEV); }
+static inline void pnv_npu2_destroy_context(struct npu_context *context,
+ struct pci_dev *gpdev) { }
+
+static inline int pnv_npu2_handle_fault(struct npu_context *context,
+ uintptr_t *ea, unsigned long *flags,
+ unsigned long *status, int count) {
+ return -ENODEV;
+}
#endif
#endif /* _ASM_POWERNV_H */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index e0fecbcea2a2..a4b1d8d6b793 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -102,11 +102,25 @@ void release_thread(struct task_struct *);
#endif
#ifdef CONFIG_PPC64
-/* 64-bit user address space is 46-bits (64TB user VM) */
-#define TASK_SIZE_USER64 (0x0000400000000000UL)
+/*
+ * 64-bit user address space can have multiple limits
+ * For now supported values are:
+ */
+#define TASK_SIZE_64TB (0x0000400000000000UL)
+#define TASK_SIZE_128TB (0x0000800000000000UL)
+#define TASK_SIZE_512TB (0x0002000000000000UL)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * Max value currently used:
+ */
+#define TASK_SIZE_USER64 TASK_SIZE_512TB
+#else
+#define TASK_SIZE_USER64 TASK_SIZE_64TB
+#endif
-/*
- * 32-bit user address space is 4GB - 1 page
+/*
+ * 32-bit user address space is 4GB - 1 page
* (this 1 page is needed so referencing of 0xFFFFFFFF generates EFAULT
*/
#define TASK_SIZE_USER32 (0x0000000100000000UL - (1*PAGE_SIZE))
@@ -114,26 +128,37 @@ void release_thread(struct task_struct *);
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
TASK_SIZE_USER32 : TASK_SIZE_USER64)
#define TASK_SIZE TASK_SIZE_OF(current)
-
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
-#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4))
+#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_128TB / 4))
#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
#endif
+/*
+ * Initial task size value for user applications. For book3s 64 we start
+ * with 128TB and conditionally enable upto 512TB
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \
+ TASK_SIZE_USER32 : TASK_SIZE_128TB)
+#else
+#define DEFAULT_MAP_WINDOW TASK_SIZE
+#endif
+
#ifdef __powerpc64__
-#define STACK_TOP_USER64 TASK_SIZE_USER64
+/* Limit stack to 128TB */
+#define STACK_TOP_USER64 TASK_SIZE_128TB
#define STACK_TOP_USER32 TASK_SIZE_USER32
#define STACK_TOP (is_32bit_task() ? \
STACK_TOP_USER32 : STACK_TOP_USER64)
-#define STACK_TOP_MAX STACK_TOP_USER64
+#define STACK_TOP_MAX TASK_SIZE_USER64
#else /* __powerpc64__ */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 87e4b2d8dcd4..2e17d668c472 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -10,15 +10,7 @@
#ifdef __KERNEL__
-/* We have 8k stacks on ppc32 and 16k on ppc64 */
-
-#if defined(CONFIG_PPC64)
-#define THREAD_SHIFT 14
-#elif defined(CONFIG_PPC_256K_PAGES)
-#define THREAD_SHIFT 15
-#else
-#define THREAD_SHIFT 13
-#endif
+#define THREAD_SHIFT CONFIG_THREAD_SHIFT
#define THREAD_SIZE (1 << THREAD_SHIFT)
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index 03c06ba7464f..ab45cc2f3101 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -29,4 +29,20 @@
#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
+/*
+ * When MAP_HUGETLB is set, bits [26:31] of the flags argument to mmap(2),
+ * encode the log2 of the huge page size. A value of zero indicates that the
+ * default huge page size should be used. To use a non-default huge page size,
+ * one of these defines can be used, or the size can be encoded by hand. Note
+ * that on most systems only a subset, or possibly none, of these sizes will be
+ * available.
+ */
+#define MAP_HUGE_512KB (19 << MAP_HUGE_SHIFT) /* 512KB HugeTLB Page */
+#define MAP_HUGE_1MB (20 << MAP_HUGE_SHIFT) /* 1MB HugeTLB Page */
+#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) /* 2MB HugeTLB Page */
+#define MAP_HUGE_8MB (23 << MAP_HUGE_SHIFT) /* 8MB HugeTLB Page */
+#define MAP_HUGE_16MB (24 << MAP_HUGE_SHIFT) /* 16MB HugeTLB Page */
+#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) /* 1GB HugeTLB Page */
+#define MAP_HUGE_16GB (34 << MAP_HUGE_SHIFT) /* 16GB HugeTLB Page */
+
#endif /* _UAPI_ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4367e7df51a1..e7c8229a8812 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,6 +185,7 @@ int main(void)
#ifdef CONFIG_PPC_MM_SLICES
OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
+ DEFINE(PACA_ADDR_LIMIT, offsetof(struct paca_struct, addr_limit));
DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
#endif /* CONFIG_PPC_MM_SLICES */
#endif
@@ -727,6 +728,7 @@ int main(void)
OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
+ OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 9de7f79e702b..63992b2d8e15 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -22,7 +22,6 @@
*/
#include <linux/delay.h>
-#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
@@ -37,7 +36,7 @@
#include <linux/of.h>
#include <linux/atomic.h>
-#include <asm/debug.h>
+#include <asm/debugfs.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/io.h>
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8ff0dd4e77a7..243dbef7e926 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -30,17 +30,16 @@
#include <linux/string.h>
#include <linux/memblock.h>
#include <linux/delay.h>
-#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/crash_dump.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
+#include <asm/debugfs.h>
#include <asm/page.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/fadump.h>
-#include <asm/debug.h>
#include <asm/setup.h>
static struct fw_dump fw_dump;
@@ -319,15 +318,34 @@ int __init fadump_reserve_mem(void)
pr_debug("fadumphdr_addr = %p\n",
(void *) fw_dump.fadumphdr_addr);
} else {
- /* Reserve the memory at the top of memory. */
size = get_fadump_area_size();
- base = memory_boundary - size;
- memblock_reserve(base, size);
- printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
- "for firmware-assisted dump\n",
- (unsigned long)(size >> 20),
- (unsigned long)(base >> 20));
+
+ /*
+ * Reserve memory at an offset closer to bottom of the RAM to
+ * minimize the impact of memory hot-remove operation. We can't
+ * use memblock_find_in_range() here since it doesn't allocate
+ * from bottom to top.
+ */
+ for (base = fw_dump.boot_memory_size;
+ base <= (memory_boundary - size);
+ base += size) {
+ if (memblock_is_region_memory(base, size) &&
+ !memblock_is_region_reserved(base, size))
+ break;
+ }
+ if ((base > (memory_boundary - size)) ||
+ memblock_reserve(base, size)) {
+ pr_err("Failed to reserve memory\n");
+ return 0;
+ }
+
+ pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
+ "assisted dump (System RAM: %ldMB)\n",
+ (unsigned long)(size >> 20),
+ (unsigned long)(base >> 20),
+ (unsigned long)(memblock_phys_mem_size() >> 20));
}
+
fw_dump.reserve_dump_area_start = base;
fw_dump.reserve_dump_area_size = size;
return 1;
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 5c9f50c1aa99..32509de6ce4c 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/list.h>
+#include <asm/asm-prototypes.h>
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
#include <asm/ftrace.h>
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 1607be7c0ef2..e22734278458 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -735,11 +735,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE)
EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE)
EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE)
-
- .globl mol_trampoline
- .set mol_trampoline, i0x2f00
- EXPORT_SYMBOL(mol_trampoline)
+ EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE)
. = 0x3000
@@ -1278,16 +1274,6 @@ EXPORT_SYMBOL(empty_zero_page)
swapper_pg_dir:
.space PGD_TABLE_SIZE
- .globl intercept_table
-intercept_table:
- .long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700
- .long i0x800, 0, 0, 0, 0, i0xd00, 0, 0
- .long 0, 0, 0, i0x1300, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
-EXPORT_SYMBOL(intercept_table)
-
/* Room for two PTE pointers, usually the kernel and current user pointers
* to their respective root page table.
*/
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 1dc5eae2ced3..0ddc602b33a4 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -949,7 +949,8 @@ start_here_multiplatform:
LOAD_REG_ADDR(r3,init_thread_union)
/* set up a stack pointer */
- addi r1,r3,THREAD_SIZE
+ LOAD_REG_IMMEDIATE(r1,THREAD_SIZE)
+ add r1,r3,r1
li r0,0
stdu r0,-STACK_FRAME_OVERHEAD(r1)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 995728736677..24717a73b6bb 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -375,6 +375,46 @@ _GLOBAL(power9_idle_stop)
li r4,1
b pnv_powersave_common
/* No return */
+
+
+/*
+ * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
+ * HSPRG0 will be set to the HSPRG0 value of one of the
+ * threads in this core. Thus the value we have in r13
+ * may not be this thread's paca pointer.
+ *
+ * Fortunately, the TIR remains invariant. Since this thread's
+ * paca pointer is recorded in all its sibling's paca, we can
+ * correctly recover this thread's paca pointer if we
+ * know the index of this thread in the core.
+ *
+ * This index can be obtained from the TIR.
+ *
+ * i.e, thread's position in the core = TIR.
+ * If this value is i, then this thread's paca is
+ * paca->thread_sibling_pacas[i].
+ */
+power9_dd1_recover_paca:
+ mfspr r4, SPRN_TIR
+ /*
+ * Since each entry in thread_sibling_pacas is 8 bytes
+ * we need to left-shift by 3 bits. Thus r4 = i * 8
+ */
+ sldi r4, r4, 3
+ /* Get &paca->thread_sibling_pacas[0] in r5 */
+ ld r5, PACA_SIBLING_PACA_PTRS(r13)
+ /* Load paca->thread_sibling_pacas[i] into r13 */
+ ldx r13, r4, r5
+ SET_PACA(r13)
+ ld r2, PACATOC(r13)
+ /*
+ * Indicate that we have lost NVGPR state
+ * which needs to be restored from the stack.
+ */
+ li r3, 1
+ stb r0,PACA_NAPSTATELOST(r13)
+ blr
+
/*
* Called from reset vector. Check whether we have woken up with
* hypervisor state loss. If yes, restore hypervisor state and return
@@ -385,7 +425,13 @@ _GLOBAL(power9_idle_stop)
*/
_GLOBAL(pnv_restore_hyp_resource)
BEGIN_FTR_SECTION
- ld r2,PACATOC(r13);
+BEGIN_FTR_SECTION_NESTED(70)
+ mflr r6
+ bl power9_dd1_recover_paca
+ mtlr r6
+FTR_SECTION_ELSE_NESTED(70)
+ ld r2, PACATOC(r13)
+ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70)
/*
* POWER ISA 3. Use PSSCR to determine if we
* are waking up from deep idle state
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8ee7b44450eb..5c291df30fe3 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -65,7 +65,6 @@
#include <asm/machdep.h>
#include <asm/udbg.h>
#include <asm/smp.h>
-#include <asm/debug.h>
#include <asm/livepatch.h>
#include <asm/asm-prototypes.h>
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index a1475e6aef3a..16eb0b508761 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -228,12 +228,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
machine_check_print_event_info(
- this_cpu_ptr(&mce_event_queue[index]));
+ this_cpu_ptr(&mce_event_queue[index]), false);
__this_cpu_dec(mce_queue_count);
}
}
-void machine_check_print_event_info(struct machine_check_event *evt)
+void machine_check_print_event_info(struct machine_check_event *evt,
+ bool user_mode)
{
const char *level, *sevstr, *subtype;
static const char *mc_ue_types[] = {
@@ -310,7 +311,16 @@ void machine_check_print_event_info(struct machine_check_event *evt)
printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
evt->disposition == MCE_DISPOSITION_RECOVERED ?
- "Recovered" : "[Not recovered");
+ "Recovered" : "Not recovered");
+
+ if (user_mode) {
+ printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
+ evt->srr0, current->pid, current->comm);
+ } else {
+ printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
+ (void *)evt->srr0);
+ }
+
printk("%s Initiator: %s\n", level,
evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
switch (evt->error_type) {
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 763d6f58caa8..de242b4bbd20 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -147,159 +147,365 @@ static int mce_flush(int what)
return 0;
}
-static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat)
-{
- if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB))
- dsisr &= ~slb;
- if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT))
- dsisr &= ~erat;
- if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB))
- dsisr &= ~tlb;
- /* Any other errors we don't understand? */
- if (dsisr)
- return 0;
- return 1;
-}
-
-static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
+struct mce_ierror_table {
+ unsigned long srr1_mask;
+ unsigned long srr1_value;
+ bool nip_valid; /* nip is a valid indicator of faulting address */
+ unsigned int error_type;
+ unsigned int error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct mce_ierror_table mce_p7_ierror_table[] = {
+{ 0x00000000001c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000001c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p8_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008000000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008040000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p9_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008000000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008040000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000080c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008100000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000008140000, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x0000000008180000, false,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, 0, 0, 0, 0, 0 } };
+
+struct mce_derror_table {
+ unsigned long dsisr_value;
+ bool dar_valid; /* dar is a valid indicator of faulting address */
+ unsigned int error_type;
+ unsigned int error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct mce_derror_table mce_p7_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000040, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, false, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p8_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00002000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00001000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000200, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, false, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p9_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00002000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00001000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000200, false,
+ MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000040, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000020, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000010, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000008, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0, false, 0, 0, 0, 0 } };
+
+static int mce_handle_ierror(struct pt_regs *regs,
+ const struct mce_ierror_table table[],
+ struct mce_error_info *mce_err, uint64_t *addr)
{
- long handled = 1;
+ uint64_t srr1 = regs->msr;
+ int handled = 0;
+ int i;
+
+ *addr = 0;
+
+ for (i = 0; table[i].srr1_mask; i++) {
+ if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
+ continue;
+
+ /* attempt to correct the error */
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_SLB:
+ handled = mce_flush(MCE_FLUSH_SLB);
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ handled = mce_flush(MCE_FLUSH_ERAT);
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ handled = mce_flush(MCE_FLUSH_TLB);
+ break;
+ }
- /*
- * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
- * reset the error bits whenever we handle them so that at the end
- * we can check whether we handled all of them or not.
- * */
-#ifdef CONFIG_PPC_STD_MMU_64
- if (dsisr & slb_error_bits) {
- flush_and_reload_slb();
- /* reset error bits */
- dsisr &= ~(slb_error_bits);
- }
- if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
- /* reset error bits */
- dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
+ /* now fill in mce_error_info */
+ mce_err->error_type = table[i].error_type;
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce_err->u.ue_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce_err->u.slb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce_err->u.erat_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce_err->u.tlb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_USER:
+ mce_err->u.user_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce_err->u.ra_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce_err->u.link_error_type = table[i].error_subtype;
+ break;
+ }
+ mce_err->severity = table[i].severity;
+ mce_err->initiator = table[i].initiator;
+ if (table[i].nip_valid)
+ *addr = regs->nip;
+ return handled;
}
-#endif
- /* Any other errors we don't understand? */
- if (dsisr & 0xffffffffUL)
- handled = 0;
- return handled;
-}
+ mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->initiator = MCE_INITIATOR_CPU;
-static long mce_handle_derror_p7(uint64_t dsisr)
-{
- return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
+ return 0;
}
-static long mce_handle_common_ierror(uint64_t srr1)
+static int mce_handle_derror(struct pt_regs *regs,
+ const struct mce_derror_table table[],
+ struct mce_error_info *mce_err, uint64_t *addr)
{
- long handled = 0;
-
- switch (P7_SRR1_MC_IFETCH(srr1)) {
- case 0:
- break;
-#ifdef CONFIG_PPC_STD_MMU_64
- case P7_SRR1_MC_IFETCH_SLB_PARITY:
- case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
- /* flush and reload SLBs for SLB errors. */
- flush_and_reload_slb();
- handled = 1;
- break;
- case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
- cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
- handled = 1;
+ uint64_t dsisr = regs->dsisr;
+ int handled = 0;
+ int found = 0;
+ int i;
+
+ *addr = 0;
+
+ for (i = 0; table[i].dsisr_value; i++) {
+ if (!(dsisr & table[i].dsisr_value))
+ continue;
+
+ /* attempt to correct the error */
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_SLB:
+ if (mce_flush(MCE_FLUSH_SLB))
+ handled = 1;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ if (mce_flush(MCE_FLUSH_ERAT))
+ handled = 1;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ if (mce_flush(MCE_FLUSH_TLB))
+ handled = 1;
+ break;
}
- break;
-#endif
- default:
- break;
- }
-
- return handled;
-}
-
-static long mce_handle_ierror_p7(uint64_t srr1)
-{
- long handled = 0;
- handled = mce_handle_common_ierror(srr1);
+ /*
+ * Attempt to handle multiple conditions, but only return
+ * one. Ensure uncorrectable errors are first in the table
+ * to match.
+ */
+ if (found)
+ continue;
+
+ /* now fill in mce_error_info */
+ mce_err->error_type = table[i].error_type;
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce_err->u.ue_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce_err->u.slb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce_err->u.erat_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce_err->u.tlb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_USER:
+ mce_err->u.user_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce_err->u.ra_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce_err->u.link_error_type = table[i].error_subtype;
+ break;
+ }
+ mce_err->severity = table[i].severity;
+ mce_err->initiator = table[i].initiator;
+ if (table[i].dar_valid)
+ *addr = regs->dar;
-#ifdef CONFIG_PPC_STD_MMU_64
- if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
- flush_and_reload_slb();
- handled = 1;
+ found = 1;
}
-#endif
- return handled;
-}
-static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
-{
- switch (P7_SRR1_MC_IFETCH(srr1)) {
- case P7_SRR1_MC_IFETCH_SLB_PARITY:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- break;
- case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- break;
- case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- break;
- case P7_SRR1_MC_IFETCH_UE:
- case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
- break;
- case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type =
- MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
- break;
- }
-}
+ if (found)
+ return handled;
-static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
-{
- mce_get_common_ierror(mce_err, srr1);
- if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
- }
-}
+ mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->initiator = MCE_INITIATOR_CPU;
-static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
-{
- if (dsisr & P7_DSISR_MC_UE) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
- } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type =
- MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
- } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
- }
+ return 0;
}
static long mce_handle_ue_error(struct pt_regs *regs)
@@ -320,292 +526,42 @@ static long mce_handle_ue_error(struct pt_regs *regs)
return handled;
}
-long __machine_check_early_realmode_p7(struct pt_regs *regs)
+static long mce_handle_error(struct pt_regs *regs,
+ const struct mce_derror_table dtable[],
+ const struct mce_ierror_table itable[])
{
- uint64_t srr1, nip, addr;
- long handled = 1;
- struct mce_error_info mce_error_info = { 0 };
-
- mce_error_info.severity = MCE_SEV_ERROR_SYNC;
- mce_error_info.initiator = MCE_INITIATOR_CPU;
-
- srr1 = regs->msr;
- nip = regs->nip;
+ struct mce_error_info mce_err = { 0 };
+ uint64_t addr;
+ uint64_t srr1 = regs->msr;
+ long handled;
- /*
- * Handle memory errors depending whether this was a load/store or
- * ifetch exception. Also, populate the mce error_type and
- * type-specific error_type from either SRR1 or DSISR, depending
- * whether this was a load/store or ifetch exception
- */
- if (P7_SRR1_MC_LOADSTORE(srr1)) {
- handled = mce_handle_derror_p7(regs->dsisr);
- mce_get_derror_p7(&mce_error_info, regs->dsisr);
- addr = regs->dar;
- } else {
- handled = mce_handle_ierror_p7(srr1);
- mce_get_ierror_p7(&mce_error_info, srr1);
- addr = regs->nip;
- }
+ if (SRR1_MC_LOADSTORE(srr1))
+ handled = mce_handle_derror(regs, dtable, &mce_err, &addr);
+ else
+ handled = mce_handle_ierror(regs, itable, &mce_err, &addr);
- /* Handle UE error. */
- if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+ if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
handled = mce_handle_ue_error(regs);
- save_mce_event(regs, handled, &mce_error_info, nip, addr);
- return handled;
-}
-
-static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
-{
- mce_get_common_ierror(mce_err, srr1);
- if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- }
-}
-
-static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
-{
- mce_get_derror_p7(mce_err, dsisr);
- if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- }
-}
-
-static long mce_handle_ierror_p8(uint64_t srr1)
-{
- long handled = 0;
-
- handled = mce_handle_common_ierror(srr1);
-
-#ifdef CONFIG_PPC_STD_MMU_64
- if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
- flush_and_reload_slb();
- handled = 1;
- }
-#endif
- return handled;
-}
-
-static long mce_handle_derror_p8(uint64_t dsisr)
-{
- return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
-}
-
-long __machine_check_early_realmode_p8(struct pt_regs *regs)
-{
- uint64_t srr1, nip, addr;
- long handled = 1;
- struct mce_error_info mce_error_info = { 0 };
-
- mce_error_info.severity = MCE_SEV_ERROR_SYNC;
- mce_error_info.initiator = MCE_INITIATOR_CPU;
-
- srr1 = regs->msr;
- nip = regs->nip;
-
- if (P7_SRR1_MC_LOADSTORE(srr1)) {
- handled = mce_handle_derror_p8(regs->dsisr);
- mce_get_derror_p8(&mce_error_info, regs->dsisr);
- addr = regs->dar;
- } else {
- handled = mce_handle_ierror_p8(srr1);
- mce_get_ierror_p8(&mce_error_info, srr1);
- addr = regs->nip;
- }
-
- /* Handle UE error. */
- if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
+ save_mce_event(regs, handled, &mce_err, regs->nip, addr);
- save_mce_event(regs, handled, &mce_error_info, nip, addr);
return handled;
}
-static int mce_handle_derror_p9(struct pt_regs *regs)
-{
- uint64_t dsisr = regs->dsisr;
-
- return mce_handle_flush_derrors(dsisr,
- P9_DSISR_MC_SLB_PARITY_MFSLB |
- P9_DSISR_MC_SLB_MULTIHIT_MFSLB,
-
- P9_DSISR_MC_TLB_MULTIHIT_MFTLB,
-
- P9_DSISR_MC_ERAT_MULTIHIT);
-}
-
-static int mce_handle_ierror_p9(struct pt_regs *regs)
-{
- uint64_t srr1 = regs->msr;
-
- switch (P9_SRR1_MC_IFETCH(srr1)) {
- case P9_SRR1_MC_IFETCH_SLB_PARITY:
- case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
- return mce_flush(MCE_FLUSH_SLB);
- case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
- return mce_flush(MCE_FLUSH_TLB);
- case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
- return mce_flush(MCE_FLUSH_ERAT);
- default:
- return 0;
- }
-}
-
-static void mce_get_derror_p9(struct pt_regs *regs,
- struct mce_error_info *mce_err, uint64_t *addr)
+long __machine_check_early_realmode_p7(struct pt_regs *regs)
{
- uint64_t dsisr = regs->dsisr;
-
- mce_err->severity = MCE_SEV_ERROR_SYNC;
- mce_err->initiator = MCE_INITIATOR_CPU;
+ /* P7 DD1 leaves top bits of DSISR undefined */
+ regs->dsisr &= 0x0000ffff;
- if (dsisr & P9_DSISR_MC_USER_TLBIE)
- *addr = regs->nip;
- else
- *addr = regs->dar;
-
- if (dsisr & P9_DSISR_MC_UE) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
- } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
- } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) {
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT;
- } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) {
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT;
- } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) {
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- } else if (dsisr & P9_DSISR_MC_USER_TLBIE) {
- mce_err->error_type = MCE_ERROR_TYPE_USER;
- mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE;
- } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- } else if (dsisr & P9_DSISR_MC_RA_LOAD) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD;
- } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
- } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
- } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) {
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN;
- }
+ return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table);
}
-static void mce_get_ierror_p9(struct pt_regs *regs,
- struct mce_error_info *mce_err, uint64_t *addr)
+long __machine_check_early_realmode_p8(struct pt_regs *regs)
{
- uint64_t srr1 = regs->msr;
-
- switch (P9_SRR1_MC_IFETCH(srr1)) {
- case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
- case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
- mce_err->severity = MCE_SEV_FATAL;
- break;
- default:
- mce_err->severity = MCE_SEV_ERROR_SYNC;
- break;
- }
-
- mce_err->initiator = MCE_INITIATOR_CPU;
-
- *addr = regs->nip;
-
- switch (P9_SRR1_MC_IFETCH(srr1)) {
- case P9_SRR1_MC_IFETCH_UE:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_SLB_PARITY:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- break;
- case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- break;
- case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- break;
- case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- break;
- case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_LINK_TIMEOUT:
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT;
- break;
- case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT:
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT;
- break;
- case P9_SRR1_MC_IFETCH_RA:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_RA_TABLEWALK:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH;
- break;
- case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_STORE;
- break;
- case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
- mce_err->error_type = MCE_ERROR_TYPE_LINK;
- mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT;
- break;
- case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN:
- mce_err->error_type = MCE_ERROR_TYPE_RA;
- mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN;
- break;
- default:
- break;
- }
+ return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table);
}
long __machine_check_early_realmode_p9(struct pt_regs *regs)
{
- uint64_t nip, addr;
- long handled;
- struct mce_error_info mce_error_info = { 0 };
-
- nip = regs->nip;
-
- if (P9_SRR1_MC_LOADSTORE(regs->msr)) {
- handled = mce_handle_derror_p9(regs);
- mce_get_derror_p9(regs, &mce_error_info, &addr);
- } else {
- handled = mce_handle_ierror_p9(regs);
- mce_get_ierror_p9(regs, &mce_error_info, &addr);
- }
-
- /* Handle UE error. */
- if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
-
- save_mce_event(regs, handled, &mce_error_info, nip, addr);
- return handled;
+ return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
}
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index dfc479df9634..8d63627e067f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -245,3 +245,24 @@ void __init free_unused_pacas(void)
free_lppacas();
}
+
+void copy_mm_to_paca(struct mm_struct *mm)
+{
+#ifdef CONFIG_PPC_BOOK3S
+ mm_context_t *context = &mm->context;
+
+ get_paca()->mm_ctx_id = context->id;
+#ifdef CONFIG_PPC_MM_SLICES
+ VM_BUG_ON(!mm->context.addr_limit);
+ get_paca()->addr_limit = mm->context.addr_limit;
+ get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+ memcpy(&get_paca()->mm_ctx_high_slices_psize,
+ &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
+#else /* CONFIG_PPC_MM_SLICES */
+ get_paca()->mm_ctx_user_psize = context->user_psize;
+ get_paca()->mm_ctx_sllp = context->sllp;
+#endif
+#else /* CONFIG_PPC_BOOK3S */
+ return;
+#endif
+}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f5d399e46193..d2f0afeae5a0 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -55,7 +55,6 @@
#include <asm/kexec.h>
#include <asm/opal.h>
#include <asm/fadump.h>
-#include <asm/debug.h>
#include <asm/epapr_hcalls.h>
#include <asm/firmware.h>
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1c1b44ec7642..dd8a04f3053a 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -815,7 +815,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.virt_base = cpu_to_be32(0xffffffff),
.virt_size = cpu_to_be32(0xffffffff),
.load_base = cpu_to_be32(0xffffffff),
- .min_rma = cpu_to_be32(256), /* 256MB min RMA */
+ .min_rma = cpu_to_be32(512), /* 512MB min RMA */
.min_load = cpu_to_be32(0xffffffff), /* full client load */
.min_rma_percent = 0, /* min RMA percentage of total RAM */
.max_pft_size = 48, /* max log_2(hash table size) */
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 4697da895133..5c10b5925ac2 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -31,11 +31,11 @@
#include <linux/unistd.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
-#include <linux/debugfs.h>
#include <linux/percpu.h>
#include <linux/memblock.h>
#include <linux/of_platform.h>
#include <linux/hugetlb.h>
+#include <asm/debugfs.h>
#include <asm/io.h>
#include <asm/paca.h>
#include <asm/prom.h>
@@ -920,6 +920,15 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = klimit;
+
+#ifdef CONFIG_PPC_MM_SLICES
+#ifdef CONFIG_PPC64
+ init_mm.context.addr_limit = TASK_SIZE_128TB;
+#else
+#error "context.addr_limit not initialized."
+#endif
+#endif
+
#ifdef CONFIG_PPC_64K_PAGES
init_mm.context.pte_frag = NULL;
#endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 9cfaa8b69b5f..729e990a019d 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -230,8 +230,8 @@ static void cpu_ready_for_interrupts(void)
* If we are not in hypervisor mode the job is done once for
* the whole partition in configure_exceptions().
*/
- if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
- early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S)) {
unsigned long lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
}
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 66711958493c..d534ed901538 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -59,7 +59,14 @@ EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
- save_context_stack(trace, tsk->thread.ksp, tsk, 0);
+ unsigned long sp;
+
+ if (tsk == current)
+ sp = current_stack_pointer();
+ else
+ sp = tsk->thread.ksp;
+
+ save_context_stack(trace, sp, tsk, 0);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
index 6ae9bd5086a4..0050b2d2ff7a 100644
--- a/arch/powerpc/kernel/swsusp.c
+++ b/arch/powerpc/kernel/swsusp.c
@@ -10,6 +10,7 @@
*/
#include <linux/sched.h>
+#include <linux/suspend.h>
#include <asm/current.h>
#include <asm/mmu_context.h>
#include <asm/switch_to.h>
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index ff365f9de27a..354946236c61 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -35,13 +35,13 @@
#include <linux/backlight.h>
#include <linux/bug.h>
#include <linux/kdebug.h>
-#include <linux/debugfs.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
#include <linux/uaccess.h>
+#include <asm/debugfs.h>
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index a587e8f4fd26..74b0153780e3 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -229,6 +229,7 @@ void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
{
+ unsigned long vsid_bits = VSID_BITS_65_256M;
struct kvmppc_sid_map *map;
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
u16 sid_map_mask;
@@ -257,7 +258,12 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
kvmppc_mmu_pte_flush(vcpu, 0, 0);
kvmppc_mmu_flush_segments(vcpu);
}
- map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M);
+
+ if (mmu_has_feature(MMU_FTR_68_BIT_VA))
+ vsid_bits = VSID_BITS_256M;
+
+ map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++,
+ VSID_MULTIPLIER_256M, vsid_bits);
map->guest_vsid = gvsid;
map->valid = true;
@@ -390,7 +396,7 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
int err;
- err = __init_new_context();
+ err = hash__alloc_context_id();
if (err < 0)
return -1;
vcpu3s->context_id[0] = err;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3a1a463a039a..ffde4507ddfd 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -16,7 +16,6 @@
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
-#include <asm/debug.h>
#include <asm/synch.h>
#include <asm/cputhreads.h>
#include <asm/pgtable.h>
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index ef4fd528c193..459b72cb617a 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -19,10 +19,9 @@
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
-#include <asm/debug.h>
+#include <asm/debugfs.h>
#include <asm/time.h>
-#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include "book3s_xics.h"
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 51def8a515be..3a7d580fdc59 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -120,8 +120,6 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
siginfo_t info;
unsigned int lsb = 0;
- up_read(&current->mm->mmap_sem);
-
if (!user_mode(regs))
return MM_FAULT_ERR(SIGBUS);
@@ -154,13 +152,6 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
* continue the pagefault.
*/
if (fatal_signal_pending(current)) {
- /*
- * If we have retry set, the mmap semaphore will have
- * alrady been released in __lock_page_or_retry(). Else
- * we release it now.
- */
- if (!(fault & VM_FAULT_RETRY))
- up_read(&current->mm->mmap_sem);
/* Coming from kernel, we need to deal with uaccess fixups */
if (user_mode(regs))
return MM_FAULT_RETURN;
@@ -173,8 +164,6 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
/* Out of memory */
if (fault & VM_FAULT_OOM) {
- up_read(&current->mm->mmap_sem);
-
/*
* We ran out of memory, or some other thing happened to us that
* made us unable to handle the page fault gracefully.
@@ -298,7 +287,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* can result in fault, which will cause a deadlock when called with
* mmap_sem held
*/
- if (user_mode(regs))
+ if (!is_exec && user_mode(regs))
store_update_sp = store_updates_sp(regs);
if (user_mode(regs))
@@ -458,9 +447,30 @@ good_area:
* the fault.
*/
fault = handle_mm_fault(vma, address, flags);
+
+ /*
+ * Handle the retry right now, the mmap_sem has been released in that
+ * case.
+ */
+ if (unlikely(fault & VM_FAULT_RETRY)) {
+ /* We retry only once */
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ /*
+ * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+ * of starvation.
+ */
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
+ if (!fatal_signal_pending(current))
+ goto retry;
+ }
+ /* We will enter mm_fault_error() below */
+ } else
+ up_read(&current->mm->mmap_sem);
+
if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
if (fault & VM_FAULT_SIGSEGV)
- goto bad_area;
+ goto bad_area_nosemaphore;
rc = mm_fault_error(regs, address, fault);
if (rc >= MM_FAULT_RETURN)
goto bail;
@@ -469,41 +479,29 @@ good_area:
}
/*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
+ * Major/minor page fault accounting.
*/
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
+ if (fault & VM_FAULT_MAJOR) {
+ current->maj_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+ regs, address);
#ifdef CONFIG_PPC_SMLPAR
- if (firmware_has_feature(FW_FEATURE_CMO)) {
- u32 page_ins;
-
- preempt_disable();
- page_ins = be32_to_cpu(get_lppaca()->page_ins);
- page_ins += 1 << PAGE_FACTOR;
- get_lppaca()->page_ins = cpu_to_be32(page_ins);
- preempt_enable();
- }
-#endif /* CONFIG_PPC_SMLPAR */
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
- if (fault & VM_FAULT_RETRY) {
- /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
- * of starvation. */
- flags &= ~FAULT_FLAG_ALLOW_RETRY;
- flags |= FAULT_FLAG_TRIED;
- goto retry;
+ if (firmware_has_feature(FW_FEATURE_CMO)) {
+ u32 page_ins;
+
+ preempt_disable();
+ page_ins = be32_to_cpu(get_lppaca()->page_ins);
+ page_ins += 1 << PAGE_FACTOR;
+ get_lppaca()->page_ins = cpu_to_be32(page_ins);
+ preempt_enable();
}
+#endif /* CONFIG_PPC_SMLPAR */
+ } else {
+ current->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+ regs, address);
}
- up_read(&mm->mmap_sem);
goto bail;
bad_area:
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index 09cc50c8dace..6f962e5cb5e1 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -31,10 +31,8 @@
#ifdef CONFIG_SMP
.section .bss
.align 2
- .globl mmu_hash_lock
mmu_hash_lock:
.space 4
-EXPORT_SYMBOL(mmu_hash_lock)
#endif /* CONFIG_SMP */
/*
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index c554768b1fa2..f2095ce9d4b0 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -35,9 +35,8 @@
#include <linux/memblock.h>
#include <linux/context_tracking.h>
#include <linux/libfdt.h>
-#include <linux/debugfs.h>
-#include <asm/debug.h>
+#include <asm/debugfs.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -927,11 +926,6 @@ static void __init htab_initialize(void)
}
#endif /* CONFIG_DEBUG_PAGEALLOC */
- /* On U3 based machines, we need to reserve the DART area and
- * _NOT_ map it to avoid cache paradoxes as it's remapped non
- * cacheable later on
- */
-
/* create bolted the linear mapping in the hash table */
for_each_memblock(memory, reg) {
base = (unsigned long)__va(reg->base);
@@ -981,6 +975,19 @@ void __init hash__early_init_devtree(void)
void __init hash__early_init_mmu(void)
{
+ /*
+ * We have code in __hash_page_64K() and elsewhere, which assumes it can
+ * do the following:
+ * new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+ *
+ * Where the slot number is between 0-15, and values of 8-15 indicate
+ * the secondary bucket. For that code to work H_PAGE_F_SECOND and
+ * H_PAGE_F_GIX must occupy four contiguous bits in the PTE, and
+ * H_PAGE_F_SECOND must be placed above H_PAGE_F_GIX. Assert that here
+ * with a BUILD_BUG_ON().
+ */
+ BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul << (H_PAGE_F_GIX_SHIFT + 3)));
+
htab_init_page_sizes();
/*
@@ -1120,7 +1127,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
copro_flush_all_slbs(mm);
if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
- copy_mm_to_paca(&mm->context);
+ copy_mm_to_paca(mm);
slb_flush_and_rebolt();
}
}
@@ -1192,7 +1199,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
{
if (user_region) {
if (psize != get_paca_psize(ea)) {
- copy_mm_to_paca(&mm->context);
+ copy_mm_to_paca(mm);
slb_flush_and_rebolt();
}
} else if (get_paca()->vmalloc_sllp !=
@@ -1855,5 +1862,4 @@ static int __init hash64_debugfs(void)
return 0;
}
machine_device_initcall(pseries, hash64_debugfs);
-
#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 83a8be791e06..bfe4e8526b2d 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -148,16 +148,9 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
mm = vma->vm_mm;
-#ifdef CONFIG_PPC_MM_SLICES
- psize = get_slice_psize(mm, ea);
- tsize = mmu_get_tsize(psize);
- shift = mmu_psize_defs[psize].shift;
-#else
psize = vma_mmu_pagesize(vma);
shift = __ilog2(psize);
tsize = shift - 10;
-#endif
-
/*
* We can't be interrupted while we're setting up the MAS
* regusters or after we've confirmed that no tlb exists.
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 35254a678456..0aa9cade422f 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -50,9 +50,12 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct hstate *h = hstate_file(file);
struct vm_unmapped_area_info info;
+ if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
+ mm->context.addr_limit = TASK_SIZE;
+
if (len & ~huge_page_mask(h))
return -EINVAL;
- if (len > TASK_SIZE)
+ if (len > mm->context.addr_limit)
return -ENOMEM;
if (flags & MAP_FIXED) {
@@ -64,7 +67,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
+ if (mm->context.addr_limit - len >= addr &&
(!vma || addr + len <= vma->vm_start))
return addr;
}
@@ -78,5 +81,9 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
info.high_limit = current->mm->mmap_base;
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
+
+ if (addr > DEFAULT_MAP_WINDOW)
+ info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
+
return vm_unmapped_area(&info);
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 8c3389cbcd12..a4f33de4008e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -753,6 +753,24 @@ static int __init add_huge_page_size(unsigned long long size)
if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
return -EINVAL;
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * We need to make sure that for different page sizes reported by
+ * firmware we only add hugetlb support for page sizes that can be
+ * supported by linux page table layout.
+ * For now we have
+ * Radix: 2M
+ * Hash: 16M and 16G
+ */
+ if (radix_enabled()) {
+ if (mmu_psize != MMU_PAGE_2M)
+ return -EINVAL;
+ } else {
+ if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
+ return -EINVAL;
+ }
+#endif
+
BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
/* Return if huge page size has already been setup */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 9be992083d2a..8f6f2a173e47 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -71,10 +71,6 @@
#if H_PGTABLE_RANGE > USER_VSID_RANGE
#warning Limited user VSID range means pagetable space is wasted
#endif
-
-#if (TASK_SIZE_USER64 < H_PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
-#warning TASK_SIZE is smaller than it needs to be.
-#endif
#endif /* CONFIG_PPC_STD_MMU_64 */
phys_addr_t memstart_addr = ~0;
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index a5d9ef59debe..b2111baa0da6 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -79,7 +79,7 @@ static inline unsigned long mmap_base(unsigned long rnd)
else if (gap > MAX_GAP)
gap = MAX_GAP;
- return PAGE_ALIGN(TASK_SIZE - gap - rnd);
+ return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd);
}
#ifdef CONFIG_PPC_RADIX_MMU
@@ -97,7 +97,10 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct vm_area_struct *vma;
struct vm_unmapped_area_info info;
- if (len > TASK_SIZE - mmap_min_addr)
+ if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
+ mm->context.addr_limit = TASK_SIZE;
+
+ if (len > mm->context.addr_limit - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
@@ -106,7 +109,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ if (mm->context.addr_limit - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vma->vm_start))
return addr;
}
@@ -114,8 +117,13 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
- info.high_limit = TASK_SIZE;
info.align_mask = 0;
+
+ if (unlikely(addr > DEFAULT_MAP_WINDOW))
+ info.high_limit = mm->context.addr_limit;
+ else
+ info.high_limit = DEFAULT_MAP_WINDOW;
+
return vm_unmapped_area(&info);
}
@@ -131,8 +139,11 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
+ if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
+ mm->context.addr_limit = TASK_SIZE;
+
/* requested length too big for entire address space */
- if (len > TASK_SIZE - mmap_min_addr)
+ if (len > mm->context.addr_limit - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
@@ -142,7 +153,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ if (mm->context.addr_limit - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vma->vm_start))
return addr;
}
@@ -152,7 +163,14 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
info.high_limit = mm->mmap_base;
info.align_mask = 0;
+
+ if (addr > DEFAULT_MAP_WINDOW)
+ info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
+
addr = vm_unmapped_area(&info);
+ if (!(addr & ~PAGE_MASK))
+ return addr;
+ VM_BUG_ON(addr != -ENOMEM);
/*
* A failed mmap() very likely causes application failure,
@@ -160,15 +178,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
* can happen with large stack limits and large mmap()
* allocations.
*/
- if (addr & ~PAGE_MASK) {
- VM_BUG_ON(addr != -ENOMEM);
- info.flags = 0;
- info.low_limit = TASK_UNMAPPED_BASE;
- info.high_limit = TASK_SIZE;
- addr = vm_unmapped_area(&info);
- }
-
- return addr;
+ return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
}
static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 73bf6e14c3aa..c6dca2ae78ef 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -30,17 +30,16 @@
static DEFINE_SPINLOCK(mmu_context_lock);
static DEFINE_IDA(mmu_context_ida);
-int __init_new_context(void)
+static int alloc_context_id(int min_id, int max_id)
{
- int index;
- int err;
+ int index, err;
again:
if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
return -ENOMEM;
spin_lock(&mmu_context_lock);
- err = ida_get_new_above(&mmu_context_ida, 1, &index);
+ err = ida_get_new_above(&mmu_context_ida, min_id, &index);
spin_unlock(&mmu_context_lock);
if (err == -EAGAIN)
@@ -48,7 +47,7 @@ again:
else if (err)
return err;
- if (index > MAX_USER_CONTEXT) {
+ if (index > max_id) {
spin_lock(&mmu_context_lock);
ida_remove(&mmu_context_ida, index);
spin_unlock(&mmu_context_lock);
@@ -57,48 +56,105 @@ again:
return index;
}
-EXPORT_SYMBOL_GPL(__init_new_context);
-static int radix__init_new_context(struct mm_struct *mm, int index)
+
+void hash__reserve_context_id(int id)
+{
+ int rc, result = 0;
+
+ do {
+ if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
+ break;
+
+ spin_lock(&mmu_context_lock);
+ rc = ida_get_new_above(&mmu_context_ida, id, &result);
+ spin_unlock(&mmu_context_lock);
+ } while (rc == -EAGAIN);
+
+ WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result);
+}
+
+int hash__alloc_context_id(void)
+{
+ unsigned long max;
+
+ if (mmu_has_feature(MMU_FTR_68_BIT_VA))
+ max = MAX_USER_CONTEXT;
+ else
+ max = MAX_USER_CONTEXT_65BIT_VA;
+
+ return alloc_context_id(MIN_USER_CONTEXT, max);
+}
+EXPORT_SYMBOL_GPL(hash__alloc_context_id);
+
+static int hash__init_new_context(struct mm_struct *mm)
+{
+ int index;
+
+ index = hash__alloc_context_id();
+ if (index < 0)
+ return index;
+
+ /*
+ * We do switch_slb() early in fork, even before we setup the
+ * mm->context.addr_limit. Default to max task size so that we copy the
+ * default values to paca which will help us to handle slb miss early.
+ */
+ mm->context.addr_limit = TASK_SIZE_128TB;
+
+ /*
+ * The old code would re-promote on fork, we don't do that when using
+ * slices as it could cause problem promoting slices that have been
+ * forced down to 4K.
+ *
+ * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
+ * explicitly against context.id == 0. This ensures that we properly
+ * initialize context slice details for newly allocated mm's (which will
+ * have id == 0) and don't alter context slice inherited via fork (which
+ * will have id != 0).
+ *
+ * We should not be calling init_new_context() on init_mm. Hence a
+ * check against 0 is OK.
+ */
+ if (mm->context.id == 0)
+ slice_set_user_psize(mm, mmu_virtual_psize);
+
+ subpage_prot_init_new_context(mm);
+
+ return index;
+}
+
+static int radix__init_new_context(struct mm_struct *mm)
{
unsigned long rts_field;
+ int index;
+
+ index = alloc_context_id(1, PRTB_ENTRIES - 1);
+ if (index < 0)
+ return index;
/*
* set the process table entry,
*/
rts_field = radix__get_tree_size();
process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
- return 0;
+
+ mm->context.npu_context = NULL;
+
+ return index;
}
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
int index;
- index = __init_new_context();
+ if (radix_enabled())
+ index = radix__init_new_context(mm);
+ else
+ index = hash__init_new_context(mm);
+
if (index < 0)
return index;
- if (radix_enabled()) {
- radix__init_new_context(mm, index);
- } else {
-
- /* The old code would re-promote on fork, we don't do that
- * when using slices as it could cause problem promoting slices
- * that have been forced down to 4K
- *
- * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
- * explicitly against context.id == 0. This ensures that we
- * properly initialize context slice details for newly allocated
- * mm's (which will have id == 0) and don't alter context slice
- * inherited via fork (which will have id != 0).
- *
- * We should not be calling init_new_context() on init_mm. Hence a
- * check against 0 is ok.
- */
- if (mm->context.id == 0)
- slice_set_user_psize(mm, mmu_virtual_psize);
- subpage_prot_init_new_context(mm);
- }
mm->context.id = index;
#ifdef CONFIG_PPC_ICSWX
mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index c491f2c8f2b9..4554d6527682 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -333,11 +333,6 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0;
-
-#ifdef CONFIG_PPC_MM_SLICES
- slice_set_user_psize(mm, mmu_virtual_psize);
-#endif
-
return 0;
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9befaee237d6..371792e4418f 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -875,13 +875,6 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
void *nd;
int tnid;
- if (spanned_pages)
- pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
- nid, start_pfn << PAGE_SHIFT,
- (end_pfn << PAGE_SHIFT) - 1);
- else
- pr_info("Initmem setup node %d\n", nid);
-
nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
nd = __va(nd_pa);
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 5e01b2ece1d0..98ae810b8c21 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -229,7 +229,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
asm volatile("slbie %0" : : "r" (slbie_data));
get_paca()->slb_cache_ptr = 0;
- copy_mm_to_paca(&mm->context);
+ copy_mm_to_paca(mm);
/*
* preload some userspace segments into the SLB.
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index a85e06ea6c20..1519617aab36 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -23,6 +23,48 @@
#include <asm/pgtable.h>
#include <asm/firmware.h>
+/*
+ * This macro generates asm code to compute the VSID scramble
+ * function. Used in slb_allocate() and do_stab_bolted. The function
+ * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
+ *
+ * rt = register containing the proto-VSID and into which the
+ * VSID will be stored
+ * rx = scratch register (clobbered)
+ * rf = flags
+ *
+ * - rt and rx must be different registers
+ * - The answer will end up in the low VSID_BITS bits of rt. The higher
+ * bits may contain other garbage, so you may need to mask the
+ * result.
+ */
+#define ASM_VSID_SCRAMBLE(rt, rx, rf, size) \
+ lis rx,VSID_MULTIPLIER_##size@h; \
+ ori rx,rx,VSID_MULTIPLIER_##size@l; \
+ mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \
+/* \
+ * powermac get slb fault before feature fixup, so make 65 bit part \
+ * the default part of feature fixup \
+ */ \
+BEGIN_MMU_FTR_SECTION \
+ srdi rx,rt,VSID_BITS_65_##size; \
+ clrldi rt,rt,(64-VSID_BITS_65_##size); \
+ add rt,rt,rx; \
+ addi rx,rt,1; \
+ srdi rx,rx,VSID_BITS_65_##size; \
+ add rt,rt,rx; \
+ rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \
+MMU_FTR_SECTION_ELSE \
+ srdi rx,rt,VSID_BITS_##size; \
+ clrldi rt,rt,(64-VSID_BITS_##size); \
+ add rt,rt,rx; /* add high and low bits */ \
+ addi rx,rt,1; \
+ srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \
+ add rt,rt,rx; \
+ rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
+
+
/* void slb_allocate_realmode(unsigned long ea);
*
* Create an SLB entry for the given EA (user or kernel).
@@ -45,13 +87,6 @@ _GLOBAL(slb_allocate_realmode)
/* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
blt cr7,0f /* user or kernel? */
- /* kernel address: proto-VSID = ESID */
- /* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but
- * this code will generate the protoVSID 0xfffffffff for the
- * top segment. That's ok, the scramble below will translate
- * it to VSID 0, which is reserved as a bad VSID - one which
- * will never have any pages in it. */
-
/* Check if hitting the linear mapping or some other kernel space
*/
bne cr7,1f
@@ -63,12 +98,10 @@ _GLOBAL(slb_allocate_realmode)
slb_miss_kernel_load_linear:
li r11,0
/*
- * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+ * context = (ea >> 60) - (0xc - 1)
* r9 = region id.
*/
- addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
- addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
-
+ subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET
BEGIN_FTR_SECTION
b .Lslb_finish_load
@@ -77,9 +110,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
1:
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- /* Check virtual memmap region. To be patches at kernel boot */
cmpldi cr0,r9,0xf
bne 1f
+/* Check virtual memmap region. To be patched at kernel boot */
.globl slb_miss_kernel_load_vmemmap
slb_miss_kernel_load_vmemmap:
li r11,0
@@ -102,11 +135,10 @@ slb_miss_kernel_load_io:
li r11,0
6:
/*
- * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+ * context = (ea >> 60) - (0xc - 1)
* r9 = region id.
*/
- addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
- addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
+ subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET
BEGIN_FTR_SECTION
b .Lslb_finish_load
@@ -117,7 +149,13 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
* For userspace addresses, make sure this is region 0.
*/
cmpdi r9, 0
- bne 8f
+ bne- 8f
+ /*
+ * user space make sure we are within the allowed limit
+ */
+ ld r11,PACA_ADDR_LIMIT(r13)
+ cmpld r3,r11
+ bge- 8f
/* when using slices, we extract the psize off the slice bitmaps
* and then we need to get the sllp encoding off the mmu_psize_defs
@@ -189,13 +227,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
*/
.Lslb_finish_load:
rldimi r10,r9,ESID_BITS,0
- ASM_VSID_SCRAMBLE(r10,r9,256M)
- /*
- * bits above VSID_BITS_256M need to be ignored from r10
- * also combine VSID and flags
- */
- rldimi r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M))
-
+ ASM_VSID_SCRAMBLE(r10,r9,r11,256M)
/* r3 = EA, r11 = VSID data */
/*
* Find a slot, round robin. Previously we tried to find a
@@ -259,12 +291,12 @@ slb_compare_rr_to_size:
.Lslb_finish_load_1T:
srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
rldimi r10,r9,ESID_BITS_1T,0
- ASM_VSID_SCRAMBLE(r10,r9,1T)
+ ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
/*
* bits above VSID_BITS_1T need to be ignored from r10
* also combine VSID and flags
*/
- rldimi r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T))
+
li r10,MMU_SEGSIZE_1T
rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 2b27458902ee..251b6bae7023 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -36,38 +36,29 @@
#include <asm/copro.h>
#include <asm/hugetlb.h>
-/* some sanity checks */
-#if (H_PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
-#error H_PGTABLE_RANGE exceeds slice_mask high_slices size
-#endif
-
static DEFINE_SPINLOCK(slice_convert_lock);
-
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
#ifdef DEBUG
int _slice_debug = 1;
static void slice_print_mask(const char *label, struct slice_mask mask)
{
- char *p, buf[16 + 3 + 64 + 1];
- int i;
-
if (!_slice_debug)
return;
- p = buf;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0';
- *(p++) = ' ';
- *(p++) = '-';
- *(p++) = ' ';
- for (i = 0; i < SLICE_NUM_HIGH; i++)
- *(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
- *(p++) = 0;
-
- printk(KERN_DEBUG "%s:%s\n", label, buf);
+ pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices);
+ pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices);
}
-#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0)
+#define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
#else
@@ -76,25 +67,28 @@ static void slice_print_mask(const char *label, struct slice_mask mask) {}
#endif
-static struct slice_mask slice_range_to_mask(unsigned long start,
- unsigned long len)
+static void slice_range_to_mask(unsigned long start, unsigned long len,
+ struct slice_mask *ret)
{
unsigned long end = start + len - 1;
- struct slice_mask ret = { 0, 0 };
+
+ ret->low_slices = 0;
+ bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
if (start < SLICE_LOW_TOP) {
- unsigned long mend = min(end, SLICE_LOW_TOP);
- unsigned long mstart = min(start, SLICE_LOW_TOP);
+ unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
- ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
- - (1u << GET_LOW_SLICE_INDEX(mstart));
+ ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+ - (1u << GET_LOW_SLICE_INDEX(start));
}
- if ((start + len) > SLICE_LOW_TOP)
- ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1))
- - (1ul << GET_HIGH_SLICE_INDEX(start));
+ if ((start + len) > SLICE_LOW_TOP) {
+ unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
+ unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
+ unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
- return ret;
+ bitmap_set(ret->high_slices, start_index, count);
+ }
}
static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
@@ -128,53 +122,60 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
return !slice_area_is_free(mm, start, end - start);
}
-static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
+static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
{
- struct slice_mask ret = { 0, 0 };
unsigned long i;
+ ret->low_slices = 0;
+ bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+
for (i = 0; i < SLICE_NUM_LOW; i++)
if (!slice_low_has_vma(mm, i))
- ret.low_slices |= 1u << i;
+ ret->low_slices |= 1u << i;
if (mm->task_size <= SLICE_LOW_TOP)
- return ret;
+ return;
- for (i = 0; i < SLICE_NUM_HIGH; i++)
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++)
if (!slice_high_has_vma(mm, i))
- ret.high_slices |= 1ul << i;
-
- return ret;
+ __set_bit(i, ret->high_slices);
}
-static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
+static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret)
{
unsigned char *hpsizes;
int index, mask_index;
- struct slice_mask ret = { 0, 0 };
unsigned long i;
u64 lpsizes;
+ ret->low_slices = 0;
+ bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+
lpsizes = mm->context.low_slices_psize;
for (i = 0; i < SLICE_NUM_LOW; i++)
if (((lpsizes >> (i * 4)) & 0xf) == psize)
- ret.low_slices |= 1u << i;
+ ret->low_slices |= 1u << i;
hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < SLICE_NUM_HIGH; i++) {
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) {
mask_index = i & 0x1;
index = i >> 1;
if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
- ret.high_slices |= 1ul << i;
+ __set_bit(i, ret->high_slices);
}
-
- return ret;
}
-static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
+static int slice_check_fit(struct mm_struct *mm,
+ struct slice_mask mask, struct slice_mask available)
{
+ DECLARE_BITMAP(result, SLICE_NUM_HIGH);
+ unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.addr_limit);
+
+ bitmap_and(result, mask.high_slices,
+ available.high_slices, slice_count);
+
return (mask.low_slices & available.low_slices) == mask.low_slices &&
- (mask.high_slices & available.high_slices) == mask.high_slices;
+ bitmap_equal(result, mask.high_slices, slice_count);
}
static void slice_flush_segments(void *parm)
@@ -185,7 +186,7 @@ static void slice_flush_segments(void *parm)
if (mm != current->active_mm)
return;
- copy_mm_to_paca(&current->active_mm->context);
+ copy_mm_to_paca(current->active_mm);
local_irq_save(flags);
slb_flush_and_rebolt();
@@ -218,18 +219,18 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
mm->context.low_slices_psize = lpsizes;
hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < SLICE_NUM_HIGH; i++) {
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) {
mask_index = i & 0x1;
index = i >> 1;
- if (mask.high_slices & (1ul << i))
+ if (test_bit(i, mask.high_slices))
hpsizes[index] = (hpsizes[index] &
~(0xf << (mask_index * 4))) |
(((unsigned long)psize) << (mask_index * 4));
}
slice_dbg(" lsps=%lx, hsps=%lx\n",
- mm->context.low_slices_psize,
- mm->context.high_slices_psize);
+ (unsigned long)mm->context.low_slices_psize,
+ (unsigned long)mm->context.high_slices_psize);
spin_unlock_irqrestore(&slice_convert_lock, flags);
@@ -257,14 +258,14 @@ static bool slice_scan_available(unsigned long addr,
slice = GET_HIGH_SLICE_INDEX(addr);
*boundary_addr = (slice + end) ?
((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
- return !!(available.high_slices & (1ul << slice));
+ return !!test_bit(slice, available.high_slices);
}
}
static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
unsigned long len,
struct slice_mask available,
- int psize)
+ int psize, unsigned long high_limit)
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
unsigned long addr, found, next_end;
@@ -276,7 +277,10 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
info.align_offset = 0;
addr = TASK_UNMAPPED_BASE;
- while (addr < TASK_SIZE) {
+ /*
+ * Check till the allow max value for this mmap request
+ */
+ while (addr < high_limit) {
info.low_limit = addr;
if (!slice_scan_available(addr, available, 1, &addr))
continue;
@@ -288,8 +292,8 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
* Check if we need to reduce the range, or if we can
* extend it to cover the next available slice.
*/
- if (addr >= TASK_SIZE)
- addr = TASK_SIZE;
+ if (addr >= mm->context.addr_limit)
+ addr = mm->context.addr_limit;
else if (slice_scan_available(addr, available, 1, &next_end)) {
addr = next_end;
goto next_slice;
@@ -307,7 +311,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
static unsigned long slice_find_area_topdown(struct mm_struct *mm,
unsigned long len,
struct slice_mask available,
- int psize)
+ int psize, unsigned long high_limit)
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
unsigned long addr, found, prev;
@@ -319,6 +323,15 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
info.align_offset = 0;
addr = mm->mmap_base;
+ /*
+ * If we are trying to allocate above DEFAULT_MAP_WINDOW
+ * Add the different to the mmap_base.
+ * Only for that request for which high_limit is above
+ * DEFAULT_MAP_WINDOW we should apply this.
+ */
+ if (high_limit > DEFAULT_MAP_WINDOW)
+ addr += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
+
while (addr > PAGE_SIZE) {
info.high_limit = addr;
if (!slice_scan_available(addr - 1, available, 0, &addr))
@@ -350,29 +363,38 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
* can happen with large stack limits and large mmap()
* allocations.
*/
- return slice_find_area_bottomup(mm, len, available, psize);
+ return slice_find_area_bottomup(mm, len, available, psize, high_limit);
}
static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
struct slice_mask mask, int psize,
- int topdown)
+ int topdown, unsigned long high_limit)
{
if (topdown)
- return slice_find_area_topdown(mm, len, mask, psize);
+ return slice_find_area_topdown(mm, len, mask, psize, high_limit);
else
- return slice_find_area_bottomup(mm, len, mask, psize);
+ return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
}
-#define or_mask(dst, src) do { \
- (dst).low_slices |= (src).low_slices; \
- (dst).high_slices |= (src).high_slices; \
-} while (0)
+static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
+{
+ DECLARE_BITMAP(result, SLICE_NUM_HIGH);
+
+ dst->low_slices |= src->low_slices;
+ bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
+ bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+}
-#define andnot_mask(dst, src) do { \
- (dst).low_slices &= ~(src).low_slices; \
- (dst).high_slices &= ~(src).high_slices; \
-} while (0)
+static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
+{
+ DECLARE_BITMAP(result, SLICE_NUM_HIGH);
+
+ dst->low_slices &= ~src->low_slices;
+
+ bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
+ bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+}
#ifdef CONFIG_PPC_64K_PAGES
#define MMU_PAGE_BASE MMU_PAGE_64K
@@ -384,14 +406,42 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
unsigned long flags, unsigned int psize,
int topdown)
{
- struct slice_mask mask = {0, 0};
+ struct slice_mask mask;
struct slice_mask good_mask;
- struct slice_mask potential_mask = {0,0} /* silence stupid warning */;
- struct slice_mask compat_mask = {0, 0};
+ struct slice_mask potential_mask;
+ struct slice_mask compat_mask;
int fixed = (flags & MAP_FIXED);
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
struct mm_struct *mm = current->mm;
unsigned long newaddr;
+ unsigned long high_limit;
+
+ /*
+ * Check if we need to expland slice area.
+ */
+ if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE)) {
+ mm->context.addr_limit = TASK_SIZE;
+ on_each_cpu(slice_flush_segments, mm, 1);
+ }
+ /*
+ * This mmap request can allocate upt to 512TB
+ */
+ if (addr > DEFAULT_MAP_WINDOW)
+ high_limit = mm->context.addr_limit;
+ else
+ high_limit = DEFAULT_MAP_WINDOW;
+ /*
+ * init different masks
+ */
+ mask.low_slices = 0;
+ bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
+
+ /* silence stupid warning */;
+ potential_mask.low_slices = 0;
+ bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
+
+ compat_mask.low_slices = 0;
+ bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
/* Sanity checks */
BUG_ON(mm->task_size == 0);
@@ -423,7 +473,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* First make up a "good" mask of slices that have the right size
* already
*/
- good_mask = slice_mask_for_size(mm, psize);
+ slice_mask_for_size(mm, psize, &good_mask);
slice_print_mask(" good_mask", good_mask);
/*
@@ -448,22 +498,22 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
#ifdef CONFIG_PPC_64K_PAGES
/* If we support combo pages, we can allow 64k pages in 4k slices */
if (psize == MMU_PAGE_64K) {
- compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
+ slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
if (fixed)
- or_mask(good_mask, compat_mask);
+ slice_or_mask(&good_mask, &compat_mask);
}
#endif
/* First check hint if it's valid or if we have MAP_FIXED */
if (addr != 0 || fixed) {
/* Build a mask for the requested range */
- mask = slice_range_to_mask(addr, len);
+ slice_range_to_mask(addr, len, &mask);
slice_print_mask(" mask", mask);
/* Check if we fit in the good mask. If we do, we just return,
* nothing else to do
*/
- if (slice_check_fit(mask, good_mask)) {
+ if (slice_check_fit(mm, mask, good_mask)) {
slice_dbg(" fits good !\n");
return addr;
}
@@ -471,7 +521,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* Now let's see if we can find something in the existing
* slices for that size
*/
- newaddr = slice_find_area(mm, len, good_mask, psize, topdown);
+ newaddr = slice_find_area(mm, len, good_mask,
+ psize, topdown, high_limit);
if (newaddr != -ENOMEM) {
/* Found within the good mask, we don't have to setup,
* we thus return directly
@@ -484,11 +535,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* We don't fit in the good mask, check what other slices are
* empty and thus can be converted
*/
- potential_mask = slice_mask_for_free(mm);
- or_mask(potential_mask, good_mask);
+ slice_mask_for_free(mm, &potential_mask);
+ slice_or_mask(&potential_mask, &good_mask);
slice_print_mask(" potential", potential_mask);
- if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) {
+ if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) {
slice_dbg(" fits potential !\n");
goto convert;
}
@@ -503,7 +554,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* anywhere in the good area.
*/
if (addr) {
- addr = slice_find_area(mm, len, good_mask, psize, topdown);
+ addr = slice_find_area(mm, len, good_mask,
+ psize, topdown, high_limit);
if (addr != -ENOMEM) {
slice_dbg(" found area at 0x%lx\n", addr);
return addr;
@@ -513,28 +565,29 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* Now let's see if we can find something in the existing slices
* for that size plus free slices
*/
- addr = slice_find_area(mm, len, potential_mask, psize, topdown);
+ addr = slice_find_area(mm, len, potential_mask,
+ psize, topdown, high_limit);
#ifdef CONFIG_PPC_64K_PAGES
if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
/* retry the search with 4k-page slices included */
- or_mask(potential_mask, compat_mask);
- addr = slice_find_area(mm, len, potential_mask, psize,
- topdown);
+ slice_or_mask(&potential_mask, &compat_mask);
+ addr = slice_find_area(mm, len, potential_mask,
+ psize, topdown, high_limit);
}
#endif
if (addr == -ENOMEM)
return -ENOMEM;
- mask = slice_range_to_mask(addr, len);
+ slice_range_to_mask(addr, len, &mask);
slice_dbg(" found potential area at 0x%lx\n", addr);
slice_print_mask(" mask", mask);
convert:
- andnot_mask(mask, good_mask);
- andnot_mask(mask, compat_mask);
- if (mask.low_slices || mask.high_slices) {
+ slice_andnot_mask(&mask, &good_mask);
+ slice_andnot_mask(&mask, &compat_mask);
+ if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
slice_convert(mm, mask, psize);
if (psize > MMU_PAGE_BASE)
on_each_cpu(slice_flush_segments, mm, 1);
@@ -649,8 +702,8 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
slice_dbg(" lsps=%lx, hsps=%lx\n",
- mm->context.low_slices_psize,
- mm->context.high_slices_psize);
+ (unsigned long)mm->context.low_slices_psize,
+ (unsigned long)mm->context.high_slices_psize);
bail:
spin_unlock_irqrestore(&slice_convert_lock, flags);
@@ -659,9 +712,11 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long len, unsigned int psize)
{
- struct slice_mask mask = slice_range_to_mask(start, len);
+ struct slice_mask mask;
VM_BUG_ON(radix_enabled());
+
+ slice_range_to_mask(start, len, &mask);
slice_convert(mm, mask, psize);
}
@@ -694,14 +749,14 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
if (radix_enabled())
return 0;
- mask = slice_range_to_mask(addr, len);
- available = slice_mask_for_size(mm, psize);
+ slice_range_to_mask(addr, len, &mask);
+ slice_mask_for_size(mm, psize, &available);
#ifdef CONFIG_PPC_64K_PAGES
/* We need to account for 4k slices too */
if (psize == MMU_PAGE_64K) {
struct slice_mask compat_mask;
- compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
- or_mask(available, compat_mask);
+ slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
+ slice_or_mask(&available, &compat_mask);
}
#endif
@@ -711,6 +766,6 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
slice_print_mask(" mask", mask);
slice_print_mask(" available", available);
#endif
- return !slice_check_fit(mask, available);
+ return !slice_check_fit(mm, mask, available);
}
#endif
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 94210940112f..a409f78d206b 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -197,7 +197,8 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
/* Check parameters */
if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
- addr >= TASK_SIZE || len >= TASK_SIZE || addr + len > TASK_SIZE)
+ addr >= mm->context.addr_limit || len >= mm->context.addr_limit ||
+ addr + len > mm->context.addr_limit)
return -EINVAL;
if (is_hugepage_only_range(mm, addr, len))
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 952713d6cf04..b68b5219cf45 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -34,10 +34,8 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
prs = 1; /* process scoped */
r = 1; /* raidx format */
- asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- asm volatile("ptesync": : :"memory");
}
/*
@@ -47,9 +45,11 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
{
int set;
+ asm volatile("ptesync": : :"memory");
for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
__tlbiel_pid(pid, set, ric);
}
+ asm volatile("ptesync": : :"memory");
asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
}
@@ -129,6 +129,12 @@ void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
{
unsigned long pid;
struct mm_struct *mm = tlb->mm;
+ /*
+ * If we are doing a full mm flush, we will do a tlb flush
+ * with RIC_FLUSH_ALL later.
+ */
+ if (tlb->fullmm)
+ return;
preempt_disable();
@@ -195,6 +201,12 @@ void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
unsigned long pid;
struct mm_struct *mm = tlb->mm;
+ /*
+ * If we are doing a full mm flush, we will do a tlb flush
+ * with RIC_FLUSH_ALL later.
+ */
+ if (tlb->fullmm)
+ return;
preempt_disable();
pid = mm->context.id;
@@ -437,7 +449,7 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
return;
}
- if (old_pte & _PAGE_LARGE)
+ if (old_pte & R_PAGE_LARGE)
radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M);
else
radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index ba28fcb98597..bfc4a0869609 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -770,7 +770,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
* avoid going over total available memory just in case...
*/
#ifdef CONFIG_PPC_FSL_BOOK3E
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+ if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
unsigned long linear_sz;
unsigned int num_cams;
diff --git a/arch/powerpc/platforms/44x/sam440ep.c b/arch/powerpc/platforms/44x/sam440ep.c
index 688ffeab0699..55fed5e4de14 100644
--- a/arch/powerpc/platforms/44x/sam440ep.c
+++ b/arch/powerpc/platforms/44x/sam440ep.c
@@ -70,7 +70,7 @@ static struct i2c_board_info sam440ep_rtc_info = {
.irq = -1,
};
-static int sam440ep_setup_rtc(void)
+static int __init sam440ep_setup_rtc(void)
{
return i2c_register_board_info(0, &sam440ep_rtc_info, 1);
}
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 9b25cded03e9..ef4c4b8fc547 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -359,7 +359,7 @@ config PPC_BOOK3E_MMU
config PPC_MM_SLICES
bool
- default y if (!PPC_FSL_BOOK3E && PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES)
+ default y if PPC_STD_MMU_64
default n
config PPC_HAVE_PMU_SUPPORT
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 8b55c5f19d4c..8d3ae2cc52bf 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -15,9 +15,9 @@
#include <linux/msi.h>
#include <linux/export.h>
#include <linux/of_platform.h>
-#include <linux/debugfs.h>
#include <linux/slab.h>
+#include <asm/debugfs.h>
#include <asm/dcr.h>
#include <asm/machdep.h>
#include <asm/prom.h>
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 9689a6272995..2489805e79f1 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -20,6 +20,7 @@ config PPC_POWERNV
select CPU_FREQ_GOV_ONDEMAND
select CPU_FREQ_GOV_CONSERVATIVE
select PPC_DOORBELL
+ select MMU_NOTIFIER
default y
config OPAL_PRD
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 4ee837e6391a..b369e39aa392 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -122,9 +122,12 @@ static void pnv_alloc_idle_core_states(void)
for (i = 0; i < nr_cores; i++) {
int first_cpu = i * threads_per_core;
int node = cpu_to_node(first_cpu);
+ size_t paca_ptr_array_size;
core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
*core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+ paca_ptr_array_size = (threads_per_core *
+ sizeof(struct paca_struct *));
for (j = 0; j < threads_per_core; j++) {
int cpu = first_cpu + j;
@@ -132,6 +135,11 @@ static void pnv_alloc_idle_core_states(void)
paca[cpu].core_idle_state_ptr = core_idle_state;
paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
paca[cpu].thread_mask = 1 << j;
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
+ continue;
+ paca[cpu].thread_sibling_pacas =
+ kmalloc_node(paca_ptr_array_size,
+ GFP_KERNEL, node);
}
}
@@ -147,7 +155,6 @@ u32 pnv_get_supported_cpuidle_states(void)
}
EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
-
static void pnv_fastsleep_workaround_apply(void *info)
{
@@ -241,8 +248,9 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
* The default stop state that will be used by ppc_md.power_save
* function on platforms that support stop instruction.
*/
-u64 pnv_default_stop_val;
-u64 pnv_default_stop_mask;
+static u64 pnv_default_stop_val;
+static u64 pnv_default_stop_mask;
+static bool default_stop_found;
/*
* Used for ppc_md.power_save which needs a function with no parameters
@@ -262,8 +270,42 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
* psscr value and mask of the deepest stop idle state.
* Used when a cpu is offlined.
*/
-u64 pnv_deepest_stop_psscr_val;
-u64 pnv_deepest_stop_psscr_mask;
+static u64 pnv_deepest_stop_psscr_val;
+static u64 pnv_deepest_stop_psscr_mask;
+static bool deepest_stop_found;
+
+/*
+ * pnv_cpu_offline: A function that puts the CPU into the deepest
+ * available platform idle state on a CPU-Offline.
+ */
+unsigned long pnv_cpu_offline(unsigned int cpu)
+{
+ unsigned long srr1;
+
+ u32 idle_states = pnv_get_supported_cpuidle_states();
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
+ srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val,
+ pnv_deepest_stop_psscr_mask);
+ } else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+ srr1 = power7_winkle();
+ } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
+ (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+ srr1 = power7_sleep();
+ } else if (idle_states & OPAL_PM_NAP_ENABLED) {
+ srr1 = power7_nap(1);
+ } else {
+ /* This is the fallback method. We emulate snooze */
+ while (!generic_check_cpu_restart(cpu)) {
+ HMT_low();
+ HMT_very_low();
+ }
+ srr1 = 0;
+ HMT_medium();
+ }
+
+ return srr1;
+}
/*
* Power ISA 3.0 idle initialization.
@@ -352,7 +394,6 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
u32 *residency_ns = NULL;
u64 max_residency_ns = 0;
int rc = 0, i;
- bool default_stop_found = false, deepest_stop_found = false;
psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
@@ -432,21 +473,24 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
}
}
- if (!default_stop_found) {
- pnv_default_stop_val = PSSCR_HV_DEFAULT_VAL;
- pnv_default_stop_mask = PSSCR_HV_DEFAULT_MASK;
- pr_warn("Setting default stop psscr val=0x%016llx,mask=0x%016llx\n",
+ if (unlikely(!default_stop_found)) {
+ pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
+ } else {
+ ppc_md.power_save = power9_idle;
+ pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
pnv_default_stop_val, pnv_default_stop_mask);
}
- if (!deepest_stop_found) {
- pnv_deepest_stop_psscr_val = PSSCR_HV_DEFAULT_VAL;
- pnv_deepest_stop_psscr_mask = PSSCR_HV_DEFAULT_MASK;
- pr_warn("Setting default stop psscr val=0x%016llx,mask=0x%016llx\n",
+ if (unlikely(!deepest_stop_found)) {
+ pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
+ } else {
+ pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
pnv_deepest_stop_psscr_val,
pnv_deepest_stop_psscr_mask);
}
+ pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
+ pnv_first_deep_stop_state);
out:
kfree(psscr_val);
kfree(psscr_mask);
@@ -524,10 +568,30 @@ static int __init pnv_init_idle_states(void)
pnv_alloc_idle_core_states();
+ /*
+ * For each CPU, record its PACA address in each of it's
+ * sibling thread's PACA at the slot corresponding to this
+ * CPU's index in the core.
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ int cpu;
+
+ pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
+ for_each_possible_cpu(cpu) {
+ int base_cpu = cpu_first_thread_sibling(cpu);
+ int idx = cpu_thread_in_core(cpu);
+ int i;
+
+ for (i = 0; i < threads_per_core; i++) {
+ int j = base_cpu + i;
+
+ paca[j].thread_sibling_pacas[idx] = &paca[cpu];
+ }
+ }
+ }
+
if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
ppc_md.power_save = power7_idle;
- else if (supported_cpuidle_states & OPAL_PM_STOP_INST_FAST)
- ppc_md.power_save = power9_idle;
out:
return 0;
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 1c383f38031d..4c88c3e6ec9e 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -9,11 +9,20 @@
* License as published by the Free Software Foundation.
*/
+#include <linux/slab.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mmu_context.h>
+#include <linux/of.h>
#include <linux/export.h>
#include <linux/pci.h>
#include <linux/memblock.h>
#include <linux/iommu.h>
+#include <asm/tlb.h>
+#include <asm/powernv.h>
+#include <asm/reg.h>
+#include <asm/opal.h>
+#include <asm/io.h>
#include <asm/iommu.h>
#include <asm/pnv-pci.h>
#include <asm/msi_bitmap.h>
@@ -22,6 +31,8 @@
#include "powernv.h"
#include "pci.h"
+#define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
+
/*
* Other types of TCE cache invalidation are not functional in the
* hardware.
@@ -37,6 +48,12 @@ struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
struct device_node *dn;
struct pci_dev *gpdev;
+ if (WARN_ON(!npdev))
+ return NULL;
+
+ if (WARN_ON(!npdev->dev.of_node))
+ return NULL;
+
/* Get assoicated PCI device */
dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
if (!dn)
@@ -55,6 +72,12 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
struct device_node *dn;
struct pci_dev *npdev;
+ if (WARN_ON(!gpdev))
+ return NULL;
+
+ if (WARN_ON(!gpdev->dev.of_node))
+ return NULL;
+
/* Get assoicated PCI device */
dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
if (!dn)
@@ -359,3 +382,442 @@ struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
return gpe;
}
+
+/* Maximum number of nvlinks per npu */
+#define NV_MAX_LINKS 6
+
+/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */
+static int max_npu2_index;
+
+struct npu_context {
+ struct mm_struct *mm;
+ struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
+ struct mmu_notifier mn;
+ struct kref kref;
+
+ /* Callback to stop translation requests on a given GPU */
+ struct npu_context *(*release_cb)(struct npu_context *, void *);
+
+ /*
+ * Private pointer passed to the above callback for usage by
+ * device drivers.
+ */
+ void *priv;
+};
+
+/*
+ * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
+ * if none are available.
+ */
+static int get_mmio_atsd_reg(struct npu *npu)
+{
+ int i;
+
+ for (i = 0; i < npu->mmio_atsd_count; i++) {
+ if (!test_and_set_bit(i, &npu->mmio_atsd_usage))
+ return i;
+ }
+
+ return -ENOSPC;
+}
+
+static void put_mmio_atsd_reg(struct npu *npu, int reg)
+{
+ clear_bit(reg, &npu->mmio_atsd_usage);
+}
+
+/* MMIO ATSD register offsets */
+#define XTS_ATSD_AVA 1
+#define XTS_ATSD_STAT 2
+
+static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
+ unsigned long va)
+{
+ int mmio_atsd_reg;
+
+ do {
+ mmio_atsd_reg = get_mmio_atsd_reg(npu);
+ cpu_relax();
+ } while (mmio_atsd_reg < 0);
+
+ __raw_writeq(cpu_to_be64(va),
+ npu->mmio_atsd_regs[mmio_atsd_reg] + XTS_ATSD_AVA);
+ eieio();
+ __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[mmio_atsd_reg]);
+
+ return mmio_atsd_reg;
+}
+
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+{
+ unsigned long launch;
+
+ /* IS set to invalidate matching PID */
+ launch = PPC_BIT(12);
+
+ /* PRS set to process-scoped */
+ launch |= PPC_BIT(13);
+
+ /* AP */
+ launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+
+ /* PID */
+ launch |= pid << PPC_BITLSHIFT(38);
+
+ /* Invalidating the entire process doesn't use a va */
+ return mmio_launch_invalidate(npu, launch, 0);
+}
+
+static int mmio_invalidate_va(struct npu *npu, unsigned long va,
+ unsigned long pid)
+{
+ unsigned long launch;
+
+ /* IS set to invalidate target VA */
+ launch = 0;
+
+ /* PRS set to process scoped */
+ launch |= PPC_BIT(13);
+
+ /* AP */
+ launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+
+ /* PID */
+ launch |= pid << PPC_BITLSHIFT(38);
+
+ return mmio_launch_invalidate(npu, launch, va);
+}
+
+#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
+
+/*
+ * Invalidate either a single address or an entire PID depending on
+ * the value of va.
+ */
+static void mmio_invalidate(struct npu_context *npu_context, int va,
+ unsigned long address)
+{
+ int i, j, reg;
+ struct npu *npu;
+ struct pnv_phb *nphb;
+ struct pci_dev *npdev;
+ struct {
+ struct npu *npu;
+ int reg;
+ } mmio_atsd_reg[NV_MAX_NPUS];
+ unsigned long pid = npu_context->mm->context.id;
+
+ /*
+ * Loop over all the NPUs this process is active on and launch
+ * an invalidate.
+ */
+ for (i = 0; i <= max_npu2_index; i++) {
+ mmio_atsd_reg[i].reg = -1;
+ for (j = 0; j < NV_MAX_LINKS; j++) {
+ npdev = npu_context->npdev[i][j];
+ if (!npdev)
+ continue;
+
+ nphb = pci_bus_to_host(npdev->bus)->private_data;
+ npu = &nphb->npu;
+ mmio_atsd_reg[i].npu = npu;
+
+ if (va)
+ mmio_atsd_reg[i].reg =
+ mmio_invalidate_va(npu, address, pid);
+ else
+ mmio_atsd_reg[i].reg =
+ mmio_invalidate_pid(npu, pid);
+
+ /*
+ * The NPU hardware forwards the shootdown to all GPUs
+ * so we only have to launch one shootdown per NPU.
+ */
+ break;
+ }
+ }
+
+ /*
+ * Unfortunately the nest mmu does not support flushing specific
+ * addresses so we have to flush the whole mm.
+ */
+ flush_tlb_mm(npu_context->mm);
+
+ /* Wait for all invalidations to complete */
+ for (i = 0; i <= max_npu2_index; i++) {
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
+
+ /* Wait for completion */
+ npu = mmio_atsd_reg[i].npu;
+ reg = mmio_atsd_reg[i].reg;
+ while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+ cpu_relax();
+ put_mmio_atsd_reg(npu, reg);
+ }
+}
+
+static void pnv_npu2_mn_release(struct mmu_notifier *mn,
+ struct mm_struct *mm)
+{
+ struct npu_context *npu_context = mn_to_npu_context(mn);
+
+ /* Call into device driver to stop requests to the NMMU */
+ if (npu_context->release_cb)
+ npu_context->release_cb(npu_context, npu_context->priv);
+
+ /*
+ * There should be no more translation requests for this PID, but we
+ * need to ensure any entries for it are removed from the TLB.
+ */
+ mmio_invalidate(npu_context, 0, 0);
+}
+
+static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address,
+ pte_t pte)
+{
+ struct npu_context *npu_context = mn_to_npu_context(mn);
+
+ mmio_invalidate(npu_context, 1, address);
+}
+
+static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address)
+{
+ struct npu_context *npu_context = mn_to_npu_context(mn);
+
+ mmio_invalidate(npu_context, 1, address);
+}
+
+static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct npu_context *npu_context = mn_to_npu_context(mn);
+ unsigned long address;
+
+ for (address = start; address <= end; address += PAGE_SIZE)
+ mmio_invalidate(npu_context, 1, address);
+}
+
+static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
+ .release = pnv_npu2_mn_release,
+ .change_pte = pnv_npu2_mn_change_pte,
+ .invalidate_page = pnv_npu2_mn_invalidate_page,
+ .invalidate_range = pnv_npu2_mn_invalidate_range,
+};
+
+/*
+ * Call into OPAL to setup the nmmu context for the current task in
+ * the NPU. This must be called to setup the context tables before the
+ * GPU issues ATRs. pdev should be a pointed to PCIe GPU device.
+ *
+ * A release callback should be registered to allow a device driver to
+ * be notified that it should not launch any new translation requests
+ * as the final TLB invalidate is about to occur.
+ *
+ * Returns an error if there no contexts are currently available or a
+ * npu_context which should be passed to pnv_npu2_handle_fault().
+ *
+ * mmap_sem must be held in write mode.
+ */
+struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
+ unsigned long flags,
+ struct npu_context *(*cb)(struct npu_context *, void *),
+ void *priv)
+{
+ int rc;
+ u32 nvlink_index;
+ struct device_node *nvlink_dn;
+ struct mm_struct *mm = current->mm;
+ struct pnv_phb *nphb;
+ struct npu *npu;
+ struct npu_context *npu_context;
+
+ /*
+ * At present we don't support GPUs connected to multiple NPUs and I'm
+ * not sure the hardware does either.
+ */
+ struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
+
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
+ return ERR_PTR(-ENODEV);
+
+ if (!npdev)
+ /* No nvlink associated with this GPU device */
+ return ERR_PTR(-ENODEV);
+
+ if (!mm) {
+ /* kernel thread contexts are not supported */
+ return ERR_PTR(-EINVAL);
+ }
+
+ nphb = pci_bus_to_host(npdev->bus)->private_data;
+ npu = &nphb->npu;
+
+ /*
+ * Setup the NPU context table for a particular GPU. These need to be
+ * per-GPU as we need the tables to filter ATSDs when there are no
+ * active contexts on a particular GPU.
+ */
+ rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
+ PCI_DEVID(gpdev->bus->number, gpdev->devfn));
+ if (rc < 0)
+ return ERR_PTR(-ENOSPC);
+
+ /*
+ * We store the npu pci device so we can more easily get at the
+ * associated npus.
+ */
+ npu_context = mm->context.npu_context;
+ if (!npu_context) {
+ npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
+ if (!npu_context)
+ return ERR_PTR(-ENOMEM);
+
+ mm->context.npu_context = npu_context;
+ npu_context->mm = mm;
+ npu_context->mn.ops = &nv_nmmu_notifier_ops;
+ __mmu_notifier_register(&npu_context->mn, mm);
+ kref_init(&npu_context->kref);
+ } else {
+ kref_get(&npu_context->kref);
+ }
+
+ npu_context->release_cb = cb;
+ npu_context->priv = priv;
+ nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
+ if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
+ &nvlink_index)))
+ return ERR_PTR(-ENODEV);
+ npu_context->npdev[npu->index][nvlink_index] = npdev;
+
+ return npu_context;
+}
+EXPORT_SYMBOL(pnv_npu2_init_context);
+
+static void pnv_npu2_release_context(struct kref *kref)
+{
+ struct npu_context *npu_context =
+ container_of(kref, struct npu_context, kref);
+
+ npu_context->mm->context.npu_context = NULL;
+ mmu_notifier_unregister(&npu_context->mn,
+ npu_context->mm);
+
+ kfree(npu_context);
+}
+
+void pnv_npu2_destroy_context(struct npu_context *npu_context,
+ struct pci_dev *gpdev)
+{
+ struct pnv_phb *nphb, *phb;
+ struct npu *npu;
+ struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
+ struct device_node *nvlink_dn;
+ u32 nvlink_index;
+
+ if (WARN_ON(!npdev))
+ return;
+
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
+ return;
+
+ nphb = pci_bus_to_host(npdev->bus)->private_data;
+ npu = &nphb->npu;
+ phb = pci_bus_to_host(gpdev->bus)->private_data;
+ nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
+ if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
+ &nvlink_index)))
+ return;
+ npu_context->npdev[npu->index][nvlink_index] = NULL;
+ opal_npu_destroy_context(phb->opal_id, npu_context->mm->context.id,
+ PCI_DEVID(gpdev->bus->number, gpdev->devfn));
+ kref_put(&npu_context->kref, pnv_npu2_release_context);
+}
+EXPORT_SYMBOL(pnv_npu2_destroy_context);
+
+/*
+ * Assumes mmap_sem is held for the contexts associated mm.
+ */
+int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
+ unsigned long *flags, unsigned long *status, int count)
+{
+ u64 rc = 0, result = 0;
+ int i, is_write;
+ struct page *page[1];
+
+ /* mmap_sem should be held so the struct_mm must be present */
+ struct mm_struct *mm = context->mm;
+
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
+ return -ENODEV;
+
+ WARN_ON(!rwsem_is_locked(&mm->mmap_sem));
+
+ for (i = 0; i < count; i++) {
+ is_write = flags[i] & NPU2_WRITE;
+ rc = get_user_pages_remote(NULL, mm, ea[i], 1,
+ is_write ? FOLL_WRITE : 0,
+ page, NULL, NULL);
+
+ /*
+ * To support virtualised environments we will have to do an
+ * access to the page to ensure it gets faulted into the
+ * hypervisor. For the moment virtualisation is not supported in
+ * other areas so leave the access out.
+ */
+ if (rc != 1) {
+ status[i] = rc;
+ result = -EFAULT;
+ continue;
+ }
+
+ status[i] = 0;
+ put_page(page[0]);
+ }
+
+ return result;
+}
+EXPORT_SYMBOL(pnv_npu2_handle_fault);
+
+int pnv_npu2_init(struct pnv_phb *phb)
+{
+ unsigned int i;
+ u64 mmio_atsd;
+ struct device_node *dn;
+ struct pci_dev *gpdev;
+ static int npu_index;
+ uint64_t rc = 0;
+
+ for_each_child_of_node(phb->hose->dn, dn) {
+ gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn));
+ if (gpdev) {
+ rc = opal_npu_map_lpar(phb->opal_id,
+ PCI_DEVID(gpdev->bus->number, gpdev->devfn),
+ 0, 0);
+ if (rc)
+ dev_err(&gpdev->dev,
+ "Error %lld mapping device to LPAR\n",
+ rc);
+ }
+ }
+
+ for (i = 0; !of_property_read_u64_index(phb->hose->dn, "ibm,mmio-atsd",
+ i, &mmio_atsd); i++)
+ phb->npu.mmio_atsd_regs[i] = ioremap(mmio_atsd, 32);
+
+ pr_info("NPU%lld: Found %d MMIO ATSD registers", phb->opal_id, i);
+ phb->npu.mmio_atsd_count = i;
+ phb->npu.mmio_atsd_usage = 0;
+ npu_index++;
+ if (WARN_ON(npu_index >= NV_MAX_NPUS))
+ return -ENOSPC;
+ max_npu2_index = npu_index;
+ phb->npu.index = npu_index;
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
index a91d7876fae2..6c7ad1d8b32e 100644
--- a/arch/powerpc/platforms/powernv/opal-lpc.c
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -12,7 +12,6 @@
#include <linux/kernel.h>
#include <linux/of.h>
#include <linux/bug.h>
-#include <linux/debugfs.h>
#include <linux/io.h>
#include <linux/slab.h>
@@ -21,7 +20,7 @@
#include <asm/opal.h>
#include <asm/prom.h>
#include <linux/uaccess.h>
-#include <asm/debug.h>
+#include <asm/debugfs.h>
#include <asm/isa-bridge.h>
static int opal_lpc_chip_id = -1;
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index 308efd170c27..aa267f120033 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -64,6 +64,10 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
*sensor_data = be32_to_cpu(data);
break;
+ case OPAL_WRONG_STATE:
+ ret = -EIO;
+ break;
+
default:
ret = opal_error_code(ret);
break;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 085605a73168..f620572f891f 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -50,21 +50,13 @@ END_FTR_SECTION(0, 1); \
#define OPAL_BRANCH(LABEL)
#endif
-/* TODO:
- *
- * - Trace irqs in/off (needs saving/restoring all args, argh...)
- * - Get r11 feed up by Dave so I can have better register usage
+/*
+ * DO_OPAL_CALL assumes:
+ * r0 = opal call token
+ * r12 = msr
+ * LR has been saved
*/
-
-#define OPAL_CALL(name, token) \
- _GLOBAL_TOC(name); \
- mfmsr r12; \
- mflr r0; \
- andi. r11,r12,MSR_IR|MSR_DR; \
- std r0,PPC_LR_STKOFF(r1); \
- li r0,token; \
- beq opal_real_call; \
- OPAL_BRANCH(opal_tracepoint_entry) \
+#define DO_OPAL_CALL() \
mfcr r11; \
stw r11,8(r1); \
li r11,0; \
@@ -83,6 +75,18 @@ END_FTR_SECTION(0, 1); \
mtspr SPRN_HSRR0,r12; \
hrfid
+#define OPAL_CALL(name, token) \
+ _GLOBAL_TOC(name); \
+ mfmsr r12; \
+ mflr r0; \
+ andi. r11,r12,MSR_IR|MSR_DR; \
+ std r0,PPC_LR_STKOFF(r1); \
+ li r0,token; \
+ beq opal_real_call; \
+ OPAL_BRANCH(opal_tracepoint_entry) \
+ DO_OPAL_CALL()
+
+
opal_return:
/*
* Fixup endian on OPAL return... we should be able to simplify
@@ -148,26 +152,13 @@ opal_tracepoint_entry:
ld r8,STK_REG(R29)(r1)
ld r9,STK_REG(R30)(r1)
ld r10,STK_REG(R31)(r1)
+
+ /* setup LR so we return via tracepoint_return */
LOAD_REG_ADDR(r11,opal_tracepoint_return)
- mfcr r12
std r11,16(r1)
- stw r12,8(r1)
- li r11,0
+
mfmsr r12
- ori r11,r11,MSR_EE
- std r12,PACASAVEDMSR(r13)
- andc r12,r12,r11
- mtmsrd r12,1
- LOAD_REG_ADDR(r11,opal_return)
- mtlr r11
- li r11,MSR_DR|MSR_IR|MSR_LE
- andc r12,r12,r11
- mtspr SPRN_HSRR1,r12
- LOAD_REG_ADDR(r11,opal)
- ld r12,8(r11)
- ld r2,0(r11)
- mtspr SPRN_HSRR0,r12
- hrfid
+ DO_OPAL_CALL()
opal_tracepoint_return:
std r3,STK_REG(R31)(r1)
@@ -316,3 +307,6 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
+OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
+OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
+OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index d0ac535cf5d7..28651fb25417 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -73,25 +73,32 @@ static int opal_xscom_err_xlate(int64_t rc)
static u64 opal_scom_unmangle(u64 addr)
{
+ u64 tmp;
+
/*
- * XSCOM indirect addresses have the top bit set. Additionally
- * the rest of the top 3 nibbles is always 0.
+ * XSCOM addresses use the top nibble to set indirect mode and
+ * its form. Bits 4-11 are always 0.
*
* Because the debugfs interface uses signed offsets and shifts
* the address left by 3, we basically cannot use the top 4 bits
* of the 64-bit address, and thus cannot use the indirect bit.
*
- * To deal with that, we support the indirect bit being in bit
- * 4 (IBM notation) instead of bit 0 in this API, we do the
- * conversion here. To leave room for further xscom address
- * expansion, we only clear out the top byte
+ * To deal with that, we support the indirect bits being in
+ * bits 4-7 (IBM notation) instead of bit 0-3 in this API, we
+ * do the conversion here.
*
- * For in-kernel use, we also support the real indirect bit, so
- * we test for any of the top 5 bits
+ * For in-kernel use, we don't need to do this mangling. In
+ * kernel won't have bits 4-7 set.
*
+ * So:
+ * debugfs will always set 0-3 = 0 and clear 4-7
+ * kernel will always clear 0-3 = 0 and set 4-7
*/
- if (addr & (0x1full << 59))
- addr = (addr & ~(0xffull << 56)) | (1ull << 63);
+ tmp = addr;
+ tmp &= 0x0f00000000000000;
+ addr &= 0xf0ffffffffffffff;
+ addr |= tmp << 4;
+
return addr;
}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index e0f856bfbfe8..76e153fc1f93 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -435,7 +435,7 @@ int opal_machine_check(struct pt_regs *regs)
evt.version);
return 0;
}
- machine_check_print_event_info(&evt);
+ machine_check_print_event_info(&evt, user_mode(regs));
if (opal_recover_mce(regs, &evt))
return 1;
@@ -595,6 +595,79 @@ static void opal_export_symmap(void)
pr_warn("Error %d creating OPAL symbols file\n", rc);
}
+static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf,
+ loff_t off, size_t count)
+{
+ return memory_read_from_buffer(buf, count, &off, bin_attr->private,
+ bin_attr->size);
+}
+
+/*
+ * opal_export_attrs: creates a sysfs node for each property listed in
+ * the device-tree under /ibm,opal/firmware/exports/
+ * All new sysfs nodes are created under /opal/exports/.
+ * This allows for reserved memory regions (e.g. HDAT) to be read.
+ * The new sysfs nodes are only readable by root.
+ */
+static void opal_export_attrs(void)
+{
+ struct bin_attribute *attr;
+ struct device_node *np;
+ struct property *prop;
+ struct kobject *kobj;
+ u64 vals[2];
+ int rc;
+
+ np = of_find_node_by_path("/ibm,opal/firmware/exports");
+ if (!np)
+ return;
+
+ /* Create new 'exports' directory - /sys/firmware/opal/exports */
+ kobj = kobject_create_and_add("exports", opal_kobj);
+ if (!kobj) {
+ pr_warn("kobject_create_and_add() of exports failed\n");
+ return;
+ }
+
+ for_each_property_of_node(np, prop) {
+ if (!strcmp(prop->name, "name") || !strcmp(prop->name, "phandle"))
+ continue;
+
+ if (of_property_read_u64_array(np, prop->name, &vals[0], 2))
+ continue;
+
+ attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+
+ if (attr == NULL) {
+ pr_warn("Failed kmalloc for bin_attribute!");
+ continue;
+ }
+
+ attr->attr.name = kstrdup(prop->name, GFP_KERNEL);
+ attr->attr.mode = 0400;
+ attr->read = export_attr_read;
+ attr->private = __va(vals[0]);
+ attr->size = vals[1];
+
+ if (attr->attr.name == NULL) {
+ pr_warn("Failed kstrdup for bin_attribute attr.name");
+ kfree(attr);
+ continue;
+ }
+
+ rc = sysfs_create_bin_file(kobj, attr);
+ if (rc) {
+ pr_warn("Error %d creating OPAL sysfs exports/%s file\n",
+ rc, prop->name);
+ kfree(attr->attr.name);
+ kfree(attr);
+ }
+ }
+
+ of_node_put(np);
+}
+
static void __init opal_dump_region_init(void)
{
void *addr;
@@ -733,6 +806,9 @@ static int __init opal_init(void)
opal_msglog_sysfs_init();
}
+ /* Export all properties */
+ opal_export_attrs();
+
/* Initialize platform devices: IPMI backend, PRD & flash interface */
opal_pdev_init("ibm,opal-ipmi");
opal_pdev_init("ibm,opal-flash");
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index e36738291c32..7eebc76721ea 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -14,7 +14,6 @@
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/crash_dump.h>
-#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/string.h>
#include <linux/init.h>
@@ -38,7 +37,7 @@
#include <asm/iommu.h>
#include <asm/tce.h>
#include <asm/xics.h>
-#include <asm/debug.h>
+#include <asm/debugfs.h>
#include <asm/firmware.h>
#include <asm/pnv-pci.h>
#include <asm/mmzone.h>
@@ -1262,6 +1261,8 @@ static void pnv_pci_ioda_setup_PEs(void)
/* PE#0 is needed for error reporting */
pnv_ioda_reserve_pe(phb, 0);
pnv_ioda_setup_npu_PEs(hose->bus);
+ if (phb->model == PNV_PHB_MODEL_NPU2)
+ pnv_npu2_init(phb);
}
}
}
@@ -2735,9 +2736,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
if (rc)
return;
- if (pe->flags & PNV_IODA_PE_DEV)
- iommu_add_device(&pe->pdev->dev);
- else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+ if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index eb835e977e33..a43f22dc069e 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -758,7 +758,7 @@ void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
{
- return *(pnv_tce(tbl, index - tbl->it_offset));
+ return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset)));
}
struct iommu_table *pnv_pci_table_alloc(int nid)
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index e1d3e5526b54..4eab713136d1 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -7,6 +7,9 @@
struct pci_dn;
+/* Maximum possible number of ATSD MMIO registers per NPU */
+#define NV_NMMU_ATSD_REGS 8
+
enum pnv_phb_type {
PNV_PHB_IODA1 = 0,
PNV_PHB_IODA2 = 1,
@@ -174,6 +177,16 @@ struct pnv_phb {
struct OpalIoP7IOCErrorData hub_diag;
} diag;
+ /* Nvlink2 data */
+ struct npu {
+ int index;
+ __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
+ unsigned int mmio_atsd_count;
+
+ /* Bitmask for MMIO register usage */
+ unsigned long mmio_atsd_usage;
+ } npu;
+
#ifdef CONFIG_CXL_BASE
struct cxl_afu *cxl_afu;
#endif
@@ -236,7 +249,7 @@ extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num);
extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe);
extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe);
-
+extern int pnv_npu2_init(struct pnv_phb *phb);
/* cxl functions */
extern bool pnv_cxl_enable_device_hook(struct pci_dev *dev);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 613052232475..6dbc0a1da1f6 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -18,8 +18,6 @@ static inline void pnv_pci_shutdown(void) { }
#endif
extern u32 pnv_get_supported_cpuidle_states(void);
-extern u64 pnv_deepest_stop_psscr_val;
-extern u64 pnv_deepest_stop_psscr_mask;
extern void pnv_lpc_init(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index adceac978d18..2dc7e5fb86c3 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -98,6 +98,10 @@ static void pnv_show_cpuinfo(struct seq_file *m)
else
seq_printf(m, "firmware\t: BML\n");
of_node_put(root);
+ if (radix_enabled())
+ seq_printf(m, "MMU\t\t: Radix\n");
+ else
+ seq_printf(m, "MMU\t\t: Hash\n");
}
static void pnv_prepare_going_down(void)
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index f57195588c6c..39296bf7009e 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -36,6 +36,7 @@
#include <asm/dbell.h>
#include <asm/kvm_ppc.h>
#include <asm/ppc-opcode.h>
+#include <asm/cpuidle.h>
#include "powernv.h"
@@ -146,7 +147,6 @@ static void pnv_smp_cpu_kill_self(void)
{
unsigned int cpu;
unsigned long srr1, wmask;
- u32 idle_states;
/* Standard hot unplug procedure */
local_irq_disable();
@@ -161,8 +161,6 @@ static void pnv_smp_cpu_kill_self(void)
if (cpu_has_feature(CPU_FTR_ARCH_207S))
wmask = SRR1_WAKEMASK_P8;
- idle_states = pnv_get_supported_cpuidle_states();
-
/* We don't want to take decrementer interrupts while we are offline,
* so clear LPCR:PECE1. We keep PECE2 (and LPCR_PECE_HVEE on P9)
* enabled as to let IPIs in.
@@ -190,19 +188,7 @@ static void pnv_smp_cpu_kill_self(void)
kvmppc_set_host_ipi(cpu, 0);
ppc64_runlatch_off();
-
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val,
- pnv_deepest_stop_psscr_mask);
- } else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
- srr1 = power7_winkle();
- } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
- (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- srr1 = power7_sleep();
- } else {
- srr1 = power7_nap(1);
- }
-
+ srr1 = pnv_cpu_offline(cpu);
ppc64_runlatch_on();
/*
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 6b04e3f0f982..18014cdeb590 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -21,13 +21,12 @@
*/
#include <linux/slab.h>
-#include <linux/debugfs.h>
#include <linux/spinlock.h>
#include <asm/smp.h>
#include <linux/uaccess.h>
#include <asm/firmware.h>
#include <asm/lppaca.h>
-#include <asm/debug.h>
+#include <asm/debugfs.h>
#include <asm/plpar_wrappers.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index f02ec3ab428c..957ae347b0b3 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -29,6 +29,16 @@
#include <asm/trace.h>
#include <asm/machdep.h>
+/* For hcall instrumentation. One structure per-hcall, per-CPU */
+struct hcall_stats {
+ unsigned long num_calls; /* number of calls (on this CPU) */
+ unsigned long tb_total; /* total wall time (mftb) of calls. */
+ unsigned long purr_total; /* total cpu time (PURR) of calls. */
+ unsigned long tb_start;
+ unsigned long purr_start;
+};
+#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
+
DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
/*
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 8b1fe895daa3..6541d0b03e4c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -958,3 +958,64 @@ int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
return rc;
}
+
+static unsigned long vsid_unscramble(unsigned long vsid, int ssize)
+{
+ unsigned long protovsid;
+ unsigned long va_bits = VA_BITS;
+ unsigned long modinv, vsid_modulus;
+ unsigned long max_mod_inv, tmp_modinv;
+
+ if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
+ va_bits = 65;
+
+ if (ssize == MMU_SEGSIZE_256M) {
+ modinv = VSID_MULINV_256M;
+ vsid_modulus = ((1UL << (va_bits - SID_SHIFT)) - 1);
+ } else {
+ modinv = VSID_MULINV_1T;
+ vsid_modulus = ((1UL << (va_bits - SID_SHIFT_1T)) - 1);
+ }
+
+ /*
+ * vsid outside our range.
+ */
+ if (vsid >= vsid_modulus)
+ return 0;
+
+ /*
+ * If modinv is the modular multiplicate inverse of (x % vsid_modulus)
+ * and vsid = (protovsid * x) % vsid_modulus, then we say:
+ * protovsid = (vsid * modinv) % vsid_modulus
+ */
+
+ /* Check if (vsid * modinv) overflow (63 bits) */
+ max_mod_inv = 0x7fffffffffffffffull / vsid;
+ if (modinv < max_mod_inv)
+ return (vsid * modinv) % vsid_modulus;
+
+ tmp_modinv = modinv/max_mod_inv;
+ modinv %= max_mod_inv;
+
+ protovsid = (((vsid * max_mod_inv) % vsid_modulus) * tmp_modinv) % vsid_modulus;
+ protovsid = (protovsid + vsid * modinv) % vsid_modulus;
+
+ return protovsid;
+}
+
+static int __init reserve_vrma_context_id(void)
+{
+ unsigned long protovsid;
+
+ /*
+ * Reserve context ids which map to reserved virtual addresses. For now
+ * we only reserve the context id which maps to the VRMA VSID. We ignore
+ * the addresses in "ibm,adjunct-virtual-addresses" because we don't
+ * enable adjunct support via the "ibm,client-architecture-support"
+ * interface.
+ */
+ protovsid = vsid_unscramble(VRMA_VSID, MMU_SEGSIZE_1T);
+ hash__reserve_context_id(protovsid >> ESID_BITS_1T);
+ return 0;
+}
+machine_device_initcall(pseries, reserve_vrma_context_id);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index b4d362ed03a1..b5d86426e97b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -87,6 +87,10 @@ static void pSeries_show_cpuinfo(struct seq_file *m)
model = of_get_property(root, "model", NULL);
seq_printf(m, "machine\t\t: CHRP %s\n", model);
of_node_put(root);
+ if (radix_enabled())
+ seq_printf(m, "MMU\t\t: Radix\n");
+ else
+ seq_printf(m, "MMU\t\t: Hash\n");
}
/* Initialize firmware assisted non-maskable interrupts if
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
index d0e9f178a324..76ea32c1b664 100644
--- a/arch/powerpc/sysdev/scom.c
+++ b/arch/powerpc/sysdev/scom.c
@@ -19,10 +19,9 @@
*/
#include <linux/kernel.h>
-#include <linux/debugfs.h>
#include <linux/slab.h>
#include <linux/export.h>
-#include <asm/debug.h>
+#include <asm/debugfs.h>
#include <asm/prom.h>
#include <asm/scom.h>
#include <linux/uaccess.h>
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 67435b9bf98d..f77a104abf9f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -29,6 +29,7 @@
#include <linux/nmi.h>
#include <linux/ctype.h>
+#include <asm/debugfs.h>
#include <asm/ptrace.h>
#include <asm/smp.h>
#include <asm/string.h>
@@ -77,6 +78,7 @@ static int xmon_gate;
#endif /* CONFIG_SMP */
static unsigned long in_xmon __read_mostly = 0;
+static int xmon_on = IS_ENABLED(CONFIG_XMON_DEFAULT);
static unsigned long adrs;
static int size = 1;
@@ -185,8 +187,6 @@ static void dump_tlb_44x(void);
static void dump_tlb_book3e(void);
#endif
-static int xmon_no_auto_backtrace;
-
#ifdef CONFIG_PPC64
#define REG "%.16lx"
#else
@@ -891,10 +891,7 @@ cmds(struct pt_regs *excp)
last_cmd = NULL;
xmon_regs = excp;
- if (!xmon_no_auto_backtrace) {
- xmon_no_auto_backtrace = 1;
- xmon_show_stack(excp->gpr[1], excp->link, excp->nip);
- }
+ xmon_show_stack(excp->gpr[1], excp->link, excp->nip);
for(;;) {
#ifdef CONFIG_SMP
@@ -3392,6 +3389,8 @@ static void sysrq_handle_xmon(int key)
/* ensure xmon is enabled */
xmon_init(1);
debugger(get_irq_regs());
+ if (!xmon_on)
+ xmon_init(0);
}
static struct sysrq_key_op sysrq_xmon_op = {
@@ -3405,10 +3404,37 @@ static int __init setup_xmon_sysrq(void)
register_sysrq_key('x', &sysrq_xmon_op);
return 0;
}
-__initcall(setup_xmon_sysrq);
+device_initcall(setup_xmon_sysrq);
#endif /* CONFIG_MAGIC_SYSRQ */
-static int __initdata xmon_early, xmon_off;
+#ifdef CONFIG_DEBUG_FS
+static int xmon_dbgfs_set(void *data, u64 val)
+{
+ xmon_on = !!val;
+ xmon_init(xmon_on);
+
+ return 0;
+}
+
+static int xmon_dbgfs_get(void *data, u64 *val)
+{
+ *val = xmon_on;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(xmon_dbgfs_ops, xmon_dbgfs_get,
+ xmon_dbgfs_set, "%llu\n");
+
+static int __init setup_xmon_dbgfs(void)
+{
+ debugfs_create_file("xmon", 0600, powerpc_debugfs_root, NULL,
+ &xmon_dbgfs_ops);
+ return 0;
+}
+device_initcall(setup_xmon_dbgfs);
+#endif /* CONFIG_DEBUG_FS */
+
+static int xmon_early __initdata;
static int __init early_parse_xmon(char *p)
{
@@ -3416,12 +3442,12 @@ static int __init early_parse_xmon(char *p)
/* just "xmon" is equivalent to "xmon=early" */
xmon_init(1);
xmon_early = 1;
- } else if (strncmp(p, "on", 2) == 0)
+ xmon_on = 1;
+ } else if (strncmp(p, "on", 2) == 0) {
xmon_init(1);
- else if (strncmp(p, "off", 3) == 0)
- xmon_off = 1;
- else if (strncmp(p, "nobt", 4) == 0)
- xmon_no_auto_backtrace = 1;
+ xmon_on = 1;
+ } else if (strncmp(p, "off", 3) == 0)
+ xmon_on = 0;
else
return 1;
@@ -3431,10 +3457,8 @@ early_param("xmon", early_parse_xmon);
void __init xmon_setup(void)
{
-#ifdef CONFIG_XMON_DEFAULT
- if (!xmon_off)
+ if (xmon_on)
xmon_init(1);
-#endif
if (xmon_early)
debugger(NULL);
}