aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig100
-rw-r--r--arch/powerpc/Makefile5
-rw-r--r--arch/powerpc/Makefile.postlink2
-rw-r--r--arch/powerpc/boot/.gitignore2
-rw-r--r--arch/powerpc/boot/Makefile16
-rw-r--r--arch/powerpc/boot/main.c41
-rw-r--r--arch/powerpc/boot/ops.h2
-rw-r--r--arch/powerpc/boot/serial.c1
-rwxr-xr-xarch/powerpc/boot/wrapper43
-rw-r--r--arch/powerpc/boot/xz_config.h20
-rw-r--r--arch/powerpc/boot/zImage.lds.S8
-rw-r--r--arch/powerpc/configs/40x/acadia_defconfig1
-rw-r--r--arch/powerpc/configs/40x/ep405_defconfig1
-rw-r--r--arch/powerpc/configs/40x/kilauea_defconfig1
-rw-r--r--arch/powerpc/configs/40x/klondike_defconfig1
-rw-r--r--arch/powerpc/configs/40x/makalu_defconfig1
-rw-r--r--arch/powerpc/configs/40x/obs600_defconfig1
-rw-r--r--arch/powerpc/configs/40x/virtex_defconfig1
-rw-r--r--arch/powerpc/configs/40x/walnut_defconfig1
-rw-r--r--arch/powerpc/configs/44x/akebono_defconfig1
-rw-r--r--arch/powerpc/configs/44x/arches_defconfig1
-rw-r--r--arch/powerpc/configs/44x/bamboo_defconfig1
-rw-r--r--arch/powerpc/configs/44x/bluestone_defconfig1
-rw-r--r--arch/powerpc/configs/44x/canyonlands_defconfig1
-rw-r--r--arch/powerpc/configs/44x/currituck_defconfig1
-rw-r--r--arch/powerpc/configs/44x/ebony_defconfig1
-rw-r--r--arch/powerpc/configs/44x/eiger_defconfig1
-rw-r--r--arch/powerpc/configs/44x/fsp2_defconfig1
-rw-r--r--arch/powerpc/configs/44x/icon_defconfig1
-rw-r--r--arch/powerpc/configs/44x/iss476-smp_defconfig1
-rw-r--r--arch/powerpc/configs/44x/katmai_defconfig1
-rw-r--r--arch/powerpc/configs/44x/rainier_defconfig1
-rw-r--r--arch/powerpc/configs/44x/redwood_defconfig1
-rw-r--r--arch/powerpc/configs/44x/sam440ep_defconfig1
-rw-r--r--arch/powerpc/configs/44x/sequoia_defconfig1
-rw-r--r--arch/powerpc/configs/44x/taishan_defconfig1
-rw-r--r--arch/powerpc/configs/44x/virtex5_defconfig1
-rw-r--r--arch/powerpc/configs/44x/warp_defconfig1
-rw-r--r--arch/powerpc/configs/52xx/cm5200_defconfig1
-rw-r--r--arch/powerpc/configs/52xx/lite5200b_defconfig1
-rw-r--r--arch/powerpc/configs/52xx/motionpro_defconfig1
-rw-r--r--arch/powerpc/configs/52xx/pcm030_defconfig1
-rw-r--r--arch/powerpc/configs/52xx/tqm5200_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/asp8347_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc8313_rdb_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc8315_rdb_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc832x_mds_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc832x_rdb_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc834x_itx_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc834x_mds_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc836x_mds_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc836x_rdk_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc837x_mds_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc837x_rdb_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/ge_imp3a_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/ksi8560_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/mpc8540_ads_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/mpc8560_ads_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/mpc85xx_cds_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/sbc8548_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/stx_gp3_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/tqm8548_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/xes_mpc85xx_defconfig1
-rw-r--r--arch/powerpc/configs/adder875_defconfig1
-rw-r--r--arch/powerpc/configs/amigaone_defconfig1
-rw-r--r--arch/powerpc/configs/cell_defconfig1
-rw-r--r--arch/powerpc/configs/chrp32_defconfig1
-rw-r--r--arch/powerpc/configs/ep8248e_defconfig1
-rw-r--r--arch/powerpc/configs/ep88xc_defconfig1
-rw-r--r--arch/powerpc/configs/fsl-emb-nonhw.config1
-rw-r--r--arch/powerpc/configs/g5_defconfig2
-rw-r--r--arch/powerpc/configs/gamecube_defconfig2
-rw-r--r--arch/powerpc/configs/holly_defconfig1
-rw-r--r--arch/powerpc/configs/linkstation_defconfig1
-rw-r--r--arch/powerpc/configs/maple_defconfig2
-rw-r--r--arch/powerpc/configs/mgcoge_defconfig1
-rw-r--r--arch/powerpc/configs/mpc512x_defconfig1
-rw-r--r--arch/powerpc/configs/mpc5200_defconfig1
-rw-r--r--arch/powerpc/configs/mpc7448_hpc2_defconfig1
-rw-r--r--arch/powerpc/configs/mpc8272_ads_defconfig1
-rw-r--r--arch/powerpc/configs/mpc83xx_defconfig1
-rw-r--r--arch/powerpc/configs/mpc885_ads_defconfig1
-rw-r--r--arch/powerpc/configs/mvme5100_defconfig1
-rw-r--r--arch/powerpc/configs/pasemi_defconfig1
-rw-r--r--arch/powerpc/configs/pmac32_defconfig3
-rw-r--r--arch/powerpc/configs/powernv_defconfig4
-rw-r--r--arch/powerpc/configs/ppc40x_defconfig2
-rw-r--r--arch/powerpc/configs/ppc44x_defconfig1
-rw-r--r--arch/powerpc/configs/ppc64_defconfig5
-rw-r--r--arch/powerpc/configs/ppc64e_defconfig2
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig4
-rw-r--r--arch/powerpc/configs/pq2fads_defconfig1
-rw-r--r--arch/powerpc/configs/ps3_defconfig1
-rw-r--r--arch/powerpc/configs/pseries_defconfig3
-rw-r--r--arch/powerpc/configs/skiroot_defconfig2
-rw-r--r--arch/powerpc/configs/storcenter_defconfig1
-rw-r--r--arch/powerpc/configs/tqm8xx_defconfig1
-rw-r--r--arch/powerpc/configs/wii_defconfig2
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h14
-rw-r--r--arch/powerpc/include/asm/atomic.h44
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h18
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h6
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h39
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h12
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h9
-rw-r--r--arch/powerpc/include/asm/book3s/pgtable.h11
-rw-r--r--arch/powerpc/include/asm/bug.h8
-rw-r--r--arch/powerpc/include/asm/cache.h26
-rw-r--r--arch/powerpc/include/asm/cacheflush.h46
-rw-r--r--arch/powerpc/include/asm/cputable.h20
-rw-r--r--arch/powerpc/include/asm/current.h3
-rw-r--r--arch/powerpc/include/asm/eeh.h40
-rw-r--r--arch/powerpc/include/asm/elfnote.h24
-rw-r--r--arch/powerpc/include/asm/error-injection.h13
-rw-r--r--arch/powerpc/include/asm/exception-64s.h609
-rw-r--r--arch/powerpc/include/asm/fadump-internal.h169
-rw-r--r--arch/powerpc/include/asm/fadump.h194
-rw-r--r--arch/powerpc/include/asm/firmware.h5
-rw-r--r--arch/powerpc/include/asm/ftrace.h2
-rw-r--r--arch/powerpc/include/asm/futex.h3
-rw-r--r--arch/powerpc/include/asm/head-64.h243
-rw-r--r--arch/powerpc/include/asm/hugetlb.h3
-rw-r--r--arch/powerpc/include/asm/hvcall.h11
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h21
-rw-r--r--arch/powerpc/include/asm/io-workarounds.h20
-rw-r--r--arch/powerpc/include/asm/io.h16
-rw-r--r--arch/powerpc/include/asm/iommu.h36
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h23
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h1
-rw-r--r--arch/powerpc/include/asm/lppaca.h40
-rw-r--r--arch/powerpc/include/asm/machdep.h7
-rw-r--r--arch/powerpc/include/asm/mce.h10
-rw-r--r--arch/powerpc/include/asm/mem_encrypt.h26
-rw-r--r--arch/powerpc/include/asm/mmu.h2
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h18
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h13
-rw-r--r--arch/powerpc/include/asm/opal-api.h46
-rw-r--r--arch/powerpc/include/asm/opal.h9
-rw-r--r--arch/powerpc/include/asm/paca.h2
-rw-r--r--arch/powerpc/include/asm/page.h14
-rw-r--r--arch/powerpc/include/asm/page_32.h4
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h1
-rw-r--r--arch/powerpc/include/asm/pci.h2
-rw-r--r--arch/powerpc/include/asm/pgalloc.h2
-rw-r--r--arch/powerpc/include/asm/pgtable.h53
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h6
-rw-r--r--arch/powerpc/include/asm/pmc.h5
-rw-r--r--arch/powerpc/include/asm/pnv-ocxl.h2
-rw-r--r--arch/powerpc/include/asm/pnv-pci.h6
-rw-r--r--arch/powerpc/include/asm/powernv.h22
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h20
-rw-r--r--arch/powerpc/include/asm/ppc-pci.h7
-rw-r--r--arch/powerpc/include/asm/ppc4xx_ocm.h31
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h80
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/include/asm/ps3stor.h2
-rw-r--r--arch/powerpc/include/asm/pte-walk.h28
-rw-r--r--arch/powerpc/include/asm/ptrace.h35
-rw-r--r--arch/powerpc/include/asm/reg.h3
-rw-r--r--arch/powerpc/include/asm/scom.h154
-rw-r--r--arch/powerpc/include/asm/sections.h11
-rw-r--r--arch/powerpc/include/asm/setjmp.h4
-rw-r--r--arch/powerpc/include/asm/spinlock.h62
-rw-r--r--arch/powerpc/include/asm/string.h2
-rw-r--r--arch/powerpc/include/asm/svm.h31
-rw-r--r--arch/powerpc/include/asm/syscall.h10
-rw-r--r--arch/powerpc/include/asm/time.h6
-rw-r--r--arch/powerpc/include/asm/timex.h34
-rw-r--r--arch/powerpc/include/asm/topology.h6
-rw-r--r--arch/powerpc/include/asm/uaccess.h15
-rw-r--r--arch/powerpc/include/asm/ultravisor-api.h33
-rw-r--r--arch/powerpc/include/asm/ultravisor.h49
-rw-r--r--arch/powerpc/include/asm/unistd.h1
-rw-r--r--arch/powerpc/include/asm/vas.h10
-rw-r--r--arch/powerpc/include/asm/xive.h10
-rw-r--r--arch/powerpc/include/uapi/asm/bpf_perf_event.h2
-rw-r--r--arch/powerpc/include/uapi/asm/kvm_para.h2
-rw-r--r--arch/powerpc/include/uapi/asm/mman.h6
-rw-r--r--arch/powerpc/kernel/.gitignore1
-rw-r--r--arch/powerpc/kernel/Makefile24
-rw-r--r--arch/powerpc/kernel/align.c4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kernel/cacheinfo.c21
-rw-r--r--arch/powerpc/kernel/cacheinfo.h4
-rw-r--r--arch/powerpc/kernel/cputable.c6
-rw-r--r--arch/powerpc/kernel/dawr.c101
-rw-r--r--arch/powerpc/kernel/dma-iommu.c53
-rw-r--r--arch/powerpc/kernel/eeh.c296
-rw-r--r--arch/powerpc/kernel/eeh_cache.c40
-rw-r--r--arch/powerpc/kernel/eeh_dev.c2
-rw-r--r--arch/powerpc/kernel/eeh_driver.c280
-rw-r--r--arch/powerpc/kernel/eeh_event.c34
-rw-r--r--arch/powerpc/kernel/eeh_pe.c145
-rw-r--r--arch/powerpc/kernel/entry_32.S48
-rw-r--r--arch/powerpc/kernel/entry_64.S26
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S22
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2065
-rw-r--r--arch/powerpc/kernel/fadump.c1340
-rw-r--r--arch/powerpc/kernel/head_32.S55
-rw-r--r--arch/powerpc/kernel/head_32.h21
-rw-r--r--arch/powerpc/kernel/head_64.S10
-rw-r--r--arch/powerpc/kernel/head_8xx.S28
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c133
-rw-r--r--arch/powerpc/kernel/io-workarounds.c13
-rw-r--r--arch/powerpc/kernel/iommu.c97
-rw-r--r--arch/powerpc/kernel/irq.c6
-rw-r--r--arch/powerpc/kernel/kexec_elf_64.c545
-rw-r--r--arch/powerpc/kernel/kvm.c58
-rw-r--r--arch/powerpc/kernel/kvm_emul.S16
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c9
-rw-r--r--arch/powerpc/kernel/mce.c71
-rw-r--r--arch/powerpc/kernel/mce_power.c53
-rw-r--r--arch/powerpc/kernel/misc_32.S36
-rw-r--r--arch/powerpc/kernel/misc_64.S52
-rw-r--r--arch/powerpc/kernel/module_32.c24
-rw-r--r--arch/powerpc/kernel/module_64.c62
-rw-r--r--arch/powerpc/kernel/note.S40
-rw-r--r--arch/powerpc/kernel/paca.c52
-rw-r--r--arch/powerpc/kernel/pci-common.c4
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c7
-rw-r--r--arch/powerpc/kernel/pci_32.c4
-rw-r--r--arch/powerpc/kernel/pci_64.c12
-rw-r--r--arch/powerpc/kernel/pci_dn.c21
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c76
-rw-r--r--arch/powerpc/kernel/process.c79
-rw-r--r--arch/powerpc/kernel/prom.c8
-rw-r--r--arch/powerpc/kernel/prom_init.c127
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh2
-rw-r--r--arch/powerpc/kernel/ptrace.c1
-rw-r--r--arch/powerpc/kernel/rtas.c25
-rw-r--r--arch/powerpc/kernel/security.c19
-rw-r--r--arch/powerpc/kernel/setup-common.c14
-rw-r--r--arch/powerpc/kernel/setup_32.c2
-rw-r--r--arch/powerpc/kernel/signal_32.c9
-rw-r--r--arch/powerpc/kernel/signal_64.c7
-rw-r--r--arch/powerpc/kernel/stacktrace.c2
-rw-r--r--arch/powerpc/kernel/suspend.c1
-rw-r--r--arch/powerpc/kernel/swsusp_32.S73
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/kernel/sysfs.c20
-rw-r--r--arch/powerpc/kernel/tm.S4
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c9
-rw-r--r--arch/powerpc/kernel/trace/ftrace_32.S1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_pg.S1
-rw-r--r--arch/powerpc/kernel/traps.c5
-rw-r--r--arch/powerpc/kernel/ucall.S14
-rw-r--r--arch/powerpc/kernel/vdso.c22
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S2
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S4
-rw-r--r--arch/powerpc/kvm/Kconfig7
-rw-r--r--arch/powerpc/kvm/book3s.c8
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c12
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c77
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c42
-rw-r--r--arch/powerpc/kvm/book3s_hv.c56
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S77
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm.c6
-rw-r--r--arch/powerpc/kvm/book3s_xics.c2
-rw-r--r--arch/powerpc/kvm/book3s_xive.c64
-rw-r--r--arch/powerpc/kvm/book3s_xive.h2
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c27
-rw-r--r--arch/powerpc/kvm/e500.c3
-rw-r--r--arch/powerpc/kvm/emulate.c1
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c22
-rw-r--r--arch/powerpc/lib/Makefile7
-rw-r--r--arch/powerpc/lib/ldstfp.S4
-rw-r--r--arch/powerpc/lib/locks.c6
-rw-r--r--arch/powerpc/lib/memcpy_mcsafe_64.S242
-rw-r--r--arch/powerpc/lib/pmem.c8
-rw-r--r--arch/powerpc/mm/Makefile2
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c60
-rw-r--r--arch/powerpc/mm/book3s64/Makefile1
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c6
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c105
-rw-r--r--arch/powerpc/mm/book3s64/iommu_api.c48
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c1
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c114
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c194
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c338
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c12
-rw-r--r--arch/powerpc/mm/book3s64/vphn.h16
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c312
-rw-r--r--arch/powerpc/mm/fault.c28
-rw-r--r--arch/powerpc/mm/hugetlbpage.c99
-rw-r--r--arch/powerpc/mm/init_64.c5
-rw-r--r--arch/powerpc/mm/ioremap.c99
-rw-r--r--arch/powerpc/mm/ioremap_32.c92
-rw-r--r--arch/powerpc/mm/ioremap_64.c113
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c30
-rw-r--r--arch/powerpc/mm/mem.c84
-rw-r--r--arch/powerpc/mm/mmu_decl.h7
-rw-r--r--arch/powerpc/mm/nohash/book3e_hugetlbpage.c16
-rw-r--r--arch/powerpc/mm/nohash/tlb.c3
-rw-r--r--arch/powerpc/mm/numa.c61
-rw-r--r--arch/powerpc/mm/pgtable-frag.c6
-rw-r--r--arch/powerpc/mm/pgtable.c16
-rw-r--r--arch/powerpc/mm/pgtable_32.c157
-rw-r--r--arch/powerpc/mm/pgtable_64.c202
-rw-r--r--arch/powerpc/mm/ptdump/bats.c2
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c24
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c43
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c36
-rw-r--r--arch/powerpc/perf/hv-24x7.c2
-rw-r--r--arch/powerpc/perf/imc-pmu.c43
-rw-r--r--arch/powerpc/platforms/40x/Kconfig7
-rw-r--r--arch/powerpc/platforms/44x/Kconfig18
-rw-r--r--arch/powerpc/platforms/4xx/Makefile1
-rw-r--r--arch/powerpc/platforms/4xx/ocm.c390
-rw-r--r--arch/powerpc/platforms/4xx/uic.c1
-rw-r--r--arch/powerpc/platforms/85xx/Kconfig8
-rw-r--r--arch/powerpc/platforms/86xx/Kconfig6
-rw-r--r--arch/powerpc/platforms/8xx/Kconfig7
-rw-r--r--arch/powerpc/platforms/8xx/Makefile2
-rw-r--r--arch/powerpc/platforms/8xx/cpm1.c (renamed from arch/powerpc/sysdev/cpm1.c)24
-rw-r--r--arch/powerpc/platforms/8xx/micropatch.c378
-rw-r--r--arch/powerpc/platforms/Kconfig3
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype18
-rw-r--r--arch/powerpc/platforms/cell/iommu.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c207
-rw-r--r--arch/powerpc/platforms/cell/spufs/run.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/powerpc/platforms/maple/Kconfig2
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c2
-rw-r--r--arch/powerpc/platforms/powermac/sleep.S68
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig5
-rw-r--r--arch/powerpc/platforms/powernv/Makefile6
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c101
-rw-r--r--arch/powerpc/platforms/powernv/idle.c12
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c23
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c672
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-core.c636
-rw-r--r--arch/powerpc/platforms/powernv/opal-fadump.c716
-rw-r--r--arch/powerpc/platforms/powernv/opal-fadump.h146
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c40
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c12
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c57
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c213
-rw-r--r--arch/powerpc/platforms/powernv/opal.c65
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda-tce.c38
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c112
-rw-r--r--arch/powerpc/platforms/powernv/pci.c148
-rw-r--r--arch/powerpc/platforms/powernv/pci.h8
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h5
-rw-r--r--arch/powerpc/platforms/powernv/setup.c9
-rw-r--r--arch/powerpc/platforms/powernv/ultravisor.c69
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c19
-rw-r--r--arch/powerpc/platforms/powernv/vas.h20
-rw-r--r--arch/powerpc/platforms/ps3/spu.c10
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c11
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig33
-rw-r--r--arch/powerpc/platforms/pseries/Makefile3
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c12
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c23
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c68
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c29
-rw-r--r--arch/powerpc/platforms/pseries/hvconsole.c2
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c4
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c24
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c623
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c28
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c196
-rw-r--r--arch/powerpc/platforms/pseries/pci.c3
-rw-r--r--arch/powerpc/platforms/pseries/ras.c460
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.c550
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.h114
-rw-r--r--arch/powerpc/platforms/pseries/setup.c71
-rw-r--r--arch/powerpc/platforms/pseries/smp.c3
-rw-r--r--arch/powerpc/platforms/pseries/svm.c85
-rw-r--r--arch/powerpc/platforms/pseries/vio.c8
-rw-r--r--arch/powerpc/platforms/pseries/vphn.c (renamed from arch/powerpc/mm/book3s64/vphn.c)20
-rw-r--r--arch/powerpc/sysdev/Kconfig9
-rw-r--r--arch/powerpc/sysdev/Makefile4
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c4
-rw-r--r--arch/powerpc/sysdev/micropatch.c749
-rw-r--r--arch/powerpc/sysdev/scom.c223
-rw-r--r--arch/powerpc/sysdev/xics/Kconfig13
-rw-r--r--arch/powerpc/sysdev/xive/common.c153
-rw-r--r--arch/powerpc/sysdev/xive/native.c33
-rw-r--r--arch/powerpc/sysdev/xive/spapr.c109
-rw-r--r--arch/powerpc/sysdev/xive/xive-internal.h2
-rw-r--r--arch/powerpc/xmon/xmon.c65
395 files changed, 11373 insertions, 9207 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8c1c636308c8..3e56c9c2f16e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -48,7 +48,7 @@ config ARCH_MMAP_RND_COMPAT_BITS_MAX
# Allow randomisation to consume up to 512MB of address space (2^29).
default 11 if PPC_256K_PAGES # 11 = 29 (512MB) - 18 (256K)
default 13 if PPC_64K_PAGES # 13 = 29 (512MB) - 16 (64K)
- default 15 if PPC_16K_PAGES # 15 = 29 (512MB) - 14 (16K)
+ default 15 if PPC_16K_PAGES # 15 = 29 (512MB) - 14 (16K)
default 17 # 17 = 29 (512MB) - 12 (4K)
config ARCH_MMAP_RND_COMPAT_BITS_MIN
@@ -125,17 +125,19 @@ config PPC
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV
+ select ARCH_HAS_HUGEPD if HUGETLB_PAGE
select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_PHYS_TO_DMA
- select ARCH_HAS_PMEM_API if PPC64
+ select ARCH_HAS_PMEM_API
+ select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MEMBARRIER_CALLBACKS
- select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
+ select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
- select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
+ select ARCH_HAS_UACCESS_FLUSHCACHE
+ select ARCH_HAS_UACCESS_MCSAFE if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
- select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
select ARCH_MIGHT_HAVE_PC_PARPORT
@@ -167,6 +169,7 @@ config PPC
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if PPC32
select HAVE_ARCH_KGDB
@@ -175,27 +178,32 @@ config PPC
select HAVE_ARCH_NVRAM_OPS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
+ select HAVE_ASM_MODVERSIONS
+ select HAVE_C_RECORDMCOUNT
select HAVE_CBPF_JIT if !PPC64
select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_CONTEXT_TRACKING if PPC64
+ select HAVE_COPY_THREAD_TLS
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
select HAVE_EBPF_JIT if PPC64
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
+ select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC
- select HAVE_GENERIC_GUP
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IDE
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE
+ select HAVE_KERNEL_LZO if DEFAULT_UIMAGE
select HAVE_KERNEL_XZ if PPC_BOOK3S || 44x
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
@@ -234,6 +242,7 @@ config PPC
select OLD_SIGSUSPEND
select PCI_DOMAINS if PCI
select PCI_SYSCALL if PCI
+ select PPC_DAWR if PPC64
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
@@ -244,9 +253,9 @@ config PPC
#
config PPC_BARRIER_NOSPEC
- bool
- default y
- depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
+ bool
+ default y
+ depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
config EARLY_PRINTK
bool
@@ -370,6 +379,9 @@ config PPC_ADV_DEBUG_DAC_RANGE
depends on PPC_ADV_DEBUG_REGS && 44x
default y
+config PPC_DAWR
+ bool
+
config ZONE_DMA
bool
default y if PPC_BOOK3E_64
@@ -398,7 +410,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE
config MATH_EMULATION
bool "Math emulation"
depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE
- ---help---
+ help
Some PowerPC chips designed for embedded applications do not have
a floating-point unit and therefore do not implement the
floating-point instructions in the PowerPC instruction set. If you
@@ -417,27 +429,27 @@ choice
config MATH_EMULATION_FULL
bool "Emulate all the floating point instructions"
- ---help---
+ help
Select this option will enable the kernel to support to emulate
all the floating point instructions. If your SoC doesn't have
a FPU, you should select this.
config MATH_EMULATION_HW_UNIMPLEMENTED
bool "Just emulate the FPU unimplemented instructions"
- ---help---
+ help
Select this if you know there does have a hardware FPU on your
SoC, but some floating point instructions are not implemented by that.
endchoice
config PPC_TRANSACTIONAL_MEM
- bool "Transactional Memory support for POWERPC"
- depends on PPC_BOOK3S_64
- depends on SMP
- select ALTIVEC
- select VSX
- ---help---
- Support user-mode Transactional Memory on POWERPC.
+ bool "Transactional Memory support for POWERPC"
+ depends on PPC_BOOK3S_64
+ depends on SMP
+ select ALTIVEC
+ select VSX
+ help
+ Support user-mode Transactional Memory on POWERPC.
config LD_HEAD_STUB_CATCH
bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT
@@ -457,7 +469,7 @@ config HOTPLUG_CPU
bool "Support for enabling/disabling CPUs"
depends on SMP && (PPC_PSERIES || \
PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE)
- ---help---
+ help
Say Y here to be able to disable and re-enable individual
CPUs at runtime on SMP machines.
@@ -502,6 +514,7 @@ config KEXEC_FILE
select KEXEC_CORE
select HAVE_IMA_KEXEC
select BUILD_BIN2C
+ select KEXEC_ELF
depends on PPC64
depends on CRYPTO=y
depends on CRYPTO_SHA256=y
@@ -557,7 +570,7 @@ config CRASH_DUMP
config FA_DUMP
bool "Firmware-assisted dump"
- depends on PPC64 && PPC_RTAS
+ depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
select CRASH_CORE
select CRASH_DUMP
help
@@ -568,7 +581,26 @@ config FA_DUMP
is meant to be a kdump replacement offering robustness and
speed not possible without system firmware assistance.
- If unsure, say "N"
+ If unsure, say "y". Only special kernels like petitboot may
+ need to say "N" here.
+
+config PRESERVE_FA_DUMP
+ bool "Preserve Firmware-assisted dump"
+ depends on PPC64 && PPC_POWERNV && !FA_DUMP
+ help
+ On a kernel with FA_DUMP disabled, this option helps to preserve
+ crash data from a previously crash'ed kernel. Useful when the next
+ memory preserving kernel boot would process this crash data.
+ Petitboot kernel is the typical usecase for this option.
+
+config OPAL_CORE
+ bool "Export OPAL memory as /sys/firmware/opal/core"
+ depends on PPC64 && PPC_POWERNV
+ help
+ This option uses the MPIPL support in firmware to provide an
+ ELF core of OPAL memory after a crash. The ELF core is exported
+ as /sys/firmware/opal/core file which is helpful in debugging
+ OPAL crashes using GDB.
config IRQ_ALL_CPUS
bool "Distribute interrupts on all CPUs by default"
@@ -825,7 +857,7 @@ config PPC_DENORMALISATION
bool "PowerPC denormalisation exception handling"
depends on PPC_BOOK3S_64
default "y" if PPC_POWERNV
- ---help---
+ help
Add support for handling denormalisation of single precision
values. Useful for bare metal only. If unsure say Y here.
@@ -898,7 +930,7 @@ config PPC_MEM_KEYS
page-based protections, but without requiring modification of the
page tables when an application changes protection domains.
- For details, see Documentation/vm/protection-keys.rst
+ For details, see Documentation/core-api/protection-keys.rst
If unsure, say y.
@@ -938,7 +970,7 @@ config FSL_SOC
bool
config FSL_PCI
- bool
+ bool
select ARCH_HAS_DMA_SET_MASK
select PPC_INDIRECT_PCI
select PCI_QUIRKS
@@ -986,7 +1018,7 @@ config FSL_RIO
bool "Freescale Embedded SRIO Controller support"
depends on RAPIDIO = y && HAVE_RAPIDIO
default "n"
- ---help---
+ help
Include support for RapidIO controller on Freescale embedded
processors (MPC8548, MPC8641, etc).
@@ -1050,14 +1082,14 @@ config DYNAMIC_MEMSTART
select NONSTATIC_KERNEL
help
This option enables the kernel to be loaded at any page aligned
- physical address. The kernel creates a mapping from KERNELBASE to
+ physical address. The kernel creates a mapping from KERNELBASE to
the address where the kernel is loaded. The page size here implies
the TLB page size of the mapping for kernel on the particular platform.
Please refer to the init code for finding the TLB page size.
DYNAMIC_MEMSTART is an easy way of implementing pseudo-RELOCATABLE
kernel image, where the only restriction is the page aligned kernel
- load address. When this option is enabled, the compile time physical
+ load address. When this option is enabled, the compile time physical
address CONFIG_PHYSICAL_START is ignored.
This option is overridden by CONFIG_RELOCATABLE
@@ -1129,18 +1161,6 @@ config TASK_SIZE
default "0x80000000" if PPC_8xx
default "0xc0000000"
-config CONSISTENT_SIZE_BOOL
- bool "Set custom consistent memory pool size"
- depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
- help
- This option allows you to set the size of the
- consistent memory pool. This pool of virtual memory
- is used to make consistent memory allocations.
-
-config CONSISTENT_SIZE
- hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
- default "0x00200000" if NOT_COHERENT_CACHE
-
config PIN_TLB
bool "Pinned Kernel TLBs (860 ONLY)"
depends on ADVANCED_OPTIONS && PPC_8xx && \
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index c345b79414a9..83522c9fc7b6 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -39,13 +39,11 @@ endif
uname := $(shell uname -m)
KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig
-ifdef CONFIG_PPC64
new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
ifeq ($(new_nm),y)
NM := $(NM) --synthetic
endif
-endif
# BITS is used as extension for files which are available in a 32 bit
# and a 64 bit version to simplify shared Makefiles.
@@ -67,7 +65,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y))
ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
else
-KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/powerpc/kernel/module.lds
+KBUILD_LDS_MODULE += $(srctree)/arch/powerpc/kernel/module.lds
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
# Have the linker provide sfpr if possible.
# There is a corresponding test in arch/powerpc/lib/Makefile
@@ -112,7 +110,6 @@ ifeq ($(HAS_BIARCH),y)
KBUILD_CFLAGS += -m$(BITS)
KBUILD_AFLAGS += -m$(BITS) -Wl,-a$(BITS)
KBUILD_LDFLAGS += -m elf$(BITS)$(LDEMULATION)
-KBUILD_ARFLAGS += --target=elf$(BITS)-$(GNUTARGET)
endif
cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard=tls
diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index 83f8e5ba2722..134f12f89b92 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -18,7 +18,7 @@ quiet_cmd_relocs_check = CHKREL $@
ifdef CONFIG_PPC_BOOK3S_64
cmd_relocs_check = \
$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" ; \
- $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@"
+ $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@"
else
cmd_relocs_check = \
$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@"
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore
index 32034a0cc554..6610665fcf5e 100644
--- a/arch/powerpc/boot/.gitignore
+++ b/arch/powerpc/boot/.gitignore
@@ -44,5 +44,3 @@ fdt_sw.c
fdt_wip.c
libfdt.h
libfdt_internal.h
-autoconf.h
-
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 73d1f3562978..6841bd52738b 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -20,9 +20,6 @@
all: $(obj)/zImage
-compress-$(CONFIG_KERNEL_GZIP) := CONFIG_KERNEL_GZIP
-compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ
-
ifdef CROSS32_COMPILE
BOOTCC := $(CROSS32_COMPILE)gcc
BOOTAR := $(CROSS32_COMPILE)ar
@@ -34,7 +31,7 @@ endif
BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \
-pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
- -D$(compress-y)
+ $(LINUXINCLUDE)
ifdef CONFIG_PPC64_BOOT_WRAPPER
BOOTCFLAGS += -m64
@@ -51,7 +48,7 @@ BOOTCFLAGS += -mlittle-endian
BOOTCFLAGS += $(call cc-option,-mabi=elfv2)
endif
-BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
+BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -nostdinc
BOOTARFLAGS := -cr$(KBUILD_ARFLAGS)
@@ -202,14 +199,9 @@ $(obj)/empty.c:
$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
$(Q)cp $< $@
-$(srctree)/$(src)/serial.c: $(obj)/autoconf.h
-
-$(obj)/autoconf.h: $(obj)/%: $(objtree)/include/generated/%
- $(Q)cp $< $@
-
clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \
$(zlib-decomp-) $(libfdt) $(libfdtheader) \
- autoconf.h empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
+ empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
quiet_cmd_bootcc = BOOTCC $@
cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $<
@@ -257,6 +249,8 @@ endif
compressor-$(CONFIG_KERNEL_GZIP) := gz
compressor-$(CONFIG_KERNEL_XZ) := xz
+compressor-$(CONFIG_KERNEL_LZMA) := lzma
+compressor-$(CONFIG_KERNEL_LZO) := lzo
# args (to if_changed): 1 = (this rule), 2 = platform, 3 = dts 4=dtb 5=initrd
quiet_cmd_wrap = WRAP $@
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index 102cc546444d..a9d209135975 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -146,6 +146,46 @@ static struct addr_range prep_initrd(struct addr_range vmlinux, void *chosen,
return (struct addr_range){(void *)initrd_addr, initrd_size};
}
+#ifdef __powerpc64__
+static void prep_esm_blob(struct addr_range vmlinux, void *chosen)
+{
+ unsigned long esm_blob_addr, esm_blob_size;
+
+ /* Do we have an ESM (Enter Secure Mode) blob? */
+ if (_esm_blob_end <= _esm_blob_start)
+ return;
+
+ printf("Attached ESM blob at 0x%p-0x%p\n\r",
+ _esm_blob_start, _esm_blob_end);
+ esm_blob_addr = (unsigned long)_esm_blob_start;
+ esm_blob_size = _esm_blob_end - _esm_blob_start;
+
+ /*
+ * If the ESM blob is too low it will be clobbered when the
+ * kernel relocates to its final location. In this case,
+ * allocate a safer place and move it.
+ */
+ if (esm_blob_addr < vmlinux.size) {
+ void *old_addr = (void *)esm_blob_addr;
+
+ printf("Allocating 0x%lx bytes for esm_blob ...\n\r",
+ esm_blob_size);
+ esm_blob_addr = (unsigned long)malloc(esm_blob_size);
+ if (!esm_blob_addr)
+ fatal("Can't allocate memory for ESM blob !\n\r");
+ printf("Relocating ESM blob 0x%lx <- 0x%p (0x%lx bytes)\n\r",
+ esm_blob_addr, old_addr, esm_blob_size);
+ memmove((void *)esm_blob_addr, old_addr, esm_blob_size);
+ }
+
+ /* Tell the kernel ESM blob address via device tree. */
+ setprop_val(chosen, "linux,esm-blob-start", (u32)(esm_blob_addr));
+ setprop_val(chosen, "linux,esm-blob-end", (u32)(esm_blob_addr + esm_blob_size));
+}
+#else
+static inline void prep_esm_blob(struct addr_range vmlinux, void *chosen) { }
+#endif
+
/* A buffer that may be edited by tools operating on a zImage binary so as to
* edit the command line passed to vmlinux (by setting /chosen/bootargs).
* The buffer is put in it's own section so that tools may locate it easier.
@@ -214,6 +254,7 @@ void start(void)
vmlinux = prep_kernel();
initrd = prep_initrd(vmlinux, chosen,
loader_info.initrd_addr, loader_info.initrd_size);
+ prep_esm_blob(vmlinux, chosen);
prep_cmdline(chosen);
printf("Finalizing device tree...");
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
index cd043726ed88..e0606766480f 100644
--- a/arch/powerpc/boot/ops.h
+++ b/arch/powerpc/boot/ops.h
@@ -251,6 +251,8 @@ extern char _initrd_start[];
extern char _initrd_end[];
extern char _dtb_start[];
extern char _dtb_end[];
+extern char _esm_blob_start[];
+extern char _esm_blob_end[];
static inline __attribute__((const))
int __ilog2_u32(u32 n)
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index b0491b8c0199..9457863147f9 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -18,7 +18,6 @@
#include "stdio.h"
#include "io.h"
#include "ops.h"
-#include "autoconf.h"
static int serial_open(void)
{
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 532d45833396..ed6266367bc0 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -13,6 +13,7 @@
# -i initrd specify initrd file
# -d devtree specify device-tree blob
# -s tree.dts specify device-tree source file (needs dtc installed)
+# -e esm_blob specify ESM blob for secure images
# -c cache $kernel.strip.gz (use if present & newer, else make)
# -C prefix specify command prefix for cross-building tools
# (strip, objcopy, ld)
@@ -37,9 +38,11 @@ platform=of
initrd=
dtb=
dts=
+esm_blob=
cacheit=
binary=
compression=.gz
+uboot_comp=gzip
pie=
format=
@@ -59,9 +62,9 @@ tmpdir=.
usage() {
echo 'Usage: wrapper [-o output] [-p platform] [-i initrd]' >&2
- echo ' [-d devtree] [-s tree.dts] [-c] [-C cross-prefix]' >&2
- echo ' [-D datadir] [-W workingdir] [-Z (gz|xz|none)]' >&2
- echo ' [--no-compression] [vmlinux]' >&2
+ echo ' [-d devtree] [-s tree.dts] [-e esm_blob]' >&2
+ echo ' [-c] [-C cross-prefix] [-D datadir] [-W workingdir]' >&2
+ echo ' [-Z (gz|xz|none)] [--no-compression] [vmlinux]' >&2
exit 1
}
@@ -104,6 +107,11 @@ while [ "$#" -gt 0 ]; do
[ "$#" -gt 0 ] || usage
dtb="$1"
;;
+ -e)
+ shift
+ [ "$#" -gt 0 ] || usage
+ esm_blob="$1"
+ ;;
-s)
shift
[ "$#" -gt 0 ] || usage
@@ -130,22 +138,29 @@ while [ "$#" -gt 0 ]; do
;;
-z)
compression=.gz
+ uboot_comp=gzip
;;
-Z)
shift
[ "$#" -gt 0 ] || usage
- [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "none" ] || usage
+ [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "lzma" -o "$1" != "lzo" -o "$1" != "none" ] || usage
compression=".$1"
+ uboot_comp=$1
if [ $compression = ".none" ]; then
compression=
+ uboot_comp=none
fi
+ if [ $uboot_comp = "gz" ]; then
+ uboot_comp=gzip
+ fi
;;
--no-gzip)
# a "feature" of the the wrapper script is that it can be used outside
# the kernel tree. So keeping this around for backwards compatibility.
compression=
+ uboot_comp=none
;;
-?)
usage
@@ -210,9 +225,16 @@ objflags=-S
tmp=$tmpdir/zImage.$$.o
ksection=.kernel:vmlinux.strip
isection=.kernel:initrd
+esection=.kernel:esm_blob
link_address='0x400000'
make_space=y
+
+if [ -n "$esm_blob" -a "$platform" != "pseries" ]; then
+ echo "ESM blob not support on non-pseries platforms" >&2
+ exit 1
+fi
+
case "$platform" in
of)
platformo="$object/of.o $object/epapr.o"
@@ -365,9 +387,16 @@ if [ -z "$cacheit" -o ! -f "$vmz$compression" -o "$vmz$compression" -ot "$kernel
.gz)
gzip -n -f -9 "$vmz.$$"
;;
+ .lzma)
+ xz --format=lzma -f -6 "$vmz.$$"
+ ;;
+ .lzo)
+ lzop -f -9 "$vmz.$$"
+ ;;
*)
# drop the compression suffix so the stripped vmlinux is used
compression=
+ uboot_comp=none
;;
esac
@@ -411,7 +440,7 @@ membase=`${CROSS}objdump -p "$kernel" | grep -m 1 LOAD | awk '{print $7}'`
case "$platform" in
uboot)
rm -f "$ofile"
- ${MKIMAGE} -A ppc -O linux -T kernel -C gzip -a $membase -e $membase \
+ ${MKIMAGE} -A ppc -O linux -T kernel -C $uboot_comp -a $membase -e $membase \
$uboot_version -d "$vmz" "$ofile"
if [ -z "$cacheit" ]; then
rm -f "$vmz"
@@ -462,6 +491,10 @@ if [ -n "$dtb" ]; then
fi
fi
+if [ -n "$esm_blob" ]; then
+ addsec $tmp "$esm_blob" $esection
+fi
+
if [ "$platform" != "miboot" ]; then
if [ -n "$link_address" ] ; then
text_start="-Ttext $link_address"
diff --git a/arch/powerpc/boot/xz_config.h b/arch/powerpc/boot/xz_config.h
index e22e5b3770dd..ebfadd39e192 100644
--- a/arch/powerpc/boot/xz_config.h
+++ b/arch/powerpc/boot/xz_config.h
@@ -20,10 +20,30 @@ static inline uint32_t swab32p(void *p)
#ifdef __LITTLE_ENDIAN__
#define get_le32(p) (*((uint32_t *) (p)))
+#define cpu_to_be32(x) swab32(x)
+static inline u32 be32_to_cpup(const u32 *p)
+{
+ return swab32p((u32 *)p);
+}
#else
#define get_le32(p) swab32p(p)
+#define cpu_to_be32(x) (x)
+static inline u32 be32_to_cpup(const u32 *p)
+{
+ return *p;
+}
#endif
+static inline uint32_t get_unaligned_be32(const void *p)
+{
+ return be32_to_cpup(p);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+ *((u32 *)p) = cpu_to_be32(val);
+}
+
#define memeq(a, b, size) (memcmp(a, b, size) == 0)
#define memzero(buf, size) memset(buf, 0, size)
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 4ac1e36edfe7..a21f3a76e06f 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -68,6 +68,14 @@ SECTIONS
_initrd_end = .;
}
+ . = ALIGN(4096);
+ .kernel:esm_blob :
+ {
+ _esm_blob_start = .;
+ *(.kernel:esm_blob)
+ _esm_blob_end = .;
+ }
+
#ifdef CONFIG_PPC64_BOOT_WRAPPER
. = ALIGN(256);
.got :
diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig
index e57344c3b0d7..5a75e4f14273 100644
--- a/arch/powerpc/configs/40x/acadia_defconfig
+++ b/arch/powerpc/configs/40x/acadia_defconfig
@@ -22,7 +22,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig
index 0f66f8a87be8..e2691c5db766 100644
--- a/arch/powerpc/configs/40x/ep405_defconfig
+++ b/arch/powerpc/configs/40x/ep405_defconfig
@@ -21,7 +21,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig
index 3da091f651d6..949989ef2322 100644
--- a/arch/powerpc/configs/40x/kilauea_defconfig
+++ b/arch/powerpc/configs/40x/kilauea_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig
index caab658d1da1..4347a87088dc 100644
--- a/arch/powerpc/configs/40x/klondike_defconfig
+++ b/arch/powerpc/configs/40x/klondike_defconfig
@@ -14,7 +14,6 @@ CONFIG_APM8018X=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_MATH_EMULATION=y
# CONFIG_SUSPEND is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
CONFIG_SCSI=y
diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig
index e0b1489b7c7b..90b759bbf426 100644
--- a/arch/powerpc/configs/40x/makalu_defconfig
+++ b/arch/powerpc/configs/40x/makalu_defconfig
@@ -21,7 +21,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig
index 38d3d7769a2f..881c300c011d 100644
--- a/arch/powerpc/configs/40x/obs600_defconfig
+++ b/arch/powerpc/configs/40x/obs600_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/40x/virtex_defconfig b/arch/powerpc/configs/40x/virtex_defconfig
index a2b2770eee8f..5e7c61d1d7d0 100644
--- a/arch/powerpc/configs/40x/virtex_defconfig
+++ b/arch/powerpc/configs/40x/virtex_defconfig
@@ -31,7 +31,6 @@ CONFIG_NETFILTER=y
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig
index 6faa03cd661c..0ed46704b9fa 100644
--- a/arch/powerpc/configs/40x/walnut_defconfig
+++ b/arch/powerpc/configs/40x/walnut_defconfig
@@ -19,7 +19,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
index 9fcd361607e2..2fa553ebfdc9 100644
--- a/arch/powerpc/configs/44x/akebono_defconfig
+++ b/arch/powerpc/configs/44x/akebono_defconfig
@@ -33,7 +33,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_CONNECTOR=y
diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig
index 6bba1a55b827..5a1b9ee18075 100644
--- a/arch/powerpc/configs/44x/arches_defconfig
+++ b/arch/powerpc/configs/44x/arches_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig
index 6f3a6ecc81e7..22e1ef5272ab 100644
--- a/arch/powerpc/configs/44x/bamboo_defconfig
+++ b/arch/powerpc/configs/44x/bamboo_defconfig
@@ -22,7 +22,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig
index 6b77aea79b6c..8006a5728afd 100644
--- a/arch/powerpc/configs/44x/bluestone_defconfig
+++ b/arch/powerpc/configs/44x/bluestone_defconfig
@@ -20,7 +20,6 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig
index d427cee027a6..86f34ea4173a 100644
--- a/arch/powerpc/configs/44x/canyonlands_defconfig
+++ b/arch/powerpc/configs/44x/canyonlands_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig
index 5f1df5fe4453..ce3ec5a2cd15 100644
--- a/arch/powerpc/configs/44x/currituck_defconfig
+++ b/arch/powerpc/configs/44x/currituck_defconfig
@@ -31,7 +31,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_CONNECTOR=y
diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig
index e2b6578993d5..f67447c92e6f 100644
--- a/arch/powerpc/configs/44x/ebony_defconfig
+++ b/arch/powerpc/configs/44x/ebony_defconfig
@@ -20,7 +20,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig
index f593258806ad..5dbd83a1c11b 100644
--- a/arch/powerpc/configs/44x/eiger_defconfig
+++ b/arch/powerpc/configs/44x/eiger_defconfig
@@ -25,7 +25,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
index bae6b26bcfba..e49114f0e526 100644
--- a/arch/powerpc/configs/44x/fsp2_defconfig
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -44,7 +44,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_VLAN_8021Q=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_CONNECTOR=y
diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig
index 4453a4590b1a..fa5378af44f9 100644
--- a/arch/powerpc/configs/44x/icon_defconfig
+++ b/arch/powerpc/configs/44x/icon_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig
index d24bfa6ecd62..aae879c21239 100644
--- a/arch/powerpc/configs/44x/iss476-smp_defconfig
+++ b/arch/powerpc/configs/44x/iss476-smp_defconfig
@@ -33,7 +33,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig
index 5d3f685a7af8..56eddca998c6 100644
--- a/arch/powerpc/configs/44x/katmai_defconfig
+++ b/arch/powerpc/configs/44x/katmai_defconfig
@@ -22,7 +22,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig
index 7b8355a5698d..369bfd2e451d 100644
--- a/arch/powerpc/configs/44x/rainier_defconfig
+++ b/arch/powerpc/configs/44x/rainier_defconfig
@@ -23,7 +23,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig
index 918cfb63f0c8..8be95f6fe3a7 100644
--- a/arch/powerpc/configs/44x/redwood_defconfig
+++ b/arch/powerpc/configs/44x/redwood_defconfig
@@ -25,7 +25,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig
index 63302fbd184d..974a4f038cda 100644
--- a/arch/powerpc/configs/44x/sam440ep_defconfig
+++ b/arch/powerpc/configs/44x/sam440ep_defconfig
@@ -27,7 +27,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig
index f34fee9464e5..10e517b69fa4 100644
--- a/arch/powerpc/configs/44x/sequoia_defconfig
+++ b/arch/powerpc/configs/44x/sequoia_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig
index 42cc7b4ed95f..cd08f3ddd609 100644
--- a/arch/powerpc/configs/44x/taishan_defconfig
+++ b/arch/powerpc/configs/44x/taishan_defconfig
@@ -22,7 +22,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/44x/virtex5_defconfig b/arch/powerpc/configs/44x/virtex5_defconfig
index 99cc3dc02df1..1f74079e1703 100644
--- a/arch/powerpc/configs/44x/virtex5_defconfig
+++ b/arch/powerpc/configs/44x/virtex5_defconfig
@@ -30,7 +30,6 @@ CONFIG_NETFILTER=y
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig
index 6ae88d4879bf..af66c69c49fe 100644
--- a/arch/powerpc/configs/44x/warp_defconfig
+++ b/arch/powerpc/configs/44x/warp_defconfig
@@ -26,7 +26,6 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
CONFIG_VLAN_8021Q=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/52xx/cm5200_defconfig b/arch/powerpc/configs/52xx/cm5200_defconfig
index 73948e88ac82..2412a6bf7ee6 100644
--- a/arch/powerpc/configs/52xx/cm5200_defconfig
+++ b/arch/powerpc/configs/52xx/cm5200_defconfig
@@ -23,7 +23,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig
index 6fc7f786c83c..63368e677506 100644
--- a/arch/powerpc/configs/52xx/lite5200b_defconfig
+++ b/arch/powerpc/configs/52xx/lite5200b_defconfig
@@ -26,7 +26,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig
index ae2a1f74103b..72762da94846 100644
--- a/arch/powerpc/configs/52xx/motionpro_defconfig
+++ b/arch/powerpc/configs/52xx/motionpro_defconfig
@@ -23,7 +23,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig
index 1554de6968ca..303600ff1fdb 100644
--- a/arch/powerpc/configs/52xx/pcm030_defconfig
+++ b/arch/powerpc/configs/52xx/pcm030_defconfig
@@ -36,7 +36,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig
index 0777e6efd22d..a3c8ca74032c 100644
--- a/arch/powerpc/configs/52xx/tqm5200_defconfig
+++ b/arch/powerpc/configs/52xx/tqm5200_defconfig
@@ -27,7 +27,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/83xx/asp8347_defconfig b/arch/powerpc/configs/83xx/asp8347_defconfig
index dd884df32dfd..10192410b33c 100644
--- a/arch/powerpc/configs/83xx/asp8347_defconfig
+++ b/arch/powerpc/configs/83xx/asp8347_defconfig
@@ -27,7 +27,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_REDBOOT_PARTS=y
diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
index 9dffb2e7f735..16a42e2267fb 100644
--- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
index a42232732c6d..80d40ae668eb 100644
--- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig
index 4f914906ee4b..e94555452fb2 100644
--- a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig
@@ -26,7 +26,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
index a484eb8401e8..1715ff547442 100644
--- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
@@ -27,7 +27,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
index 37f4d93b3f81..e65c0057147f 100644
--- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
@@ -25,7 +25,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CFI=y
diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
index 7adb6708a761..17714bf0ed40 100644
--- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
@@ -25,7 +25,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CFI=y
diff --git a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig
index d7ce3551529d..e2ff684d8792 100644
--- a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig
@@ -26,7 +26,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig
index 92134cee3f37..3eceb6db2982 100644
--- a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig
@@ -25,7 +25,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
index 97f7ea5f205f..093df33f9455 100644
--- a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
+++ b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig
index ee7510a33d06..3f5e5d10789f 100644
--- a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
index 8966a9af4230..dad53ef86b49 100644
--- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
@@ -26,7 +26,6 @@ CONFIG_SYN_COOKIES=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
index d70b60314dad..920f37316fdb 100644
--- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig
+++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
@@ -65,7 +65,6 @@ CONFIG_INET6_AH=m
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_TUNNEL=m
CONFIG_NET_PKTGEN=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
diff --git a/arch/powerpc/configs/85xx/ksi8560_defconfig b/arch/powerpc/configs/85xx/ksi8560_defconfig
index 9ce6f48cfb61..9cb211fb6d1e 100644
--- a/arch/powerpc/configs/85xx/ksi8560_defconfig
+++ b/arch/powerpc/configs/85xx/ksi8560_defconfig
@@ -23,7 +23,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
index 5fbc3f904046..618e03e0706d 100644
--- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
index ff981d7905c7..9bc6283f2fb2 100644
--- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
@@ -23,7 +23,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
index 974f0706d777..0683d8c292a8 100644
--- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
+++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
@@ -25,7 +25,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/85xx/sbc8548_defconfig b/arch/powerpc/configs/85xx/sbc8548_defconfig
index 7e3e84a842e4..258881727119 100644
--- a/arch/powerpc/configs/85xx/sbc8548_defconfig
+++ b/arch/powerpc/configs/85xx/sbc8548_defconfig
@@ -22,7 +22,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig
index 5b9cc01b9098..ecbcc853307d 100644
--- a/arch/powerpc/configs/85xx/stx_gp3_defconfig
+++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig
@@ -22,7 +22,6 @@ CONFIG_NETFILTER=y
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_FILTER=m
CONFIG_NET_PKTGEN=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_PARPORT=m
CONFIG_PARPORT_PC=m
diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig
index 1c63cbdc3211..afa1b9b633f8 100644
--- a/arch/powerpc/configs/85xx/tqm8548_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8548_defconfig
@@ -29,7 +29,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CFI=y
diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
index 78f5beb2928c..d50aca608736 100644
--- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
+++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
@@ -54,7 +54,6 @@ CONFIG_IP_PIMSM_V2=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_REDBOOT_PARTS=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig
index 935ea3ade7de..f7a803ab2285 100644
--- a/arch/powerpc/configs/adder875_defconfig
+++ b/arch/powerpc/configs/adder875_defconfig
@@ -26,7 +26,6 @@ CONFIG_SYN_COOKIES=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig
index 12f397d403c6..cf94d28d0e31 100644
--- a/arch/powerpc/configs/amigaone_defconfig
+++ b/arch/powerpc/configs/amigaone_defconfig
@@ -37,7 +37,6 @@ CONFIG_NETFILTER=y
# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set
# CONFIG_NETFILTER_XT_MATCH_STATE is not set
# CONFIG_IP_NF_MANGLE is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
CONFIG_PARPORT=y
CONFIG_PARPORT_PC=y
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 560a93a84efe..2dd1b58a18ae 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -102,7 +102,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=131072
diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig
index a203b1cf67d3..9ff493dd8439 100644
--- a/arch/powerpc/configs/chrp32_defconfig
+++ b/arch/powerpc/configs/chrp32_defconfig
@@ -38,7 +38,6 @@ CONFIG_NETFILTER=y
# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set
# CONFIG_NETFILTER_XT_MATCH_STATE is not set
# CONFIG_IP_NF_MANGLE is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
CONFIG_BLK_DEV_FD=y
CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig
index 2e6c8a45ae88..6e08d9502d89 100644
--- a/arch/powerpc/configs/ep8248e_defconfig
+++ b/arch/powerpc/configs/ep8248e_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
CONFIG_NETFILTER=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig
index 7cb590e8f8fd..b20bd0cf3543 100644
--- a/arch/powerpc/configs/ep88xc_defconfig
+++ b/arch/powerpc/configs/ep88xc_defconfig
@@ -28,7 +28,6 @@ CONFIG_SYN_COOKIES=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config
index d592ba27b122..3c7dad19a691 100644
--- a/arch/powerpc/configs/fsl-emb-nonhw.config
+++ b/arch/powerpc/configs/fsl-emb-nonhw.config
@@ -118,7 +118,6 @@ CONFIG_SYSVIPC=y
CONFIG_TMPFS=y
CONFIG_UBIFS_FS=y
CONFIG_UDF_FS=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_UFS_FS=m
CONFIG_UIO=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index ceb3c770786f..fbfcc85e4dc0 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -52,7 +52,6 @@ CONFIG_NF_CONNTRACK_IRC=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_BLK_DEV_LOOP=y
@@ -244,7 +243,6 @@ CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_LATENCYTOP=y
CONFIG_BOOTX_TEXT=y
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index 805b0f87653c..85e73c3bd859 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -35,7 +35,6 @@ CONFIG_IP_PNP_RARP=y
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
@@ -91,7 +90,6 @@ CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
CONFIG_DMA_API_DEBUG=y
CONFIG_PPC_EARLY_DEBUG=y
diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig
index 71d8d2430b6c..067f433c8f5e 100644
--- a/arch/powerpc/configs/holly_defconfig
+++ b/arch/powerpc/configs/holly_defconfig
@@ -27,7 +27,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig
index 477794c41d50..ea59f3d146df 100644
--- a/arch/powerpc/configs/linkstation_defconfig
+++ b/arch/powerpc/configs/linkstation_defconfig
@@ -48,7 +48,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig
index c5f2005005d3..2975e64629aa 100644
--- a/arch/powerpc/configs/maple_defconfig
+++ b/arch/powerpc/configs/maple_defconfig
@@ -36,7 +36,6 @@ CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
# CONFIG_SCSI_PROC_FS is not set
@@ -104,7 +103,6 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LATENCYTOP=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
CONFIG_BOOTX_TEXT=y
diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig
index 5d5f08e5b8d9..6ce4f206eac7 100644
--- a/arch/powerpc/configs/mgcoge_defconfig
+++ b/arch/powerpc/configs/mgcoge_defconfig
@@ -30,7 +30,6 @@ CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
CONFIG_TIPC=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
index e4bf8aa87e60..6203c1093a3a 100644
--- a/arch/powerpc/configs/mpc512x_defconfig
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -35,7 +35,6 @@ CONFIG_CAN_VCAN=y
CONFIG_CAN_MSCAN=y
CONFIG_CAN_DEBUG_DEVICES=y
# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig
index 7a2b2aa37def..6f87a5c74960 100644
--- a/arch/powerpc/configs/mpc5200_defconfig
+++ b/arch/powerpc/configs/mpc5200_defconfig
@@ -27,7 +27,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/mpc7448_hpc2_defconfig b/arch/powerpc/configs/mpc7448_hpc2_defconfig
index 4b14c02b437c..19406a6c2648 100644
--- a/arch/powerpc/configs/mpc7448_hpc2_defconfig
+++ b/arch/powerpc/configs/mpc7448_hpc2_defconfig
@@ -25,7 +25,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig
index b1e88b64536b..00a4d2bf43b2 100644
--- a/arch/powerpc/configs/mpc8272_ads_defconfig
+++ b/arch/powerpc/configs/mpc8272_ads_defconfig
@@ -23,7 +23,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
CONFIG_NETFILTER=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig
index 005d00020fb9..be125729635c 100644
--- a/arch/powerpc/configs/mpc83xx_defconfig
+++ b/arch/powerpc/configs/mpc83xx_defconfig
@@ -37,7 +37,6 @@ CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
CONFIG_INET_ESP=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_FW_LOADER is not set
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index ec3fcc2bf737..285d506c5a76 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -27,7 +27,6 @@ CONFIG_SYN_COOKIES=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig
index 63e38c7220f1..0a0d046fc445 100644
--- a/arch/powerpc/configs/mvme5100_defconfig
+++ b/arch/powerpc/configs/mvme5100_defconfig
@@ -58,7 +58,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_LAPB=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=2
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index c0423b2cf7c0..4b6d31d4474e 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -44,7 +44,6 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=y
CONFIG_INET_ESP=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_MTD=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 50b610b48914..4e6e95f92646 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -20,7 +20,6 @@ CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_PMAC=y
-CONFIG_PPC601_SYNC_FIX=y
CONFIG_GEN_RTC=y
CONFIG_HIGHMEM=y
CONFIG_BINFMT_MISC=m
@@ -112,7 +111,6 @@ CONFIG_BT_HCIBFUSB=m
CONFIG_CFG80211=m
CONFIG_MAC80211=m
CONFIG_MAC80211_LEDS=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
CONFIG_CONNECTOR=y
CONFIG_MAC_FLOPPY=m
@@ -293,7 +291,6 @@ CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-CONFIG_LATENCYTOP=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
CONFIG_BOOTX_TEXT=y
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index ef2ef98d3f28..6658cceb928c 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -38,7 +38,7 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_PARTITION_ADVANCED=y
-CONFIG_SCOM_DEBUGFS=y
+# CONFIG_SCOM_DEBUGFS is not set
CONFIG_OPAL_PRD=y
CONFIG_PPC_MEMTRACE=y
# CONFIG_PPC_PSERIES is not set
@@ -98,7 +98,6 @@ CONFIG_NET_ACT_BPF=m
CONFIG_DNS_RESOLVER=y
CONFIG_BPF_JIT=y
# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_MTD=y
@@ -317,7 +316,6 @@ CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
-CONFIG_LATENCYTOP=y
CONFIG_FUNCTION_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig
index 689d7e276769..a5f683aed328 100644
--- a/arch/powerpc/configs/ppc40x_defconfig
+++ b/arch/powerpc/configs/ppc40x_defconfig
@@ -25,7 +25,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
@@ -85,4 +84,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-CONFIG_PPC4xx_OCM=y
diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig
index db48039e0b11..67952819593e 100644
--- a/arch/powerpc/configs/ppc44x_defconfig
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -36,7 +36,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_BRIDGE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 91fdb619b484..b250e6f5a7ca 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -29,6 +29,7 @@ CONFIG_DTL=y
CONFIG_SCANLOG=m
CONFIG_PPC_SMLPAR=y
CONFIG_IBMEBUS=y
+CONFIG_PPC_SVM=y
CONFIG_PPC_MAPLE=y
CONFIG_PPC_PASEMI=y
CONFIG_PPC_PASEMI_IOMMU=y
@@ -89,7 +90,7 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
-# CONFIG_IPV6 is not set
+CONFIG_IPV6=y
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
CONFIG_BRIDGE=m
@@ -98,7 +99,6 @@ CONFIG_NET_CLS_BPF=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_BPF=m
CONFIG_BPF_JIT=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_BLK_DEV_FD=y
@@ -367,7 +367,6 @@ CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_LATENCYTOP=y
CONFIG_FUNCTION_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index 41d85cb3c9a2..0d746774c2bd 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -50,7 +50,6 @@ CONFIG_INET_IPCOMP=m
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
CONFIG_BRIDGE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_BLK_DEV_FD=y
@@ -223,7 +222,6 @@ CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_LATENCYTOP=y
CONFIG_IRQSOFF_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 7c6baf6df139..9dca4cffa623 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -301,7 +301,6 @@ CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
CONFIG_IRDA=m
CONFIG_IRLAN=m
CONFIG_IRNET=m
@@ -346,7 +345,6 @@ CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211_DEBUGFS=y
CONFIG_NET_9P=m
CONFIG_NET_9P_VIRTIO=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEBUG_DEVRES=y
CONFIG_CONNECTOR=y
CONFIG_PARPORT=m
@@ -1124,6 +1122,7 @@ CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_DEBUG_INFO=y
CONFIG_UNUSED_SYMBOLS=y
+CONFIG_HEADERS_INSTALL=y
CONFIG_HEADERS_CHECK=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
@@ -1148,7 +1147,6 @@ CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_FAIL_IO_TIMEOUT=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
-CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig
index 0ededa8c837d..9d8a76857c6f 100644
--- a/arch/powerpc/configs/pq2fads_defconfig
+++ b/arch/powerpc/configs/pq2fads_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
CONFIG_NETFILTER=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index cf8d55f67272..314c63939816 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -63,7 +63,6 @@ CONFIG_CFG80211=m
CONFIG_CFG80211_WEXT=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_RC_MINSTREL is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=65535
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 62e12f61a3b2..26126b4d4de3 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -42,6 +42,7 @@ CONFIG_DTL=y
CONFIG_SCANLOG=m
CONFIG_PPC_SMLPAR=y
CONFIG_IBMEBUS=y
+CONFIG_PPC_SVM=y
# CONFIG_PPC_PMAC is not set
CONFIG_RTAS_FLASH=m
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
@@ -83,7 +84,6 @@ CONFIG_NET_CLS_BPF=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_BPF=m
CONFIG_BPF_JIT=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_PARPORT=m
@@ -290,7 +290,6 @@ CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
-CONFIG_LATENCYTOP=y
CONFIG_FUNCTION_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index a887616e35a2..1253482a67c0 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -68,7 +68,6 @@ CONFIG_SYN_COOKIES=y
# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_DNS_RESOLVER=y
# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_MTD=m
@@ -214,6 +213,7 @@ CONFIG_IPMI_WATCHDOG=y
CONFIG_HW_RANDOM=y
CONFIG_TCG_TPM=y
CONFIG_TCG_TIS_I2C_NUVOTON=y
+# CONFIG_DEVPORT is not set
CONFIG_I2C=y
# CONFIG_I2C_COMPAT is not set
CONFIG_I2C_CHARDEV=y
diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig
index 74bca2eccd0f..6c39c52b8e4a 100644
--- a/arch/powerpc/configs/storcenter_defconfig
+++ b/arch/powerpc/configs/storcenter_defconfig
@@ -26,7 +26,6 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig
index cd72193fac0a..7493f36dd6e9 100644
--- a/arch/powerpc/configs/tqm8xx_defconfig
+++ b/arch/powerpc/configs/tqm8xx_defconfig
@@ -32,7 +32,6 @@ CONFIG_SYN_COOKIES=y
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index f5c366b02828..5a04448ad6b5 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -41,7 +41,6 @@ CONFIG_BT_BNEP_MC_FILTER=y
CONFIG_BT_HIDP=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
@@ -123,7 +122,6 @@ CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_DMA_API_DEBUG=y
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 9a1d2fc6ceb7..64870c7be4a3 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -11,4 +11,3 @@ generic-y += mcs_spinlock.h
generic-y += preempt.h
generic-y += vtime.h
generic-y += msi.h
-generic-y += simd.h
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index ec1c97a8e8cb..8561498e653c 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -15,6 +15,7 @@
#include <asm/epapr_hcalls.h>
#include <asm/dcr.h>
#include <asm/mmu_context.h>
+#include <asm/ultravisor-api.h>
#include <uapi/asm/ucontext.h>
@@ -34,6 +35,16 @@ extern struct static_key hcall_tracepoint_key;
void __trace_hcall_entry(unsigned long opcode, unsigned long *args);
void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf);
+/* Ultravisor */
+#if defined(CONFIG_PPC_POWERNV) || defined(CONFIG_PPC_SVM)
+long ucall_norets(unsigned long opcode, ...);
+#else
+static inline long ucall_norets(unsigned long opcode, ...)
+{
+ return U_NOT_AVAILABLE;
+}
+#endif
+
/* OPAL */
int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
int64_t a4, int64_t a5, int64_t a6, int64_t a7,
@@ -123,7 +134,8 @@ extern int __ucmpdi2(u64, u64);
/* tracing */
void _mcount(void);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+ unsigned long sp);
void pnv_power9_force_smt4_catch(void);
void pnv_power9_force_smt4_release(void);
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 52eafaf74054..31c231ea56b7 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -297,24 +297,24 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
#define ATOMIC64_INIT(i) { (i) }
-static __inline__ long atomic64_read(const atomic64_t *v)
+static __inline__ s64 atomic64_read(const atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
return t;
}
-static __inline__ void atomic64_set(atomic64_t *v, long i)
+static __inline__ void atomic64_set(atomic64_t *v, s64 i)
{
__asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
}
#define ATOMIC64_OP(op, asm_op) \
-static __inline__ void atomic64_##op(long a, atomic64_t *v) \
+static __inline__ void atomic64_##op(s64 a, atomic64_t *v) \
{ \
- long t; \
+ s64 t; \
\
__asm__ __volatile__( \
"1: ldarx %0,0,%3 # atomic64_" #op "\n" \
@@ -327,10 +327,10 @@ static __inline__ void atomic64_##op(long a, atomic64_t *v) \
}
#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
-static inline long \
-atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
+static inline s64 \
+atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
{ \
- long t; \
+ s64 t; \
\
__asm__ __volatile__( \
"1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \
@@ -345,10 +345,10 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
}
#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \
-static inline long \
-atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \
+static inline s64 \
+atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
{ \
- long res, t; \
+ s64 res, t; \
\
__asm__ __volatile__( \
"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \
@@ -396,7 +396,7 @@ ATOMIC64_OPS(xor, xor)
static __inline__ void atomic64_inc(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_inc\n\
@@ -409,9 +409,9 @@ static __inline__ void atomic64_inc(atomic64_t *v)
}
#define atomic64_inc atomic64_inc
-static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
+static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n"
@@ -427,7 +427,7 @@ static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
static __inline__ void atomic64_dec(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_dec\n\
@@ -440,9 +440,9 @@ static __inline__ void atomic64_dec(atomic64_t *v)
}
#define atomic64_dec atomic64_dec
-static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
+static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n"
@@ -463,9 +463,9 @@ static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1.
*/
-static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
+static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
PPC_ATOMIC_ENTRY_BARRIER
@@ -502,9 +502,9 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
+static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long t;
+ s64 t;
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
@@ -534,7 +534,7 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
*/
static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
{
- long t1, t2;
+ s64 t1, t2;
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 838de59f6754..0796533d37dd 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -148,23 +148,21 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
*/
#include <asm/fixmap.h>
-#ifdef CONFIG_HIGHMEM
-#define KVIRT_TOP PKMAP_BASE
-#else
-#define KVIRT_TOP FIXADDR_START
-#endif
-
/*
* ioremap_bot starts at that address. Early ioremaps move down from there,
* until mem_init() at which point this becomes the top of the vmalloc
* and ioremap space
*/
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
+#ifdef CONFIG_HIGHMEM
+#define IOREMAP_TOP PKMAP_BASE
#else
-#define IOREMAP_TOP KVIRT_TOP
+#define IOREMAP_TOP FIXADDR_START
#endif
+/* PPC32 shares vmalloc area with ioremap */
+#define IOREMAP_START VMALLOC_START
+#define IOREMAP_END VMALLOC_END
+
/*
* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 16MB value just means that there will be a 64MB "hole" after the
@@ -201,8 +199,6 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
#include <linux/sched.h>
#include <linux/threads.h>
-extern unsigned long ioremap_bot;
-
/* Bits to mask out from a PGD to get to the PUD page */
#define PGD_MASKED_BITS 0
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 74d24201fc4f..bb3deb76c951 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -116,8 +116,6 @@ typedef struct {
/* Number of users of the external (Nest) MMU */
atomic_t copros;
- /* NPU NMMU context */
- struct npu_context *npu_context;
struct hash_mm_context *hash_context;
unsigned long vdso_base;
@@ -208,7 +206,6 @@ extern int mmu_io_psize;
void mmu_early_init_devtree(void);
void hash__early_init_devtree(void);
void radix__early_init_devtree(void);
-extern void radix_init_native(void);
extern void hash__early_init_mmu(void);
extern void radix__early_init_mmu(void);
static inline void early_init_mmu(void)
@@ -240,9 +237,6 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
first_memblock_size);
}
-extern int (*register_process_table)(unsigned long base, unsigned long page_size,
- unsigned long tbl_size);
-
#ifdef CONFIG_PPC_PSERIES
extern void radix_init_pseries(void);
#else
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index ccf00a8b98c6..b01624e5c467 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -90,7 +90,6 @@
#define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */
#define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */
#define _PAGE_DEVMAP _RPAGE_SW1 /* software: ZONE_DEVICE page */
-#define __HAVE_ARCH_PTE_DEVMAP
/*
* Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
@@ -274,8 +273,15 @@ extern unsigned long __vmalloc_end;
#define VMALLOC_START __vmalloc_start
#define VMALLOC_END __vmalloc_end
+static inline unsigned int ioremap_max_order(void)
+{
+ if (radix_enabled())
+ return PUD_SHIFT;
+ return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
+}
+#define IOREMAP_MAX_ORDER ioremap_max_order()
+
extern unsigned long __kernel_virt_start;
-extern unsigned long __kernel_virt_size;
extern unsigned long __kernel_io_start;
extern unsigned long __kernel_io_end;
#define KERN_VIRT_START __kernel_virt_start
@@ -283,7 +289,6 @@ extern unsigned long __kernel_io_end;
#define KERN_IO_END __kernel_io_end
extern struct page *vmemmap;
-extern unsigned long ioremap_bot;
extern unsigned long pci_io_base;
#endif /* __ASSEMBLY__ */
@@ -311,6 +316,7 @@ extern unsigned long pci_io_base;
#define PHB_IO_BASE (ISA_IO_END)
#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
+#define IOREMAP_START (ioremap_bot)
#define IOREMAP_END (KERN_IO_END)
/* Advertise special mapping type for AGP */
@@ -602,8 +608,10 @@ static inline bool pte_access_permitted(pte_t pte, bool write)
*/
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
{
- return __pte((((pte_basic_t)(pfn) << PAGE_SHIFT) & PTE_RPN_MASK) |
- pgprot_val(pgprot));
+ VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
+ VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
+
+ return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot));
}
static inline unsigned long pte_pfn(pte_t pte)
@@ -1343,5 +1351,26 @@ static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_va
return false;
}
+/*
+ * Like pmd_huge() and pmd_large(), but works regardless of config options
+ */
+#define pmd_is_leaf pmd_is_leaf
+static inline bool pmd_is_leaf(pmd_t pmd)
+{
+ return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
+}
+
+#define pud_is_leaf pud_is_leaf
+static inline bool pud_is_leaf(pud_t pud)
+{
+ return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
+}
+
+#define pgd_is_leaf pgd_is_leaf
+static inline bool pgd_is_leaf(pgd_t pgd)
+{
+ return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE));
+}
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 05147cecb8df..4ce795d30377 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -17,8 +17,8 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
unsigned long addr,
unsigned long page_size);
extern void radix__flush_pwc_lpid(unsigned int lpid);
-extern void radix__flush_tlb_lpid(unsigned int lpid);
-extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
+extern void radix__flush_all_lpid(unsigned int lpid);
+extern void radix__flush_all_lpid_guest(unsigned int lpid);
#else
static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); };
static inline void radix__flush_tlb_lpid_page(unsigned int lpid,
@@ -31,11 +31,7 @@ static inline void radix__flush_pwc_lpid(unsigned int lpid)
{
WARN_ON(1);
}
-static inline void radix__flush_tlb_lpid(unsigned int lpid)
-{
- WARN_ON(1);
-}
-static inline void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
+static inline void radix__flush_all_lpid(unsigned int lpid)
{
WARN_ON(1);
}
@@ -73,6 +69,4 @@ extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr);
extern void radix__flush_tlb_all(void);
-extern void radix__local_flush_tlb_lpid(unsigned int lpid);
-
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index ebf572ea621e..7aa8195b6cff 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -162,4 +162,13 @@ static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long addre
radix__flush_tlb_pwc(tlb, address);
}
+
+extern bool tlbie_capable;
+extern bool tlbie_enabled;
+
+static inline bool cputlb_use_tlbie(void)
+{
+ return tlbie_enabled;
+}
+
#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index 6436b65ac7bc..0e1263455d73 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -26,5 +26,16 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot);
#define __HAVE_PHYS_MEM_ACCESS_PROT
+/*
+ * This gets called at the end of handling a page fault, when
+ * the kernel has put a new PTE into the page table for the process.
+ * We use it to ensure coherency between the i-cache and d-cache
+ * for the page which has just been mapped in.
+ * On machines which use an MMU hash table, we use this to put a
+ * corresponding HPTE into the hash table ahead of time, instead of
+ * waiting for the inevitable extra hash-table miss exception.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index fed7e6241349..f47e6ff6554d 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -5,14 +5,6 @@
#include <asm/asm-compat.h>
-/*
- * Define an illegal instr to trap on the bug.
- * We don't use 0 because that marks the end of a function
- * in the ELF ABI. That's "Boo Boo" in case you wonder...
- */
-#define BUG_OPCODE .long 0x00b00b00 /* For asm */
-#define BUG_ILLEGAL_INSTR "0x00b00b00" /* For BUG macro */
-
#ifdef CONFIG_BUG
#ifdef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 40ea5b3781c6..45e3137ccd71 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -33,7 +33,8 @@
#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT)
-#if defined(__powerpc64__) && !defined(__ASSEMBLY__)
+#if !defined(__ASSEMBLY__)
+#ifdef CONFIG_PPC64
struct ppc_cache_info {
u32 size;
@@ -53,7 +54,28 @@ struct ppc64_caches {
};
extern struct ppc64_caches ppc64_caches;
-#endif /* __powerpc64__ && ! __ASSEMBLY__ */
+
+static inline u32 l1_cache_shift(void)
+{
+ return ppc64_caches.l1d.log_block_size;
+}
+
+static inline u32 l1_cache_bytes(void)
+{
+ return ppc64_caches.l1d.block_size;
+}
+#else
+static inline u32 l1_cache_shift(void)
+{
+ return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_cache_bytes(void)
+{
+ return L1_CACHE_BYTES;
+}
+#endif
+#endif /* ! __ASSEMBLY__ */
#if defined(__ASSEMBLY__)
/*
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 74d60cfe8ce5..eef388f2659f 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -29,9 +29,12 @@
* not expect this type of fault. flush_cache_vmap is not exactly the right
* place to put this, but it seems to work well enough.
*/
-#define flush_cache_vmap(start, end) do { asm volatile("ptesync" ::: "memory"); } while (0)
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+ asm volatile("ptesync" ::: "memory");
+}
#else
-#define flush_cache_vmap(start, end) do { } while (0)
+static inline void flush_cache_vmap(unsigned long start, unsigned long end) { }
#endif
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
@@ -54,20 +57,29 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr)
}
#endif
-#ifdef CONFIG_PPC32
/*
* Write any modified data cache blocks out to memory and invalidate them.
* Does not invalidate the corresponding instruction cache blocks.
*/
static inline void flush_dcache_range(unsigned long start, unsigned long stop)
{
- void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
- unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+ unsigned long shift = l1_cache_shift();
+ unsigned long bytes = l1_cache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
- for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+ if (IS_ENABLED(CONFIG_PPC64)) {
+ mb(); /* sync */
+ isync();
+ }
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
dcbf(addr);
mb(); /* sync */
+
+ if (IS_ENABLED(CONFIG_PPC64))
+ isync();
}
/*
@@ -77,11 +89,13 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
*/
static inline void clean_dcache_range(unsigned long start, unsigned long stop)
{
- void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
- unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+ unsigned long shift = l1_cache_shift();
+ unsigned long bytes = l1_cache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
- for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+ for (i = 0; i < size >> shift; i++, addr += bytes)
dcbst(addr);
mb(); /* sync */
}
@@ -94,21 +108,17 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop)
static inline void invalidate_dcache_range(unsigned long start,
unsigned long stop)
{
- void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
- unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+ unsigned long shift = l1_cache_shift();
+ unsigned long bytes = l1_cache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
unsigned long i;
- for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+ for (i = 0; i < size >> shift; i++, addr += bytes)
dcbi(addr);
mb(); /* sync */
}
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-extern void flush_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
-#endif
-
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
do { \
memcpy(dst, src, len); \
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index d05f0c28e515..a1ebcbc3931f 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -145,12 +145,10 @@ static inline void cpu_feature_keys_init(void) { }
/* Definitions for features that only exist on 32-bit chips */
#ifdef CONFIG_PPC32
-#define CPU_FTR_601 ASM_CONST(0x00001000)
#define CPU_FTR_L2CR ASM_CONST(0x00002000)
#define CPU_FTR_SPEC7450 ASM_CONST(0x00004000)
#define CPU_FTR_TAU ASM_CONST(0x00008000)
#define CPU_FTR_CAN_DOZE ASM_CONST(0x00010000)
-#define CPU_FTR_USE_RTC ASM_CONST(0x00020000)
#define CPU_FTR_L3CR ASM_CONST(0x00040000)
#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00080000)
#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00100000)
@@ -160,14 +158,12 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_NEED_COHERENT ASM_CONST(0x01000000)
#define CPU_FTR_NO_BTIC ASM_CONST(0x02000000)
#define CPU_FTR_PPC_LE ASM_CONST(0x04000000)
-#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x08000000)
#define CPU_FTR_SPE ASM_CONST(0x10000000)
#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x20000000)
#define CPU_FTR_INDEXED_DCR ASM_CONST(0x40000000)
#else /* CONFIG_PPC32 */
/* Define these to 0 for the sake of tests in common code */
-#define CPU_FTR_601 (0)
#define CPU_FTR_PPC_LE (0)
#endif
@@ -294,8 +290,8 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_MAYBE_CAN_NAP 0
#endif
-#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \
- CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_USE_RTC)
+#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | \
+ CPU_FTR_COHERENT_ICACHE)
#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE)
@@ -386,7 +382,7 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTRS_47X (CPU_FTRS_440x6)
#define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \
CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
- CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
+ CPU_FTR_NOEXECUTE | \
CPU_FTR_DEBUG_LVL_EXC)
#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
@@ -498,7 +494,9 @@ static inline void cpu_feature_keys_init(void) { }
#else
enum {
CPU_FTRS_POSSIBLE =
-#ifdef CONFIG_PPC_BOOK3S_32
+#ifdef CONFIG_PPC_BOOK3S_601
+ CPU_FTRS_PPC601 |
+#elif defined(CONFIG_PPC_BOOK3S_32)
CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 |
CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX |
@@ -574,8 +572,10 @@ enum {
#else
enum {
CPU_FTRS_ALWAYS =
-#ifdef CONFIG_PPC_BOOK3S_32
- CPU_FTRS_PPC601 & CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
+#ifdef CONFIG_PPC_BOOK3S_601
+ CPU_FTRS_PPC601 &
+#elif defined(CONFIG_PPC_BOOK3S_32)
+ CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 &
CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &
CPU_FTRS_7400_NOTAU & CPU_FTRS_7400 & CPU_FTRS_7450_20 &
diff --git a/arch/powerpc/include/asm/current.h b/arch/powerpc/include/asm/current.h
index 297827b76169..bbfb94800415 100644
--- a/arch/powerpc/include/asm/current.h
+++ b/arch/powerpc/include/asm/current.h
@@ -16,7 +16,8 @@ static inline struct task_struct *get_current(void)
{
struct task_struct *task;
- __asm__ __volatile__("ld %0,%1(13)"
+ /* get_current can be cached by the compiler, so no volatile */
+ asm ("ld %0,%1(13)"
: "=r" (task)
: "i" (offsetof(struct paca_struct, __current)));
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 8aa7c76c2130..6f9b2a12540a 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -88,6 +88,19 @@ struct eeh_pe {
struct list_head child_list; /* List of PEs below this PE */
struct list_head child; /* Memb. child_list/eeh_phb_pe */
struct list_head edevs; /* List of eeh_dev in this PE */
+
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Saved stack trace. When we find a PE freeze in eeh_dev_check_failure
+ * the stack trace is saved here so we can print it in the recovery
+ * thread if it turns out to due to a real problem rather than
+ * a hot-remove.
+ *
+ * A max of 64 entries might be overkill, but it also might not be.
+ */
+ unsigned long stack_trace[64];
+ int trace_entries;
+#endif /* CONFIG_STACKTRACE */
};
#define eeh_pe_for_each_dev(pe, edev, tmp) \
@@ -121,6 +134,8 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe)
struct eeh_dev {
int mode; /* EEH mode */
int class_code; /* Class code of the device */
+ int bdfn; /* bdfn of device (for cfg ops) */
+ struct pci_controller *controller;
int pe_config_addr; /* PE config address */
u32 config_space[16]; /* Saved PCI config space */
int pcix_cap; /* Saved PCIx capability */
@@ -136,6 +151,17 @@ struct eeh_dev {
struct pci_dev *physfn; /* Associated SRIOV PF */
};
+/* "fmt" must be a simple literal string */
+#define EEH_EDEV_PRINT(level, edev, fmt, ...) \
+ pr_##level("PCI %04x:%02x:%02x.%x#%04x: EEH: " fmt, \
+ (edev)->controller->global_number, PCI_BUSNO((edev)->bdfn), \
+ PCI_SLOT((edev)->bdfn), PCI_FUNC((edev)->bdfn), \
+ ((edev)->pe ? (edev)->pe_config_addr : 0xffff), ##__VA_ARGS__)
+#define eeh_edev_dbg(edev, fmt, ...) EEH_EDEV_PRINT(debug, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_info(edev, fmt, ...) EEH_EDEV_PRINT(info, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_warn(edev, fmt, ...) EEH_EDEV_PRINT(warn, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_err(edev, fmt, ...) EEH_EDEV_PRINT(err, (edev), fmt, ##__VA_ARGS__)
+
static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev)
{
return edev ? edev->pdn : NULL;
@@ -247,7 +273,7 @@ static inline bool eeh_state_active(int state)
== (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
}
-typedef void *(*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
+typedef void (*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
void eeh_set_pe_aux_size(int size);
int eeh_phb_pe_create(struct pci_controller *phb);
@@ -261,20 +287,20 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
void eeh_pe_update_time_stamp(struct eeh_pe *pe);
void *eeh_pe_traverse(struct eeh_pe *root,
eeh_pe_traverse_func fn, void *flag);
-void *eeh_pe_dev_traverse(struct eeh_pe *root,
- eeh_edev_traverse_func fn, void *flag);
+void eeh_pe_dev_traverse(struct eeh_pe *root,
+ eeh_edev_traverse_func fn, void *flag);
void eeh_pe_restore_bars(struct eeh_pe *pe);
const char *eeh_pe_loc_get(struct eeh_pe *pe);
struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
-void eeh_probe_devices(void);
+void eeh_show_enabled(void);
int __init eeh_ops_register(struct eeh_ops *ops);
int __exit eeh_ops_unregister(const char *name);
int eeh_check_failure(const volatile void __iomem *token);
int eeh_dev_check_failure(struct eeh_dev *edev);
-void eeh_addr_cache_build(void);
+void eeh_addr_cache_init(void);
void eeh_add_device_early(struct pci_dn *);
void eeh_add_device_tree_early(struct pci_dn *);
void eeh_add_device_late(struct pci_dev *);
@@ -316,7 +342,7 @@ static inline bool eeh_enabled(void)
return false;
}
-static inline void eeh_probe_devices(void) { }
+static inline void eeh_show_enabled(void) { }
static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
{
@@ -332,7 +358,7 @@ static inline int eeh_check_failure(const volatile void __iomem *token)
#define eeh_dev_check_failure(x) (0)
-static inline void eeh_addr_cache_build(void) { }
+static inline void eeh_addr_cache_init(void) { }
static inline void eeh_add_device_early(struct pci_dn *pdn) { }
diff --git a/arch/powerpc/include/asm/elfnote.h b/arch/powerpc/include/asm/elfnote.h
new file mode 100644
index 000000000000..a201b6e9ae44
--- /dev/null
+++ b/arch/powerpc/include/asm/elfnote.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC ELF notes.
+ *
+ * Copyright 2019, IBM Corporation
+ */
+
+#ifndef __ASM_POWERPC_ELFNOTE_H__
+#define __ASM_POWERPC_ELFNOTE_H__
+
+/*
+ * These note types should live in a SHT_NOTE segment and have
+ * "PowerPC" in the name field.
+ */
+
+/*
+ * The capabilities supported/required by this kernel (bitmap).
+ *
+ * This type uses a bitmap as "desc" field. Each bit is described
+ * in arch/powerpc/kernel/note.S
+ */
+#define PPC_ELFNOTE_CAPABILITIES 1
+
+#endif /* __ASM_POWERPC_ELFNOTE_H__ */
diff --git a/arch/powerpc/include/asm/error-injection.h b/arch/powerpc/include/asm/error-injection.h
deleted file mode 100644
index 62fd24739852..000000000000
--- a/arch/powerpc/include/asm/error-injection.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-
-#ifndef _ASM_ERROR_INJECTION_H
-#define _ASM_ERROR_INJECTION_H
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm-generic/error-injection.h>
-
-void override_function_with_return(struct pt_regs *regs);
-
-#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 841a0be6c1b2..33f4f72eb035 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -30,25 +30,13 @@
* exception handlers (including pSeries LPAR) and iSeries LPAR
* implementations as possible.
*/
-#include <asm/head-64.h>
#include <asm/feature-fixups.h>
-/* PACA save area offsets (exgen, exmc, etc) */
-#define EX_R9 0
-#define EX_R10 8
-#define EX_R11 16
-#define EX_R12 24
-#define EX_R13 32
-#define EX_DAR 40
-#define EX_DSISR 48
-#define EX_CCR 52
-#define EX_CFAR 56
-#define EX_PPR 64
+/* PACA save area size in u64 units (exgen, exmc, etc) */
#if defined(CONFIG_RELOCATABLE)
-#define EX_CTR 72
-#define EX_SIZE 10 /* size in u64 units */
+#define EX_SIZE 10
#else
-#define EX_SIZE 9 /* size in u64 units */
+#define EX_SIZE 9
#endif
/*
@@ -56,12 +44,7 @@
*/
#define MAX_MCE_DEPTH 4
-/*
- * EX_R3 is only used by the bad_stack handler. bad_stack reloads and
- * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap
- * with EX_DAR.
- */
-#define EX_R3 EX_DAR
+#ifdef __ASSEMBLY__
#define STF_ENTRY_BARRIER_SLOT \
STF_ENTRY_BARRIER_FIXUP_SECTION; \
@@ -144,588 +127,6 @@
hrfid; \
b hrfi_flush_fallback
-#ifdef CONFIG_RELOCATABLE
-#define __EXCEPTION_PROLOG_2_RELON(label, h) \
- mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
- LOAD_HANDLER(r12,label); \
- mtctr r12; \
- mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
- li r10,MSR_RI; \
- mtmsrd r10,1; /* Set RI (EE=0) */ \
- bctr;
-#else
-/* If not relocatable, we can jump directly -- and save messing with LR */
-#define __EXCEPTION_PROLOG_2_RELON(label, h) \
- mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
- mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
- li r10,MSR_RI; \
- mtmsrd r10,1; /* Set RI (EE=0) */ \
- b label;
-#endif
-#define EXCEPTION_PROLOG_2_RELON(label, h) \
- __EXCEPTION_PROLOG_2_RELON(label, h)
-
-/*
- * As EXCEPTION_PROLOG(), except we've already got relocation on so no need to
- * rfid. Save LR in case we're CONFIG_RELOCATABLE, in which case
- * EXCEPTION_PROLOG_2_RELON will be using LR.
- */
-#define EXCEPTION_RELON_PROLOG(area, label, h, extra, vec) \
- SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_PROLOG_0(area); \
- EXCEPTION_PROLOG_1(area, extra, vec); \
- EXCEPTION_PROLOG_2_RELON(label, h)
-
-/*
- * We're short on space and time in the exception prolog, so we can't
- * use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
- * Instead we get the base of the kernel from paca->kernelbase and or in the low
- * part of label. This requires that the label be within 64KB of kernelbase, and
- * that kernelbase be 64K aligned.
- */
-#define LOAD_HANDLER(reg, label) \
- ld reg,PACAKBASE(r13); /* get high part of &label */ \
- ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label);
-
-#define __LOAD_HANDLER(reg, label) \
- ld reg,PACAKBASE(r13); \
- ori reg,reg,(ABS_ADDR(label))@l;
-
-/*
- * Branches from unrelocated code (e.g., interrupts) to labels outside
- * head-y require >64K offsets.
- */
-#define __LOAD_FAR_HANDLER(reg, label) \
- ld reg,PACAKBASE(r13); \
- ori reg,reg,(ABS_ADDR(label))@l; \
- addis reg,reg,(ABS_ADDR(label))@h;
-
-/* Exception register prefixes */
-#define EXC_HV H
-#define EXC_STD
-
-#if defined(CONFIG_RELOCATABLE)
-/*
- * If we support interrupts with relocation on AND we're a relocatable kernel,
- * we need to use CTR to get to the 2nd level handler. So, save/restore it
- * when required.
- */
-#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13)
-#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13)
-#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg
-#else
-/* ...else CTR is unused and in register. */
-#define SAVE_CTR(reg, area)
-#define GET_CTR(reg, area) mfctr reg
-#define RESTORE_CTR(reg, area)
-#endif
-
-/*
- * PPR save/restore macros used in exceptions_64s.S
- * Used for P7 or later processors
- */
-#define SAVE_PPR(area, ra) \
-BEGIN_FTR_SECTION_NESTED(940) \
- ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \
- std ra,_PPR(r1); \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
-
-#define RESTORE_PPR_PACA(area, ra) \
-BEGIN_FTR_SECTION_NESTED(941) \
- ld ra,area+EX_PPR(r13); \
- mtspr SPRN_PPR,ra; \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
-
-/*
- * Get an SPR into a register if the CPU has the given feature
- */
-#define OPT_GET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mfspr ra,spr; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Set an SPR from a register if the CPU has the given feature
- */
-#define OPT_SET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mtspr spr,ra; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Save a register to the PACA if the CPU has the given feature
- */
-#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- std ra,offset(r13); \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-#define EXCEPTION_PROLOG_0(area) \
- GET_PACA(r13); \
- std r9,area+EX_R9(r13); /* save r9 */ \
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \
- HMT_MEDIUM; \
- std r10,area+EX_R10(r13); /* save r10 - r12 */ \
- OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
-
-#define __EXCEPTION_PROLOG_1_PRE(area) \
- OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \
- OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \
- INTERRUPT_TO_KERNEL; \
- SAVE_CTR(r10, area); \
- mfcr r9;
-
-#define __EXCEPTION_PROLOG_1_POST(area) \
- std r11,area+EX_R11(r13); \
- std r12,area+EX_R12(r13); \
- GET_SCRATCH0(r10); \
- std r10,area+EX_R13(r13)
-
-/*
- * This version of the EXCEPTION_PROLOG_1 will carry
- * addition parameter called "bitmask" to support
- * checking of the interrupt maskable level in the SOFTEN_TEST.
- * Intended to be used in MASKABLE_EXCPETION_* macros.
- */
-#define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) \
- __EXCEPTION_PROLOG_1_PRE(area); \
- extra(vec, bitmask); \
- __EXCEPTION_PROLOG_1_POST(area);
-
-/*
- * This version of the EXCEPTION_PROLOG_1 is intended
- * to be used in STD_EXCEPTION* macros
- */
-#define _EXCEPTION_PROLOG_1(area, extra, vec) \
- __EXCEPTION_PROLOG_1_PRE(area); \
- extra(vec); \
- __EXCEPTION_PROLOG_1_POST(area);
-
-#define EXCEPTION_PROLOG_1(area, extra, vec) \
- _EXCEPTION_PROLOG_1(area, extra, vec)
-
-#define __EXCEPTION_PROLOG_2(label, h) \
- ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \
- mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
- LOAD_HANDLER(r12,label) \
- mtspr SPRN_##h##SRR0,r12; \
- mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
- mtspr SPRN_##h##SRR1,r10; \
- h##RFI_TO_KERNEL; \
- b . /* prevent speculative execution */
-#define EXCEPTION_PROLOG_2(label, h) \
- __EXCEPTION_PROLOG_2(label, h)
-
-/* _NORI variant keeps MSR_RI clear */
-#define __EXCEPTION_PROLOG_2_NORI(label, h) \
- ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \
- xori r10,r10,MSR_RI; /* Clear MSR_RI */ \
- mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
- LOAD_HANDLER(r12,label) \
- mtspr SPRN_##h##SRR0,r12; \
- mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
- mtspr SPRN_##h##SRR1,r10; \
- h##RFI_TO_KERNEL; \
- b . /* prevent speculative execution */
-
-#define EXCEPTION_PROLOG_2_NORI(label, h) \
- __EXCEPTION_PROLOG_2_NORI(label, h)
-
-#define EXCEPTION_PROLOG(area, label, h, extra, vec) \
- SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_PROLOG_0(area); \
- EXCEPTION_PROLOG_1(area, extra, vec); \
- EXCEPTION_PROLOG_2(label, h);
-
-#define __KVMTEST(h, n) \
- lbz r10,HSTATE_IN_GUEST(r13); \
- cmpwi r10,0; \
- bne do_kvm_##h##n
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-/*
- * If hv is possible, interrupts come into to the hv version
- * of the kvmppc_interrupt code, which then jumps to the PR handler,
- * kvmppc_interrupt_pr, if the guest is a PR guest.
- */
-#define kvmppc_interrupt kvmppc_interrupt_hv
-#else
-#define kvmppc_interrupt kvmppc_interrupt_pr
-#endif
-
-/*
- * Branch to label using its 0xC000 address. This results in instruction
- * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
- * on using mtmsr rather than rfid.
- *
- * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than
- * load KBASE for a slight optimisation.
- */
-#define BRANCH_TO_C000(reg, label) \
- __LOAD_HANDLER(reg, label); \
- mtctr reg; \
- bctr
-
-#ifdef CONFIG_RELOCATABLE
-#define BRANCH_TO_COMMON(reg, label) \
- __LOAD_HANDLER(reg, label); \
- mtctr reg; \
- bctr
-
-#define BRANCH_LINK_TO_FAR(label) \
- __LOAD_FAR_HANDLER(r12, label); \
- mtctr r12; \
- bctrl
-
-/*
- * KVM requires __LOAD_FAR_HANDLER.
- *
- * __BRANCH_TO_KVM_EXIT branches are also a special case because they
- * explicitly use r9 then reload it from PACA before branching. Hence
- * the double-underscore.
- */
-#define __BRANCH_TO_KVM_EXIT(area, label) \
- mfctr r9; \
- std r9,HSTATE_SCRATCH1(r13); \
- __LOAD_FAR_HANDLER(r9, label); \
- mtctr r9; \
- ld r9,area+EX_R9(r13); \
- bctr
-
-#else
-#define BRANCH_TO_COMMON(reg, label) \
- b label
-
-#define BRANCH_LINK_TO_FAR(label) \
- bl label
-
-#define __BRANCH_TO_KVM_EXIT(area, label) \
- ld r9,area+EX_R9(r13); \
- b label
-
-#endif
-
-/* Do not enable RI */
-#define EXCEPTION_PROLOG_NORI(area, label, h, extra, vec) \
- EXCEPTION_PROLOG_0(area); \
- EXCEPTION_PROLOG_1(area, extra, vec); \
- EXCEPTION_PROLOG_2_NORI(label, h);
-
-
-#define __KVM_HANDLER(area, h, n) \
- BEGIN_FTR_SECTION_NESTED(947) \
- ld r10,area+EX_CFAR(r13); \
- std r10,HSTATE_CFAR(r13); \
- END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947); \
- BEGIN_FTR_SECTION_NESTED(948) \
- ld r10,area+EX_PPR(r13); \
- std r10,HSTATE_PPR(r13); \
- END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
- ld r10,area+EX_R10(r13); \
- std r12,HSTATE_SCRATCH0(r13); \
- sldi r12,r9,32; \
- ori r12,r12,(n); \
- /* This reloads r9 before branching to kvmppc_interrupt */ \
- __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt)
-
-#define __KVM_HANDLER_SKIP(area, h, n) \
- cmpwi r10,KVM_GUEST_MODE_SKIP; \
- beq 89f; \
- BEGIN_FTR_SECTION_NESTED(948) \
- ld r10,area+EX_PPR(r13); \
- std r10,HSTATE_PPR(r13); \
- END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
- ld r10,area+EX_R10(r13); \
- std r12,HSTATE_SCRATCH0(r13); \
- sldi r12,r9,32; \
- ori r12,r12,(n); \
- /* This reloads r9 before branching to kvmppc_interrupt */ \
- __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt); \
-89: mtocrf 0x80,r9; \
- ld r9,area+EX_R9(r13); \
- ld r10,area+EX_R10(r13); \
- b kvmppc_skip_##h##interrupt
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define KVMTEST(h, n) __KVMTEST(h, n)
-#define KVM_HANDLER(area, h, n) __KVM_HANDLER(area, h, n)
-#define KVM_HANDLER_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n)
-
-#else
-#define KVMTEST(h, n)
-#define KVM_HANDLER(area, h, n)
-#define KVM_HANDLER_SKIP(area, h, n)
-#endif
-
-#define NOTEST(n)
-
-#define EXCEPTION_PROLOG_COMMON_1() \
- std r9,_CCR(r1); /* save CR in stackframe */ \
- std r11,_NIP(r1); /* save SRR0 in stackframe */ \
- std r12,_MSR(r1); /* save SRR1 in stackframe */ \
- std r10,0(r1); /* make stack chain pointer */ \
- std r0,GPR0(r1); /* save r0 in stackframe */ \
- std r10,GPR1(r1); /* save r1 in stackframe */ \
-
-
-/*
- * The common exception prolog is used for all except a few exceptions
- * such as a segment miss on a kernel address. We have to be prepared
- * to take another exception from the point where we first touch the
- * kernel stack onwards.
- *
- * On entry r13 points to the paca, r9-r13 are saved in the paca,
- * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
- * SRR1, and relocation is on.
- */
-#define EXCEPTION_PROLOG_COMMON(n, area) \
- andi. r10,r12,MSR_PR; /* See if coming from user */ \
- mr r10,r1; /* Save r1 */ \
- subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \
- beq- 1f; \
- ld r1,PACAKSAVE(r13); /* kernel stack to use */ \
-1: cmpdi cr1,r1,-INT_FRAME_SIZE; /* check if r1 is in userspace */ \
- blt+ cr1,3f; /* abort if it is */ \
- li r1,(n); /* will be reloaded later */ \
- sth r1,PACA_TRAP_SAVE(r13); \
- std r3,area+EX_R3(r13); \
- addi r3,r13,area; /* r3 -> where regs are saved*/ \
- RESTORE_CTR(r1, area); \
- b bad_stack; \
-3: EXCEPTION_PROLOG_COMMON_1(); \
- kuap_save_amr_and_lock r9, r10, cr1, cr0; \
- beq 4f; /* if from kernel mode */ \
- ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \
- SAVE_PPR(area, r9); \
-4: EXCEPTION_PROLOG_COMMON_2(area) \
- EXCEPTION_PROLOG_COMMON_3(n) \
- ACCOUNT_STOLEN_TIME
-
-/* Save original regs values from save area to stack frame. */
-#define EXCEPTION_PROLOG_COMMON_2(area) \
- ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \
- ld r10,area+EX_R10(r13); \
- std r9,GPR9(r1); \
- std r10,GPR10(r1); \
- ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \
- ld r10,area+EX_R12(r13); \
- ld r11,area+EX_R13(r13); \
- std r9,GPR11(r1); \
- std r10,GPR12(r1); \
- std r11,GPR13(r1); \
- BEGIN_FTR_SECTION_NESTED(66); \
- ld r10,area+EX_CFAR(r13); \
- std r10,ORIG_GPR3(r1); \
- END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
- GET_CTR(r10, area); \
- std r10,_CTR(r1);
-
-#define EXCEPTION_PROLOG_COMMON_3(n) \
- std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- mflr r9; /* Get LR, later save to stack */ \
- ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
- std r9,_LINK(r1); \
- lbz r10,PACAIRQSOFTMASK(r13); \
- mfspr r11,SPRN_XER; /* save XER in stackframe */ \
- std r10,SOFTE(r1); \
- std r11,_XER(r1); \
- li r9,(n)+1; \
- std r9,_TRAP(r1); /* set trap number */ \
- li r10,0; \
- ld r11,exception_marker@toc(r2); \
- std r10,RESULT(r1); /* clear regs->result */ \
- std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */
-
-/*
- * Exception vectors.
- */
-#define STD_EXCEPTION(vec, label) \
- EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_STD, KVMTEST_PR, vec);
-
-/* Version of above for when we have to branch out-of-line */
-#define __OOL_EXCEPTION(vec, label, hdlr) \
- SET_SCRATCH0(r13) \
- EXCEPTION_PROLOG_0(PACA_EXGEN) \
- b hdlr;
-
-#define STD_EXCEPTION_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \
- EXCEPTION_PROLOG_2(label, EXC_STD)
-
-#define STD_EXCEPTION_HV(loc, vec, label) \
- EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_HV, KVMTEST_HV, vec);
-
-#define STD_EXCEPTION_HV_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \
- EXCEPTION_PROLOG_2(label, EXC_HV)
-
-#define STD_RELON_EXCEPTION(loc, vec, label) \
- /* No guest interrupts come through here */ \
- EXCEPTION_RELON_PROLOG(PACA_EXGEN, label, EXC_STD, NOTEST, vec);
-
-#define STD_RELON_EXCEPTION_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \
- EXCEPTION_PROLOG_2_RELON(label, EXC_STD)
-
-#define STD_RELON_EXCEPTION_HV(loc, vec, label) \
- EXCEPTION_RELON_PROLOG(PACA_EXGEN, label, EXC_HV, KVMTEST_HV, vec);
-
-#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \
- EXCEPTION_PROLOG_2_RELON(label, EXC_HV)
-
-/* This associate vector numbers with bits in paca->irq_happened */
-#define SOFTEN_VALUE_0x500 PACA_IRQ_EE
-#define SOFTEN_VALUE_0x900 PACA_IRQ_DEC
-#define SOFTEN_VALUE_0x980 PACA_IRQ_DEC
-#define SOFTEN_VALUE_0xa00 PACA_IRQ_DBELL
-#define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL
-#define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI
-#define SOFTEN_VALUE_0xea0 PACA_IRQ_EE
-#define SOFTEN_VALUE_0xf00 PACA_IRQ_PMI
-
-#define __SOFTEN_TEST(h, vec, bitmask) \
- lbz r10,PACAIRQSOFTMASK(r13); \
- andi. r10,r10,bitmask; \
- li r10,SOFTEN_VALUE_##vec; \
- bne masked_##h##interrupt
-
-#define _SOFTEN_TEST(h, vec, bitmask) __SOFTEN_TEST(h, vec, bitmask)
-
-#define SOFTEN_TEST_PR(vec, bitmask) \
- KVMTEST(EXC_STD, vec); \
- _SOFTEN_TEST(EXC_STD, vec, bitmask)
-
-#define SOFTEN_TEST_HV(vec, bitmask) \
- KVMTEST(EXC_HV, vec); \
- _SOFTEN_TEST(EXC_HV, vec, bitmask)
-
-#define KVMTEST_PR(vec) \
- KVMTEST(EXC_STD, vec)
-
-#define KVMTEST_HV(vec) \
- KVMTEST(EXC_HV, vec)
-
-#define SOFTEN_NOTEST_PR(vec, bitmask) _SOFTEN_TEST(EXC_STD, vec, bitmask)
-#define SOFTEN_NOTEST_HV(vec, bitmask) _SOFTEN_TEST(EXC_HV, vec, bitmask)
-
-#define __MASKABLE_EXCEPTION(vec, label, h, extra, bitmask) \
- SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_PROLOG_0(PACA_EXGEN); \
- MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \
- EXCEPTION_PROLOG_2(label, h);
-
-#define MASKABLE_EXCEPTION(vec, label, bitmask) \
- __MASKABLE_EXCEPTION(vec, label, EXC_STD, SOFTEN_TEST_PR, bitmask)
-
-#define MASKABLE_EXCEPTION_OOL(vec, label, bitmask) \
- MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\
- EXCEPTION_PROLOG_2(label, EXC_STD)
-
-#define MASKABLE_EXCEPTION_HV(vec, label, bitmask) \
- __MASKABLE_EXCEPTION(vec, label, EXC_HV, SOFTEN_TEST_HV, bitmask)
-
-#define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \
- MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
- EXCEPTION_PROLOG_2(label, EXC_HV)
-
-#define __MASKABLE_RELON_EXCEPTION(vec, label, h, extra, bitmask) \
- SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_PROLOG_0(PACA_EXGEN); \
- MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \
- EXCEPTION_PROLOG_2_RELON(label, h)
-
-#define MASKABLE_RELON_EXCEPTION(vec, label, bitmask) \
- __MASKABLE_RELON_EXCEPTION(vec, label, EXC_STD, SOFTEN_NOTEST_PR, bitmask)
-
-#define MASKABLE_RELON_EXCEPTION_OOL(vec, label, bitmask) \
- MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\
- EXCEPTION_PROLOG_2(label, EXC_STD);
-
-#define MASKABLE_RELON_EXCEPTION_HV(vec, label, bitmask) \
- __MASKABLE_RELON_EXCEPTION(vec, label, EXC_HV, SOFTEN_TEST_HV, bitmask)
-
-#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \
- MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
- EXCEPTION_PROLOG_2_RELON(label, EXC_HV)
-
-/*
- * Our exception common code can be passed various "additions"
- * to specify the behaviour of interrupts, whether to kick the
- * runlatch, etc...
- */
-
-/*
- * This addition reconciles our actual IRQ state with the various software
- * flags that track it. This may call C code.
- */
-#define ADD_RECONCILE RECONCILE_IRQ_STATE(r10,r11)
-
-#define ADD_NVGPRS \
- bl save_nvgprs
-
-#define RUNLATCH_ON \
-BEGIN_FTR_SECTION \
- ld r3, PACA_THREAD_INFO(r13); \
- ld r4,TI_LOCAL_FLAGS(r3); \
- andi. r0,r4,_TLF_RUNLATCH; \
- beql ppc64_runlatch_on_trampoline; \
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
-
-#define EXCEPTION_COMMON(area, trap, label, hdlr, ret, additions) \
- EXCEPTION_PROLOG_COMMON(trap, area); \
- /* Volatile regs are potentially clobbered here */ \
- additions; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b ret
-
-/*
- * Exception where stack is already set in r1, r1 is saved in r10, and it
- * continues rather than returns.
- */
-#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \
- EXCEPTION_PROLOG_COMMON_1(); \
- kuap_save_amr_and_lock r9, r10, cr1; \
- EXCEPTION_PROLOG_COMMON_2(area); \
- EXCEPTION_PROLOG_COMMON_3(trap); \
- /* Volatile regs are potentially clobbered here */ \
- additions; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr
-
-#define STD_EXCEPTION_COMMON(trap, label, hdlr) \
- EXCEPTION_COMMON(PACA_EXGEN, trap, label, hdlr, \
- ret_from_except, ADD_NVGPRS;ADD_RECONCILE)
-
-/*
- * Like STD_EXCEPTION_COMMON, but for exceptions that can occur
- * in the idle task and therefore need the special idle handling
- * (finish nap and runlatch)
- */
-#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \
- EXCEPTION_COMMON(PACA_EXGEN, trap, label, hdlr, \
- ret_from_except_lite, FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON)
-
-/*
- * When the idle code in power4_idle puts the CPU into NAP mode,
- * it has to do so in a loop, and relies on the external interrupt
- * and decrementer interrupt entry code to get it out of the loop.
- * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
- * to signal that it is in the loop and needs help to get out.
- */
-#ifdef CONFIG_PPC_970_NAP
-#define FINISH_NAP \
-BEGIN_FTR_SECTION \
- ld r11, PACA_THREAD_INFO(r13); \
- ld r9,TI_LOCAL_FLAGS(r11); \
- andi. r10,r9,_TLF_NAPPING; \
- bnel power4_fixup_nap; \
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-#else
-#define FINISH_NAP
-#endif
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_EXCEPTION_H */
diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h
new file mode 100644
index 000000000000..c814a2b55389
--- /dev/null
+++ b/arch/powerpc/include/asm/fadump-internal.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump internal code.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_FADUMP_INTERNAL_H
+#define _ASM_POWERPC_FADUMP_INTERNAL_H
+
+/* Maximum number of memory regions kernel supports */
+#define FADUMP_MAX_MEM_REGS 128
+
+#ifndef CONFIG_PRESERVE_FA_DUMP
+
+/* The upper limit percentage for user specified boot memory size (25%) */
+#define MAX_BOOT_MEM_RATIO 4
+
+#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
+
+/* Alignment per CMA requirement. */
+#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
+ max_t(unsigned long, MAX_ORDER - 1, \
+ pageblock_order))
+
+/* FAD commands */
+#define FADUMP_REGISTER 1
+#define FADUMP_UNREGISTER 2
+#define FADUMP_INVALIDATE 3
+
+/*
+ * Copy the ascii values for first 8 characters from a string into u64
+ * variable at their respective indexes.
+ * e.g.
+ * The string "FADMPINF" will be converted into 0x4641444d50494e46
+ */
+static inline u64 fadump_str_to_u64(const char *str)
+{
+ u64 val = 0;
+ int i;
+
+ for (i = 0; i < sizeof(val); i++)
+ val = (*str) ? (val << 8) | *str++ : val << 8;
+ return val;
+}
+
+#define FADUMP_CPU_UNKNOWN (~((u32)0))
+
+#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPINF")
+
+/* fadump crash info structure */
+struct fadump_crash_info_header {
+ u64 magic_number;
+ u64 elfcorehdr_addr;
+ u32 crashing_cpu;
+ struct pt_regs regs;
+ struct cpumask online_mask;
+};
+
+struct fadump_memory_range {
+ u64 base;
+ u64 size;
+};
+
+/* fadump memory ranges info */
+struct fadump_mrange_info {
+ char name[16];
+ struct fadump_memory_range *mem_ranges;
+ u32 mem_ranges_sz;
+ u32 mem_range_cnt;
+ u32 max_mem_ranges;
+};
+
+/* Platform specific callback functions */
+struct fadump_ops;
+
+/* Firmware-assisted dump configuration details. */
+struct fw_dump {
+ unsigned long reserve_dump_area_start;
+ unsigned long reserve_dump_area_size;
+ /* cmd line option during boot */
+ unsigned long reserve_bootvar;
+
+ unsigned long cpu_state_data_size;
+ u64 cpu_state_dest_vaddr;
+ u32 cpu_state_data_version;
+ u32 cpu_state_entry_size;
+
+ unsigned long hpte_region_size;
+
+ unsigned long boot_memory_size;
+ u64 boot_mem_dest_addr;
+ u64 boot_mem_addr[FADUMP_MAX_MEM_REGS];
+ u64 boot_mem_sz[FADUMP_MAX_MEM_REGS];
+ u64 boot_mem_top;
+ u64 boot_mem_regs_cnt;
+
+ unsigned long fadumphdr_addr;
+ unsigned long cpu_notes_buf_vaddr;
+ unsigned long cpu_notes_buf_size;
+
+ /*
+ * Maximum size supported by firmware to copy from source to
+ * destination address per entry.
+ */
+ u64 max_copy_size;
+ u64 kernel_metadata;
+
+ int ibm_configure_kernel_dump;
+
+ unsigned long fadump_enabled:1;
+ unsigned long fadump_supported:1;
+ unsigned long dump_active:1;
+ unsigned long dump_registered:1;
+ unsigned long nocma:1;
+
+ struct fadump_ops *ops;
+};
+
+struct fadump_ops {
+ u64 (*fadump_init_mem_struct)(struct fw_dump *fadump_conf);
+ u64 (*fadump_get_metadata_size)(void);
+ int (*fadump_setup_metadata)(struct fw_dump *fadump_conf);
+ u64 (*fadump_get_bootmem_min)(void);
+ int (*fadump_register)(struct fw_dump *fadump_conf);
+ int (*fadump_unregister)(struct fw_dump *fadump_conf);
+ int (*fadump_invalidate)(struct fw_dump *fadump_conf);
+ void (*fadump_cleanup)(struct fw_dump *fadump_conf);
+ int (*fadump_process)(struct fw_dump *fadump_conf);
+ void (*fadump_region_show)(struct fw_dump *fadump_conf,
+ struct seq_file *m);
+ void (*fadump_trigger)(struct fadump_crash_info_header *fdh,
+ const char *msg);
+};
+
+/* Helper functions */
+s32 fadump_setup_cpu_notes_buf(u32 num_cpus);
+void fadump_free_cpu_notes_buf(void);
+u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs);
+void fadump_update_elfcore_header(char *bufp);
+bool is_fadump_boot_mem_contiguous(void);
+bool is_fadump_reserved_mem_contiguous(void);
+
+#else /* !CONFIG_PRESERVE_FA_DUMP */
+
+/* Firmware-assisted dump configuration details. */
+struct fw_dump {
+ u64 boot_mem_top;
+ u64 dump_active;
+};
+
+#endif /* CONFIG_PRESERVE_FA_DUMP */
+
+#ifdef CONFIG_PPC_PSERIES
+extern void rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
+#else
+static inline void
+rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { }
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+extern void opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
+#else
+static inline void
+opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { }
+#endif
+
+#endif /* _ASM_POWERPC_FADUMP_INTERNAL_H */
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 17d9b6acaf63..526a6a647312 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -6,196 +6,14 @@
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
-#ifndef __PPC64_FA_DUMP_H__
-#define __PPC64_FA_DUMP_H__
+#ifndef _ASM_POWERPC_FADUMP_H
+#define _ASM_POWERPC_FADUMP_H
#ifdef CONFIG_FA_DUMP
-/*
- * The RMA region will be saved for later dumping when kernel crashes.
- * RMA is Real Mode Area, the first block of logical memory address owned
- * by logical partition, containing the storage that may be accessed with
- * translate off.
- */
-#define RMA_START 0x0
-#define RMA_END (ppc64_rma_size)
-
-/*
- * On some Power systems where RMO is 128MB, it still requires minimum of
- * 256MB for kernel to boot successfully. When kdump infrastructure is
- * configured to save vmcore over network, we run into OOM issue while
- * loading modules related to network setup. Hence we need aditional 64M
- * of memory to avoid OOM issue.
- */
-#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
- + (0x1UL << 26))
-
-/* The upper limit percentage for user specified boot memory size (25%) */
-#define MAX_BOOT_MEM_RATIO 4
-
-#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
-
-/* Alignement per CMA requirement. */
-#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
- max_t(unsigned long, MAX_ORDER - 1, pageblock_order))
-
-/* Firmware provided dump sections */
-#define FADUMP_CPU_STATE_DATA 0x0001
-#define FADUMP_HPTE_REGION 0x0002
-#define FADUMP_REAL_MODE_REGION 0x0011
-
-/* Dump request flag */
-#define FADUMP_REQUEST_FLAG 0x00000001
-
-/* FAD commands */
-#define FADUMP_REGISTER 1
-#define FADUMP_UNREGISTER 2
-#define FADUMP_INVALIDATE 3
-
-/* Dump status flag */
-#define FADUMP_ERROR_FLAG 0x2000
-
-#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
-
-#define CPU_UNKNOWN (~((u32)0))
-
-/* Utility macros */
-#define SKIP_TO_NEXT_CPU(reg_entry) \
-({ \
- while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) \
- reg_entry++; \
- reg_entry++; \
-})
-
extern int crashing_cpu;
-/* Kernel Dump section info */
-struct fadump_section {
- __be32 request_flag;
- __be16 source_data_type;
- __be16 error_flags;
- __be64 source_address;
- __be64 source_len;
- __be64 bytes_dumped;
- __be64 destination_address;
-};
-
-/* ibm,configure-kernel-dump header. */
-struct fadump_section_header {
- __be32 dump_format_version;
- __be16 dump_num_sections;
- __be16 dump_status_flag;
- __be32 offset_first_dump_section;
-
- /* Fields for disk dump option. */
- __be32 dd_block_size;
- __be64 dd_block_offset;
- __be64 dd_num_blocks;
- __be32 dd_offset_disk_path;
-
- /* Maximum time allowed to prevent an automatic dump-reboot. */
- __be32 max_time_auto;
-};
-
-/*
- * Firmware Assisted dump memory structure. This structure is required for
- * registering future kernel dump with power firmware through rtas call.
- *
- * No disk dump option. Hence disk dump path string section is not included.
- */
-struct fadump_mem_struct {
- struct fadump_section_header header;
-
- /* Kernel dump sections */
- struct fadump_section cpu_state_data;
- struct fadump_section hpte_region;
- struct fadump_section rmr_region;
-};
-
-/* Firmware-assisted dump configuration details. */
-struct fw_dump {
- unsigned long cpu_state_data_size;
- unsigned long hpte_region_size;
- unsigned long boot_memory_size;
- unsigned long reserve_dump_area_start;
- unsigned long reserve_dump_area_size;
- /* cmd line option during boot */
- unsigned long reserve_bootvar;
-
- unsigned long fadumphdr_addr;
- unsigned long cpu_notes_buf;
- unsigned long cpu_notes_buf_size;
-
- int ibm_configure_kernel_dump;
-
- unsigned long fadump_enabled:1;
- unsigned long fadump_supported:1;
- unsigned long dump_active:1;
- unsigned long dump_registered:1;
- unsigned long nocma:1;
-};
-
-/*
- * Copy the ascii values for first 8 characters from a string into u64
- * variable at their respective indexes.
- * e.g.
- * The string "FADMPINF" will be converted into 0x4641444d50494e46
- */
-static inline u64 str_to_u64(const char *str)
-{
- u64 val = 0;
- int i;
-
- for (i = 0; i < sizeof(val); i++)
- val = (*str) ? (val << 8) | *str++ : val << 8;
- return val;
-}
-#define STR_TO_HEX(x) str_to_u64(x)
-#define REG_ID(x) str_to_u64(x)
-
-#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF")
-#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
-
-/* The firmware-assisted dump format.
- *
- * The register save area is an area in the partition's memory used to preserve
- * the register contents (CPU state data) for the active CPUs during a firmware
- * assisted dump. The dump format contains register save area header followed
- * by register entries. Each list of registers for a CPU starts with
- * "CPUSTRT" and ends with "CPUEND".
- */
-
-/* Register save area header. */
-struct fadump_reg_save_area_header {
- __be64 magic_number;
- __be32 version;
- __be32 num_cpu_offset;
-};
-
-/* Register entry. */
-struct fadump_reg_entry {
- __be64 reg_id;
- __be64 reg_value;
-};
-
-/* fadump crash info structure */
-struct fadump_crash_info_header {
- u64 magic_number;
- u64 elfcorehdr_addr;
- u32 crashing_cpu;
- struct pt_regs regs;
- struct cpumask online_mask;
-};
-
-struct fad_crash_memory_ranges {
- unsigned long long base;
- unsigned long long size;
-};
-
extern int is_fadump_memory_area(u64 addr, ulong size);
-extern int early_init_dt_scan_fw_dump(unsigned long node,
- const char *uname, int depth, void *data);
-extern int fadump_reserve_mem(void);
extern int setup_fadump(void);
extern int is_fadump_active(void);
extern int should_fadump_crash(void);
@@ -207,5 +25,11 @@ static inline int is_fadump_active(void) { return 0; }
static inline int should_fadump_crash(void) { return 0; }
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
static inline void fadump_cleanup(void) { }
+#endif /* !CONFIG_FA_DUMP */
+
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
+extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data);
+extern int fadump_reserve_mem(void);
#endif
-#endif
+#endif /* _ASM_POWERPC_FADUMP_H */
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index faeca8b76c8c..b3e214a97f3a 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -50,6 +50,7 @@
#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000)
#define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000)
#define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000)
+#define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000)
#ifndef __ASSEMBLY__
@@ -68,9 +69,9 @@ enum {
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
- FW_FEATURE_PAPR_SCM,
+ FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR,
FW_FEATURE_PSERIES_ALWAYS = 0,
- FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
+ FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
FW_FEATURE_POWERNV_ALWAYS = 0,
FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 3dfb80b86561..f54a08a2cd70 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -8,6 +8,8 @@
#define MCOUNT_ADDR ((unsigned long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
#ifdef __ASSEMBLY__
/* Based off of objdump optput from glibc */
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 3a6aa57b9d90..eea28ca679db 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -60,8 +60,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
pagefault_enable();
- if (!ret)
- *oval = oldval;
+ *oval = oldval;
prevent_write_to_user(uaddr, sizeof(*uaddr));
return ret;
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
index a4f947888744..2dabcf668292 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -169,249 +169,6 @@ name:
#define ABS_ADDR(label) (label - fs_label + fs_start)
-/*
- * Following are the BOOK3S exception handler helper macros.
- * Handlers come in a number of types, and each type has a number of varieties.
- *
- * EXC_REAL_* - real, unrelocated exception vectors
- * EXC_VIRT_* - virt (AIL), unrelocated exception vectors
- * TRAMP_REAL_* - real, unrelocated helpers (virt can call these)
- * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use)
- * TRAMP_KVM - KVM handlers that get put into real, unrelocated
- * EXC_COMMON - virt, relocated common handlers
- *
- * The EXC handlers are given a name, and branch to name_common, or the
- * appropriate KVM or masking function. Vector handler verieties are as
- * follows:
- *
- * EXC_{REAL|VIRT}_BEGIN/END - used to open-code the exception
- *
- * EXC_{REAL|VIRT} - standard exception
- *
- * EXC_{REAL|VIRT}_suffix
- * where _suffix is:
- * - _MASKABLE - maskable exception
- * - _OOL - out of line with trampoline to common handler
- * - _HV - HV exception
- *
- * There can be combinations, e.g., EXC_VIRT_OOL_MASKABLE_HV
- *
- * The one unusual case is __EXC_REAL_OOL_HV_DIRECT, which is
- * an OOL vector that branches to a specified handler rather than the usual
- * trampoline that goes to common. It, and other underscore macros, should
- * be used with care.
- *
- * KVM handlers come in the following verieties:
- * TRAMP_KVM
- * TRAMP_KVM_SKIP
- * TRAMP_KVM_HV
- * TRAMP_KVM_HV_SKIP
- *
- * COMMON handlers come in the following verieties:
- * EXC_COMMON_BEGIN/END - used to open-code the handler
- * EXC_COMMON
- * EXC_COMMON_ASYNC
- *
- * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM
- * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers.
- */
-
-#define EXC_REAL_BEGIN(name, start, size) \
- FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
-
-#define EXC_REAL_END(name, start, size) \
- FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
-
-#define EXC_VIRT_BEGIN(name, start, size) \
- FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
-
-#define EXC_VIRT_END(name, start, size) \
- FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
-
-#define EXC_COMMON_BEGIN(name) \
- USE_TEXT_SECTION(); \
- .balign IFETCH_ALIGN_BYTES; \
- .global name; \
- _ASM_NOKPROBE_SYMBOL(name); \
- DEFINE_FIXED_SYMBOL(name); \
-name:
-
-#define TRAMP_REAL_BEGIN(name) \
- FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
-
-#define TRAMP_VIRT_BEGIN(name) \
- FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define TRAMP_KVM_BEGIN(name) \
- TRAMP_VIRT_BEGIN(name)
-#else
-#define TRAMP_KVM_BEGIN(name)
-#endif
-
-#define EXC_REAL_NONE(start, size) \
- FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
- FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
-
-#define EXC_VIRT_NONE(start, size) \
- FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
- FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size);
-
-
-#define EXC_REAL(name, start, size) \
- EXC_REAL_BEGIN(name, start, size); \
- STD_EXCEPTION(start, name##_common); \
- EXC_REAL_END(name, start, size);
-
-#define EXC_VIRT(name, start, size, realvec) \
- EXC_VIRT_BEGIN(name, start, size); \
- STD_RELON_EXCEPTION(start, realvec, name##_common); \
- EXC_VIRT_END(name, start, size);
-
-#define EXC_REAL_MASKABLE(name, start, size, bitmask) \
- EXC_REAL_BEGIN(name, start, size); \
- MASKABLE_EXCEPTION(start, name##_common, bitmask); \
- EXC_REAL_END(name, start, size);
-
-#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \
- EXC_VIRT_BEGIN(name, start, size); \
- MASKABLE_RELON_EXCEPTION(realvec, name##_common, bitmask); \
- EXC_VIRT_END(name, start, size);
-
-#define EXC_REAL_HV(name, start, size) \
- EXC_REAL_BEGIN(name, start, size); \
- STD_EXCEPTION_HV(start, start, name##_common); \
- EXC_REAL_END(name, start, size);
-
-#define EXC_VIRT_HV(name, start, size, realvec) \
- EXC_VIRT_BEGIN(name, start, size); \
- STD_RELON_EXCEPTION_HV(start, realvec, name##_common); \
- EXC_VIRT_END(name, start, size);
-
-#define __EXC_REAL_OOL(name, start, size) \
- EXC_REAL_BEGIN(name, start, size); \
- __OOL_EXCEPTION(start, label, tramp_real_##name); \
- EXC_REAL_END(name, start, size);
-
-#define __TRAMP_REAL_OOL(name, vec) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- STD_EXCEPTION_OOL(vec, name##_common);
-
-#define EXC_REAL_OOL(name, start, size) \
- __EXC_REAL_OOL(name, start, size); \
- __TRAMP_REAL_OOL(name, start);
-
-#define __EXC_REAL_OOL_MASKABLE(name, start, size) \
- __EXC_REAL_OOL(name, start, size);
-
-#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- MASKABLE_EXCEPTION_OOL(vec, name##_common, bitmask);
-
-#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \
- __EXC_REAL_OOL_MASKABLE(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask);
-
-#define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler) \
- EXC_REAL_BEGIN(name, start, size); \
- __OOL_EXCEPTION(start, label, handler); \
- EXC_REAL_END(name, start, size);
-
-#define __EXC_REAL_OOL_HV(name, start, size) \
- __EXC_REAL_OOL(name, start, size);
-
-#define __TRAMP_REAL_OOL_HV(name, vec) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- STD_EXCEPTION_HV_OOL(vec, name##_common); \
-
-#define EXC_REAL_OOL_HV(name, start, size) \
- __EXC_REAL_OOL_HV(name, start, size); \
- __TRAMP_REAL_OOL_HV(name, start);
-
-#define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
- __EXC_REAL_OOL(name, start, size);
-
-#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- MASKABLE_EXCEPTION_HV_OOL(vec, name##_common, bitmask); \
-
-#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \
- __EXC_REAL_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask);
-
-#define __EXC_VIRT_OOL(name, start, size) \
- EXC_VIRT_BEGIN(name, start, size); \
- __OOL_EXCEPTION(start, label, tramp_virt_##name); \
- EXC_VIRT_END(name, start, size);
-
-#define __TRAMP_VIRT_OOL(name, realvec) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- STD_RELON_EXCEPTION_OOL(realvec, name##_common);
-
-#define EXC_VIRT_OOL(name, start, size, realvec) \
- __EXC_VIRT_OOL(name, start, size); \
- __TRAMP_VIRT_OOL(name, realvec);
-
-#define __EXC_VIRT_OOL_MASKABLE(name, start, size) \
- __EXC_VIRT_OOL(name, start, size);
-
-#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- MASKABLE_RELON_EXCEPTION_OOL(realvec, name##_common, bitmask);
-
-#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \
- __EXC_VIRT_OOL_MASKABLE(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask);
-
-#define __EXC_VIRT_OOL_HV(name, start, size) \
- __EXC_VIRT_OOL(name, start, size);
-
-#define __TRAMP_VIRT_OOL_HV(name, realvec) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- STD_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \
-
-#define EXC_VIRT_OOL_HV(name, start, size, realvec) \
- __EXC_VIRT_OOL_HV(name, start, size); \
- __TRAMP_VIRT_OOL_HV(name, realvec);
-
-#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \
- __EXC_VIRT_OOL(name, start, size);
-
-#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common, bitmask);\
-
-#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \
- __EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask);
-
-#define TRAMP_KVM(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_##n); \
- KVM_HANDLER(area, EXC_STD, n); \
-
-#define TRAMP_KVM_SKIP(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_##n); \
- KVM_HANDLER_SKIP(area, EXC_STD, n); \
-
-/*
- * HV variant exceptions get the 0x2 bit added to their trap number.
- */
-#define TRAMP_KVM_HV(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_H##n); \
- KVM_HANDLER(area, EXC_HV, n + 0x2); \
-
-#define TRAMP_KVM_HV_SKIP(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_H##n); \
- KVM_HANDLER_SKIP(area, EXC_HV, n + 0x2); \
-
-#define EXC_COMMON(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- STD_EXCEPTION_COMMON(realvec, name, hdlr); \
-
-#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
- EXC_COMMON_BEGIN(name); \
- STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr); \
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_HEAD_64_H */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 20a101046cff..bd6504c28c2f 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -31,9 +31,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
return 0;
}
-void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
- pte_t pte);
-
#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor,
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 463c63a9fcf1..11112023e327 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -302,9 +302,14 @@
#define H_SCM_UNBIND_MEM 0x3F0
#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
-#define H_SCM_MEM_QUERY 0x3FC
-#define H_SCM_BLOCK_CLEAR 0x400
-#define MAX_HCALL_OPCODE H_SCM_BLOCK_CLEAR
+#define H_SCM_UNBIND_ALL 0x3FC
+#define H_SCM_HEALTH 0x400
+#define H_SCM_PERFORMANCE_STATS 0x418
+#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS
+
+/* Scope args for H_SCM_UNBIND_ALL */
+#define H_UNBIND_SCOPE_ALL (0x1)
+#define H_UNBIND_SCOPE_DRC (0x2)
/* H_VIOCTL functions */
#define H_GET_VIOA_DUMP_SIZE 0x01
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index 78202d5fb13a..67e2da195eae 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -76,18 +76,25 @@ static inline void hw_breakpoint_disable(void)
extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
int hw_breakpoint_handler(struct die_args *args);
-extern int set_dawr(struct arch_hw_breakpoint *brk);
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
+static inline void hw_breakpoint_disable(void) { }
+static inline void thread_change_pc(struct task_struct *tsk,
+ struct pt_regs *regs) { }
+
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+
+#ifdef CONFIG_PPC_DAWR
extern bool dawr_force_enable;
static inline bool dawr_enabled(void)
{
return dawr_force_enable;
}
-
-#else /* CONFIG_HAVE_HW_BREAKPOINT */
-static inline void hw_breakpoint_disable(void) { }
-static inline void thread_change_pc(struct task_struct *tsk,
- struct pt_regs *regs) { }
+int set_dawr(struct arch_hw_breakpoint *brk);
+#else
static inline bool dawr_enabled(void) { return false; }
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+static inline int set_dawr(struct arch_hw_breakpoint *brk) { return -1; }
+#endif
+
#endif /* __KERNEL__ */
#endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */
diff --git a/arch/powerpc/include/asm/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h
index 01567ea4ceaf..3cce499fbe27 100644
--- a/arch/powerpc/include/asm/io-workarounds.h
+++ b/arch/powerpc/include/asm/io-workarounds.h
@@ -8,6 +8,7 @@
#ifndef _IO_WORKAROUNDS_H
#define _IO_WORKAROUNDS_H
+#ifdef CONFIG_PPC_IO_WORKAROUNDS
#include <linux/io.h>
#include <asm/pci-bridge.h>
@@ -32,4 +33,23 @@ extern int spiderpci_iowa_init(struct iowa_bus *, void *);
#define SPIDER_PCI_DUMMY_READ 0x0810
#define SPIDER_PCI_DUMMY_READ_BASE 0x0814
+#endif
+
+#if defined(CONFIG_PPC_IO_WORKAROUNDS) && defined(CONFIG_PPC_INDIRECT_MMIO)
+extern bool io_workaround_inited;
+
+static inline bool iowa_is_active(void)
+{
+ return unlikely(io_workaround_inited);
+}
+#else
+static inline bool iowa_is_active(void)
+{
+ return false;
+}
+#endif
+
+void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+ pgprot_t prot, void *caller);
+
#endif /* _IO_WORKAROUNDS_H */
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 23e5d5d16c7e..a63ec938636d 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -705,16 +705,9 @@ static inline void iosync(void)
* create hand-made mappings for use only by the PCI code and cannot
* currently be hooked. Must be page aligned.
*
- * * __ioremap is the low level implementation used by ioremap and
- * ioremap_prot and cannot be hooked (but can be used by a hook on one
- * of the previous ones)
- *
* * __ioremap_caller is the same as above but takes an explicit caller
* reference rather than using __builtin_return_address(0)
*
- * * __iounmap, is the low level implementation used by iounmap and cannot
- * be hooked (but can be used by a hook on iounmap)
- *
*/
extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
@@ -729,13 +722,14 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
extern void iounmap(volatile void __iomem *addr);
-extern void __iomem *__ioremap(phys_addr_t, unsigned long size,
- unsigned long flags);
+int early_ioremap_range(unsigned long ea, phys_addr_t pa,
+ unsigned long size, pgprot_t prot);
+void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
+ pgprot_t prot, void *caller);
+
extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
pgprot_t prot, void *caller);
-extern void __iounmap(volatile void __iomem *addr);
-
extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
unsigned long size, pgprot_t prot);
extern void __iounmap_at(void *ea, unsigned long size);
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 2c1845e5e851..350101e11ddb 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -48,15 +48,16 @@ struct iommu_table_ops {
* returns old TCE and DMA direction mask.
* @tce is a physical address.
*/
- int (*exchange)(struct iommu_table *tbl,
+ int (*xchg_no_kill)(struct iommu_table *tbl,
long index,
unsigned long *hpa,
- enum dma_data_direction *direction);
- /* Real mode */
- int (*exchange_rm)(struct iommu_table *tbl,
- long index,
- unsigned long *hpa,
- enum dma_data_direction *direction);
+ enum dma_data_direction *direction,
+ bool realmode);
+
+ void (*tce_kill)(struct iommu_table *tbl,
+ unsigned long index,
+ unsigned long pages,
+ bool realmode);
__be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc);
#endif
@@ -111,6 +112,8 @@ struct iommu_table {
struct iommu_table_ops *it_ops;
struct kref it_kref;
int it_nid;
+ unsigned long it_reserved_start; /* Start of not-DMA-able (MMIO) area */
+ unsigned long it_reserved_end;
};
#define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
@@ -149,8 +152,9 @@ extern int iommu_tce_table_put(struct iommu_table *tbl);
/* Initializes an iommu_table based in values set in the passed-in
* structure
*/
-extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
- int nid);
+extern struct iommu_table *iommu_init_table(struct iommu_table *tbl,
+ int nid, unsigned long res_start, unsigned long res_end);
+
#define IOMMU_TABLE_GROUP_MAX_TABLES 2
struct iommu_table_group;
@@ -206,6 +210,12 @@ extern void iommu_del_device(struct device *dev);
extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction);
+extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+ struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction);
+extern void iommu_tce_kill(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
@@ -314,13 +324,5 @@ extern bool iommu_fixed_is_weak;
extern const struct dma_map_ops dma_iommu_ops;
-static inline unsigned long device_to_mask(struct device *dev)
-{
- if (dev->dma_mask && *dev->dma_mask)
- return *dev->dma_mask;
- /* Assume devices without mask can take 32 bit addresses */
- return 0xfffffffful;
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_IOMMU_H */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index bb7c8cc77f1a..04b2b927bb5a 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -535,7 +535,7 @@ static inline void note_hpte_modification(struct kvm *kvm,
*/
static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
{
- return rcu_dereference_raw_notrace(kvm->memslots[0]);
+ return rcu_dereference_raw_check(kvm->memslots[0]);
}
extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e6e5f59aaa97..6fe6ad64cba5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -232,11 +232,25 @@ struct revmap_entry {
};
/*
- * We use the top bit of each memslot->arch.rmap entry as a lock bit,
- * and bit 32 as a present flag. The bottom 32 bits are the
- * index in the guest HPT of a HPTE that points to the page.
+ * The rmap array of size number of guest pages is allocated for each memslot.
+ * This array is used to store usage specific information about the guest page.
+ * Below are the encodings of the various possible usage types.
*/
-#define KVMPPC_RMAP_LOCK_BIT 63
+/* Free bits which can be used to define a new usage */
+#define KVMPPC_RMAP_TYPE_MASK 0xff00000000000000
+#define KVMPPC_RMAP_NESTED 0xc000000000000000 /* Nested rmap array */
+#define KVMPPC_RMAP_HPT 0x0100000000000000 /* HPT guest */
+
+/*
+ * rmap usage definition for a hash page table (hpt) guest:
+ * 0x0000080000000000 Lock bit
+ * 0x0000018000000000 RC bits
+ * 0x0000000100000000 Present bit
+ * 0x00000000ffffffff HPT index bits
+ * The bottom 32 bits are the index in the guest HPT of a HPTE that points to
+ * the page.
+ */
+#define KVMPPC_RMAP_LOCK_BIT 43
#define KVMPPC_RMAP_RC_SHIFT 32
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_PRESENT 0x100000000ul
@@ -283,6 +297,7 @@ struct kvm_arch {
cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
+ u8 secure_guest;
bool threads_indep;
bool nested_enable;
pgd_t *pgtable;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2484e6a8f5ca..8e8514efb124 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -598,6 +598,7 @@ extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val);
extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val);
+extern bool kvmppc_xive_native_supported(void);
#else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 806494283e2a..3b4b305796ae 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -5,6 +5,29 @@
*/
#ifndef _ASM_POWERPC_LPPACA_H
#define _ASM_POWERPC_LPPACA_H
+
+/*
+ * The below VPHN macros are outside the __KERNEL__ check since these are
+ * used for compiling the vphn selftest in userspace
+ */
+
+/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
+#define VPHN_REGISTER_COUNT 6
+
+/*
+ * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
+ * form the complete property we have to add the length in the first cell.
+ */
+#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
+
+/*
+ * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags:
+ * 1 for retrieving associativity information for a guest cpu
+ * 2 for retrieving associativity information for a host/hypervisor cpu
+ */
+#define VPHN_FLAG_VCPU 1
+#define VPHN_FLAG_PCPU 2
+
#ifdef __KERNEL__
/*
@@ -19,6 +42,7 @@
*/
#include <linux/cache.h>
#include <linux/threads.h>
+#include <linux/spinlock_types.h>
#include <asm/types.h>
#include <asm/mmu.h>
#include <asm/firmware.h>
@@ -141,7 +165,19 @@ struct dtl_entry {
#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */
#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
+/*
+ * Dispatch trace log event enable mask:
+ * 0x1: voluntary virtual processor waits
+ * 0x2: time-slice preempts
+ * 0x4: virtual partition memory page faults
+ */
+#define DTL_LOG_CEDE 0x1
+#define DTL_LOG_PREEMPT 0x2
+#define DTL_LOG_FAULT 0x4
+#define DTL_LOG_ALL (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
+
extern struct kmem_cache *dtl_cache;
+extern rwlock_t dtl_access_lock;
/*
* When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
@@ -151,6 +187,10 @@ extern struct kmem_cache *dtl_cache;
*/
extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
+extern void register_dtl_buffer(int cpu);
+extern void alloc_dtl_buffers(unsigned long *time_limit);
+extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
+
#endif /* CONFIG_PPC_BOOK3S */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_LPPACA_H */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index c43d6eca9edd..7bcb64444a39 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -3,9 +3,6 @@
#define _ASM_POWERPC_MACHDEP_H
#ifdef __KERNEL__
-/*
- */
-
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/dma-mapping.h>
@@ -31,10 +28,6 @@ struct pci_host_bridge;
struct machdep_calls {
char *name;
#ifdef CONFIG_PPC64
- void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size,
- pgprot_t prot, void *caller);
- void (*iounmap)(volatile void __iomem *token);
-
#ifdef CONFIG_PM
void (*iommu_save)(void);
void (*iommu_restore)(void);
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index a4c6a74ad2fb..6a6ddaabdb34 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -30,6 +30,10 @@ enum MCE_Disposition {
enum MCE_Initiator {
MCE_INITIATOR_UNKNOWN = 0,
MCE_INITIATOR_CPU = 1,
+ MCE_INITIATOR_PCI = 2,
+ MCE_INITIATOR_ISA = 3,
+ MCE_INITIATOR_MEMORY= 4,
+ MCE_INITIATOR_POWERMGM = 5,
};
enum MCE_ErrorType {
@@ -41,6 +45,8 @@ enum MCE_ErrorType {
MCE_ERROR_TYPE_USER = 5,
MCE_ERROR_TYPE_RA = 6,
MCE_ERROR_TYPE_LINK = 7,
+ MCE_ERROR_TYPE_DCACHE = 8,
+ MCE_ERROR_TYPE_ICACHE = 9,
};
enum MCE_ErrorClass {
@@ -122,7 +128,8 @@ struct machine_check_event {
enum MCE_UeErrorType ue_error_type:8;
u8 effective_address_provided;
u8 physical_address_provided;
- u8 reserved_1[5];
+ u8 ignore_event;
+ u8 reserved_1[4];
u64 effective_address;
u64 physical_address;
u8 reserved_2[8];
@@ -193,6 +200,7 @@ struct mce_error_info {
enum MCE_Initiator initiator:8;
enum MCE_ErrorClass error_class:8;
bool sync_error;
+ bool ignore_event;
};
#define MAX_MC_EVT 100
diff --git a/arch/powerpc/include/asm/mem_encrypt.h b/arch/powerpc/include/asm/mem_encrypt.h
new file mode 100644
index 000000000000..ba9dab07c1be
--- /dev/null
+++ b/arch/powerpc/include/asm/mem_encrypt.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * SVM helper functions
+ *
+ * Copyright 2018 IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_MEM_ENCRYPT_H
+#define _ASM_POWERPC_MEM_ENCRYPT_H
+
+#include <asm/svm.h>
+
+static inline bool mem_encrypt_active(void)
+{
+ return is_secure_guest();
+}
+
+static inline bool force_dma_unencrypted(struct device *dev)
+{
+ return is_secure_guest();
+}
+
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
+#endif /* _ASM_POWERPC_MEM_ENCRYPT_H */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index ba94ce8c22d7..0699cfeeb8c9 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -257,7 +257,7 @@ extern void radix__mmu_cleanup_all(void);
/* Functions for creating and updating partition table on POWER9 */
extern void mmu_partition_table_init(void);
extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
- unsigned long dw1);
+ unsigned long dw1, bool flush);
#endif /* CONFIG_PPC64 */
struct mm_struct;
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 0284f8f5305f..552b96eef0c8 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -11,8 +11,6 @@
#include <asm/mmu.h> /* For sub-arch specific PPC_PIN_SIZE */
#include <asm/asm-405.h>
-extern unsigned long ioremap_bot;
-
#ifdef CONFIG_44x
extern int icache_44x_need_flush;
#endif
@@ -78,23 +76,21 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
*/
#include <asm/fixmap.h>
-#ifdef CONFIG_HIGHMEM
-#define KVIRT_TOP PKMAP_BASE
-#else
-#define KVIRT_TOP FIXADDR_START
-#endif
-
/*
* ioremap_bot starts at that address. Early ioremaps move down from there,
* until mem_init() at which point this becomes the top of the vmalloc
* and ioremap space
*/
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
+#ifdef CONFIG_HIGHMEM
+#define IOREMAP_TOP PKMAP_BASE
#else
-#define IOREMAP_TOP KVIRT_TOP
+#define IOREMAP_TOP FIXADDR_START
#endif
+/* PPC32 shares vmalloc area with ioremap */
+#define IOREMAP_START VMALLOC_START
+#define IOREMAP_END VMALLOC_END
+
/*
* Just any arbitrary offset to the start of the vmalloc VM area: the
* current 16MB value just means that there will be a 64MB "hole" after the
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index b9f66cf15c31..9a33b8bd842d 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -53,6 +53,7 @@
#define PHB_IO_BASE (ISA_IO_END)
#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
+#define IOREMAP_START (ioremap_bot)
#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 1ca1c1864b32..7fed9dc0f147 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -293,5 +293,18 @@ static inline int pgd_huge(pgd_t pgd)
#define is_hugepd(hpd) (hugepd_ok(hpd))
#endif
+/*
+ * This gets called at the end of handling a page fault, when
+ * the kernel has put a new PTE into the page table for the process.
+ * We use it to ensure coherency between the i-cache and d-cache
+ * for the page which has just been mapped in.
+ */
+#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_HUGETLB_PAGE)
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
+#else
+static inline
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {}
+#endif
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 09a8553833d1..378e3997845a 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -208,7 +208,10 @@
#define OPAL_HANDLE_HMI2 166
#define OPAL_NX_COPROC_INIT 167
#define OPAL_XIVE_GET_VP_STATE 170
-#define OPAL_LAST 170
+#define OPAL_MPIPL_UPDATE 173
+#define OPAL_MPIPL_REGISTER_TAG 174
+#define OPAL_MPIPL_QUERY_TAG 175
+#define OPAL_LAST 175
#define QUIESCE_HOLD 1 /* Spin all calls at entry */
#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
@@ -453,6 +456,7 @@ enum opal_msg_type {
OPAL_MSG_DPO = 5,
OPAL_MSG_PRD = 6,
OPAL_MSG_OCC = 7,
+ OPAL_MSG_PRD2 = 8,
OPAL_MSG_TYPE_MAX,
};
@@ -564,6 +568,7 @@ enum OpalHMI_XstopType {
CHECKSTOP_TYPE_UNKNOWN = 0,
CHECKSTOP_TYPE_CORE = 1,
CHECKSTOP_TYPE_NX = 2,
+ CHECKSTOP_TYPE_NPU = 3
};
enum OpalHMI_CoreXstopReason {
@@ -1058,6 +1063,7 @@ enum {
OPAL_REBOOT_NORMAL = 0,
OPAL_REBOOT_PLATFORM_ERROR = 1,
OPAL_REBOOT_FULL_IPL = 2,
+ OPAL_REBOOT_MPIPL = 3,
};
/* Argument to OPAL_PCI_TCE_KILL */
@@ -1134,6 +1140,44 @@ enum {
#define OPAL_PCI_P2P_LOAD 0x2
#define OPAL_PCI_P2P_STORE 0x4
+/* MPIPL update operations */
+enum opal_mpipl_ops {
+ OPAL_MPIPL_ADD_RANGE = 0,
+ OPAL_MPIPL_REMOVE_RANGE = 1,
+ OPAL_MPIPL_REMOVE_ALL = 2,
+ OPAL_MPIPL_FREE_PRESERVED_MEMORY = 3,
+};
+
+/* Tag will point to various metadata area. Kernel will
+ * use tag to get metadata value.
+ */
+enum opal_mpipl_tags {
+ OPAL_MPIPL_TAG_CPU = 0,
+ OPAL_MPIPL_TAG_OPAL = 1,
+ OPAL_MPIPL_TAG_KERNEL = 2,
+ OPAL_MPIPL_TAG_BOOT_MEM = 3,
+};
+
+/* Preserved memory details */
+struct opal_mpipl_region {
+ __be64 src;
+ __be64 dest;
+ __be64 size;
+};
+
+/* Structure version */
+#define OPAL_MPIPL_VERSION 0x01
+
+struct opal_mpipl_fadump {
+ u8 version;
+ u8 reserved[7];
+ __be32 crashing_pir; /* OPAL crashing CPU PIR */
+ __be32 cpu_data_version;
+ __be32 cpu_data_size;
+ __be32 region_cnt;
+ struct opal_mpipl_region region[];
+} __packed;
+
#endif /* __ASSEMBLY__ */
#endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 4ed5d57f2359..a0cf8fba4d12 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -39,6 +39,7 @@ int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
uint64_t PE_handle);
int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
uint64_t rate_phys, uint32_t size);
+
int64_t opal_console_write(int64_t term_number, __be64 *length,
const uint8_t *buffer);
int64_t opal_console_read(int64_t term_number, __be64 *length,
@@ -272,7 +273,7 @@ int64_t opal_xive_get_vp_info(uint64_t vp,
int64_t opal_xive_set_vp_info(uint64_t vp,
uint64_t flags,
uint64_t report_cl_pair);
-int64_t opal_xive_allocate_irq(uint32_t chip_id);
+int64_t opal_xive_allocate_irq_raw(uint32_t chip_id);
int64_t opal_xive_free_irq(uint32_t girq);
int64_t opal_xive_sync(uint32_t type, uint32_t id);
int64_t opal_xive_dump(uint32_t type, uint32_t id);
@@ -283,8 +284,6 @@ int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio,
uint32_t qtoggle,
uint32_t qindex);
int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01);
-int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target,
- uint64_t desc, uint16_t pe_number);
int64_t opal_imc_counters_init(uint32_t type, uint64_t address,
uint64_t cpu_pir);
@@ -299,6 +298,10 @@ int opal_sensor_group_clear(u32 group_hndl, int token);
int opal_sensor_group_enable(u32 group_hndl, int token, bool enable);
int opal_nx_coproc_init(uint32_t chip_id, uint32_t ct);
+s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size);
+s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr);
+s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr);
+
s64 opal_signal_system_reset(s32 cpu);
s64 opal_quiesce(u64 shutdown_type, s32 cpu);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 9bd2326bef6f..e3cc9eb9204d 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -166,7 +166,9 @@ struct paca_struct {
u64 kstack; /* Saved Kernel stack addr */
u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */
u64 saved_msr; /* MSR saved here by enter_rtas */
+#ifdef CONFIG_PPC_BOOK3E
u16 trap_save; /* Used when bad stack is encountered */
+#endif
u8 irq_soft_mask; /* mask for irq soft masking */
u8 irq_happened; /* irq happened while soft-disabled */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 0d52f57fca04..c8bb14ff4713 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -215,9 +215,19 @@ static inline bool pfn_valid(unsigned long pfn)
/*
* gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET
* with -mcmodel=medium, so we use & and | instead of - and + on 64-bit.
+ * This also results in better code generation.
*/
-#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET))
-#define __pa(x) ((unsigned long)(x) & 0x0fffffffffffffffUL)
+#define __va(x) \
+({ \
+ VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); \
+ (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); \
+})
+
+#define __pa(x) \
+({ \
+ VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); \
+ (unsigned long)(x) & 0x0fffffffffffffffUL; \
+})
#else /* 32-bit, non book E */
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START))
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index 683dfbc67ca8..d64dfe3ac712 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -40,6 +40,8 @@ typedef unsigned long long pte_basic_t;
typedef unsigned long pte_basic_t;
#endif
+#include <asm/bug.h>
+
/*
* Clear page using the dcbz instruction, which doesn't cause any
* memory traffic (except to write out any cache lines which get
@@ -49,6 +51,8 @@ static inline void clear_page(void *addr)
{
unsigned int i;
+ WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1));
+
for (i = 0; i < PAGE_SIZE / L1_CACHE_BYTES; i++, addr += L1_CACHE_BYTES)
dcbz(addr);
}
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 8dad1fdf4bd2..ea6ec65970ef 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -183,6 +183,7 @@ struct iommu_table;
struct pci_dn {
int flags;
#define PCI_DN_FLAG_IOV_VF 0x01
+#define PCI_DN_FLAG_DEAD 0x02 /* Device has been hot-removed */
int busno; /* pci bus number */
int devfn; /* pci device and function number */
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 2372d35533ad..327567b8f7d6 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -112,8 +112,6 @@ extern pgprot_t pci_phys_mem_access_prot(struct file *file,
unsigned long size,
pgprot_t prot);
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
-
extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose);
extern void pcibios_setup_bus_devices(struct pci_bus *bus);
extern void pcibios_setup_bus_self(struct pci_bus *bus);
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 2b2c60a1a66d..6dd78a2dc03a 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -64,8 +64,6 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
extern struct kmem_cache *pgtable_cache[];
#define PGT_CACHE(shift) pgtable_cache[shift]
-static inline void check_pgt_cache(void) { }
-
#ifdef CONFIG_PPC_BOOK3S
#include <asm/book3s/pgalloc.h>
#else
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 3f53be60fb01..4053b2ab427c 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -68,6 +68,8 @@ extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
+extern unsigned long ioremap_bot;
+
/*
* kern_addr_valid is intended to indicate whether an address is a valid
* kernel address. Most 32-bit archs define it as always true (like this)
@@ -77,18 +79,6 @@ extern void paging_init(void);
#include <asm-generic/pgtable.h>
-
-/*
- * This gets called at the end of handling a page fault, when
- * the kernel has put a new PTE into the page table for the process.
- * We use it to ensure coherency between the i-cache and d-cache
- * for the page which has just been mapped in.
- * On machines which use an MMU hash table, we use this to put a
- * corresponding HPTE into the hash table ahead of time, instead of
- * waiting for the inevitable extra hash-table miss exception.
- */
-extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
-
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0
#endif
@@ -97,7 +87,6 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
unsigned long vmalloc_to_phys(void *vmalloc_addr);
void pgtable_cache_add(unsigned int shift);
-void pgtable_cache_init(void);
#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
void mark_initmem_nx(void);
@@ -140,6 +129,44 @@ static inline void pte_frag_set(mm_context_t *ctx, void *p)
}
#endif
+#ifndef pmd_is_leaf
+#define pmd_is_leaf pmd_is_leaf
+static inline bool pmd_is_leaf(pmd_t pmd)
+{
+ return false;
+}
+#endif
+
+#ifndef pud_is_leaf
+#define pud_is_leaf pud_is_leaf
+static inline bool pud_is_leaf(pud_t pud)
+{
+ return false;
+}
+#endif
+
+#ifndef pgd_is_leaf
+#define pgd_is_leaf pgd_is_leaf
+static inline bool pgd_is_leaf(pgd_t pgd)
+{
+ return false;
+}
+#endif
+
+#ifdef CONFIG_PPC64
+#define is_ioremap_addr is_ioremap_addr
+static inline bool is_ioremap_addr(const void *x)
+{
+#ifdef CONFIG_MMU
+ unsigned long addr = (unsigned long)x;
+
+ return addr >= IOREMAP_BASE && addr < IOREMAP_END;
+#else
+ return false;
+#endif
+}
+#endif /* CONFIG_PPC64 */
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index cff5a411e595..4497c8afb573 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -340,6 +340,12 @@ static inline long plpar_set_ciabr(unsigned long ciabr)
{
return 0;
}
+
+static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex,
+ unsigned long *ptes)
+{
+ return 0;
+}
#endif /* CONFIG_PPC_PSERIES */
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index dc9a1ca70edf..c6bbe9778d3c 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -27,11 +27,10 @@ static inline void ppc_set_pmu_inuse(int inuse)
#ifdef CONFIG_PPC_PSERIES
get_lppaca()->pmcregs_in_use = inuse;
#endif
- } else {
+ }
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- get_paca()->pmcregs_in_use = inuse;
+ get_paca()->pmcregs_in_use = inuse;
#endif
- }
#endif
}
diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
index 208b5503f4ed..7de82647e761 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
// Copyright 2017 IBM Corp.
#ifndef _ASM_PNV_OCXL_H
#define _ASM_PNV_OCXL_H
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index b5a85f1bb305..edcb1fc50aeb 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -22,15 +22,9 @@ extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state);
extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state);
extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
struct opal_msg *msg);
-extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
- u64 desc);
-extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind);
-extern int pnv_pci_disable_tunnel(struct pci_dev *dev);
extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
int enable);
-extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
- u32 *pid, u32 *tid);
int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
unsigned int virq);
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index bc69ed2d952c..e1a858718716 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -7,35 +7,13 @@
#define _ASM_POWERNV_H
#ifdef CONFIG_PPC_POWERNV
-#define NPU2_WRITE 1
extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
-extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
- unsigned long flags,
- void (*cb)(struct npu_context *, void *),
- void *priv);
-extern void pnv_npu2_destroy_context(struct npu_context *context,
- struct pci_dev *gpdev);
-extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
- unsigned long *flags, unsigned long *status,
- int count);
void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val);
void pnv_tm_init(void);
#else
static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { }
-static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
- unsigned long flags,
- struct npu_context *(*cb)(struct npu_context *, void *),
- void *priv) { return ERR_PTR(-ENODEV); }
-static inline void pnv_npu2_destroy_context(struct npu_context *context,
- struct pci_dev *gpdev) { }
-
-static inline int pnv_npu2_handle_fault(struct npu_context *context,
- uintptr_t *ea, unsigned long *flags,
- unsigned long *status, int count) {
- return -ENODEV;
-}
static inline void pnv_tm_init(void) { }
#endif
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 2291daf39cd1..c1df75edde44 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -410,6 +410,15 @@
#define __PPC_RC21 (0x1 << 10)
/*
+ * Both low and high 16 bits are added as SIGNED additions, so if low 16 bits
+ * has high bit set, high 16 bits must be adjusted. These macros do that (stolen
+ * from binutils).
+ */
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI((v) + 0x8000)
+
+/*
* Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
* larx with EH set as an illegal instruction.
*/
@@ -588,7 +597,16 @@
#define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \
((IH & 0x7) << 21))
-#define PPC_INVALIDATE_ERAT PPC_SLBIA(7)
+
+/*
+ * These may only be used on ISA v3.0 or later (aka. CPU_FTR_ARCH_300, radix
+ * implies CPU_FTR_ARCH_300). USER/GUEST invalidates may only be used by radix
+ * mode (on HPT these would also invalidate various SLBEs which may not be
+ * desired).
+ */
+#define PPC_ISA_3_0_INVALIDATE_ERAT PPC_SLBIA(7)
+#define PPC_RADIX_INVALIDATE_ERAT_USER PPC_SLBIA(3)
+#define PPC_RADIX_INVALIDATE_ERAT_GUEST PPC_SLBIA(6)
#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUD | \
___PPC_RT(vrt) | ___PPC_RA(vra) | \
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index cec2d6409515..7f4be5a05eb3 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -62,11 +62,6 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
void eeh_sysfs_add_device(struct pci_dev *pdev);
void eeh_sysfs_remove_device(struct pci_dev *pdev);
-static inline const char *eeh_pci_name(struct pci_dev *pdev)
-{
- return pdev ? pci_name(pdev) : "<null>";
-}
-
static inline const char *eeh_driver_name(struct pci_dev *pdev)
{
return (pdev && pdev->driver) ? pdev->driver->name : "<null>";
@@ -74,6 +69,8 @@ static inline const char *eeh_driver_name(struct pci_dev *pdev)
#endif /* CONFIG_EEH */
+#define PCI_BUSNO(bdfn) ((bdfn >> 8) & 0xff)
+
#else /* CONFIG_PCI */
static inline void init_pci_config_tokens(void) { }
#endif /* !CONFIG_PCI */
diff --git a/arch/powerpc/include/asm/ppc4xx_ocm.h b/arch/powerpc/include/asm/ppc4xx_ocm.h
deleted file mode 100644
index fc4db6dcde84..000000000000
--- a/arch/powerpc/include/asm/ppc4xx_ocm.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * PowerPC 4xx OCM memory allocation support
- *
- * (C) Copyright 2009, Applied Micro Circuits Corporation
- * Victor Gallardo (vgallardo@amcc.com)
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- */
-
-#ifndef __ASM_POWERPC_PPC4XX_OCM_H__
-#define __ASM_POWERPC_PPC4XX_OCM_H__
-
-#define PPC4XX_OCM_NON_CACHED 0
-#define PPC4XX_OCM_CACHED 1
-
-#if defined(CONFIG_PPC4xx_OCM)
-
-void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
- int flags, const char *owner);
-void ppc4xx_ocm_free(const void *virt);
-
-#else
-
-#define ppc4xx_ocm_alloc(phys, size, align, flags, owner) NULL
-#define ppc4xx_ocm_free(addr) ((void)0)
-
-#endif /* CONFIG_PPC4xx_OCM */
-
-#endif /* __ASM_POWERPC_PPC4XX_OCM_H__ */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index e0637730a8e7..6b03dff61a05 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -311,18 +311,48 @@ n:
addis reg,reg,(name - 0b)@ha; \
addi reg,reg,(name - 0b)@l;
-#ifdef __powerpc64__
-#ifdef HAVE_AS_ATHIGH
+#if defined(__powerpc64__) && defined(HAVE_AS_ATHIGH)
#define __AS_ATHIGH high
#else
#define __AS_ATHIGH h
#endif
-#define LOAD_REG_IMMEDIATE(reg,expr) \
- lis reg,(expr)@highest; \
- ori reg,reg,(expr)@higher; \
- rldicr reg,reg,32,31; \
- oris reg,reg,(expr)@__AS_ATHIGH; \
- ori reg,reg,(expr)@l;
+
+.macro __LOAD_REG_IMMEDIATE_32 r, x
+ .if (\x) >= 0x8000 || (\x) < -0x8000
+ lis \r, (\x)@__AS_ATHIGH
+ .if (\x) & 0xffff != 0
+ ori \r, \r, (\x)@l
+ .endif
+ .else
+ li \r, (\x)@l
+ .endif
+.endm
+
+.macro __LOAD_REG_IMMEDIATE r, x
+ .if (\x) >= 0x80000000 || (\x) < -0x80000000
+ __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
+ sldi \r, \r, 32
+ .if (\x) & 0xffff0000 != 0
+ oris \r, \r, (\x)@__AS_ATHIGH
+ .endif
+ .if (\x) & 0xffff != 0
+ ori \r, \r, (\x)@l
+ .endif
+ .else
+ __LOAD_REG_IMMEDIATE_32 \r, \x
+ .endif
+.endm
+
+#ifdef __powerpc64__
+
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr) \
+ lis tmp, (expr)@highest; \
+ lis reg, (expr)@__AS_ATHIGH; \
+ ori tmp, tmp, (expr)@higher; \
+ ori reg, reg, (expr)@l; \
+ rldimi reg, tmp, 32, 0
#define LOAD_REG_ADDR(reg,name) \
ld reg,name@got(r2)
@@ -335,11 +365,13 @@ n:
#else /* 32-bit */
-#define LOAD_REG_IMMEDIATE(reg,expr) \
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE_32 reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr) \
lis reg,(expr)@ha; \
addi reg,reg,(expr)@l;
-#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE(reg, name)
+#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE_SYM(reg, name)
#define LOAD_REG_ADDRBASE(reg, name) lis reg,name@ha
#define ADDROFF(name) name@l
@@ -351,19 +383,9 @@ n:
/* various errata or part fixups */
#ifdef CONFIG_PPC601_SYNC_FIX
-#define SYNC \
-BEGIN_FTR_SECTION \
- sync; \
- isync; \
-END_FTR_SECTION_IFSET(CPU_FTR_601)
-#define SYNC_601 \
-BEGIN_FTR_SECTION \
- sync; \
-END_FTR_SECTION_IFSET(CPU_FTR_601)
-#define ISYNC_601 \
-BEGIN_FTR_SECTION \
- isync; \
-END_FTR_SECTION_IFSET(CPU_FTR_601)
+#define SYNC sync; isync
+#define SYNC_601 sync
+#define ISYNC_601 isync
#else
#define SYNC
#define SYNC_601
@@ -389,15 +411,11 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
#define MFTBU(dest) mfspr dest, SPRN_TBRU
#endif
-#ifndef CONFIG_SMP
-#define TLBSYNC
-#else /* CONFIG_SMP */
/* tlbsync is not implemented on 601 */
-#define TLBSYNC \
-BEGIN_FTR_SECTION \
- tlbsync; \
- sync; \
-END_FTR_SECTION_IFCLR(CPU_FTR_601)
+#if !defined(CONFIG_SMP) || defined(CONFIG_PPC_BOOK3S_601)
+#define TLBSYNC
+#else
+#define TLBSYNC tlbsync; sync
#endif
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ef573fe9873e..a9993e7a443b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -346,8 +346,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
#define spin_cpu_relax() barrier()
-#define spin_cpu_yield() spin_cpu_relax()
-
#define spin_end() HMT_medium()
#define spin_until_cond(cond) \
diff --git a/arch/powerpc/include/asm/ps3stor.h b/arch/powerpc/include/asm/ps3stor.h
index d9f6589bc107..1d8279014f22 100644
--- a/arch/powerpc/include/asm/ps3stor.h
+++ b/arch/powerpc/include/asm/ps3stor.h
@@ -39,7 +39,7 @@ struct ps3_storage_device {
unsigned int num_regions;
unsigned long accessible_regions;
unsigned int region_idx; /* first accessible region */
- struct ps3_storage_region regions[0]; /* Must be last */
+ struct ps3_storage_region regions[]; /* Must be last */
};
static inline struct ps3_storage_device *to_ps3_storage_device(struct device *dev)
diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
index 2d633e9d686c..33fa5dd8ee6a 100644
--- a/arch/powerpc/include/asm/pte-walk.h
+++ b/arch/powerpc/include/asm/pte-walk.h
@@ -10,8 +10,20 @@ extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hshift)
{
+ pte_t *pte;
+
VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
- return __find_linux_pte(pgdir, ea, is_thp, hshift);
+ pte = __find_linux_pte(pgdir, ea, is_thp, hshift);
+
+#if defined(CONFIG_DEBUG_VM) && \
+ !(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE))
+ /*
+ * We should not find huge page if these configs are not enabled.
+ */
+ if (hshift)
+ WARN_ON(*hshift);
+#endif
+ return pte;
}
static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
@@ -26,10 +38,22 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hshift)
{
+ pte_t *pte;
+
VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
VM_WARN(pgdir != current->mm->pgd,
"%s lock less page table lookup called on wrong mm\n", __func__);
- return __find_linux_pte(pgdir, ea, is_thp, hshift);
+ pte = __find_linux_pte(pgdir, ea, is_thp, hshift);
+
+#if defined(CONFIG_DEBUG_VM) && \
+ !(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE))
+ /*
+ * We should not find huge page if these configs are not enabled.
+ */
+ if (hshift)
+ WARN_ON(*hshift);
+#endif
+ return pte;
}
#endif /* _ASM_POWERPC_PTE_WALK_H */
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index faa5a338ac5a..ee3ada66deb5 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -111,18 +111,33 @@ struct pt_regs
#ifndef __ASSEMBLY__
-#define GET_IP(regs) ((regs)->nip)
-#define GET_USP(regs) ((regs)->gpr[1])
-#define GET_FP(regs) (0)
-#define SET_FP(regs, val)
+static inline unsigned long instruction_pointer(struct pt_regs *regs)
+{
+ return regs->nip;
+}
+
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ regs->nip = val;
+}
+
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+ return regs->gpr[1];
+}
+
+static inline unsigned long frame_pointer(struct pt_regs *regs)
+{
+ return 0;
+}
#ifdef CONFIG_SMP
extern unsigned long profile_pc(struct pt_regs *regs);
-#define profile_pc profile_pc
+#else
+#define profile_pc(regs) instruction_pointer(regs)
#endif
-#include <asm-generic/ptrace.h>
-
#define kernel_stack_pointer(regs) ((regs)->gpr[1])
static inline int is_syscall_success(struct pt_regs *regs)
{
@@ -188,7 +203,11 @@ do { \
#endif /* __powerpc64__ */
#define arch_has_single_step() (1)
-#define arch_has_block_step() (!cpu_has_feature(CPU_FTR_601))
+#ifndef CONFIG_BOOK3S_601
+#define arch_has_block_step() (true)
+#else
+#define arch_has_block_step() (false)
+#endif
#define ARCH_HAS_USER_SINGLE_STEP_REPORT
/*
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 10caa145f98b..ec3714cf0989 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -38,6 +38,7 @@
#define MSR_TM_LG 32 /* Trans Mem Available */
#define MSR_VEC_LG 25 /* Enable AltiVec */
#define MSR_VSX_LG 23 /* Enable VSX */
+#define MSR_S_LG 22 /* Secure state */
#define MSR_POW_LG 18 /* Enable Power Management */
#define MSR_WE_LG 18 /* Wait State Enable */
#define MSR_TGPR_LG 17 /* TLB Update registers in use */
@@ -71,11 +72,13 @@
#define MSR_SF __MASK(MSR_SF_LG) /* Enable 64 bit mode */
#define MSR_ISF __MASK(MSR_ISF_LG) /* Interrupt 64b mode valid on 630 */
#define MSR_HV __MASK(MSR_HV_LG) /* Hypervisor state */
+#define MSR_S __MASK(MSR_S_LG) /* Secure state */
#else
/* so tests for these bits fail on 32-bit */
#define MSR_SF 0
#define MSR_ISF 0
#define MSR_HV 0
+#define MSR_S 0
#endif
/*
diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h
deleted file mode 100644
index 08c44396e54a..000000000000
--- a/arch/powerpc/include/asm/scom.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
- * <benh@kernel.crashing.org>
- * and David Gibson, IBM Corporation.
- */
-
-#ifndef _ASM_POWERPC_SCOM_H
-#define _ASM_POWERPC_SCOM_H
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_PPC_SCOM
-
-/*
- * The SCOM bus is a sideband bus used for accessing various internal
- * registers of the processor or the chipset. The implementation details
- * differ between processors and platforms, and the access method as
- * well.
- *
- * This API allows to "map" ranges of SCOM register numbers associated
- * with a given SCOM controller. The later must be represented by a
- * device node, though some implementations might support NULL if there
- * is no possible ambiguity
- *
- * Then, scom_read/scom_write can be used to accesses registers inside
- * that range. The argument passed is a register number relative to
- * the beginning of the range mapped.
- */
-
-typedef void *scom_map_t;
-
-/* Value for an invalid SCOM map */
-#define SCOM_MAP_INVALID (NULL)
-
-/* The scom_controller data structure is what the platform passes
- * to the core code in scom_init, it provides the actual implementation
- * of all the SCOM functions
- */
-struct scom_controller {
- scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
- void (*unmap)(scom_map_t map);
-
- int (*read)(scom_map_t map, u64 reg, u64 *value);
- int (*write)(scom_map_t map, u64 reg, u64 value);
-};
-
-extern const struct scom_controller *scom_controller;
-
-/**
- * scom_init - Initialize the SCOM backend, called by the platform
- * @controller: The platform SCOM controller
- */
-static inline void scom_init(const struct scom_controller *controller)
-{
- scom_controller = controller;
-}
-
-/**
- * scom_map_ok - Test is a SCOM mapping is successful
- * @map: The result of scom_map to test
- */
-static inline int scom_map_ok(scom_map_t map)
-{
- return map != SCOM_MAP_INVALID;
-}
-
-/**
- * scom_map - Map a block of SCOM registers
- * @ctrl_dev: Device node of the SCOM controller
- * some implementations allow NULL here
- * @reg: first SCOM register to map
- * @count: Number of SCOM registers to map
- */
-
-static inline scom_map_t scom_map(struct device_node *ctrl_dev,
- u64 reg, u64 count)
-{
- return scom_controller->map(ctrl_dev, reg, count);
-}
-
-/**
- * scom_find_parent - Find the SCOM controller for a device
- * @dev: OF node of the device
- *
- * This is not meant for general usage, but in combination with
- * scom_map() allows to map registers not represented by the
- * device own scom-reg property. Useful for applying HW workarounds
- * on things not properly represented in the device-tree for example.
- */
-struct device_node *scom_find_parent(struct device_node *dev);
-
-
-/**
- * scom_map_device - Map a device's block of SCOM registers
- * @dev: OF node of the device
- * @index: Register bank index (index in "scom-reg" property)
- *
- * This function will use the device-tree binding for SCOM which
- * is to follow "scom-parent" properties until it finds a node with
- * a "scom-controller" property to find the controller. It will then
- * use the "scom-reg" property which is made of reg/count pairs,
- * each of them having a size defined by the controller's #scom-cells
- * property
- */
-extern scom_map_t scom_map_device(struct device_node *dev, int index);
-
-
-/**
- * scom_unmap - Unmap a block of SCOM registers
- * @map: Result of scom_map is to be unmapped
- */
-static inline void scom_unmap(scom_map_t map)
-{
- if (scom_map_ok(map))
- scom_controller->unmap(map);
-}
-
-/**
- * scom_read - Read a SCOM register
- * @map: Result of scom_map
- * @reg: Register index within that map
- * @value: Updated with the value read
- *
- * Returns 0 (success) or a negative error code
- */
-static inline int scom_read(scom_map_t map, u64 reg, u64 *value)
-{
- int rc;
-
- rc = scom_controller->read(map, reg, value);
- if (rc)
- *value = 0xfffffffffffffffful;
- return rc;
-}
-
-/**
- * scom_write - Write to a SCOM register
- * @map: Result of scom_map
- * @reg: Register index within that map
- * @value: Value to write
- *
- * Returns 0 (success) or a negative error code
- */
-static inline int scom_write(scom_map_t map, u64 reg, u64 value)
-{
- return scom_controller->write(map, reg, value);
-}
-
-
-#endif /* CONFIG_PPC_SCOM */
-#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_SCOM_H */
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 4a1664a8658d..5a9b6eb651b6 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -61,17 +61,6 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end)
(unsigned long)_stext < end;
}
-static inline int overlaps_kvm_tmp(unsigned long start, unsigned long end)
-{
-#ifdef CONFIG_KVM_GUEST
- extern char kvm_tmp[];
- return start < (unsigned long)kvm_tmp &&
- (unsigned long)&kvm_tmp[1024 * 1024] < end;
-#else
- return 0;
-#endif
-}
-
#ifdef PPC64_ELF_ABI_v1
#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
diff --git a/arch/powerpc/include/asm/setjmp.h b/arch/powerpc/include/asm/setjmp.h
index d995061f5f86..e9f81bb3f83b 100644
--- a/arch/powerpc/include/asm/setjmp.h
+++ b/arch/powerpc/include/asm/setjmp.h
@@ -7,7 +7,7 @@
#define JMP_BUF_LEN 23
-extern long setjmp(long *);
-extern void longjmp(long *, long);
+extern long setjmp(long *) __attribute__((returns_twice));
+extern void longjmp(long *, long) __attribute__((noreturn));
#endif /* _ASM_POWERPC_SETJMP_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index a47f827bc5f1..e9a960e28f3c 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -101,15 +101,43 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
#if defined(CONFIG_PPC_SPLPAR)
/* We only yield to the hypervisor if we are in shared processor mode */
-#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
-extern void __spin_yield(arch_spinlock_t *lock);
-extern void __rw_yield(arch_rwlock_t *lock);
+void splpar_spin_yield(arch_spinlock_t *lock);
+void splpar_rw_yield(arch_rwlock_t *lock);
#else /* SPLPAR */
-#define __spin_yield(x) barrier()
-#define __rw_yield(x) barrier()
-#define SHARED_PROCESSOR 0
+static inline void splpar_spin_yield(arch_spinlock_t *lock) {};
+static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
#endif
+static inline bool is_shared_processor(void)
+{
+/*
+ * LPPACA is only available on Pseries so guard anything LPPACA related to
+ * allow other platforms (which include this common header) to compile.
+ */
+#ifdef CONFIG_PPC_PSERIES
+ return (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
+ lppaca_shared_proc(local_paca->lppaca_ptr));
+#else
+ return false;
+#endif
+}
+
+static inline void spin_yield(arch_spinlock_t *lock)
+{
+ if (is_shared_processor())
+ splpar_spin_yield(lock);
+ else
+ barrier();
+}
+
+static inline void rw_yield(arch_rwlock_t *lock)
+{
+ if (is_shared_processor())
+ splpar_rw_yield(lock);
+ else
+ barrier();
+}
+
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
while (1) {
@@ -117,8 +145,8 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
break;
do {
HMT_low();
- if (SHARED_PROCESSOR)
- __spin_yield(lock);
+ if (is_shared_processor())
+ splpar_spin_yield(lock);
} while (unlikely(lock->slock != 0));
HMT_medium();
}
@@ -136,8 +164,8 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
local_irq_restore(flags);
do {
HMT_low();
- if (SHARED_PROCESSOR)
- __spin_yield(lock);
+ if (is_shared_processor())
+ splpar_spin_yield(lock);
} while (unlikely(lock->slock != 0));
HMT_medium();
local_irq_restore(flags_dis);
@@ -226,8 +254,8 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
break;
do {
HMT_low();
- if (SHARED_PROCESSOR)
- __rw_yield(rw);
+ if (is_shared_processor())
+ splpar_rw_yield(rw);
} while (unlikely(rw->lock < 0));
HMT_medium();
}
@@ -240,8 +268,8 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
break;
do {
HMT_low();
- if (SHARED_PROCESSOR)
- __rw_yield(rw);
+ if (is_shared_processor())
+ splpar_rw_yield(rw);
} while (unlikely(rw->lock != 0));
HMT_medium();
}
@@ -281,9 +309,9 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
rw->lock = 0;
}
-#define arch_spin_relax(lock) __spin_yield(lock)
-#define arch_read_relax(lock) __rw_yield(lock)
-#define arch_write_relax(lock) __rw_yield(lock)
+#define arch_spin_relax(lock) spin_yield(lock)
+#define arch_read_relax(lock) rw_yield(lock)
+#define arch_write_relax(lock) rw_yield(lock)
/* See include/linux/spinlock.h */
#define smp_mb__after_spinlock() smp_mb()
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 9bf6dffb4090..b72692702f35 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -53,7 +53,9 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
#ifndef CONFIG_KASAN
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
+#define __HAVE_ARCH_MEMCPY_MCSAFE
+extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h
new file mode 100644
index 000000000000..85580b30aba4
--- /dev/null
+++ b/arch/powerpc/include/asm/svm.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * SVM helper functions
+ *
+ * Copyright 2018 Anshuman Khandual, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_SVM_H
+#define _ASM_POWERPC_SVM_H
+
+#ifdef CONFIG_PPC_SVM
+
+static inline bool is_secure_guest(void)
+{
+ return mfmsr() & MSR_S;
+}
+
+void dtl_cache_ctor(void *addr);
+#define get_dtl_cache_ctor() (is_secure_guest() ? dtl_cache_ctor : NULL)
+
+#else /* CONFIG_PPC_SVM */
+
+static inline bool is_secure_guest(void)
+{
+ return false;
+}
+
+#define get_dtl_cache_ctor() NULL
+
+#endif /* CONFIG_PPC_SVM */
+#endif /* _ASM_POWERPC_SVM_H */
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 81abcf6a737b..38d62acfdce7 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -35,6 +35,16 @@ static inline void syscall_rollback(struct task_struct *task,
regs->gpr[3] = regs->orig_gpr3;
}
+static inline long syscall_get_error(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ /*
+ * If the system call failed,
+ * regs->gpr[3] contains a positive ERRORCODE.
+ */
+ return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
+}
+
static inline long syscall_get_return_value(struct task_struct *task,
struct pt_regs *regs)
{
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 54f4ec1f9fab..08dbe3e6831c 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -41,11 +41,7 @@ struct div_result {
/* Accessor functions for the timebase (RTC on 601) registers. */
/* If one day CONFIG_POWER is added just define __USE_RTC as 1 */
-#ifdef CONFIG_PPC_BOOK3S_32
-#define __USE_RTC() (cpu_has_feature(CPU_FTR_USE_RTC))
-#else
-#define __USE_RTC() 0
-#endif
+#define __USE_RTC() (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index 926b9f91a3ef..d2d2c4bd8435 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -17,38 +17,10 @@ typedef unsigned long cycles_t;
static inline cycles_t get_cycles(void)
{
-#ifdef __powerpc64__
+ if (IS_ENABLED(CONFIG_BOOK3S_601))
+ return 0;
+
return mftb();
-#else
- cycles_t ret;
-
- /*
- * For the "cycle" counter we use the timebase lower half.
- * Currently only used on SMP.
- */
-
- ret = 0;
-
- __asm__ __volatile__(
-#ifdef CONFIG_PPC_8xx
- "97: mftb %0\n"
-#else
- "97: mfspr %0, %2\n"
-#endif
- "99:\n"
- ".section __ftr_fixup,\"a\"\n"
- ".align 2\n"
- "98:\n"
- " .long %1\n"
- " .long 0\n"
- " .long 97b-98b\n"
- " .long 99b-98b\n"
- " .long 0\n"
- " .long 0\n"
- ".previous"
- : "=r" (ret) : "i" (CPU_FTR_601), "i" (SPRN_TBRL));
- return ret;
-#endif
}
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index f85e2b01c3df..2f7e1ea5089e 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -35,6 +35,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))
+extern int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)
@@ -84,6 +85,11 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+ return 0;
+}
+
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 76f34346b642..15002b51ff18 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -312,6 +312,7 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
unsigned long ret;
+ barrier_nospec();
allow_user_access(to, from, n);
ret = __copy_tofrom_user(to, from, n);
prevent_user_access(to, from, n);
@@ -386,6 +387,20 @@ static inline unsigned long raw_copy_to_user(void __user *to,
return ret;
}
+static __always_inline unsigned long __must_check
+copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
+{
+ if (likely(check_copy_size(from, n, true))) {
+ if (access_ok(to, n)) {
+ allow_write_to_user(to, n);
+ n = memcpy_mcsafe((void *)to, from, n);
+ prevent_write_to_user(to, n);
+ }
+ }
+
+ return n;
+}
+
extern unsigned long __clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)
diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h
new file mode 100644
index 000000000000..4fcda1d5793d
--- /dev/null
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor API.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#ifndef _ASM_POWERPC_ULTRAVISOR_API_H
+#define _ASM_POWERPC_ULTRAVISOR_API_H
+
+#include <asm/hvcall.h>
+
+/* Return codes */
+#define U_BUSY H_BUSY
+#define U_FUNCTION H_FUNCTION
+#define U_NOT_AVAILABLE H_NOT_AVAILABLE
+#define U_P2 H_P2
+#define U_P3 H_P3
+#define U_P4 H_P4
+#define U_P5 H_P5
+#define U_PARAMETER H_PARAMETER
+#define U_PERMISSION H_PERMISSION
+#define U_SUCCESS H_SUCCESS
+
+/* opcodes */
+#define UV_WRITE_PATE 0xF104
+#define UV_RETURN 0xF11C
+#define UV_ESM 0xF110
+#define UV_SHARE_PAGE 0xF130
+#define UV_UNSHARE_PAGE 0xF134
+#define UV_UNSHARE_ALL_PAGES 0xF140
+
+#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h
new file mode 100644
index 000000000000..b1bc2e043ed4
--- /dev/null
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor definitions
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#ifndef _ASM_POWERPC_ULTRAVISOR_H
+#define _ASM_POWERPC_ULTRAVISOR_H
+
+#include <asm/asm-prototypes.h>
+#include <asm/ultravisor-api.h>
+#include <asm/firmware.h>
+
+int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
+ int depth, void *data);
+
+/*
+ * In ultravisor enabled systems, PTCR becomes ultravisor privileged only for
+ * writing and an attempt to write to it will cause a Hypervisor Emulation
+ * Assistance interrupt.
+ */
+static inline void set_ptcr_when_no_uv(u64 val)
+{
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ mtspr(SPRN_PTCR, val);
+}
+
+static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
+{
+ return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1);
+}
+
+static inline int uv_share_page(u64 pfn, u64 npages)
+{
+ return ucall_norets(UV_SHARE_PAGE, pfn, npages);
+}
+
+static inline int uv_unshare_page(u64 pfn, u64 npages)
+{
+ return ucall_norets(UV_UNSHARE_PAGE, pfn, npages);
+}
+
+static inline int uv_unshare_all_pages(void)
+{
+ return ucall_norets(UV_UNSHARE_ALL_PAGES);
+}
+
+#endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 68473c3c471c..b0720c7c3fcf 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -49,6 +49,7 @@
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index da0b19870570..f93e6b0f5c84 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -163,14 +163,4 @@ int vas_copy_crb(void *crb, int offset);
*/
int vas_paste_crb(struct vas_window *win, int offset, bool re);
-/*
- * Return a system-wide unique id for the VAS window @win.
- */
-extern u32 vas_win_id(struct vas_window *win);
-
-/*
- * Return the power bus paste address associated with @win so the caller
- * can map that address into their address space.
- */
-extern u64 vas_win_paste_addr(struct vas_window *win);
#endif /* __ASM_POWERPC_VAS_H */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index e4016985764e..24cdf97376c4 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -46,7 +46,15 @@ struct xive_irq_data {
/* Setup/used by frontend */
int target;
+ /*
+ * saved_p means that there is a queue entry for this interrupt
+ * in some CPU's queue (not including guest vcpu queues), even
+ * if P is not set in the source ESB.
+ * stale_p means that there is no queue entry for this interrupt
+ * in some CPU's queue, even if P is set in the source ESB.
+ */
bool saved_p;
+ bool stale_p;
};
#define XIVE_IRQ_FLAG_STORE_EOI 0x01
#define XIVE_IRQ_FLAG_LSI 0x02
@@ -91,6 +99,7 @@ extern void xive_flush_interrupt(void);
/* xmon hook */
extern void xmon_xive_do_dump(int cpu);
+extern int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d);
/* APIs used by KVM */
extern u32 xive_native_default_eq_shift(void);
@@ -127,6 +136,7 @@ extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
u32 qindex);
extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
+extern bool xive_native_has_queue_state_support(void);
#else
diff --git a/arch/powerpc/include/uapi/asm/bpf_perf_event.h b/arch/powerpc/include/uapi/asm/bpf_perf_event.h
index b551b741653d..5e1e648aeec4 100644
--- a/arch/powerpc/include/uapi/asm/bpf_perf_event.h
+++ b/arch/powerpc/include/uapi/asm/bpf_perf_event.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
#define _UAPI__ASM_BPF_PERF_EVENT_H__
diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h
index 01555c6ae0f5..be48c2215fa2 100644
--- a/arch/powerpc/include/uapi/asm/kvm_para.h
+++ b/arch/powerpc/include/uapi/asm/kvm_para.h
@@ -31,7 +31,7 @@
* Struct fields are always 32 or 64 bit aligned, depending on them being 32
* or 64 bit wide respectively.
*
- * See Documentation/virtual/kvm/ppc-pv.txt
+ * See Documentation/virt/kvm/ppc-pv.txt
*/
struct kvm_vcpu_arch_shared {
__u64 scratch1;
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index 65065ce32814..c0c737215b00 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -21,15 +21,11 @@
#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
+
#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */
#define MCL_FUTURE 0x4000 /* lock all additions to address space */
#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */
-#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
-#define MAP_NONBLOCK 0x10000 /* do not block on IO */
-#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
-
/* Override any generic PKEY permission defines */
#define PKEY_DISABLE_EXECUTE 0x4
#undef PKEY_ACCESS_MASK
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore
index c5f676c3c224..67ebd3003c05 100644
--- a/arch/powerpc/kernel/.gitignore
+++ b/arch/powerpc/kernel/.gitignore
@@ -1 +1,2 @@
+prom_init_check
vmlinux.lds
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 0ea6c4aa3a20..a7ca8fe62368 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -52,10 +52,11 @@ obj-y := cputable.o ptrace.o syscalls.o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
- paca.o nvram_64.o firmware.o
+ paca.o nvram_64.o firmware.o note.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_PPC_DAWR) += dawr.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
@@ -77,7 +78,9 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
-obj-$(CONFIG_FA_DUMP) += fadump.o
+ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),)
+obj-y += fadump.o
+endif
ifdef CONFIG_PPC32
obj-$(CONFIG_E500) += idle_e500.o
endif
@@ -154,6 +157,9 @@ endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
+ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
+obj-y += ucall.o
+endif
# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
@@ -183,15 +189,13 @@ extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
-ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
-$(obj)/built-in.a: prom_init_check
+extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check
-quiet_cmd_prom_init_check = CALL $<
- cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
+quiet_cmd_prom_init_check = PROMCHK $@
+ cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@
-PHONY += prom_init_check
-prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
- $(call cmd,prom_init_check)
-endif
+$(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
+ $(call if_changed,prom_init_check)
+targets += prom_init_check
clean-files := vmlinux.lds
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 7107ad86de65..92045ed64976 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -176,9 +176,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
ret |= __get_user_inatomic(temp.v[1], p++);
ret |= __get_user_inatomic(temp.v[2], p++);
ret |= __get_user_inatomic(temp.v[3], p++);
+ /* fall through */
case 4:
ret |= __get_user_inatomic(temp.v[4], p++);
ret |= __get_user_inatomic(temp.v[5], p++);
+ /* fall through */
case 2:
ret |= __get_user_inatomic(temp.v[6], p++);
ret |= __get_user_inatomic(temp.v[7], p++);
@@ -259,9 +261,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
ret |= __put_user_inatomic(data.v[1], p++);
ret |= __put_user_inatomic(data.v[2], p++);
ret |= __put_user_inatomic(data.v[3], p++);
+ /* fall through */
case 4:
ret |= __put_user_inatomic(data.v[4], p++);
ret |= __put_user_inatomic(data.v[5], p++);
+ /* fall through */
case 2:
ret |= __put_user_inatomic(data.v[6], p++);
ret |= __put_user_inatomic(data.v[7], p++);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 31dc7e64cbfc..484f54dab247 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -266,7 +266,9 @@ int main(void)
OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user);
OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
+#ifdef CONFIG_PPC_BOOK3E
OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
+#endif
OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
#else /* CONFIG_PPC64 */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
@@ -504,6 +506,7 @@ int main(void)
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
+ OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 9fbb9d12e0c0..470336277c67 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -891,4 +891,25 @@ void cacheinfo_cpu_offline(unsigned int cpu_id)
if (cache)
cache_cpu_clear(cache, cpu_id);
}
+
+void cacheinfo_teardown(void)
+{
+ unsigned int cpu;
+
+ lockdep_assert_cpus_held();
+
+ for_each_online_cpu(cpu)
+ cacheinfo_cpu_offline(cpu);
+}
+
+void cacheinfo_rebuild(void)
+{
+ unsigned int cpu;
+
+ lockdep_assert_cpus_held();
+
+ for_each_online_cpu(cpu)
+ cacheinfo_cpu_online(cpu);
+}
+
#endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h
index 955f5e999f1b..52bd3fc6642d 100644
--- a/arch/powerpc/kernel/cacheinfo.h
+++ b/arch/powerpc/kernel/cacheinfo.h
@@ -6,4 +6,8 @@
extern void cacheinfo_cpu_online(unsigned int cpu_id);
extern void cacheinfo_cpu_offline(unsigned int cpu_id);
+/* Allow migration/suspend to tear down and rebuild the hierarchy. */
+extern void cacheinfo_teardown(void);
+extern void cacheinfo_rebuild(void);
+
#endif /* _PPC_CACHEINFO_H */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index bfe5f4a2886b..e745abc5457a 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -569,7 +569,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_32
+#ifdef CONFIG_PPC_BOOK3S_601
{ /* 601 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00010000,
@@ -583,6 +583,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc601",
},
+#endif /* CONFIG_PPC_BOOK3S_601 */
+#ifdef CONFIG_PPC_BOOK3S_6xx
{ /* 603 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00030000,
@@ -1212,7 +1214,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc603",
},
-#endif /* CONFIG_PPC_BOOK3S_32 */
+#endif /* CONFIG_PPC_BOOK3S_6xx */
#ifdef CONFIG_PPC_8xx
{ /* 8xx */
.pvr_mask = 0xffff0000,
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
new file mode 100644
index 000000000000..5f66b95b6858
--- /dev/null
+++ b/arch/powerpc/kernel/dawr.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * DAWR infrastructure
+ *
+ * Copyright 2019, Michael Neuling, IBM Corporation.
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <asm/debugfs.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+
+bool dawr_force_enable;
+EXPORT_SYMBOL_GPL(dawr_force_enable);
+
+int set_dawr(struct arch_hw_breakpoint *brk)
+{
+ unsigned long dawr, dawrx, mrd;
+
+ dawr = brk->address;
+
+ dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE))
+ << (63 - 58);
+ dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) << (63 - 59);
+ dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) >> 3;
+ /*
+ * DAWR length is stored in field MDR bits 48:53. Matches range in
+ * doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
+ * 0b111111=64DW.
+ * brk->len is in bytes.
+ * This aligns up to double word size, shifts and does the bias.
+ */
+ mrd = ((brk->len + 7) >> 3) - 1;
+ dawrx |= (mrd & 0x3f) << (63 - 53);
+
+ if (ppc_md.set_dawr)
+ return ppc_md.set_dawr(dawr, dawrx);
+
+ mtspr(SPRN_DAWR, dawr);
+ mtspr(SPRN_DAWRX, dawrx);
+
+ return 0;
+}
+
+static void set_dawr_cb(void *info)
+{
+ set_dawr(info);
+}
+
+static ssize_t dawr_write_file_bool(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct arch_hw_breakpoint null_brk = {0, 0, 0};
+ size_t rc;
+
+ /* Send error to user if they hypervisor won't allow us to write DAWR */
+ if (!dawr_force_enable &&
+ firmware_has_feature(FW_FEATURE_LPAR) &&
+ set_dawr(&null_brk) != H_SUCCESS)
+ return -ENODEV;
+
+ rc = debugfs_write_file_bool(file, user_buf, count, ppos);
+ if (rc)
+ return rc;
+
+ /* If we are clearing, make sure all CPUs have the DAWR cleared */
+ if (!dawr_force_enable)
+ smp_call_function(set_dawr_cb, &null_brk, 0);
+
+ return rc;
+}
+
+static const struct file_operations dawr_enable_fops = {
+ .read = debugfs_read_file_bool,
+ .write = dawr_write_file_bool,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static int __init dawr_force_setup(void)
+{
+ if (cpu_has_feature(CPU_FTR_DAWR)) {
+ /* Don't setup sysfs file for user control on P8 */
+ dawr_force_enable = true;
+ return 0;
+ }
+
+ if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
+ /* Turn DAWR off by default, but allow admin to turn it on */
+ debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
+ powerpc_debugfs_root,
+ &dawr_force_enable,
+ &dawr_enable_fops);
+ }
+ return 0;
+}
+arch_initcall(dawr_force_setup);
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 09231ef06d01..e486d1d78de2 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -71,7 +71,7 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
return dma_direct_map_page(dev, page, offset, size, direction,
attrs);
return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
- size, device_to_mask(dev), direction, attrs);
+ size, dma_get_mask(dev), direction, attrs);
}
@@ -82,6 +82,8 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
if (!dma_iommu_map_bypass(dev, attrs))
iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size,
direction, attrs);
+ else
+ dma_direct_unmap_page(dev, dma_handle, size, direction, attrs);
}
@@ -92,7 +94,7 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
if (dma_iommu_map_bypass(dev, attrs))
return dma_direct_map_sg(dev, sglist, nelems, direction, attrs);
return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
- device_to_mask(dev), direction, attrs);
+ dma_get_mask(dev), direction, attrs);
}
static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
@@ -102,6 +104,8 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
if (!dma_iommu_map_bypass(dev, attrs))
ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
direction, attrs);
+ else
+ dma_direct_unmap_sg(dev, sglist, nelems, direction, attrs);
}
static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
@@ -118,18 +122,17 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
struct iommu_table *tbl = get_iommu_table_base(dev);
- if (!tbl) {
- dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
- ", table unavailable\n", mask);
- return 0;
- }
-
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
dev->archdata.iommu_bypass = true;
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
return 1;
}
+ if (!tbl) {
+ dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
+ return 0;
+ }
+
if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
@@ -163,6 +166,34 @@ u64 dma_iommu_get_required_mask(struct device *dev)
return mask;
}
+static void dma_iommu_sync_for_cpu(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir)
+{
+ if (dma_iommu_alloc_bypass(dev))
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static void dma_iommu_sync_for_device(struct device *dev, dma_addr_t addr,
+ size_t sz, enum dma_data_direction dir)
+{
+ if (dma_iommu_alloc_bypass(dev))
+ dma_direct_sync_single_for_device(dev, addr, sz, dir);
+}
+
+extern void dma_iommu_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+ if (dma_iommu_alloc_bypass(dev))
+ dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir);
+}
+
+extern void dma_iommu_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+ if (dma_iommu_alloc_bypass(dev))
+ dma_direct_sync_sg_for_device(dev, sgl, nents, dir);
+}
+
const struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
@@ -172,4 +203,10 @@ const struct dma_map_ops dma_iommu_ops = {
.map_page = dma_iommu_map_page,
.unmap_page = dma_iommu_unmap_page,
.get_required_mask = dma_iommu_get_required_mask,
+ .sync_single_for_cpu = dma_iommu_sync_for_cpu,
+ .sync_single_for_device = dma_iommu_sync_for_device,
+ .sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu,
+ .sync_sg_for_device = dma_iommu_sync_sg_for_device,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f192d57db47d..0a91dee51245 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -150,6 +150,16 @@ static int __init eeh_setup(char *str)
}
__setup("eeh=", eeh_setup);
+void eeh_show_enabled(void)
+{
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ pr_info("EEH: Recovery disabled by kernel parameter.\n");
+ else if (eeh_has_flag(EEH_ENABLED))
+ pr_info("EEH: Capable adapter found: recovery enabled.\n");
+ else
+ pr_info("EEH: No capable adapters found: recovery disabled.\n");
+}
+
/*
* This routine captures assorted PCI configuration space data
* for the indicated PCI device, and puts them into a buffer
@@ -354,10 +364,19 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
ptep = find_init_mm_pte(token, &hugepage_shift);
if (!ptep)
return token;
- WARN_ON(hugepage_shift);
- pa = pte_pfn(*ptep) << PAGE_SHIFT;
- return pa | (token & (PAGE_SIZE-1));
+ pa = pte_pfn(*ptep);
+
+ /* On radix we can do hugepage mappings for io, so handle that */
+ if (hugepage_shift) {
+ pa <<= hugepage_shift;
+ pa |= token & ((1ul << hugepage_shift) - 1);
+ } else {
+ pa <<= PAGE_SHIFT;
+ pa |= token & (PAGE_SIZE - 1);
+ }
+
+ return pa;
}
/*
@@ -401,11 +420,9 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
eeh_pe_mark_isolated(phb_pe);
eeh_serialize_unlock(flags);
- pr_err("EEH: PHB#%x failure detected, location: %s\n",
+ pr_debug("EEH: PHB#%x failure detected, location: %s\n",
phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
- dump_stack();
eeh_send_failure_event(phb_pe);
-
return 1;
out:
eeh_serialize_unlock(flags);
@@ -432,7 +449,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
- struct eeh_pe *pe, *parent_pe, *phb_pe;
+ struct eeh_pe *pe, *parent_pe;
int rc = 0;
const char *location = NULL;
@@ -451,8 +468,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Access to IO BARs might get this far and still not want checking. */
if (!pe) {
eeh_stats.ignored_check++;
- pr_debug("EEH: Ignored check for %s\n",
- eeh_pci_name(dev));
+ eeh_edev_dbg(edev, "Ignored check\n");
return 0;
}
@@ -492,12 +508,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
if (dn)
location = of_get_property(dn, "ibm,loc-code",
NULL);
- printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
- "location=%s driver=%s pci addr=%s\n",
+ eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n",
pe->check_count,
location ? location : "unknown",
- eeh_driver_name(dev), eeh_pci_name(dev));
- printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+ eeh_driver_name(dev));
+ eeh_edev_err(edev, "Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
dump_stack();
}
@@ -564,13 +579,8 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* a stack trace will help the device-driver authors figure
* out what happened. So print that out.
*/
- phb_pe = eeh_phb_pe_get(pe->phb);
- pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
- pe->phb->global_number, pe->addr);
- pr_err("EEH: PE location: %s, PHB location: %s\n",
- eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
- dump_stack();
-
+ pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
+ __func__, pe->phb->global_number, pe->addr);
eeh_send_failure_event(pe);
return 1;
@@ -688,7 +698,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
return rc;
}
-static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
+static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
void *userdata)
{
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
@@ -699,7 +709,7 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* state for the specified device
*/
if (!pdev || pdev == dev)
- return NULL;
+ return;
/* Ensure we have D0 power state */
pci_set_power_state(pdev, PCI_D0);
@@ -712,18 +722,16 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* interrupt from the device
*/
pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
-
- return NULL;
}
-static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
+static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
struct pci_dev *dev = userdata;
if (!pdev)
- return NULL;
+ return;
/* Apply customization from firmware */
if (pdn && eeh_ops->restore_config)
@@ -732,8 +740,6 @@ static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
/* The caller should restore state for the specified device */
if (pdev != dev)
pci_restore_state(pdev);
-
- return NULL;
}
int eeh_restore_vf_config(struct pci_dn *pdn)
@@ -859,7 +865,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
* the indicated device and its children so that the bunch of the
* devices could be reset properly.
*/
-static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
+static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
{
struct pci_dev *dev;
unsigned int *freset = (unsigned int *)flag;
@@ -867,8 +873,6 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
dev = eeh_dev_to_pci_dev(edev);
if (dev)
*freset |= dev->needs_freset;
-
- return NULL;
}
static void eeh_pe_refreeze_passed(struct eeh_pe *root)
@@ -1054,23 +1058,6 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
-void eeh_probe_devices(void)
-{
- struct pci_controller *hose, *tmp;
- struct pci_dn *pdn;
-
- /* Enable EEH for all adapters */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- pdn = hose->pci_data;
- traverse_pci_dn(pdn, eeh_ops->probe, NULL);
- }
- if (eeh_enabled())
- pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
- else
- pr_info("EEH: No capable adapters found\n");
-
-}
-
/**
* eeh_init - EEH initialization
*
@@ -1111,6 +1098,8 @@ static int eeh_init(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
eeh_dev_phb_init_dynamic(hose);
+ eeh_addr_cache_init();
+
/* Initialize EEH event */
return eeh_event_init();
}
@@ -1181,15 +1170,14 @@ void eeh_add_device_late(struct pci_dev *dev)
struct pci_dn *pdn;
struct eeh_dev *edev;
- if (!dev || !eeh_enabled())
+ if (!dev)
return;
- pr_debug("EEH: Adding device %s\n", pci_name(dev));
-
pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
edev = pdn_to_eeh_dev(pdn);
+ eeh_edev_dbg(edev, "Adding device\n");
if (edev->pdev == dev) {
- pr_debug("EEH: Already referenced !\n");
+ eeh_edev_dbg(edev, "Device already referenced!\n");
return;
}
@@ -1237,6 +1225,8 @@ void eeh_add_device_tree_late(struct pci_bus *bus)
{
struct pci_dev *dev;
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ return;
list_for_each_entry(dev, &bus->devices, bus_list) {
eeh_add_device_late(dev);
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
@@ -1290,10 +1280,10 @@ void eeh_remove_device(struct pci_dev *dev)
edev = pci_dev_to_eeh_dev(dev);
/* Unregister the device with the EEH/PCI address search system */
- pr_debug("EEH: Removing device %s\n", pci_name(dev));
+ dev_dbg(&dev->dev, "EEH: Removing device\n");
if (!edev || !edev->pdev || !edev->pe) {
- pr_debug("EEH: Not referenced !\n");
+ dev_dbg(&dev->dev, "EEH: Device not referenced!\n");
return;
}
@@ -1881,6 +1871,198 @@ static const struct file_operations eeh_force_recover_fops = {
.llseek = no_llseek,
.write = eeh_force_recover_write,
};
+
+static ssize_t eeh_debugfs_dev_usage(struct file *filp,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n";
+
+ return simple_read_from_buffer(user_buf, count, ppos,
+ usage, sizeof(usage) - 1);
+}
+
+static ssize_t eeh_dev_check_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ struct eeh_dev *edev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return -EINVAL;
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return -ENODEV;
+
+ edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev) {
+ pci_err(pdev, "No eeh_dev for this device!\n");
+ pci_dev_put(pdev);
+ return -ENODEV;
+ }
+
+ ret = eeh_dev_check_failure(edev);
+ pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n",
+ domain, bus, dev, fn, ret);
+
+ pci_dev_put(pdev);
+
+ return count;
+}
+
+static const struct file_operations eeh_dev_check_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_check_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
+static int eeh_debugfs_break_device(struct pci_dev *pdev)
+{
+ struct resource *bar = NULL;
+ void __iomem *mapped;
+ u16 old, bit;
+ int i, pos;
+
+ /* Do we have an MMIO BAR to disable? */
+ for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+ struct resource *r = &pdev->resource[i];
+
+ if (!r->flags || !r->start)
+ continue;
+ if (r->flags & IORESOURCE_IO)
+ continue;
+ if (r->flags & IORESOURCE_UNSET)
+ continue;
+
+ bar = r;
+ break;
+ }
+
+ if (!bar) {
+ pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
+ return -ENXIO;
+ }
+
+ pci_err(pdev, "Going to break: %pR\n", bar);
+
+ if (pdev->is_virtfn) {
+#ifndef CONFIG_IOV
+ return -ENXIO;
+#else
+ /*
+ * VFs don't have a per-function COMMAND register, so the best
+ * we can do is clear the Memory Space Enable bit in the PF's
+ * SRIOV control reg.
+ *
+ * Unfortunately, this requires that we have a PF (i.e doesn't
+ * work for a passed-through VF) and it has the potential side
+ * effect of also causing an EEH on every other VF under the
+ * PF. Oh well.
+ */
+ pdev = pdev->physfn;
+ if (!pdev)
+ return -ENXIO; /* passed through VFs have no PF */
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+ pos += PCI_SRIOV_CTRL;
+ bit = PCI_SRIOV_CTRL_MSE;
+#endif /* !CONFIG_IOV */
+ } else {
+ bit = PCI_COMMAND_MEMORY;
+ pos = PCI_COMMAND;
+ }
+
+ /*
+ * Process here is:
+ *
+ * 1. Disable Memory space.
+ *
+ * 2. Perform an MMIO to the device. This should result in an error
+ * (CA / UR) being raised by the device which results in an EEH
+ * PE freeze. Using the in_8() accessor skips the eeh detection hook
+ * so the freeze hook so the EEH Detection machinery won't be
+ * triggered here. This is to match the usual behaviour of EEH
+ * where the HW will asyncronously freeze a PE and it's up to
+ * the kernel to notice and deal with it.
+ *
+ * 3. Turn Memory space back on. This is more important for VFs
+ * since recovery will probably fail if we don't. For normal
+ * the COMMAND register is reset as a part of re-initialising
+ * the device.
+ *
+ * Breaking stuff is the point so who cares if it's racy ;)
+ */
+ pci_read_config_word(pdev, pos, &old);
+
+ mapped = ioremap(bar->start, PAGE_SIZE);
+ if (!mapped) {
+ pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
+ return -ENXIO;
+ }
+
+ pci_write_config_word(pdev, pos, old & ~bit);
+ in_8(mapped);
+ pci_write_config_word(pdev, pos, old);
+
+ iounmap(mapped);
+
+ return 0;
+}
+
+static ssize_t eeh_dev_break_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return -EINVAL;
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return -ENODEV;
+
+ ret = eeh_debugfs_break_device(pdev);
+ pci_dev_put(pdev);
+
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static const struct file_operations eeh_dev_break_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_break_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
#endif
static int __init eeh_init_proc(void)
@@ -1896,6 +2078,12 @@ static int __init eeh_init_proc(void)
debugfs_create_bool("eeh_disable_recovery", 0600,
powerpc_debugfs_root,
&eeh_debugfs_no_recover);
+ debugfs_create_file_unsafe("eeh_dev_check", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_dev_check_fops);
+ debugfs_create_file_unsafe("eeh_dev_break", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_dev_break_fops);
debugfs_create_file_unsafe("eeh_force_recover", 0600,
powerpc_debugfs_root, NULL,
&eeh_force_recover_fops);
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 320472373122..cf11277ebd02 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -18,6 +18,8 @@
/**
+ * DOC: Overview
+ *
* The pci address cache subsystem. This subsystem places
* PCI device address resources into a red-black tree, sorted
* according to the address range, so that given only an i/o
@@ -34,6 +36,7 @@
* than any hash algo I could think of for this problem, even
* with the penalty of slow pointer chases for d-cache misses).
*/
+
struct pci_io_addr_range {
struct rb_node rb_node;
resource_size_t addr_lo;
@@ -145,8 +148,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
piar->pcidev = dev;
piar->flags = flags;
- pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n",
- &alo, &ahi, pci_name(dev));
+ eeh_edev_dbg(piar->edev, "PIAR: insert range=[%pap:%pap]\n",
+ &alo, &ahi);
rb_link_node(&piar->rb_node, parent, p);
rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
@@ -226,8 +229,8 @@ restart:
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
if (piar->pcidev == dev) {
- pr_debug("PIAR: remove range=[%pap:%pap] dev=%s\n",
- &piar->addr_lo, &piar->addr_hi, pci_name(dev));
+ eeh_edev_dbg(piar->edev, "PIAR: remove range=[%pap:%pap]\n",
+ &piar->addr_lo, &piar->addr_hi);
rb_erase(n, &pci_io_addr_cache_root.rb_root);
kfree(piar);
goto restart;
@@ -255,37 +258,14 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
}
/**
- * eeh_addr_cache_build - Build a cache of I/O addresses
+ * eeh_addr_cache_init - Initialize a cache of I/O addresses
*
- * Build a cache of pci i/o addresses. This cache will be used to
+ * Initialize a cache of pci i/o addresses. This cache will be used to
* find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scanned for devices (after all device resources are known).
*/
-void eeh_addr_cache_build(void)
+void eeh_addr_cache_init(void)
{
- struct pci_dn *pdn;
- struct eeh_dev *edev;
- struct pci_dev *dev = NULL;
-
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
-
- for_each_pci_dev(dev) {
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- if (!pdn)
- continue;
-
- edev = pdn_to_eeh_dev(pdn);
- if (!edev)
- continue;
-
- dev->dev.archdata.edev = edev;
- edev->pdev = dev;
-
- eeh_addr_cache_insert_dev(dev);
- eeh_sysfs_add_device(dev);
- }
}
static int eeh_addr_cache_show(struct seq_file *s, void *v)
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index c4317c452d98..7370185c7a05 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -47,6 +47,8 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
+ edev->bdfn = (pdn->busno << 8) | pdn->devfn;
+ edev->controller = pdn->phb;
return edev;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 89623962c727..d9279d0ee9f5 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -27,6 +27,7 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/ppc-pci.h>
@@ -81,23 +82,6 @@ static const char *pci_ers_result_name(enum pci_ers_result result)
}
};
-static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr,
- edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf);
-
- va_end(args);
-}
-
static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
enum pci_ers_result new)
{
@@ -113,8 +97,16 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
static bool eeh_edev_actionable(struct eeh_dev *edev)
{
- return (edev->pdev && !eeh_dev_removed(edev) &&
- !eeh_pe_passed(edev->pe));
+ if (!edev->pdev)
+ return false;
+ if (edev->pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+ if (eeh_dev_removed(edev))
+ return false;
+ if (eeh_pe_passed(edev->pe))
+ return false;
+
+ return true;
}
/**
@@ -214,12 +206,12 @@ static void eeh_enable_irq(struct eeh_dev *edev)
}
}
-static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
+static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
- return NULL;
+ return;
/*
* We cannot access the config space on some adapters.
@@ -229,14 +221,13 @@ static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
* device is created.
*/
if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
- return NULL;
+ return;
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
- return NULL;
+ return;
pci_save_state(pdev);
- return NULL;
}
static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
@@ -274,20 +265,27 @@ static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
}
typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
+ struct pci_dev *,
struct pci_driver *);
static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
enum pci_ers_result *result)
{
+ struct pci_dev *pdev;
struct pci_driver *driver;
enum pci_ers_result new_result;
- if (!edev->pdev) {
+ pci_lock_rescan_remove();
+ pdev = edev->pdev;
+ if (pdev)
+ get_device(&pdev->dev);
+ pci_unlock_rescan_remove();
+ if (!pdev) {
eeh_edev_info(edev, "no device");
return;
}
- device_lock(&edev->pdev->dev);
+ device_lock(&pdev->dev);
if (eeh_edev_actionable(edev)) {
- driver = eeh_pcid_get(edev->pdev);
+ driver = eeh_pcid_get(pdev);
if (!driver)
eeh_edev_info(edev, "no driver");
@@ -296,7 +294,7 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
else if (edev->mode & EEH_DEV_NO_HANDLER)
eeh_edev_info(edev, "driver bound too late");
else {
- new_result = fn(edev, driver);
+ new_result = fn(edev, pdev, driver);
eeh_edev_info(edev, "%s driver reports: '%s'",
driver->name,
pci_ers_result_name(new_result));
@@ -305,12 +303,15 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
new_result);
}
if (driver)
- eeh_pcid_put(edev->pdev);
+ eeh_pcid_put(pdev);
} else {
- eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev,
+ eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev,
!eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
}
- device_unlock(&edev->pdev->dev);
+ device_unlock(&pdev->dev);
+ if (edev->pdev != pdev)
+ eeh_edev_warn(edev, "Device changed during processing!\n");
+ put_device(&pdev->dev);
}
static void eeh_pe_report(const char *name, struct eeh_pe *root,
@@ -337,20 +338,20 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root,
* Report an EEH error to each device driver.
*/
static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
- struct pci_dev *dev = edev->pdev;
if (!driver->err_handler->error_detected)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
driver->name);
- rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
+ rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
edev->in_error = true;
- pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE);
return rc;
}
@@ -363,12 +364,13 @@ static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
* are now enabled.
*/
static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->mmio_enabled)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
- return driver->err_handler->mmio_enabled(edev->pdev);
+ return driver->err_handler->mmio_enabled(pdev);
}
/**
@@ -382,20 +384,21 @@ static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
* driver can work again while the device is recovered.
*/
static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->slot_reset || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
- return driver->err_handler->slot_reset(edev->pdev);
+ return driver->err_handler->slot_reset(pdev);
}
-static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
+static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
- return NULL;
+ return;
/*
* The content in the config space isn't saved because
@@ -407,15 +410,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
if (list_is_last(&edev->entry, &edev->pe->edevs))
eeh_pe_restore_bars(edev->pe);
- return NULL;
+ return;
}
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
- return NULL;
+ return;
pci_restore_state(pdev);
- return NULL;
}
/**
@@ -428,13 +430,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
* to make the recovered device work again.
*/
static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->resume || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
- driver->err_handler->resume(edev->pdev);
+ driver->err_handler->resume(pdev);
pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
#ifdef CONFIG_PCI_IOV
@@ -453,6 +456,7 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
* dead, and that no further recovery attempts will be made on it.
*/
static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
@@ -462,10 +466,10 @@ static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
driver->name);
- rc = driver->err_handler->error_detected(edev->pdev,
+ rc = driver->err_handler->error_detected(pdev,
pci_channel_io_perm_failure);
- pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
return rc;
}
@@ -473,12 +477,9 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
if (!(edev->physfn)) {
- pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
- __func__, pdn->phb->global_number, pdn->busno,
- PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+ eeh_edev_warn(edev, "Not for VF\n");
return NULL;
}
@@ -492,12 +493,12 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
}
#ifdef CONFIG_PCI_IOV
- pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
+ pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index);
#endif
return NULL;
}
-static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
+static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
@@ -512,7 +513,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
*/
if (!eeh_edev_actionable(edev) ||
(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
- return NULL;
+ return;
if (rmv_data) {
driver = eeh_pcid_get(dev);
@@ -521,7 +522,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
driver->err_handler->error_detected &&
driver->err_handler->slot_reset) {
eeh_pcid_put(dev);
- return NULL;
+ return;
}
eeh_pcid_put(dev);
}
@@ -554,8 +555,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
pci_stop_and_remove_bus_device(dev);
pci_unlock_rescan_remove();
}
-
- return NULL;
}
static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
@@ -744,6 +743,99 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
*/
#define MAX_WAIT_FOR_RECOVERY 300
+
+/* Walks the PE tree after processing an event to remove any stale PEs.
+ *
+ * NB: This needs to be recursive to ensure the leaf PEs get removed
+ * before their parents do. Although this is possible to do recursively
+ * we don't since this is easier to read and we need to garantee
+ * the leaf nodes will be handled first.
+ */
+static void eeh_pe_cleanup(struct eeh_pe *pe)
+{
+ struct eeh_pe *child_pe, *tmp;
+
+ list_for_each_entry_safe(child_pe, tmp, &pe->child_list, child)
+ eeh_pe_cleanup(child_pe);
+
+ if (pe->state & EEH_PE_KEEP)
+ return;
+
+ if (!(pe->state & EEH_PE_INVALID))
+ return;
+
+ if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) {
+ list_del(&pe->child);
+ kfree(pe);
+ }
+}
+
+/**
+ * eeh_check_slot_presence - Check if a device is still present in a slot
+ * @pdev: pci_dev to check
+ *
+ * This function may return a false positive if we can't determine the slot's
+ * presence state. This might happen for for PCIe slots if the PE containing
+ * the upstream bridge is also frozen, or the bridge is part of the same PE
+ * as the device.
+ *
+ * This shouldn't happen often, but you might see it if you hotplug a PCIe
+ * switch.
+ */
+static bool eeh_slot_presence_check(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+ u8 state;
+ int rc;
+
+ if (!pdev)
+ return false;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return true;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->get_adapter_status)
+ return true;
+
+ /* set the attention indicator while we've got the slot ops */
+ if (ops->set_attention_status)
+ ops->set_attention_status(slot->hotplug, 1);
+
+ rc = ops->get_adapter_status(slot->hotplug, &state);
+ if (rc)
+ return true;
+
+ return !!state;
+}
+
+static void eeh_clear_slot_attention(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+
+ if (!pdev)
+ return;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->set_attention_status)
+ return;
+
+ ops->set_attention_status(slot->hotplug, 0);
+}
+
/**
* eeh_handle_normal_event - Handle EEH events on a specific PE
* @pe: EEH PE - which should not be used after we return, as it may
@@ -774,6 +866,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
struct eeh_rmv_data rmv_data =
{LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
+ int devices = 0;
bus = eeh_pe_bus_get(pe);
if (!bus) {
@@ -782,7 +875,59 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
- eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ /*
+ * When devices are hot-removed we might get an EEH due to
+ * a driver attempting to touch the MMIO space of a removed
+ * device. In this case we don't have a device to recover
+ * so suppress the event if we can't find any present devices.
+ *
+ * The hotplug driver should take care of tearing down the
+ * device itself.
+ */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ if (eeh_slot_presence_check(edev->pdev))
+ devices++;
+
+ if (!devices) {
+ pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
+ pe->phb->global_number, pe->addr);
+ goto out; /* nothing to recover */
+ }
+
+ /* Log the event */
+ if (pe->type & EEH_PE_PHB) {
+ pr_err("EEH: PHB#%x failure detected, location: %s\n",
+ pe->phb->global_number, eeh_pe_loc_get(pe));
+ } else {
+ struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
+
+ pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+ pe->phb->global_number, pe->addr);
+ pr_err("EEH: PE location: %s, PHB location: %s\n",
+ eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
+ }
+
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Print the saved stack trace now that we've verified there's
+ * something to recover.
+ */
+ if (pe->trace_entries) {
+ void **ptrs = (void **) pe->stack_trace;
+ int i;
+
+ pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+ pe->phb->global_number, pe->addr);
+
+ /* FIXME: Use the same format as dump_stack() */
+ pr_err("EEH: Call Trace:\n");
+ for (i = 0; i < pe->trace_entries; i++)
+ pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
+
+ pe->trace_entries = 0;
+ }
+#endif /* CONFIG_STACKTRACE */
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
@@ -793,6 +938,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
result = PCI_ERS_RESULT_DISCONNECT;
}
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
* to accomplish the reset. Each child gets a report of the
@@ -969,6 +1118,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
}
+
+out:
+ /*
+ * Clean up any PEs without devices. While marked as EEH_PE_RECOVERYING
+ * we don't want to modify the PE tree structure so we do it here.
+ */
+ eeh_pe_cleanup(pe);
+
+ /* clear the slot attention LED for all recovered devices */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ eeh_clear_slot_attention(edev->pdev);
+
eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
}
@@ -981,7 +1143,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
*/
void eeh_handle_special_event(void)
{
- struct eeh_pe *pe, *phb_pe;
+ struct eeh_pe *pe, *phb_pe, *tmp_pe;
+ struct eeh_dev *edev, *tmp_edev;
struct pci_bus *bus;
struct pci_controller *hose;
unsigned long flags;
@@ -1040,6 +1203,7 @@ void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
eeh_handle_normal_event(pe);
} else {
pci_lock_rescan_remove();
@@ -1050,6 +1214,10 @@ void eeh_handle_special_event(void)
(phb_pe->state & EEH_PE_RECOVERING))
continue;
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
/* Notify all devices to be down */
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_set_channel_state(pe, pci_channel_io_perm_failure);
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 64cfbe41174b..a7a8dc182efb 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -40,7 +40,6 @@ static int eeh_event_handler(void * dummy)
{
unsigned long flags;
struct eeh_event *event;
- struct eeh_pe *pe;
while (!kthread_should_stop()) {
if (wait_for_completion_interruptible(&eeh_eventlist_event))
@@ -59,19 +58,10 @@ static int eeh_event_handler(void * dummy)
continue;
/* We might have event without binding PE */
- pe = event->pe;
- if (pe) {
- if (pe->type & EEH_PE_PHB)
- pr_info("EEH: Detected error on PHB#%x\n",
- pe->phb->global_number);
- else
- pr_info("EEH: Detected PCI bus error on "
- "PHB#%x-PE#%x\n",
- pe->phb->global_number, pe->addr);
- eeh_handle_normal_event(pe);
- } else {
+ if (event->pe)
+ eeh_handle_normal_event(event->pe);
+ else
eeh_handle_special_event();
- }
kfree(event);
}
@@ -121,6 +111,24 @@ int __eeh_send_failure_event(struct eeh_pe *pe)
}
event->pe = pe;
+ /*
+ * Mark the PE as recovering before inserting it in the queue.
+ * This prevents the PE from being free()ed by a hotplug driver
+ * while the PE is sitting in the event queue.
+ */
+ if (pe) {
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Save the current stack trace so we can dump it from the
+ * event handler thread.
+ */
+ pe->trace_entries = stack_trace_save(pe->stack_trace,
+ ARRAY_SIZE(pe->stack_trace), 0);
+#endif /* CONFIG_STACKTRACE */
+
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ }
+
/* We may or may not be called in an interrupt context */
spin_lock_irqsave(&eeh_eventlist_lock, flags);
list_add(&event->list, &eeh_eventlist);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 854cef7b18f4..177852e39a25 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -231,29 +231,22 @@ void *eeh_pe_traverse(struct eeh_pe *root,
* The function is used to traverse the devices of the specified
* PE and its child PEs.
*/
-void *eeh_pe_dev_traverse(struct eeh_pe *root,
+void eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_edev_traverse_func fn, void *flag)
{
struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
- void *ret;
if (!root) {
pr_warn("%s: Invalid PE %p\n",
__func__, root);
- return NULL;
+ return;
}
/* Traverse root PE */
- eeh_for_each_pe(root, pe) {
- eeh_pe_for_each_dev(pe, edev, tmp) {
- ret = fn(edev, flag);
- if (ret)
- return ret;
- }
- }
-
- return NULL;
+ eeh_for_each_pe(root, pe)
+ eeh_pe_for_each_dev(pe, edev, tmp)
+ fn(edev, flag);
}
/**
@@ -379,8 +372,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
- pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
- __func__, config_addr, pdn->phb->global_number);
+ eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
return -EINVAL;
}
@@ -391,42 +383,34 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* components.
*/
pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
- if (pe && !(pe->type & EEH_PE_INVALID)) {
- /* Mark the PE as type of PCI bus */
- pe->type = EEH_PE_BUS;
- edev->pe = pe;
-
- /* Put the edev to PE */
- list_add_tail(&edev->entry, &pe->edevs);
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr);
- return 0;
- } else if (pe && (pe->type & EEH_PE_INVALID)) {
- list_add_tail(&edev->entry, &pe->edevs);
- edev->pe = pe;
- /*
- * We're running to here because of PCI hotplug caused by
- * EEH recovery. We need clear EEH_PE_INVALID until the top.
- */
- parent = pe;
- while (parent) {
- if (!(parent->type & EEH_PE_INVALID))
- break;
- parent->type &= ~EEH_PE_INVALID;
- parent = parent->parent;
- }
+ if (pe) {
+ if (pe->type & EEH_PE_INVALID) {
+ list_add_tail(&edev->entry, &pe->edevs);
+ edev->pe = pe;
+ /*
+ * We're running to here because of PCI hotplug caused by
+ * EEH recovery. We need clear EEH_PE_INVALID until the top.
+ */
+ parent = pe;
+ while (parent) {
+ if (!(parent->type & EEH_PE_INVALID))
+ break;
+ parent->type &= ~EEH_PE_INVALID;
+ parent = parent->parent;
+ }
+
+ eeh_edev_dbg(edev,
+ "Added to device PE (parent: PE#%x)\n",
+ pe->parent->addr);
+ } else {
+ /* Mark the PE as type of PCI bus */
+ pe->type = EEH_PE_BUS;
+ edev->pe = pe;
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
- "PE#%x, Parent PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr, pe->parent->addr);
+ /* Put the edev to PE */
+ list_add_tail(&edev->entry, &pe->edevs);
+ eeh_edev_dbg(edev, "Added to bus PE\n");
+ }
return 0;
}
@@ -468,13 +452,8 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
list_add_tail(&pe->child, &parent->child_list);
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
- "Device PE#%x, Parent PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr, pe->parent->addr);
+ eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
+ pe->parent->addr);
return 0;
}
@@ -491,16 +470,12 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
+ bool keep, recover;
int cnt;
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
pe = eeh_dev_to_pe(edev);
if (!pe) {
- pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
- __func__, pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn));
+ eeh_edev_dbg(edev, "No PE found for device.\n");
return -EEXIST;
}
@@ -516,10 +491,21 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
while (1) {
parent = pe->parent;
+
+ /* PHB PEs should never be removed */
if (pe->type & EEH_PE_PHB)
break;
- if (!(pe->state & EEH_PE_KEEP)) {
+ /*
+ * XXX: KEEP is set while resetting a PE. I don't think it's
+ * ever set without RECOVERING also being set. I could
+ * be wrong though so catch that with a WARN.
+ */
+ keep = !!(pe->state & EEH_PE_KEEP);
+ recover = !!(pe->state & EEH_PE_RECOVERING);
+ WARN_ON(keep && !recover);
+
+ if (!keep && !recover) {
if (list_empty(&pe->edevs) &&
list_empty(&pe->child_list)) {
list_del(&pe->child);
@@ -528,6 +514,15 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
break;
}
} else {
+ /*
+ * Mark the PE as invalid. At the end of the recovery
+ * process any invalid PEs will be garbage collected.
+ *
+ * We need to delay the free()ing of them since we can
+ * remove edev's while traversing the PE tree which
+ * might trigger the removal of a PE and we can't
+ * deal with that (yet).
+ */
if (list_empty(&pe->edevs)) {
cnt = 0;
list_for_each_entry(child, &pe->child_list, child) {
@@ -623,13 +618,11 @@ void eeh_pe_mark_isolated(struct eeh_pe *root)
}
EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
-static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
+static void __eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
{
int mode = *((int *)flag);
edev->mode |= mode;
-
- return NULL;
}
/**
@@ -717,17 +710,13 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
return;
- pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
- __func__, pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn));
+ eeh_edev_dbg(edev, "Checking PCIe link...\n");
/* Check slot status */
cap = edev->pcie_cap;
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
if (!(val & PCI_EXP_SLTSTA_PDS)) {
- pr_debug(" No card in the slot (0x%04x) !\n", val);
+ eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
return;
}
@@ -736,7 +725,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (val & PCI_EXP_SLTCAP_PCP) {
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
if (val & PCI_EXP_SLTCTL_PCC) {
- pr_debug(" In power-off state, power it on ...\n");
+ eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
@@ -752,7 +741,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
/* Check link */
eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
- pr_debug(" No link reporting capability (0x%08x) \n", val);
+ eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
msleep(1000);
return;
}
@@ -769,10 +758,10 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
}
if (val & PCI_EXP_LNKSTA_DLLLA)
- pr_debug(" Link up (%s)\n",
+ eeh_edev_dbg(edev, "Link up (%s)\n",
(val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
else
- pr_debug(" Link not ready (0x%04x)\n", val);
+ eeh_edev_dbg(edev, "Link not ready (0x%04x)\n", val);
}
#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
@@ -852,7 +841,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
* the expansion ROM base address, the latency timer, and etc.
* from the saved values in the device node.
*/
-static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
+static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
@@ -864,8 +853,6 @@ static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
if (eeh_ops->restore_config && pdn)
eeh_ops->restore_config(pdn);
-
- return NULL;
}
/**
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 85fdb6d879f1..d60908ea37fb 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -230,7 +230,7 @@ transfer_to_handler_cont:
*/
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
- LOAD_MSR_KERNEL(r0, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r0
SYNC
@@ -304,7 +304,7 @@ stack_ovf:
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
@@ -324,7 +324,7 @@ trace_syscall_entry_irq_off:
bl trace_hardirqs_on
/* Now enable for real */
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
mtmsr r10
REST_GPR(0, r1)
@@ -394,7 +394,7 @@ ret_from_syscall:
#endif
mr r6,r3
/* disable interrupts so current_thread_info()->flags can't change */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
@@ -597,6 +597,14 @@ ppc_clone:
stw r0,_TRAP(r1) /* register set saved */
b sys_clone
+ .globl ppc_clone3
+ppc_clone3:
+ SAVE_NVGPRS(r1)
+ lwz r0,_TRAP(r1)
+ rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
+ stw r0,_TRAP(r1) /* register set saved */
+ b sys_clone3
+
.globl ppc_swapcontext
ppc_swapcontext:
SAVE_NVGPRS(r1)
@@ -769,11 +777,19 @@ fast_exception_return:
1: lis r3,exc_exit_restart_end@ha
addi r3,r3,exc_exit_restart_end@l
cmplw r12,r3
+#if CONFIG_PPC_BOOK3S_601
+ bge 2b
+#else
bge 3f
+#endif
lis r4,exc_exit_restart@ha
addi r4,r4,exc_exit_restart@l
cmplw r12,r4
+#if CONFIG_PPC_BOOK3S_601
+ blt 2b
+#else
blt 3f
+#endif
lis r3,fee_restarts@ha
tophys(r3,r3)
lwz r5,fee_restarts@l(r3)
@@ -792,9 +808,6 @@ fee_restarts:
/* aargh, we don't know which trap this is */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
3:
-BEGIN_FTR_SECTION
- b 2b
-END_FTR_SECTION_IFSET(CPU_FTR_601)
li r10,-1
stw r10,_TRAP(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -816,7 +829,7 @@ ret_from_except:
* can't change between when we test it and when we return
* from the interrupt. */
/* Note: We don't bother telling lockdep about it */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC /* Some chip revs have problems here... */
MTMSRD(r10) /* disable interrupts */
@@ -983,7 +996,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
* can restart the exception exit path at the label
* exc_exit_restart below. -- paulus
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
SYNC
MTMSRD(r10) /* clear the RI bit */
.globl exc_exit_restart
@@ -1058,7 +1071,7 @@ exc_exit_restart_end:
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
- LOAD_MSR_KERNEL(r10,MSR_KERNEL); \
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
bne user_exc_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
@@ -1228,7 +1241,7 @@ recheck:
* neither. Those disable/enable cycles used to peek at
* TI_FLAGS aren't advertised.
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
lwz r9,TI_FLAGS(r2)
@@ -1262,11 +1275,19 @@ nonrecoverable:
lis r10,exc_exit_restart_end@ha
addi r10,r10,exc_exit_restart_end@l
cmplw r12,r10
+#ifdef CONFIG_PPC_BOOK3S_601
+ bgelr
+#else
bge 3f
+#endif
lis r11,exc_exit_restart@ha
addi r11,r11,exc_exit_restart@l
cmplw r12,r11
+#ifdef CONFIG_PPC_BOOK3S_601
+ bltlr
+#else
blt 3f
+#endif
lis r10,ee_restarts@ha
lwz r12,ee_restarts@l(r10)
addi r12,r12,1
@@ -1275,9 +1296,6 @@ nonrecoverable:
blr
3: /* OK, we can't recover, kill this process */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_601)
lwz r3,_TRAP(r1)
andi. r0,r3,1
beq 5f
@@ -1321,7 +1339,7 @@ _GLOBAL(enter_rtas)
lwz r4,RTASBASE(r4)
mfmsr r9
stw r9,8(r1)
- LOAD_MSR_KERNEL(r0,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
SYNC /* disable interrupts so SRR0/1 */
MTMSRD(r0) /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d9105fcf4021..6467bdab8d40 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -69,24 +69,20 @@ BEGIN_FTR_SECTION
bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
- andi. r10,r12,MSR_PR
mr r10,r1
- addi r1,r1,-INT_FRAME_SIZE
- beq- 1f
ld r1,PACAKSAVE(r13)
-1: std r10,0(r1)
+ std r10,0(r1)
std r11,_NIP(r1)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
- beq 2f /* if from kernel mode */
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
-2: std r2,GPR2(r1)
+ std r2,GPR2(r1)
std r3,GPR3(r1)
mfcr r2
std r4,GPR4(r1)
@@ -122,14 +118,13 @@ END_BTB_FLUSH_SECTION
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
- beq 33f
- /* if from user, see if there are any DTL entries to process */
+ /* see if there are any DTL entries to process */
ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
ld r11,PACA_DTL_RIDX(r13) /* get log read index */
addi r10,r10,LPPACA_DTLIDX
LDX_BE r10,0,r10 /* get log write index */
- cmpd cr1,r11,r10
- beq+ cr1,33f
+ cmpd r11,r10
+ beq+ 33f
bl accumulate_stolen_time
REST_GPR(0,r1)
REST_4GPRS(3,r1)
@@ -203,6 +198,7 @@ system_call: /* label this so stack traces look sane */
mtctr r12
bctrl /* Call handler */
+ /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
.Lsyscall_exit:
std r3,RESULT(r1)
@@ -216,11 +212,6 @@ system_call: /* label this so stack traces look sane */
ld r12, PACA_THREAD_INFO(r13)
ld r8,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- andi. r10,r8,MSR_RI
- beq- .Lunrecov_restore
-#endif
/*
* This is a few instructions into the actual syscall exit path (which actually
@@ -487,6 +478,11 @@ _GLOBAL(ppc_clone)
bl sys_clone
b .Lsyscall_exit
+_GLOBAL(ppc_clone3)
+ bl save_nvgprs
+ bl sys_clone3
+ b .Lsyscall_exit
+
_GLOBAL(ppc32_swapcontext)
bl save_nvgprs
bl compat_sys_swapcontext
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 1cfb3da4a84a..829950b96d29 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -750,12 +750,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
-#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
-#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, __end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -820,12 +822,14 @@ kernel_dbg_exc:
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
-#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
-#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15,__end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -1449,7 +1453,7 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
- LOAD_REG_IMMEDIATE(r3,1f)
+ LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
mtctr r3
bctr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 73ba246ca11d..d0018dd17e0a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -21,6 +21,677 @@
#include <asm/feature-fixups.h>
#include <asm/kup.h>
+/* PACA save area offsets (exgen, exmc, etc) */
+#define EX_R9 0
+#define EX_R10 8
+#define EX_R11 16
+#define EX_R12 24
+#define EX_R13 32
+#define EX_DAR 40
+#define EX_DSISR 48
+#define EX_CCR 52
+#define EX_CFAR 56
+#define EX_PPR 64
+#if defined(CONFIG_RELOCATABLE)
+#define EX_CTR 72
+.if EX_SIZE != 10
+ .error "EX_SIZE is wrong"
+.endif
+#else
+.if EX_SIZE != 9
+ .error "EX_SIZE is wrong"
+.endif
+#endif
+
+/*
+ * Following are fixed section helper macros.
+ *
+ * EXC_REAL_BEGIN/END - real, unrelocated exception vectors
+ * EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors
+ * TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these)
+ * TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use)
+ * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated
+ * EXC_COMMON - After switching to virtual, relocated mode.
+ */
+
+#define EXC_REAL_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_REAL_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_VIRT_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_VIRT_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_COMMON_BEGIN(name) \
+ USE_TEXT_SECTION(); \
+ .balign IFETCH_ALIGN_BYTES; \
+ .global name; \
+ _ASM_NOKPROBE_SYMBOL(name); \
+ DEFINE_FIXED_SYMBOL(name); \
+name:
+
+#define TRAMP_REAL_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
+
+#define TRAMP_VIRT_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#define TRAMP_KVM_BEGIN(name) \
+ TRAMP_VIRT_BEGIN(name)
+#else
+#define TRAMP_KVM_BEGIN(name)
+#endif
+
+#define EXC_REAL_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
+
+#define EXC_VIRT_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size)
+
+/*
+ * We're short on space and time in the exception prolog, so we can't
+ * use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
+ * Instead we get the base of the kernel from paca->kernelbase and or in the low
+ * part of label. This requires that the label be within 64KB of kernelbase, and
+ * that kernelbase be 64K aligned.
+ */
+#define LOAD_HANDLER(reg, label) \
+ ld reg,PACAKBASE(r13); /* get high part of &label */ \
+ ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
+
+#define __LOAD_HANDLER(reg, label) \
+ ld reg,PACAKBASE(r13); \
+ ori reg,reg,(ABS_ADDR(label))@l
+
+/*
+ * Branches from unrelocated code (e.g., interrupts) to labels outside
+ * head-y require >64K offsets.
+ */
+#define __LOAD_FAR_HANDLER(reg, label) \
+ ld reg,PACAKBASE(r13); \
+ ori reg,reg,(ABS_ADDR(label))@l; \
+ addis reg,reg,(ABS_ADDR(label))@h
+
+/* Exception register prefixes */
+#define EXC_HV_OR_STD 2 /* depends on HVMODE */
+#define EXC_HV 1
+#define EXC_STD 0
+
+#if defined(CONFIG_RELOCATABLE)
+/*
+ * If we support interrupts with relocation on AND we're a relocatable kernel,
+ * we need to use CTR to get to the 2nd level handler. So, save/restore it
+ * when required.
+ */
+#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13)
+#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13)
+#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg
+#else
+/* ...else CTR is unused and in register. */
+#define SAVE_CTR(reg, area)
+#define GET_CTR(reg, area) mfctr reg
+#define RESTORE_CTR(reg, area)
+#endif
+
+/*
+ * PPR save/restore macros used in exceptions-64s.S
+ * Used for P7 or later processors
+ */
+#define SAVE_PPR(area, ra) \
+BEGIN_FTR_SECTION_NESTED(940) \
+ ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \
+ std ra,_PPR(r1); \
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
+
+#define RESTORE_PPR_PACA(area, ra) \
+BEGIN_FTR_SECTION_NESTED(941) \
+ ld ra,area+EX_PPR(r13); \
+ mtspr SPRN_PPR,ra; \
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
+
+/*
+ * Get an SPR into a register if the CPU has the given feature
+ */
+#define OPT_GET_SPR(ra, spr, ftr) \
+BEGIN_FTR_SECTION_NESTED(943) \
+ mfspr ra,spr; \
+END_FTR_SECTION_NESTED(ftr,ftr,943)
+
+/*
+ * Set an SPR from a register if the CPU has the given feature
+ */
+#define OPT_SET_SPR(ra, spr, ftr) \
+BEGIN_FTR_SECTION_NESTED(943) \
+ mtspr spr,ra; \
+END_FTR_SECTION_NESTED(ftr,ftr,943)
+
+/*
+ * Save a register to the PACA if the CPU has the given feature
+ */
+#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \
+BEGIN_FTR_SECTION_NESTED(943) \
+ std ra,offset(r13); \
+END_FTR_SECTION_NESTED(ftr,ftr,943)
+
+/*
+ * Branch to label using its 0xC000 address. This results in instruction
+ * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
+ * on using mtmsr rather than rfid.
+ *
+ * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than
+ * load KBASE for a slight optimisation.
+ */
+#define BRANCH_TO_C000(reg, label) \
+ __LOAD_FAR_HANDLER(reg, label); \
+ mtctr reg; \
+ bctr
+
+.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
+ TRAMP_KVM_BEGIN(\name\()_kvm)
+ KVM_HANDLER \vec, \hsrr, \area, \skip
+.endm
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * If hv is possible, interrupts come into to the hv version
+ * of the kvmppc_interrupt code, which then jumps to the PR handler,
+ * kvmppc_interrupt_pr, if the guest is a PR guest.
+ */
+#define kvmppc_interrupt kvmppc_interrupt_hv
+#else
+#define kvmppc_interrupt kvmppc_interrupt_pr
+#endif
+
+.macro KVMTEST name, hsrr, n
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,0
+ bne \name\()_kvm
+.endm
+
+.macro KVM_HANDLER vec, hsrr, area, skip
+ .if \skip
+ cmpwi r10,KVM_GUEST_MODE_SKIP
+ beq 89f
+ .else
+BEGIN_FTR_SECTION_NESTED(947)
+ ld r10,\area+EX_CFAR(r13)
+ std r10,HSTATE_CFAR(r13)
+END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
+ .endif
+
+BEGIN_FTR_SECTION_NESTED(948)
+ ld r10,\area+EX_PPR(r13)
+ std r10,HSTATE_PPR(r13)
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
+ ld r10,\area+EX_R10(r13)
+ std r12,HSTATE_SCRATCH0(r13)
+ sldi r12,r9,32
+ /* HSRR variants have the 0x2 bit added to their trap number */
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ ori r12,r12,(\vec + 0x2)
+ FTR_SECTION_ELSE
+ ori r12,r12,(\vec)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ ori r12,r12,(\vec + 0x2)
+ .else
+ ori r12,r12,(\vec)
+ .endif
+
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
+ * outside the head section. CONFIG_RELOCATABLE KVM expects CTR
+ * to be saved in HSTATE_SCRATCH1.
+ */
+ mfctr r9
+ std r9,HSTATE_SCRATCH1(r13)
+ __LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
+ mtctr r9
+ ld r9,\area+EX_R9(r13)
+ bctr
+#else
+ ld r9,\area+EX_R9(r13)
+ b kvmppc_interrupt
+#endif
+
+
+ .if \skip
+89: mtocrf 0x80,r9
+ ld r9,\area+EX_R9(r13)
+ ld r10,\area+EX_R10(r13)
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ b kvmppc_skip_Hinterrupt
+ FTR_SECTION_ELSE
+ b kvmppc_skip_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ b kvmppc_skip_Hinterrupt
+ .else
+ b kvmppc_skip_interrupt
+ .endif
+ .endif
+.endm
+
+#else
+.macro KVMTEST name, hsrr, n
+.endm
+.macro KVM_HANDLER name, vec, hsrr, area, skip
+.endm
+#endif
+
+.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ .if ! \set_ri
+ xori r10,r10,MSR_RI /* Clear MSR_RI */
+ .endif
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ mtspr SPRN_HSRR1,r10
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ mtspr SPRN_SRR1,r10
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ mtspr SPRN_HSRR1,r10
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ mtspr SPRN_SRR1,r10
+ .endif
+ LOAD_HANDLER(r10, \label\())
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mtspr SPRN_HSRR0,r10
+ HRFI_TO_KERNEL
+ FTR_SECTION_ELSE
+ mtspr SPRN_SRR0,r10
+ RFI_TO_KERNEL
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mtspr SPRN_HSRR0,r10
+ HRFI_TO_KERNEL
+ .else
+ mtspr SPRN_SRR0,r10
+ RFI_TO_KERNEL
+ .endif
+ b . /* prevent speculative execution */
+.endm
+
+/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */
+.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr
+#ifdef CONFIG_RELOCATABLE
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ .endif
+ LOAD_HANDLER(r12, \label\())
+ mtctr r12
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r12,SPRN_SRR1 /* and HSRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .else
+ mfspr r12,SPRN_SRR1 /* and HSRR1 */
+ .endif
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+ bctr
+#else
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ .endif
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+ b \label
+#endif
+.endm
+
+/*
+ * This is the BOOK3S interrupt entry code macro.
+ *
+ * This can result in one of several things happening:
+ * - Branch to the _common handler, relocated, in virtual mode.
+ * These are normal interrupts (synchronous and asynchronous) handled by
+ * the kernel.
+ * - Branch to KVM, relocated but real mode interrupts remain in real mode.
+ * These occur when HSTATE_IN_GUEST is set. The interrupt may be caused by
+ * / intended for host or guest kernel, but KVM must always be involved
+ * because the machine state is set for guest execution.
+ * - Branch to the masked handler, unrelocated.
+ * These occur when maskable asynchronous interrupts are taken with the
+ * irq_soft_mask set.
+ * - Branch to an "early" handler in real mode but relocated.
+ * This is done if early=1. MCE and HMI use these to handle errors in real
+ * mode.
+ * - Fall through and continue executing in real, unrelocated mode.
+ * This is done if early=2.
+ */
+.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0
+ SET_SCRATCH0(r13) /* save r13 */
+ GET_PACA(r13)
+ std r9,\area\()+EX_R9(r13) /* save r9 */
+ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+ HMT_MEDIUM
+ std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
+ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+ .if \ool
+ .if !\virt
+ b tramp_real_\name
+ .pushsection .text
+ TRAMP_REAL_BEGIN(tramp_real_\name)
+ .else
+ b tramp_virt_\name
+ .pushsection .text
+ TRAMP_VIRT_BEGIN(tramp_virt_\name)
+ .endif
+ .endif
+
+ OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
+ OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
+ INTERRUPT_TO_KERNEL
+ SAVE_CTR(r10, \area\())
+ mfcr r9
+ .if \kvm
+ KVMTEST \name \hsrr \vec
+ .endif
+ .if \bitmask
+ lbz r10,PACAIRQSOFTMASK(r13)
+ andi. r10,r10,\bitmask
+ /* Associate vector numbers with bits in paca->irq_happened */
+ .if \vec == 0x500 || \vec == 0xea0
+ li r10,PACA_IRQ_EE
+ .elseif \vec == 0x900
+ li r10,PACA_IRQ_DEC
+ .elseif \vec == 0xa00 || \vec == 0xe80
+ li r10,PACA_IRQ_DBELL
+ .elseif \vec == 0xe60
+ li r10,PACA_IRQ_HMI
+ .elseif \vec == 0xf00
+ li r10,PACA_IRQ_PMI
+ .else
+ .abort "Bad maskable vector"
+ .endif
+
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ bne masked_Hinterrupt
+ FTR_SECTION_ELSE
+ bne masked_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ bne masked_Hinterrupt
+ .else
+ bne masked_interrupt
+ .endif
+ .endif
+
+ std r11,\area\()+EX_R11(r13)
+ std r12,\area\()+EX_R12(r13)
+
+ /*
+ * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
+ * because a d-side MCE will clobber those registers so is
+ * not recoverable if they are live.
+ */
+ GET_SCRATCH0(r10)
+ std r10,\area\()+EX_R13(r13)
+ .if \dar
+ .if \hsrr
+ mfspr r10,SPRN_HDAR
+ .else
+ mfspr r10,SPRN_DAR
+ .endif
+ std r10,\area\()+EX_DAR(r13)
+ .endif
+ .if \dsisr
+ .if \hsrr
+ mfspr r10,SPRN_HDSISR
+ .else
+ mfspr r10,SPRN_DSISR
+ .endif
+ stw r10,\area\()+EX_DSISR(r13)
+ .endif
+
+ .if \early == 2
+ /* nothing more */
+ .elseif \early
+ mfctr r10 /* save ctr, even for !RELOCATABLE */
+ BRANCH_TO_C000(r11, \name\()_early_common)
+ .elseif !\virt
+ INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
+ .else
+ INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr
+ .endif
+ .if \ool
+ .popsection
+ .endif
+.endm
+
+/*
+ * On entry r13 points to the paca, r9-r13 are saved in the paca,
+ * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
+ * SRR1, and relocation is on.
+ *
+ * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
+ * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ */
+.macro INT_COMMON vec, area, stack, kaup, reconcile, dar, dsisr
+ .if \stack
+ andi. r10,r12,MSR_PR /* See if coming from user */
+ mr r10,r1 /* Save r1 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
+ beq- 100f
+ ld r1,PACAKSAVE(r13) /* kernel stack to use */
+100: tdgei r1,-INT_FRAME_SIZE /* trap if r1 is in userspace */
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
+ .endif
+
+ std r9,_CCR(r1) /* save CR in stackframe */
+ std r11,_NIP(r1) /* save SRR0 in stackframe */
+ std r12,_MSR(r1) /* save SRR1 in stackframe */
+ std r10,0(r1) /* make stack chain pointer */
+ std r0,GPR0(r1) /* save r0 in stackframe */
+ std r10,GPR1(r1) /* save r1 in stackframe */
+
+ .if \stack
+ .if \kaup
+ kuap_save_amr_and_lock r9, r10, cr1, cr0
+ .endif
+ beq 101f /* if from kernel mode */
+ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
+ SAVE_PPR(\area, r9)
+101:
+ .else
+ .if \kaup
+ kuap_save_amr_and_lock r9, r10, cr1
+ .endif
+ .endif
+
+ /* Save original regs values from save area to stack frame. */
+ ld r9,\area+EX_R9(r13) /* move r9, r10 to stackframe */
+ ld r10,\area+EX_R10(r13)
+ std r9,GPR9(r1)
+ std r10,GPR10(r1)
+ ld r9,\area+EX_R11(r13) /* move r11 - r13 to stackframe */
+ ld r10,\area+EX_R12(r13)
+ ld r11,\area+EX_R13(r13)
+ std r9,GPR11(r1)
+ std r10,GPR12(r1)
+ std r11,GPR13(r1)
+ .if \dar
+ .if \dar == 2
+ ld r10,_NIP(r1)
+ .else
+ ld r10,\area+EX_DAR(r13)
+ .endif
+ std r10,_DAR(r1)
+ .endif
+ .if \dsisr
+ .if \dsisr == 2
+ ld r10,_MSR(r1)
+ lis r11,DSISR_SRR1_MATCH_64S@h
+ and r10,r10,r11
+ .else
+ lwz r10,\area+EX_DSISR(r13)
+ .endif
+ std r10,_DSISR(r1)
+ .endif
+BEGIN_FTR_SECTION_NESTED(66)
+ ld r10,\area+EX_CFAR(r13)
+ std r10,ORIG_GPR3(r1)
+END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
+ GET_CTR(r10, \area)
+ std r10,_CTR(r1)
+ std r2,GPR2(r1) /* save r2 in stackframe */
+ SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
+ SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */
+ mflr r9 /* Get LR, later save to stack */
+ ld r2,PACATOC(r13) /* get kernel TOC into r2 */
+ std r9,_LINK(r1)
+ lbz r10,PACAIRQSOFTMASK(r13)
+ mfspr r11,SPRN_XER /* save XER in stackframe */
+ std r10,SOFTE(r1)
+ std r11,_XER(r1)
+ li r9,(\vec)+1
+ std r9,_TRAP(r1) /* set trap number */
+ li r10,0
+ ld r11,exception_marker@toc(r2)
+ std r10,RESULT(r1) /* clear regs->result */
+ std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
+
+ .if \stack
+ ACCOUNT_STOLEN_TIME
+ .endif
+
+ .if \reconcile
+ RECONCILE_IRQ_STATE(r10, r11)
+ .endif
+.endm
+
+/*
+ * Restore all registers including H/SRR0/1 saved in a stack frame of a
+ * standard exception.
+ */
+.macro EXCEPTION_RESTORE_REGS hsrr
+ /* Move original SRR0 and SRR1 into the respective regs */
+ ld r9,_MSR(r1)
+ .if \hsrr == EXC_HV_OR_STD
+ .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS"
+ .endif
+ .if \hsrr
+ mtspr SPRN_HSRR1,r9
+ .else
+ mtspr SPRN_SRR1,r9
+ .endif
+ ld r9,_NIP(r1)
+ .if \hsrr
+ mtspr SPRN_HSRR0,r9
+ .else
+ mtspr SPRN_SRR0,r9
+ .endif
+ ld r9,_CTR(r1)
+ mtctr r9
+ ld r9,_XER(r1)
+ mtxer r9
+ ld r9,_LINK(r1)
+ mtlr r9
+ ld r9,_CCR(r1)
+ mtcr r9
+ REST_8GPRS(2, r1)
+ REST_4GPRS(10, r1)
+ REST_GPR(0, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+.endm
+
+#define RUNLATCH_ON \
+BEGIN_FTR_SECTION \
+ ld r3, PACA_THREAD_INFO(r13); \
+ ld r4,TI_LOCAL_FLAGS(r3); \
+ andi. r0,r4,_TLF_RUNLATCH; \
+ beql ppc64_runlatch_on_trampoline; \
+END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
+
+/*
+ * When the idle code in power4_idle puts the CPU into NAP mode,
+ * it has to do so in a loop, and relies on the external interrupt
+ * and decrementer interrupt entry code to get it out of the loop.
+ * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
+ * to signal that it is in the loop and needs help to get out.
+ */
+#ifdef CONFIG_PPC_970_NAP
+#define FINISH_NAP \
+BEGIN_FTR_SECTION \
+ ld r11, PACA_THREAD_INFO(r13); \
+ ld r9,TI_LOCAL_FLAGS(r11); \
+ andi. r10,r9,_TLF_NAPPING; \
+ bnel power4_fixup_nap; \
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+#else
+#define FINISH_NAP
+#endif
+
+#define EXC_COMMON(name, realvec, hdlr) \
+ EXC_COMMON_BEGIN(name); \
+ INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
+ bl save_nvgprs; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ bl hdlr; \
+ b ret_from_except
+
+/*
+ * Like EXC_COMMON, but for exceptions that can occur in the idle task and
+ * therefore need the special idle handling (finish nap and runlatch)
+ */
+#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
+ EXC_COMMON_BEGIN(name); \
+ INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
+ FINISH_NAP; \
+ RUNLATCH_ON; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ bl hdlr; \
+ b ret_from_except_lite
+
+
/*
* There are a few constraints to be concerned with.
* - Real mode exceptions code/data must be located at their physical location.
@@ -107,6 +778,7 @@ __start_interrupts:
EXC_VIRT_NONE(0x4000, 0x100)
+EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
#ifdef CONFIG_PPC_P7_NAP
/*
* If running native on arch 2.06 or later, check if we are waking up
@@ -114,60 +786,67 @@ EXC_VIRT_NONE(0x4000, 0x100)
* bits 46:47. A non-0 value indicates that we are coming from a power
* saving state. The idle wakeup handler initially runs in real mode,
* but we branch to the 0xc000... address so we can turn on relocation
- * with mtmsr.
+ * with mtmsrd later, after SPRs are restored.
+ *
+ * Careful to minimise cost for the fast path (idle wakeup) while
+ * also avoiding clobbering CFAR for the debug path (non-idle).
+ *
+ * For the idle wake case volatile registers can be clobbered, which
+ * is why we use those initially. If it turns out to not be an idle
+ * wake, carefully put everything back the way it was, so we can use
+ * common exception macros to handle it.
*/
-#define IDLETEST(n) \
- BEGIN_FTR_SECTION ; \
- mfspr r10,SPRN_SRR1 ; \
- rlwinm. r10,r10,47-31,30,31 ; \
- beq- 1f ; \
- cmpwi cr1,r10,2 ; \
- mfspr r3,SPRN_SRR1 ; \
- bltlr cr1 ; /* no state loss, return to idle caller */ \
- BRANCH_TO_C000(r10, system_reset_idle_common) ; \
-1: \
- KVMTEST_PR(n) ; \
- END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#else
-#define IDLETEST NOTEST
+BEGIN_FTR_SECTION
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
+ std r3,PACA_EXNMI+0*8(r13)
+ std r4,PACA_EXNMI+1*8(r13)
+ std r5,PACA_EXNMI+2*8(r13)
+ mfspr r3,SPRN_SRR1
+ mfocrf r4,0x80
+ rlwinm. r5,r3,47-31,30,31
+ bne+ system_reset_idle_wake
+ /* Not powersave wakeup. Restore regs for regular interrupt handler. */
+ mtocrf 0x80,r4
+ ld r3,PACA_EXNMI+0*8(r13)
+ ld r4,PACA_EXNMI+1*8(r13)
+ ld r5,PACA_EXNMI+2*8(r13)
+ GET_SCRATCH0(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
-EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
- SET_SCRATCH0(r13)
+ INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1
/*
* MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
* being used, so a nested NMI exception would corrupt it.
+ *
+ * In theory, we should not enable relocation here if it was disabled
+ * in SRR1, because the MMU may not be configured to support it (e.g.,
+ * SLB may have been cleared). In practice, there should only be a few
+ * small windows where that's the case, and sreset is considered to
+ * be dangerous anyway.
*/
- EXCEPTION_PROLOG_NORI(PACA_EXNMI, system_reset_common, EXC_STD,
- IDLETEST, 0x100)
-
EXC_REAL_END(system_reset, 0x100, 0x100)
EXC_VIRT_NONE(0x4100, 0x100)
-TRAMP_KVM(PACA_EXNMI, 0x100)
+INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0
#ifdef CONFIG_PPC_P7_NAP
-EXC_COMMON_BEGIN(system_reset_idle_common)
- /*
- * This must be a direct branch (without linker branch stub) because
- * we can not use TOC at this point as r2 may not be restored yet.
- */
- b idle_return_gpr_loss
+TRAMP_REAL_BEGIN(system_reset_idle_wake)
+ /* We are waking up from idle, so may clobber any volatile register */
+ cmpwi cr1,r5,2
+ bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
+ BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss))
#endif
+#ifdef CONFIG_PPC_PSERIES
/*
- * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
- * the right thing. We do not want to reconcile because that goes
- * through irq tracing which we don't want in NMI.
- *
- * Save PACAIRQHAPPENED because some code will do a hard disable
- * (e.g., xmon). So we want to restore this back to where it was
- * when we return. DAR is unused in the stack, so save it there.
+ * Vectors for the FWNMI option. Share common code.
*/
-#define ADD_RECONCILE_NMI \
- li r10,IRQS_ALL_DISABLED; \
- stb r10,PACAIRQSOFTMASK(r13); \
- lbz r10,PACAIRQHAPPENED(r13); \
- std r10,_DAR(r1)
+TRAMP_REAL_BEGIN(system_reset_fwnmi)
+ /* See comment at system_reset exception, don't turn on RI */
+ INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0
+
+#endif /* CONFIG_PPC_PSERIES */
EXC_COMMON_BEGIN(system_reset_common)
/*
@@ -185,15 +864,27 @@ EXC_COMMON_BEGIN(system_reset_common)
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
- EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100,
- system_reset, system_reset_exception,
- ADD_NVGPRS;ADD_RECONCILE_NMI)
+ INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0
+ bl save_nvgprs
+ /*
+ * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
+ * the right thing. We do not want to reconcile because that goes
+ * through irq tracing which we don't want in NMI.
+ *
+ * Save PACAIRQHAPPENED because some code will do a hard disable
+ * (e.g., xmon). So we want to restore this back to where it was
+ * when we return. DAR is unused in the stack, so save it there.
+ */
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+ lbz r10,PACAIRQHAPPENED(r13)
+ std r10,_DAR(r1)
+
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl system_reset_exception
- /* This (and MCE) can be simplified with mtmsrd L=1 */
/* Clear MSR_RI before setting SRR0 and SRR1. */
- li r0,MSR_RI
- mfmsr r9
- andc r9,r9,r0
+ li r9,0
mtmsrd r9,1
/*
@@ -211,67 +902,44 @@ EXC_COMMON_BEGIN(system_reset_common)
ld r10,SOFTE(r1)
stb r10,PACAIRQSOFTMASK(r13)
- /*
- * Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP.
- * Should share common bits...
- */
-
- /* Move original SRR0 and SRR1 into the respective regs */
- ld r9,_MSR(r1)
- mtspr SPRN_SRR1,r9
- ld r3,_NIP(r1)
- mtspr SPRN_SRR0,r3
- ld r9,_CTR(r1)
- mtctr r9
- ld r9,_XER(r1)
- mtxer r9
- ld r9,_LINK(r1)
- mtlr r9
- REST_GPR(0, r1)
- REST_8GPRS(2, r1)
- REST_GPR(10, r1)
- ld r11,_CCR(r1)
- mtcr r11
- REST_GPR(11, r1)
- REST_2GPRS(12, r1)
- /* restore original r1. */
- ld r1,GPR1(r1)
+ EXCEPTION_RESTORE_REGS EXC_STD
RFI_TO_USER_OR_KERNEL
-#ifdef CONFIG_PPC_PSERIES
-/*
- * Vectors for the FWNMI option. Share common code.
- */
-TRAMP_REAL_BEGIN(system_reset_fwnmi)
- SET_SCRATCH0(r13) /* save r13 */
- /* See comment at system_reset exception */
- EXCEPTION_PROLOG_NORI(PACA_EXNMI, system_reset_common, EXC_STD,
- NOTEST, 0x100)
-#endif /* CONFIG_PPC_PSERIES */
-
EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
- /* This is moved out of line as it can be patched by FW, but
- * some code path might still want to branch into the original
- * vector
+ INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+ /*
+ * MSR_RI is not enabled, because PACA_EXMC is being used, so a
+ * nested machine check corrupts it. machine_check_common enables
+ * MSR_RI.
*/
- SET_SCRATCH0(r13) /* save r13 */
- EXCEPTION_PROLOG_0(PACA_EXMC)
-BEGIN_FTR_SECTION
- b machine_check_common_early
-FTR_SECTION_ELSE
- b machine_check_pSeries_0
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_REAL_END(machine_check, 0x200, 0x100)
EXC_VIRT_NONE(0x4200, 0x100)
-TRAMP_REAL_BEGIN(machine_check_common_early)
- EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
+
+#ifdef CONFIG_PPC_PSERIES
+TRAMP_REAL_BEGIN(machine_check_fwnmi)
+ /* See comment at machine_check exception, don't turn on RI */
+ INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+#endif
+
+INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
+
+#define MACHINE_CHECK_HANDLER_WINDUP \
+ /* Clear MSR_RI before setting SRR0 and SRR1. */\
+ li r9,0; \
+ mtmsrd r9,1; /* Clear MSR_RI */ \
+ /* Decrement paca->in_mce now RI is clear. */ \
+ lhz r12,PACA_IN_MCE(r13); \
+ subi r12,r12,1; \
+ sth r12,PACA_IN_MCE(r13); \
+ EXCEPTION_RESTORE_REGS EXC_STD
+
+EXC_COMMON_BEGIN(machine_check_early_common)
+ mtctr r10 /* Restore ctr */
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+
/*
- * Register contents:
- * R13 = PACA
- * R9 = CR
- * Original R9 to R13 is saved on PACA_EXMC
- *
* Switch to mc_emergency stack and handle re-entrancy (we limit
* the nested MCE upto level 4 to avoid stack overflow).
* Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
@@ -292,129 +960,127 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
* the machine check is handled then the idle wakeup code is called
* to restore state.
*/
- mr r11,r1 /* Save r1 */
lhz r10,PACA_IN_MCE(r13)
cmpwi r10,0 /* Are we in nested machine check */
- bne 0f /* Yes, we are. */
- /* First machine check entry */
- ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
-0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ cmpwi cr1,r10,MAX_MCE_DEPTH /* Are we at maximum nesting */
addi r10,r10,1 /* increment paca->in_mce */
sth r10,PACA_IN_MCE(r13)
- /* Limit nested MCE to level 4 to avoid stack overflow */
- cmpwi r10,MAX_MCE_DEPTH
- bgt 2f /* Check if we hit limit of 4 */
- std r11,GPR1(r1) /* Save r1 on the stack. */
- std r11,0(r1) /* make stack chain pointer */
- mfspr r11,SPRN_SRR0 /* Save SRR0 */
- std r11,_NIP(r1)
- mfspr r11,SPRN_SRR1 /* Save SRR1 */
- std r11,_MSR(r1)
- mfspr r11,SPRN_DAR /* Save DAR */
- std r11,_DAR(r1)
- mfspr r11,SPRN_DSISR /* Save DSISR */
- std r11,_DSISR(r1)
- std r9,_CCR(r1) /* Save CR in stackframe */
+
+ mr r10,r1 /* Save r1 */
+ bne 1f
+ /* First machine check entry */
+ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
+1: /* Limit nested MCE to level 4 to avoid stack overflow */
+ bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+
/* We don't touch AMR here, we never go to virtual mode */
- /* Save r9 through r13 from EXMC save area to stack frame. */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
- mfmsr r11 /* get MSR value */
+ INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1
+
BEGIN_FTR_SECTION
- ori r11,r11,MSR_ME /* turn on ME bit */
+ bl enable_machine_check
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- ori r11,r11,MSR_RI /* turn on RI bit */
- LOAD_HANDLER(r12, machine_check_handle_early)
-1: mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r11
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-2:
- /* Stack overflow. Stay on emergency stack and panic.
- * Keep the ME bit off while panic-ing, so that if we hit
- * another machine check we checkstop.
- */
- addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */
- ld r11,PACAKMSR(r13)
- LOAD_HANDLER(r12, unrecover_mce)
- li r10,MSR_ME
- andc r11,r11,r10 /* Turn off MSR_ME */
- b 1b
- b . /* prevent speculative execution */
+ li r10,MSR_RI
+ mtmsrd r10,1
-TRAMP_REAL_BEGIN(machine_check_pSeries)
- .globl machine_check_fwnmi
-machine_check_fwnmi:
- SET_SCRATCH0(r13) /* save r13 */
- EXCEPTION_PROLOG_0(PACA_EXMC)
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_early
+ std r3,RESULT(r1) /* Save result */
+ ld r12,_MSR(r1)
+
+#ifdef CONFIG_PPC_P7_NAP
+ /*
+ * Check if thread was in power saving mode. We come here when any
+ * of the following is true:
+ * a. thread wasn't in power saving mode
+ * b. thread was in power saving mode with no state loss,
+ * supervisor state loss or hypervisor state loss.
+ *
+ * Go back to nap/sleep/winkle mode again if (b) is true.
+ */
BEGIN_FTR_SECTION
- b machine_check_common_early
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
-machine_check_pSeries_0:
- EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
+ rlwinm. r11,r12,47-31,30,31
+ bne machine_check_idle_common
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
- * MSR_RI is not enabled, because PACA_EXMC is being used, so a
- * nested machine check corrupts it. machine_check_common enables
- * MSR_RI.
+ * Check if we are coming from guest. If yes, then run the normal
+ * exception handler which will take the
+ * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+ * to guest.
+ */
+ lbz r11,HSTATE_IN_GUEST(r13)
+ cmpwi r11,0 /* Check if coming from guest */
+ bne mce_deliver /* continue if we are. */
+#endif
+
+ /*
+ * Check if we are coming from userspace. If yes, then run the normal
+ * exception handler which will deliver the MC event to this kernel.
+ */
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne mce_deliver /* continue in V mode if we are. */
+
+ /*
+ * At this point we are coming from kernel context.
+ * Queue up the MCE event and return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
*/
- EXCEPTION_PROLOG_2_NORI(machine_check_common, EXC_STD)
+ andi. r11,r12,MSR_RI
+ beq unrecoverable_mce
-TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
+ /*
+ * Check if we have successfully handled/recovered from error, if not
+ * then stay on emergency stack and panic.
+ */
+ ld r3,RESULT(r1) /* Load result */
+ cmpdi r3,0 /* see if we handled MCE successfully */
+ beq unrecoverable_mce /* if !handled then panic */
+
+ /*
+ * Return from MC interrupt.
+ * Queue up the MCE event so that we can log it later, while
+ * returning from kernel or opal call.
+ */
+ bl machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ RFI_TO_KERNEL
+
+mce_deliver:
+ /*
+ * This is a host user or guest MCE. Restore all registers, then
+ * run the "late" handler. For host user, this will run the
+ * machine_check_exception handler in virtual mode like a normal
+ * interrupt handler. For guest, this will trigger the KVM test
+ * and branch to the KVM interrupt similarly to other interrupts.
+ */
+BEGIN_FTR_SECTION
+ ld r10,ORIG_GPR3(r1)
+ mtspr SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ MACHINE_CHECK_HANDLER_WINDUP
+ /* See comment at machine_check exception, don't turn on RI */
+ INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1
EXC_COMMON_BEGIN(machine_check_common)
/*
* Machine check is different because we use a different
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
- mfspr r10,SPRN_DAR
- std r10,PACA_EXMC+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXMC+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
+ INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1
FINISH_NAP
- RECONCILE_IRQ_STATE(r10, r11)
- ld r3,PACA_EXMC+EX_DAR(r13)
- lwz r4,PACA_EXMC+EX_DSISR(r13)
/* Enable MSR_RI when finished with PACA_EXMC */
li r10,MSR_RI
mtmsrd r10,1
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
b ret_from_except
-#define MACHINE_CHECK_HANDLER_WINDUP \
- /* Clear MSR_RI before setting SRR0 and SRR1. */\
- li r0,MSR_RI; \
- mfmsr r9; /* get MSR value */ \
- andc r9,r9,r0; \
- mtmsrd r9,1; /* Clear MSR_RI */ \
- /* Move original SRR0 and SRR1 into the respective regs */ \
- ld r9,_MSR(r1); \
- mtspr SPRN_SRR1,r9; \
- ld r3,_NIP(r1); \
- mtspr SPRN_SRR0,r3; \
- ld r9,_CTR(r1); \
- mtctr r9; \
- ld r9,_XER(r1); \
- mtxer r9; \
- ld r9,_LINK(r1); \
- mtlr r9; \
- REST_GPR(0, r1); \
- REST_8GPRS(2, r1); \
- REST_GPR(10, r1); \
- ld r11,_CCR(r1); \
- mtcr r11; \
- /* Decrement paca->in_mce. */ \
- lhz r12,PACA_IN_MCE(r13); \
- subi r12,r12,1; \
- sth r12,PACA_IN_MCE(r13); \
- REST_GPR(11, r1); \
- REST_2GPRS(12, r1); \
- /* restore original r1. */ \
- ld r1,GPR1(r1)
-
#ifdef CONFIG_PPC_P7_NAP
/*
* This is an idle wakeup. Low level machine check has already been
@@ -446,72 +1112,8 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
bltlr cr1 /* no state loss, return to idle caller */
b idle_return_gpr_loss
#endif
- /*
- * Handle machine check early in real mode. We come here with
- * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
- */
-EXC_COMMON_BEGIN(machine_check_handle_early)
- std r0,GPR0(r1) /* Save r0 */
- EXCEPTION_PROLOG_COMMON_3(0x200)
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_early
- std r3,RESULT(r1) /* Save result */
- ld r12,_MSR(r1)
-BEGIN_FTR_SECTION
- b 4f
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
-#ifdef CONFIG_PPC_P7_NAP
- /*
- * Check if thread was in power saving mode. We come here when any
- * of the following is true:
- * a. thread wasn't in power saving mode
- * b. thread was in power saving mode with no state loss,
- * supervisor state loss or hypervisor state loss.
- *
- * Go back to nap/sleep/winkle mode again if (b) is true.
- */
- BEGIN_FTR_SECTION
- rlwinm. r11,r12,47-31,30,31
- bne machine_check_idle_common
- END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif
-
- /*
- * Check if we are coming from hypervisor userspace. If yes then we
- * continue in host kernel in V mode to deliver the MC event.
- */
- rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
- beq 5f
-4: andi. r11,r12,MSR_PR /* See if coming from user. */
- bne 9f /* continue in V mode if we are. */
-
-5:
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-BEGIN_FTR_SECTION
- /*
- * We are coming from kernel context. Check if we are coming from
- * guest. if yes, then we can continue. We will fall through
- * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
- */
- lbz r11,HSTATE_IN_GUEST(r13)
- cmpwi r11,0 /* Check if coming from guest */
- bne 9f /* continue if we are. */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-#endif
- /*
- * At this point we are not sure about what context we come from.
- * Queue up the MCE event and return from the interrupt.
- * But before that, check if this is an un-recoverable exception.
- * If yes, then stay on emergency stack and panic.
- */
- andi. r11,r12,MSR_RI
- bne 2f
-1: mfspr r11,SPRN_SRR0
- LOAD_HANDLER(r10,unrecover_mce)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
+EXC_COMMON_BEGIN(unrecoverable_mce)
/*
* We are going down. But there are chances that we might get hit by
* another MCE during panic path and we may run into unstable state
@@ -519,100 +1121,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
* when another MCE is hit during panic path, system will checkstop
* and hypervisor will get restarted cleanly by SP.
*/
- li r3,MSR_ME
- andc r10,r10,r3 /* Turn off MSR_ME */
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-2:
- /*
- * Check if we have successfully handled/recovered from error, if not
- * then stay on emergency stack and panic.
- */
- ld r3,RESULT(r1) /* Load result */
- cmpdi r3,0 /* see if we handled MCE successfully */
-
- beq 1b /* if !handled then panic */
BEGIN_FTR_SECTION
- /*
- * Return from MC interrupt.
- * Queue up the MCE event so that we can log it later, while
- * returning from kernel or opal call.
- */
- bl machine_check_queue_event
- MACHINE_CHECK_HANDLER_WINDUP
- RFI_TO_USER_OR_KERNEL
-FTR_SECTION_ELSE
- /*
- * pSeries: Return from MC interrupt. Before that stay on emergency
- * stack and call machine_check_exception to log the MCE event.
- */
- LOAD_HANDLER(r10,mce_return)
- mtspr SPRN_SRR0,r10
+ li r10,0 /* clear MSR_RI */
+ mtmsrd r10,1
+ bl disable_machine_check
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-9:
- /* Deliver the machine check to host kernel in V mode. */
- MACHINE_CHECK_HANDLER_WINDUP
- SET_SCRATCH0(r13) /* save r13 */
- EXCEPTION_PROLOG_0(PACA_EXMC)
- b machine_check_pSeries_0
+ li r3,MSR_ME
+ andc r10,r10,r3
+ mtmsrd r10
-EXC_COMMON_BEGIN(unrecover_mce)
/* Invoke machine_check_exception to print MCE event and panic. */
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
+
/*
- * We will not reach here. Even if we did, there is no way out. Call
- * unrecoverable_exception and die.
+ * We will not reach here. Even if we did, there is no way out.
+ * Call unrecoverable_exception and die.
*/
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
-
-EXC_COMMON_BEGIN(mce_return)
- /* Invoke machine_check_exception to print MCE event and return. */
addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_exception
- MACHINE_CHECK_HANDLER_WINDUP
- RFI_TO_KERNEL
+ bl unrecoverable_exception
b .
+
EXC_REAL_BEGIN(data_access, 0x300, 0x80)
-SET_SCRATCH0(r13) /* save r13 */
-EXCEPTION_PROLOG_0(PACA_EXGEN)
- b tramp_real_data_access
+ INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
EXC_REAL_END(data_access, 0x300, 0x80)
-
-TRAMP_REAL_BEGIN(tramp_real_data_access)
-EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, 0x300)
- /*
- * DAR/DSISR must be read before setting MSR[RI], because
- * a d-side MCE will clobber those registers so is not
- * recoverable if they are live.
- */
- mfspr r10,SPRN_DAR
- mfspr r11,SPRN_DSISR
- std r10,PACA_EXGEN+EX_DAR(r13)
- stw r11,PACA_EXGEN+EX_DSISR(r13)
-EXCEPTION_PROLOG_2(data_access_common, EXC_STD)
-
EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
-SET_SCRATCH0(r13) /* save r13 */
-EXCEPTION_PROLOG_0(PACA_EXGEN)
-EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x300)
- mfspr r10,SPRN_DAR
- mfspr r11,SPRN_DSISR
- std r10,PACA_EXGEN+EX_DAR(r13)
- stw r11,PACA_EXGEN+EX_DSISR(r13)
-EXCEPTION_PROLOG_2_RELON(data_access_common, EXC_STD)
+ INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
EXC_VIRT_END(data_access, 0x4300, 0x80)
-
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x300)
-
+INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON_BEGIN(data_access_common)
/*
* Here r13 points to the paca, r9 contains the saved CR,
@@ -620,15 +1158,12 @@ EXC_COMMON_BEGIN(data_access_common)
* r9 - r13 are saved in paca->exgen.
* EX_DAR and EX_DSISR have saved DAR/DSISR
*/
- EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- li r5,0x300
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
+ ld r6,_MSR(r1)
+ li r3,0x300
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
@@ -636,32 +1171,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
-SET_SCRATCH0(r13) /* save r13 */
-EXCEPTION_PROLOG_0(PACA_EXSLB)
- b tramp_real_data_access_slb
+ INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1
EXC_REAL_END(data_access_slb, 0x380, 0x80)
-
-TRAMP_REAL_BEGIN(tramp_real_data_access_slb)
-EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
- mfspr r10,SPRN_DAR
- std r10,PACA_EXSLB+EX_DAR(r13)
-EXCEPTION_PROLOG_2(data_access_slb_common, EXC_STD)
-
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
-SET_SCRATCH0(r13) /* save r13 */
-EXCEPTION_PROLOG_0(PACA_EXSLB)
-EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
- mfspr r10,SPRN_DAR
- std r10,PACA_EXSLB+EX_DAR(r13)
-EXCEPTION_PROLOG_2_RELON(data_access_slb_common, EXC_STD)
+ INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
-
-TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
-
+INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1
EXC_COMMON_BEGIN(data_access_slb_common)
- EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
- ld r4,PACA_EXSLB+EX_DAR(r13)
- std r4,_DAR(r1)
+ INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0
+ ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
@@ -684,20 +1202,20 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
b ret_from_except
-EXC_REAL(instruction_access, 0x400, 0x80)
-EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
-TRAMP_KVM(PACA_EXGEN, 0x400)
-
+EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
+ INT_HANDLER instruction_access, 0x400, kvm=1
+EXC_REAL_END(instruction_access, 0x400, 0x80)
+EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
+ INT_HANDLER instruction_access, 0x400, virt=1
+EXC_VIRT_END(instruction_access, 0x4400, 0x80)
+INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(instruction_access_common)
- EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,_NIP(r1)
- andis. r4,r12,DSISR_SRR1_MATCH_64S@h
- li r5,0x400
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
+ ld r6,_MSR(r1)
+ li r3,0x400
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
@@ -705,18 +1223,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
-EXCEPTION_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, KVMTEST_PR, 0x480);
+ INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
-
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
-EXCEPTION_RELON_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, NOTEST, 0x480);
+ INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
-
-TRAMP_KVM(PACA_EXSLB, 0x480)
-
+INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0
EXC_COMMON_BEGIN(instruction_access_slb_common)
- EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB)
- ld r4,_NIP(r1)
+ INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0
+ ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
@@ -732,79 +1247,44 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
- ld r4,_NIP(r1)
+ ld r4,_DAR(r1)
ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
b ret_from_except
-
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
- .globl hardware_interrupt_hv;
-hardware_interrupt_hv:
- BEGIN_FTR_SECTION
- MASKABLE_EXCEPTION_HV(0x500, hardware_interrupt_common, IRQS_DISABLED)
- FTR_SECTION_ELSE
- MASKABLE_EXCEPTION(0x500, hardware_interrupt_common, IRQS_DISABLED)
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
-
EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
- .globl hardware_interrupt_relon_hv;
-hardware_interrupt_relon_hv:
- BEGIN_FTR_SECTION
- MASKABLE_RELON_EXCEPTION_HV(0x500, hardware_interrupt_common,
- IRQS_DISABLED)
- FTR_SECTION_ELSE
- __MASKABLE_RELON_EXCEPTION(0x500, hardware_interrupt_common,
- EXC_STD, SOFTEN_TEST_PR, IRQS_DISABLED)
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
-
-TRAMP_KVM(PACA_EXGEN, 0x500)
-TRAMP_KVM_HV(PACA_EXGEN, 0x500)
+INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
EXC_REAL_BEGIN(alignment, 0x600, 0x100)
-SET_SCRATCH0(r13) /* save r13 */
-EXCEPTION_PROLOG_0(PACA_EXGEN)
-EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, 0x600)
- mfspr r10,SPRN_DAR
- mfspr r11,SPRN_DSISR
- std r10,PACA_EXGEN+EX_DAR(r13)
- stw r11,PACA_EXGEN+EX_DSISR(r13)
-EXCEPTION_PROLOG_2(alignment_common, EXC_STD)
+ INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1
EXC_REAL_END(alignment, 0x600, 0x100)
-
EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
-SET_SCRATCH0(r13) /* save r13 */
-EXCEPTION_PROLOG_0(PACA_EXGEN)
-EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x600)
- mfspr r10,SPRN_DAR
- mfspr r11,SPRN_DSISR
- std r10,PACA_EXGEN+EX_DAR(r13)
- stw r11,PACA_EXGEN+EX_DSISR(r13)
-EXCEPTION_PROLOG_2_RELON(alignment_common, EXC_STD)
+ INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1
EXC_VIRT_END(alignment, 0x4600, 0x100)
-
-TRAMP_KVM(PACA_EXGEN, 0x600)
+INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(alignment_common)
- EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl alignment_exception
b ret_from_except
-EXC_REAL(program_check, 0x700, 0x100)
-EXC_VIRT(program_check, 0x4700, 0x100, 0x700)
-TRAMP_KVM(PACA_EXGEN, 0x700)
+EXC_REAL_BEGIN(program_check, 0x700, 0x100)
+ INT_HANDLER program_check, 0x700, kvm=1
+EXC_REAL_END(program_check, 0x700, 0x100)
+EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
+ INT_HANDLER program_check, 0x700, virt=1
+EXC_VIRT_END(program_check, 0x4700, 0x100)
+INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(program_check_common)
/*
* It's possible to receive a TM Bad Thing type program check with
@@ -814,39 +1294,49 @@ EXC_COMMON_BEGIN(program_check_common)
* we switch to the emergency stack if we're taking a TM Bad Thing from
* the kernel.
*/
- li r10,MSR_PR /* Build a mask of MSR_PR .. */
- oris r10,r10,0x200000@h /* .. and SRR1_PROGTM */
- and r10,r10,r12 /* Mask SRR1 with that. */
- srdi r10,r10,8 /* Shift it so we can compare */
- cmpldi r10,(0x200000 >> 8) /* .. with an immediate. */
- bne 1f /* If != go to normal path. */
-
- /* SRR1 had PR=0 and SRR1_PROGTM=1, so use the emergency stack */
- andi. r10,r12,MSR_PR; /* Set CR0 correctly for label */
+
+ andi. r10,r12,MSR_PR
+ bne 2f /* If userspace, go normal path */
+
+ andis. r10,r12,(SRR1_PROGTM)@h
+ bne 1f /* If TM, emergency */
+
+ cmpdi r1,-INT_FRAME_SIZE /* check if r1 is in userspace */
+ blt 2f /* normal path if not */
+
+ /* Use the emergency stack */
+1: andi. r10,r12,MSR_PR /* Set CR0 correctly for label */
/* 3 in EXCEPTION_PROLOG_COMMON */
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- b 3f /* Jump into the macro !! */
-1: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
+ INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0
+ b 3f
+2:
+ INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0
+3:
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
b ret_from_except
-EXC_REAL(fp_unavailable, 0x800, 0x100)
-EXC_VIRT(fp_unavailable, 0x4800, 0x100, 0x800)
-TRAMP_KVM(PACA_EXGEN, 0x800)
+EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
+ INT_HANDLER fp_unavailable, 0x800, kvm=1
+EXC_REAL_END(fp_unavailable, 0x800, 0x100)
+EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
+ INT_HANDLER fp_unavailable, 0x800, virt=1
+EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
+INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(fp_unavailable_common)
- EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
+ INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0
bne 1f /* if from user, just load it up */
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
- BUG_OPCODE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
1:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BEGIN_FTR_SECTION
@@ -869,21 +1359,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
-EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0x900)
+EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
+ INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(decrementer, 0x900, 0x80)
+EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
+ INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED
+EXC_VIRT_END(decrementer, 0x4900, 0x80)
+INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
-EXC_REAL_HV(hdecrementer, 0x980, 0x80)
-EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980)
-TRAMP_KVM_HV(PACA_EXGEN, 0x980)
+EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
+ INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(hdecrementer, 0x980, 0x80)
+EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
+ INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
+INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
-EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED)
-EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0xa00)
+EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
+ INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(doorbell_super, 0xa00, 0x100)
+EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
+ INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED
+EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
+INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
#else
@@ -891,17 +1393,13 @@ EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
#endif
-EXC_REAL(trap_0b, 0xb00, 0x100)
-EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00)
-TRAMP_KVM(PACA_EXGEN, 0xb00)
-EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
+EXC_REAL_NONE(0xb00, 0x100)
+EXC_VIRT_NONE(0x4b00, 0x100)
/*
* system call / hypercall (0xc00, 0x4c00)
*
* The system call exception is invoked with "sc 0" and does not alter HV bit.
- * There is support for kernel code to invoke system calls but there are no
- * in-tree users.
*
* The hypercall is invoked with "sc 1" and sets HV=1.
*
@@ -910,7 +1408,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
*
* Call convention:
*
- * syscall register convention is in Documentation/powerpc/syscall64-abi.txt
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
*
* For hypercalls, the register convention is as follows:
* r0 volatile
@@ -932,6 +1430,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
* without saving, though xer is not a good idea to use, as hardware may
* interpret some bits so it may be costly to change them.
*/
+.macro SYSTEM_CALL virt
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
* There is a little bit of juggling to get syscall and hcall
@@ -941,95 +1440,66 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
* Userspace syscalls have already saved the PPR, hcalls must save
* it before setting HMT_MEDIUM.
*/
-#define SYSCALL_KVMTEST \
- mtctr r13; \
- GET_PACA(r13); \
- std r10,PACA_EXGEN+EX_R10(r13); \
- INTERRUPT_TO_KERNEL; \
- KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
- HMT_MEDIUM; \
- mfctr r9;
-
+ mtctr r13
+ GET_PACA(r13)
+ std r10,PACA_EXGEN+EX_R10(r13)
+ INTERRUPT_TO_KERNEL
+ KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */
+ mfctr r9
#else
-#define SYSCALL_KVMTEST \
- HMT_MEDIUM; \
- mr r9,r13; \
- GET_PACA(r13); \
- INTERRUPT_TO_KERNEL;
+ mr r9,r13
+ GET_PACA(r13)
+ INTERRUPT_TO_KERNEL
#endif
-
-#define LOAD_SYSCALL_HANDLER(reg) \
- __LOAD_HANDLER(reg, system_call_common)
-
-/*
- * After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9,
- * and HMT_MEDIUM.
- */
-#define SYSCALL_REAL \
- mfspr r11,SPRN_SRR0 ; \
- mfspr r12,SPRN_SRR1 ; \
- LOAD_SYSCALL_HANDLER(r10) ; \
- mtspr SPRN_SRR0,r10 ; \
- ld r10,PACAKMSR(r13) ; \
- mtspr SPRN_SRR1,r10 ; \
- RFI_TO_KERNEL ; \
- b . ; /* prevent speculative execution */
#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
-#define SYSCALL_FASTENDIAN_TEST \
-BEGIN_FTR_SECTION \
- cmpdi r0,0x1ebe ; \
- beq- 1f ; \
-END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
-
-#define SYSCALL_FASTENDIAN \
- /* Fast LE/BE switch system call */ \
-1: mfspr r12,SPRN_SRR1 ; \
- xori r12,r12,MSR_LE ; \
- mtspr SPRN_SRR1,r12 ; \
- mr r13,r9 ; \
- RFI_TO_USER ; /* return to userspace */ \
- b . ; /* prevent speculative execution */
-#else
-#define SYSCALL_FASTENDIAN_TEST
-#define SYSCALL_FASTENDIAN
-#endif /* CONFIG_PPC_FAST_ENDIAN_SWITCH */
+BEGIN_FTR_SECTION
+ cmpdi r0,0x1ebe
+ beq- 1f
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
+#endif
-#if defined(CONFIG_RELOCATABLE)
- /*
- * We can't branch directly so we do it via the CTR which
- * is volatile across system calls.
- */
-#define SYSCALL_VIRT \
- LOAD_SYSCALL_HANDLER(r10) ; \
- mtctr r10 ; \
- mfspr r11,SPRN_SRR0 ; \
- mfspr r12,SPRN_SRR1 ; \
- li r10,MSR_RI ; \
- mtmsrd r10,1 ; \
- bctr ;
+ /* We reach here with PACA in r13, r13 in r9. */
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+
+ HMT_MEDIUM
+
+ .if ! \virt
+ __LOAD_HANDLER(r10, system_call_common)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ mtspr SPRN_SRR1,r10
+ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+ .else
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+#ifdef CONFIG_RELOCATABLE
+ __LOAD_HANDLER(r10, system_call_common)
+ mtctr r10
+ bctr
#else
- /* We can branch directly */
-#define SYSCALL_VIRT \
- mfspr r11,SPRN_SRR0 ; \
- mfspr r12,SPRN_SRR1 ; \
- li r10,MSR_RI ; \
- mtmsrd r10,1 ; /* Set RI (EE=0) */ \
- b system_call_common ;
+ b system_call_common
#endif
+ .endif
+
+#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
+ /* Fast LE/BE switch system call */
+1: mfspr r12,SPRN_SRR1
+ xori r12,r12,MSR_LE
+ mtspr SPRN_SRR1,r12
+ mr r13,r9
+ RFI_TO_USER /* return to userspace */
+ b . /* prevent speculative execution */
+#endif
+.endm
EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
- SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */
- SYSCALL_FASTENDIAN_TEST
- SYSCALL_REAL
- SYSCALL_FASTENDIAN
+ SYSTEM_CALL 0
EXC_REAL_END(system_call, 0xc00, 0x100)
-
EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
- SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */
- SYSCALL_FASTENDIAN_TEST
- SYSCALL_VIRT
- SYSCALL_FASTENDIAN
+ SYSTEM_CALL 1
EXC_VIRT_END(system_call, 0x4c00, 0x100)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
@@ -1040,7 +1510,7 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
* ctr = orig r13
* orig r10 saved in PACA
*/
-TRAMP_KVM_BEGIN(do_kvm_0xc00)
+TRAMP_KVM_BEGIN(system_call_kvm)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
@@ -1053,32 +1523,33 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00)
SET_SCRATCH0(r10)
std r9,PACA_EXGEN+EX_R9(r13)
mfcr r9
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
+ KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
#endif
-EXC_REAL(single_step, 0xd00, 0x100)
-EXC_VIRT(single_step, 0x4d00, 0x100, 0xd00)
-TRAMP_KVM(PACA_EXGEN, 0xd00)
+EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
+ INT_HANDLER single_step, 0xd00, kvm=1
+EXC_REAL_END(single_step, 0xd00, 0x100)
+EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
+ INT_HANDLER single_step, 0xd00, virt=1
+EXC_VIRT_END(single_step, 0x4d00, 0x100)
+INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0
EXC_COMMON(single_step_common, 0xd00, single_step_exception)
-EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0x20)
-EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x20, 0xe00)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00)
+
+EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
+ INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+EXC_REAL_END(h_data_storage, 0xe00, 0x20)
+EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
+ INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
+INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1
EXC_COMMON_BEGIN(h_data_storage_common)
- mfspr r10,SPRN_HDAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_HDSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
+ INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
- ld r4,PACA_EXGEN+EX_DAR(r13)
- lwz r5,PACA_EXGEN+EX_DSISR(r13)
- std r4,_DAR(r1)
- std r5,_DSISR(r1)
+ ld r4,_DAR(r1)
li r5,SIGSEGV
bl bad_page_fault
MMU_FTR_SECTION_ELSE
@@ -1087,15 +1558,23 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
b ret_from_except
-EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20)
-EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x20, 0xe20)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe20)
+EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
+ INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
+EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
+ INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
+INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
-EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0x20)
-EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x20, 0xe40)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe40)
+EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
+ INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(emulation_assist, 0xe40, 0x20)
+EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
+ INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
+INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
@@ -1104,69 +1583,55 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
* first, and then eventaully from there to the trampoline to get into virtual
* mode.
*/
-__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early)
-__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60, IRQS_DISABLED)
+EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
+ INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1
+EXC_REAL_END(hmi_exception, 0xe60, 0x20)
EXC_VIRT_NONE(0x4e60, 0x20)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
-TRAMP_REAL_BEGIN(hmi_exception_early)
- EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60)
+INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0
+EXC_COMMON_BEGIN(hmi_exception_early_common)
+ mtctr r10 /* Restore ctr */
+ mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
- EXCEPTION_PROLOG_COMMON_1()
+
/* We don't touch AMR here, we never go to virtual mode */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
- EXCEPTION_PROLOG_COMMON_3(0xe60)
+ INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0
+
addi r3,r1,STACK_FRAME_OVERHEAD
- BRANCH_LINK_TO_FAR(DOTSYM(hmi_exception_realmode)) /* Function call ABI */
+ bl hmi_exception_realmode
cmpdi cr0,r3,0
-
- /* Windup the stack. */
- /* Move original HSRR0 and HSRR1 into the respective regs */
- ld r9,_MSR(r1)
- mtspr SPRN_HSRR1,r9
- ld r3,_NIP(r1)
- mtspr SPRN_HSRR0,r3
- ld r9,_CTR(r1)
- mtctr r9
- ld r9,_XER(r1)
- mtxer r9
- ld r9,_LINK(r1)
- mtlr r9
- REST_GPR(0, r1)
- REST_8GPRS(2, r1)
- REST_GPR(10, r1)
- ld r11,_CCR(r1)
- REST_2GPRS(12, r1)
bne 1f
- mtcr r11
- REST_GPR(11, r1)
- ld r1,GPR1(r1)
- HRFI_TO_USER_OR_KERNEL
-1: mtcr r11
- REST_GPR(11, r1)
- ld r1,GPR1(r1)
+ EXCEPTION_RESTORE_REGS EXC_HV
+ HRFI_TO_USER_OR_KERNEL
+1:
/*
* Go to virtual mode and pull the HMI event information from
* firmware.
*/
- .globl hmi_exception_after_realmode
-hmi_exception_after_realmode:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b tramp_real_hmi_exception
+ EXCEPTION_RESTORE_REGS EXC_HV
+ INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
EXC_COMMON_BEGIN(hmi_exception_common)
-EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
- ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
+ INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0
+ FINISH_NAP
+ RUNLATCH_ON
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl handle_hmi_exception
+ b ret_from_except
-EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
-EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
+
+EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
+ INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(h_doorbell, 0xe80, 0x20)
+EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
+ INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
+INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
#else
@@ -1174,9 +1639,13 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
#endif
-EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED)
-EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED)
-TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
+EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
+ INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
+EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
+ INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
+INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0
EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
@@ -1186,17 +1655,25 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
-EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED)
-EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0xf00)
+EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
+ INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1
+EXC_REAL_END(performance_monitor, 0xf00, 0x20)
+EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
+ INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED
+EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
+INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
-EXC_REAL_OOL(altivec_unavailable, 0xf20, 0x20)
-EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x20, 0xf20)
-TRAMP_KVM(PACA_EXGEN, 0xf20)
+EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
+ INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1
+EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
+EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
+ INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1
+EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
+INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(altivec_unavailable_common)
- EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
+ INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
beq 1f
@@ -1229,11 +1706,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
b ret_from_except
-EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20)
-EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x20, 0xf40)
-TRAMP_KVM(PACA_EXGEN, 0xf40)
+EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
+ INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1
+EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
+EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
+ INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1
+EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
+INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(vsx_unavailable_common)
- EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
+ INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
beq 1f
@@ -1265,15 +1746,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
b ret_from_except
-EXC_REAL_OOL(facility_unavailable, 0xf60, 0x20)
-EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x20, 0xf60)
-TRAMP_KVM(PACA_EXGEN, 0xf60)
+EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
+ INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1
+EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
+EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
+ INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1
+EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
+INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0
EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
-EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0x20)
-EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x20, 0xf80)
-TRAMP_KVM_HV(PACA_EXGEN, 0xf80)
+EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
+ INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
+EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
+ INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
+INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
@@ -1290,9 +1779,11 @@ EXC_REAL_NONE(0x1100, 0x100)
EXC_VIRT_NONE(0x5100, 0x100)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_system_error, 0x1200, 0x100)
+EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
+ INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200)
+INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
@@ -1300,38 +1791,43 @@ EXC_VIRT_NONE(0x5200, 0x100)
#endif
-EXC_REAL(instruction_breakpoint, 0x1300, 0x100)
-EXC_VIRT(instruction_breakpoint, 0x5300, 0x100, 0x1300)
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300)
+EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
+ INT_HANDLER instruction_breakpoint, 0x1300, kvm=1
+EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
+EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
+ INT_HANDLER instruction_breakpoint, 0x1300, virt=1
+EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
+INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
+
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
- mtspr SPRN_SPRG_HSCRATCH0,r13
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500)
-
+ INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV
#ifdef CONFIG_PPC_DENORMALISATION
mfspr r10,SPRN_HSRR1
andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
#endif
-
- KVMTEST_HV(0x1500)
- EXCEPTION_PROLOG_2(denorm_common, EXC_HV)
+ KVMTEST denorm_exception_hv, EXC_HV 0x1500
+ INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1
EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
#ifdef CONFIG_PPC_DENORMALISATION
EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
- b exc_real_0x1500_denorm_exception_hv
+ INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0
+ mfspr r10,SPRN_HSRR1
+ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ bne+ denorm_assist
+ INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV
EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
#else
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-TRAMP_KVM_HV(PACA_EXGEN, 0x1500)
+INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
@@ -1346,12 +1842,11 @@ BEGIN_FTR_SECTION
mtmsrd r10
sync
-#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1
-#define FMR4(n) FMR2(n) ; FMR2(n+2)
-#define FMR8(n) FMR4(n) ; FMR4(n+4)
-#define FMR16(n) FMR8(n) ; FMR8(n+8)
-#define FMR32(n) FMR16(n) ; FMR16(n+16)
- FMR32(0)
+ .Lreg=0
+ .rept 32
+ fmr .Lreg,.Lreg
+ .Lreg=.Lreg+1
+ .endr
FTR_SECTION_ELSE
/*
@@ -1363,12 +1858,11 @@ FTR_SECTION_ELSE
mtmsrd r10
sync
-#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1)
-#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2)
-#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4)
-#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8)
-#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16)
- XVCPSGNDP32(0)
+ .Lreg=0
+ .rept 32
+ XVCPSGNDP(.Lreg,.Lreg,.Lreg)
+ .Lreg=.Lreg+1
+ .endr
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
@@ -1379,7 +1873,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
* To denormalise we need to move a copy of the register to itself.
* For POWER8 we need to do that for all 64 VSX registers
*/
- XVCPSGNDP32(32)
+ .Lreg=32
+ .rept 32
+ XVCPSGNDP(.Lreg,.Lreg,.Lreg)
+ .Lreg=.Lreg+1
+ .endr
+
denorm_done:
mfspr r11,SPRN_HSRR0
subi r11,r11,4
@@ -1403,9 +1902,11 @@ EXC_COMMON(denorm_common, 0x1500, unknown_exception)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_maintenance, 0x1600, 0x100)
+EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
+ INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600)
+INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
@@ -1413,9 +1914,13 @@ EXC_VIRT_NONE(0x5600, 0x100)
#endif
-EXC_REAL(altivec_assist, 0x1700, 0x100)
-EXC_VIRT(altivec_assist, 0x5700, 0x100, 0x1700)
-TRAMP_KVM(PACA_EXGEN, 0x1700)
+EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
+ INT_HANDLER altivec_assist, 0x1700, kvm=1
+EXC_REAL_END(altivec_assist, 0x1700, 0x100)
+EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
+ INT_HANDLER altivec_assist, 0x1700, virt=1
+EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
+INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0
#ifdef CONFIG_ALTIVEC
EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
#else
@@ -1424,15 +1929,18 @@ EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_thermal, 0x1800, 0x100)
+EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
+ INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800)
+INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
#endif
+
#ifdef CONFIG_PPC_WATCHDOG
#define MASKED_DEC_HANDLER_LABEL 3f
@@ -1442,7 +1950,7 @@ EXC_VIRT_NONE(0x5800, 0x100)
std r12,PACA_EXGEN+EX_R12(r13); \
GET_SCRATCH0(r10); \
std r10,PACA_EXGEN+EX_R13(r13); \
- EXCEPTION_PROLOG_2(soft_nmi_common, _H)
+ INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1
/*
* Branch to soft_nmi_interrupt using the emergency stack. The emergency
@@ -1457,9 +1965,10 @@ EXC_COMMON_BEGIN(soft_nmi_common)
mr r10,r1
ld r1,PACAEMERGSP(r13)
subi r1,r1,INT_FRAME_SIZE
- EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
- system_reset, soft_nmi_interrupt,
- ADD_NVGPRS;ADD_RECONCILE)
+ INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl soft_nmi_interrupt
b ret_from_except
#else /* CONFIG_PPC_WATCHDOG */
@@ -1477,35 +1986,50 @@ EXC_COMMON_BEGIN(soft_nmi_common)
* - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
* This is called with r10 containing the value to OR to the paca field.
*/
-#define MASKED_INTERRUPT(_H) \
-masked_##_H##interrupt: \
- std r11,PACA_EXGEN+EX_R11(r13); \
- lbz r11,PACAIRQHAPPENED(r13); \
- or r11,r11,r10; \
- stb r11,PACAIRQHAPPENED(r13); \
- cmpwi r10,PACA_IRQ_DEC; \
- bne 1f; \
- lis r10,0x7fff; \
- ori r10,r10,0xffff; \
- mtspr SPRN_DEC,r10; \
- b MASKED_DEC_HANDLER_LABEL; \
-1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \
- beq 2f; \
- mfspr r10,SPRN_##_H##SRR1; \
- xori r10,r10,MSR_EE; /* clear MSR_EE */ \
- mtspr SPRN_##_H##SRR1,r10; \
- ori r11,r11,PACA_IRQ_HARD_DIS; \
- stb r11,PACAIRQHAPPENED(r13); \
-2: /* done */ \
- mtcrf 0x80,r9; \
- std r1,PACAR1(r13); \
- ld r9,PACA_EXGEN+EX_R9(r13); \
- ld r10,PACA_EXGEN+EX_R10(r13); \
- ld r11,PACA_EXGEN+EX_R11(r13); \
- /* returns to kernel where r13 must be set up, so don't restore it */ \
- ##_H##RFI_TO_KERNEL; \
- b .; \
- MASKED_DEC_HANDLER(_H)
+.macro MASKED_INTERRUPT hsrr
+ .if \hsrr
+masked_Hinterrupt:
+ .else
+masked_interrupt:
+ .endif
+ std r11,PACA_EXGEN+EX_R11(r13)
+ lbz r11,PACAIRQHAPPENED(r13)
+ or r11,r11,r10
+ stb r11,PACAIRQHAPPENED(r13)
+ cmpwi r10,PACA_IRQ_DEC
+ bne 1f
+ lis r10,0x7fff
+ ori r10,r10,0xffff
+ mtspr SPRN_DEC,r10
+ b MASKED_DEC_HANDLER_LABEL
+1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK
+ beq 2f
+ .if \hsrr
+ mfspr r10,SPRN_HSRR1
+ xori r10,r10,MSR_EE /* clear MSR_EE */
+ mtspr SPRN_HSRR1,r10
+ .else
+ mfspr r10,SPRN_SRR1
+ xori r10,r10,MSR_EE /* clear MSR_EE */
+ mtspr SPRN_SRR1,r10
+ .endif
+ ori r11,r11,PACA_IRQ_HARD_DIS
+ stb r11,PACAIRQHAPPENED(r13)
+2: /* done */
+ mtcrf 0x80,r9
+ std r1,PACAR1(r13)
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ /* returns to kernel where r13 must be set up, so don't restore it */
+ .if \hsrr
+ HRFI_TO_KERNEL
+ .else
+ RFI_TO_KERNEL
+ .endif
+ b .
+ MASKED_DEC_HANDLER(\hsrr\())
+.endm
TRAMP_REAL_BEGIN(stf_barrier_fallback)
std r9,PACA_EXRFI+EX_R9(r13)
@@ -1612,8 +2136,8 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
* cannot reach these if they are put there.
*/
USE_FIXED_SECTION(virt_trampolines)
- MASKED_INTERRUPT()
- MASKED_INTERRUPT(H)
+ MASKED_INTERRUPT EXC_STD
+ MASKED_INTERRUPT EXC_HV
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
@@ -1699,6 +2223,35 @@ CLOSE_FIXED_SECTION(virt_trampolines);
USE_TEXT_SECTION()
+/* MSR[RI] should be clear because this uses SRR[01] */
+enable_machine_check:
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ ori r3,r3,MSR_ME
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
+
+/* MSR[RI] should be clear because this uses SRR[01] */
+disable_machine_check:
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ li r4,MSR_ME
+ andc r3,r3,r4
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
+
/*
* Hash table stuff
*/
@@ -1707,7 +2260,7 @@ do_hash_page:
#ifdef CONFIG_PPC_BOOK3S_64
lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
- and. r0,r4,r0 /* weird error? */
+ and. r0,r5,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
ld r11, PACA_THREAD_INFO(r13)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
@@ -1715,15 +2268,13 @@ do_hash_page:
bne 77f /* then don't call hash_page now */
/*
- * r3 contains the faulting address
- * r4 msr
- * r5 contains the trap number
- * r6 contains dsisr
+ * r3 contains the trap number
+ * r4 contains the faulting address
+ * r5 contains dsisr
+ * r6 msr
*
* at return r3 = 0 for success, 1 for page fault, negative for error
*/
- mr r4,r12
- ld r6,_DSISR(r1)
bl __hash_page /* build HPTE if possible */
cmpdi r3,0 /* see if __hash_page succeeded */
@@ -1733,24 +2284,23 @@ do_hash_page:
/* Error */
blt- 13f
- /* Reload DSISR into r4 for the DABR check below */
- ld r4,_DSISR(r1)
+ /* Reload DAR/DSISR into r4/r5 for the DABR check below */
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
#endif /* CONFIG_PPC_BOOK3S_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
-11: andis. r0,r4,DSISR_DABRMATCH@h
+11: andis. r0,r5,DSISR_DABRMATCH@h
bne- handle_dabr_fault
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
cmpdi r3,0
- beq+ 12f
+ beq+ ret_from_except_lite
bl save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
- lwz r4,_DAR(r1)
+ ld r4,_DAR(r1)
bl bad_page_fault
b ret_from_except
@@ -1761,7 +2311,12 @@ handle_dabr_fault:
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_break
-12: b ret_from_except_lite
+ /*
+ * do_break() may have changed the NV GPRS while handling a breakpoint.
+ * If so, we need to restore them with their updated values. Don't use
+ * ret_from_except_lite here.
+ */
+ b ret_from_except
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1784,74 +2339,12 @@ handle_dabr_fault:
* the access, or panic if there isn't a handler.
*/
77: bl save_nvgprs
- mr r4,r3
addi r3,r1,STACK_FRAME_OVERHEAD
li r5,SIGSEGV
bl bad_page_fault
b ret_from_except
/*
- * Here we have detected that the kernel stack pointer is bad.
- * R9 contains the saved CR, r13 points to the paca,
- * r10 contains the (bad) kernel stack pointer,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * We switch to using an emergency stack, save the registers there,
- * and call kernel_bad_stack(), which panics.
- */
-bad_stack:
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,64+INT_FRAME_SIZE
- std r9,_CCR(r1)
- std r10,GPR1(r1)
- std r11,_NIP(r1)
- std r12,_MSR(r1)
- mfspr r11,SPRN_DAR
- mfspr r12,SPRN_DSISR
- std r11,_DAR(r1)
- std r12,_DSISR(r1)
- mflr r10
- mfctr r11
- mfxer r12
- std r10,_LINK(r1)
- std r11,_CTR(r1)
- std r12,_XER(r1)
- SAVE_GPR(0,r1)
- SAVE_GPR(2,r1)
- ld r10,EX_R3(r3)
- std r10,GPR3(r1)
- SAVE_GPR(4,r1)
- SAVE_4GPRS(5,r1)
- ld r9,EX_R9(r3)
- ld r10,EX_R10(r3)
- SAVE_2GPRS(9,r1)
- ld r9,EX_R11(r3)
- ld r10,EX_R12(r3)
- ld r11,EX_R13(r3)
- std r9,GPR11(r1)
- std r10,GPR12(r1)
- std r11,GPR13(r1)
-BEGIN_FTR_SECTION
- ld r10,EX_CFAR(r3)
- std r10,ORIG_GPR3(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
- SAVE_8GPRS(14,r1)
- SAVE_10GPRS(22,r1)
- lhz r12,PACA_TRAP_SAVE(r13)
- std r12,_TRAP(r1)
- addi r11,r1,INT_FRAME_SIZE
- std r11,0(r1)
- li r12,0
- std r12,0(r11)
- ld r2,PACATOC(r13)
- ld r11,exception_marker@toc(r2)
- std r12,RESULT(r1)
- std r11,STACK_FRAME_OVERHEAD-16(r1)
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl kernel_bad_stack
- b 1b
-_ASM_NOKPROBE_SYMBOL(bad_stack);
-
-/*
* When doorbell is triggered from system reset wakeup, the message is
* not cleared, so it would fire again when EE is enabled.
*
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 4eab97292cc2..ed59855430b9 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -28,24 +28,22 @@
#include <asm/debugfs.h>
#include <asm/page.h>
#include <asm/prom.h>
-#include <asm/rtas.h>
#include <asm/fadump.h>
+#include <asm/fadump-internal.h>
#include <asm/setup.h>
static struct fw_dump fw_dump;
-static struct fadump_mem_struct fdm;
-static const struct fadump_mem_struct *fdm_active;
-#ifdef CONFIG_CMA
-static struct cma *fadump_cma;
-#endif
+static void __init fadump_reserve_crash_area(u64 base);
+
+#ifndef CONFIG_PRESERVE_FA_DUMP
static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges *crash_memory_ranges;
-int crash_memory_ranges_size;
-int crash_mem_ranges;
-int max_crash_mem_ranges;
+struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
+struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 };
#ifdef CONFIG_CMA
+static struct cma *fadump_cma;
+
/*
* fadump_cma_init() - Initialize CMA area from a fadump reserved memory
*
@@ -107,84 +105,45 @@ static int __init fadump_cma_init(void) { return 1; }
#endif /* CONFIG_CMA */
/* Scan the Firmware Assisted dump configuration details. */
-int __init early_init_dt_scan_fw_dump(unsigned long node,
- const char *uname, int depth, void *data)
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
{
- const __be32 *sections;
- int i, num_sections;
- int size;
- const __be32 *token;
-
- if (depth != 1 || strcmp(uname, "rtas") != 0)
+ if (depth != 1)
return 0;
- /*
- * Check if Firmware Assisted dump is supported. if yes, check
- * if dump has been initiated on last reboot.
- */
- token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
- if (!token)
+ if (strcmp(uname, "rtas") == 0) {
+ rtas_fadump_dt_scan(&fw_dump, node);
return 1;
+ }
- fw_dump.fadump_supported = 1;
- fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);
-
- /*
- * The 'ibm,kernel-dump' rtas node is present only if there is
- * dump data waiting for us.
- */
- fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
- if (fdm_active)
- fw_dump.dump_active = 1;
-
- /* Get the sizes required to store dump data for the firmware provided
- * dump sections.
- * For each dump section type supported, a 32bit cell which defines
- * the ID of a supported section followed by two 32 bit cells which
- * gives teh size of the section in bytes.
- */
- sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
- &size);
-
- if (!sections)
+ if (strcmp(uname, "ibm,opal") == 0) {
+ opal_fadump_dt_scan(&fw_dump, node);
return 1;
-
- num_sections = size / (3 * sizeof(u32));
-
- for (i = 0; i < num_sections; i++, sections += 3) {
- u32 type = (u32)of_read_number(sections, 1);
-
- switch (type) {
- case FADUMP_CPU_STATE_DATA:
- fw_dump.cpu_state_data_size =
- of_read_ulong(&sections[1], 2);
- break;
- case FADUMP_HPTE_REGION:
- fw_dump.hpte_region_size =
- of_read_ulong(&sections[1], 2);
- break;
- }
}
- return 1;
+ return 0;
}
/*
* If fadump is registered, check if the memory provided
* falls within boot memory area and reserved memory area.
*/
-int is_fadump_memory_area(u64 addr, ulong size)
+int is_fadump_memory_area(u64 addr, unsigned long size)
{
- u64 d_start = fw_dump.reserve_dump_area_start;
- u64 d_end = d_start + fw_dump.reserve_dump_area_size;
+ u64 d_start, d_end;
if (!fw_dump.dump_registered)
return 0;
+ if (!size)
+ return 0;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
if (((addr + size) > d_start) && (addr <= d_end))
return 1;
- return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
+ return (addr <= fw_dump.boot_mem_top);
}
int should_fadump_crash(void)
@@ -200,31 +159,29 @@ int is_fadump_active(void)
}
/*
- * Returns 1, if there are no holes in boot memory area,
- * 0 otherwise.
+ * Returns true, if there are no holes in memory area between d_start to d_end,
+ * false otherwise.
*/
-static int is_boot_memory_area_contiguous(void)
+static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
{
struct memblock_region *reg;
- unsigned long tstart, tend;
- unsigned long start_pfn = PHYS_PFN(RMA_START);
- unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size);
- unsigned int ret = 0;
+ bool ret = false;
+ u64 start, end;
for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
- if (tstart < tend) {
- /* Memory hole from start_pfn to tstart */
- if (tstart > start_pfn)
+ start = max_t(u64, d_start, reg->base);
+ end = min_t(u64, d_end, (reg->base + reg->size));
+ if (d_start < end) {
+ /* Memory hole from d_start to start */
+ if (start > d_start)
break;
- if (tend == end_pfn) {
- ret = 1;
+ if (end == d_end) {
+ ret = true;
break;
}
- start_pfn = tend + 1;
+ d_start = end + 1;
}
}
@@ -232,37 +189,45 @@ static int is_boot_memory_area_contiguous(void)
}
/*
- * Returns true, if there are no holes in reserved memory area,
+ * Returns true, if there are no holes in boot memory area,
* false otherwise.
*/
-static bool is_reserved_memory_area_contiguous(void)
+bool is_fadump_boot_mem_contiguous(void)
{
- struct memblock_region *reg;
- unsigned long start, end;
- unsigned long d_start = fw_dump.reserve_dump_area_start;
- unsigned long d_end = d_start + fw_dump.reserve_dump_area_size;
-
- for_each_memblock(memory, reg) {
- start = max(d_start, (unsigned long)reg->base);
- end = min(d_end, (unsigned long)(reg->base + reg->size));
- if (d_start < end) {
- /* Memory hole from d_start to start */
- if (start > d_start)
- break;
+ unsigned long d_start, d_end;
+ bool ret = false;
+ int i;
- if (end == d_end)
- return true;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ d_start = fw_dump.boot_mem_addr[i];
+ d_end = d_start + fw_dump.boot_mem_sz[i];
- d_start = end + 1;
- }
+ ret = is_fadump_mem_area_contiguous(d_start, d_end);
+ if (!ret)
+ break;
}
- return false;
+ return ret;
+}
+
+/*
+ * Returns true, if there are no holes in reserved memory area,
+ * false otherwise.
+ */
+bool is_fadump_reserved_mem_contiguous(void)
+{
+ u64 d_start, d_end;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
+ return is_fadump_mem_area_contiguous(d_start, d_end);
}
/* Print firmware assisted dump configurations for debugging purpose. */
static void fadump_show_config(void)
{
+ int i;
+
pr_debug("Support for firmware-assisted dump (fadump): %s\n",
(fw_dump.fadump_supported ? "present" : "no support"));
@@ -276,62 +241,13 @@ static void fadump_show_config(void)
pr_debug("Dump section sizes:\n");
pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
- pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
-}
-
-static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
- unsigned long addr)
-{
- if (!fdm)
- return 0;
-
- memset(fdm, 0, sizeof(struct fadump_mem_struct));
- addr = addr & PAGE_MASK;
-
- fdm->header.dump_format_version = cpu_to_be32(0x00000001);
- fdm->header.dump_num_sections = cpu_to_be16(3);
- fdm->header.dump_status_flag = 0;
- fdm->header.offset_first_dump_section =
- cpu_to_be32((u32)offsetof(struct fadump_mem_struct, cpu_state_data));
-
- /*
- * Fields for disk dump option.
- * We are not using disk dump option, hence set these fields to 0.
- */
- fdm->header.dd_block_size = 0;
- fdm->header.dd_block_offset = 0;
- fdm->header.dd_num_blocks = 0;
- fdm->header.dd_offset_disk_path = 0;
-
- /* set 0 to disable an automatic dump-reboot. */
- fdm->header.max_time_auto = 0;
-
- /* Kernel dump sections */
- /* cpu state data section. */
- fdm->cpu_state_data.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->cpu_state_data.source_data_type = cpu_to_be16(FADUMP_CPU_STATE_DATA);
- fdm->cpu_state_data.source_address = 0;
- fdm->cpu_state_data.source_len = cpu_to_be64(fw_dump.cpu_state_data_size);
- fdm->cpu_state_data.destination_address = cpu_to_be64(addr);
- addr += fw_dump.cpu_state_data_size;
-
- /* hpte region section */
- fdm->hpte_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->hpte_region.source_data_type = cpu_to_be16(FADUMP_HPTE_REGION);
- fdm->hpte_region.source_address = 0;
- fdm->hpte_region.source_len = cpu_to_be64(fw_dump.hpte_region_size);
- fdm->hpte_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.hpte_region_size;
-
- /* RMA region section */
- fdm->rmr_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->rmr_region.source_data_type = cpu_to_be16(FADUMP_REAL_MODE_REGION);
- fdm->rmr_region.source_address = cpu_to_be64(RMA_START);
- fdm->rmr_region.source_len = cpu_to_be64(fw_dump.boot_memory_size);
- fdm->rmr_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.boot_memory_size;
-
- return addr;
+ pr_debug(" Boot memory size : %lx\n", fw_dump.boot_memory_size);
+ pr_debug(" Boot memory top : %llx\n", fw_dump.boot_mem_top);
+ pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ pr_debug("[%03d] base = %llx, size = %llx\n", i,
+ fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
+ }
}
/**
@@ -349,10 +265,10 @@ static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
* that is required for a kernel to boot successfully.
*
*/
-static inline unsigned long fadump_calculate_reserve_size(void)
+static inline u64 fadump_calculate_reserve_size(void)
{
+ u64 base, size, bootmem_min;
int ret;
- unsigned long long base, size;
if (fw_dump.reserve_bootvar)
pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
@@ -402,7 +318,8 @@ static inline unsigned long fadump_calculate_reserve_size(void)
if (memory_limit && size > memory_limit)
size = memory_limit;
- return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ return (size > bootmem_min ? size : bootmem_min);
}
/*
@@ -423,57 +340,136 @@ static unsigned long get_fadump_area_size(void)
size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
size = PAGE_ALIGN(size);
+
+ /* This is to hold kernel metadata on platforms that support it */
+ size += (fw_dump.ops->fadump_get_metadata_size ?
+ fw_dump.ops->fadump_get_metadata_size() : 0);
return size;
}
-static void __init fadump_reserve_crash_area(unsigned long base,
- unsigned long size)
+static int __init add_boot_mem_region(unsigned long rstart,
+ unsigned long rsize)
+{
+ int i = fw_dump.boot_mem_regs_cnt++;
+
+ if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
+ fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
+ return 0;
+ }
+
+ pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
+ i, rstart, (rstart + rsize));
+ fw_dump.boot_mem_addr[i] = rstart;
+ fw_dump.boot_mem_sz[i] = rsize;
+ return 1;
+}
+
+/*
+ * Firmware usually has a hard limit on the data it can copy per region.
+ * Honour that by splitting a memory range into multiple regions.
+ */
+static int __init add_boot_mem_regions(unsigned long mstart,
+ unsigned long msize)
{
+ unsigned long rstart, rsize, max_size;
+ int ret = 1;
+
+ rstart = mstart;
+ max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
+ while (msize) {
+ if (msize > max_size)
+ rsize = max_size;
+ else
+ rsize = msize;
+
+ ret = add_boot_mem_region(rstart, rsize);
+ if (!ret)
+ break;
+
+ msize -= rsize;
+ rstart += rsize;
+ }
+
+ return ret;
+}
+
+static int __init fadump_get_boot_mem_regions(void)
+{
+ unsigned long base, size, cur_size, hole_size, last_end;
+ unsigned long mem_size = fw_dump.boot_memory_size;
struct memblock_region *reg;
- unsigned long mstart, mend, msize;
+ int ret = 1;
+
+ fw_dump.boot_mem_regs_cnt = 0;
+ last_end = 0;
+ hole_size = 0;
+ cur_size = 0;
for_each_memblock(memory, reg) {
- mstart = max_t(unsigned long, base, reg->base);
- mend = reg->base + reg->size;
- mend = min(base + size, mend);
-
- if (mstart < mend) {
- msize = mend - mstart;
- memblock_reserve(mstart, msize);
- pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
- (msize >> 20), mstart);
+ base = reg->base;
+ size = reg->size;
+ hole_size += (base - last_end);
+
+ if ((cur_size + size) >= mem_size) {
+ size = (mem_size - cur_size);
+ ret = add_boot_mem_regions(base, size);
+ break;
}
+
+ mem_size -= size;
+ cur_size += size;
+ ret = add_boot_mem_regions(base, size);
+ if (!ret)
+ break;
+
+ last_end = base + size;
}
+ fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
+
+ return ret;
}
int __init fadump_reserve_mem(void)
{
- unsigned long base, size, memory_boundary;
+ u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE;
+ bool is_memblock_bottom_up = memblock_bottom_up();
+ int ret = 1;
if (!fw_dump.fadump_enabled)
return 0;
if (!fw_dump.fadump_supported) {
- printk(KERN_INFO "Firmware-assisted dump is not supported on"
- " this hardware\n");
- fw_dump.fadump_enabled = 0;
- return 0;
+ pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
+ goto error_out;
}
+
/*
* Initialize boot memory size
* If dump is active then we have already calculated the size during
* first kernel.
*/
- if (fdm_active)
- fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
- else {
- fw_dump.boot_memory_size = fadump_calculate_reserve_size();
+ if (!fw_dump.dump_active) {
+ fw_dump.boot_memory_size =
+ PAGE_ALIGN(fadump_calculate_reserve_size());
#ifdef CONFIG_CMA
- if (!fw_dump.nocma)
+ if (!fw_dump.nocma) {
+ align = FADUMP_CMA_ALIGNMENT;
fw_dump.boot_memory_size =
- ALIGN(fw_dump.boot_memory_size,
- FADUMP_CMA_ALIGNMENT);
+ ALIGN(fw_dump.boot_memory_size, align);
+ }
#endif
+
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ if (fw_dump.boot_memory_size < bootmem_min) {
+ pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
+ fw_dump.boot_memory_size, bootmem_min);
+ goto error_out;
+ }
+
+ if (!fadump_get_boot_mem_regions()) {
+ pr_err("Too many holes in boot memory area to enable fadump\n");
+ goto error_out;
+ }
}
/*
@@ -493,10 +489,13 @@ int __init fadump_reserve_mem(void)
" dump, now %#016llx\n", memory_limit);
}
if (memory_limit)
- memory_boundary = memory_limit;
+ mem_boundary = memory_limit;
else
- memory_boundary = memblock_end_of_DRAM();
+ mem_boundary = memblock_end_of_DRAM();
+ base = fw_dump.boot_mem_top;
+ size = get_fadump_area_size();
+ fw_dump.reserve_dump_area_size = size;
if (fw_dump.dump_active) {
pr_info("Firmware-assisted dump is active.\n");
@@ -510,58 +509,55 @@ int __init fadump_reserve_mem(void)
#endif
/*
* If last boot has crashed then reserve all the memory
- * above boot_memory_size so that we don't touch it until
+ * above boot memory size so that we don't touch it until
* dump is written to disk by userspace tool. This memory
- * will be released for general use once the dump is saved.
+ * can be released for general use by invalidating fadump.
*/
- base = fw_dump.boot_memory_size;
- size = memory_boundary - base;
- fadump_reserve_crash_area(base, size);
-
- fw_dump.fadumphdr_addr =
- be64_to_cpu(fdm_active->rmr_region.destination_address) +
- be64_to_cpu(fdm_active->rmr_region.source_len);
- pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr);
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
- } else {
- size = get_fadump_area_size();
+ fadump_reserve_crash_area(base);
+ pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
+ pr_debug("Reserve dump area start address: 0x%lx\n",
+ fw_dump.reserve_dump_area_start);
+ } else {
/*
* Reserve memory at an offset closer to bottom of the RAM to
- * minimize the impact of memory hot-remove operation. We can't
- * use memblock_find_in_range() here since it doesn't allocate
- * from bottom to top.
+ * minimize the impact of memory hot-remove operation.
*/
- for (base = fw_dump.boot_memory_size;
- base <= (memory_boundary - size);
- base += size) {
- if (memblock_is_region_memory(base, size) &&
- !memblock_is_region_reserved(base, size))
- break;
+ memblock_set_bottom_up(true);
+ base = memblock_find_in_range(base, mem_boundary, size, align);
+
+ /* Restore the previous allocation mode */
+ memblock_set_bottom_up(is_memblock_bottom_up);
+
+ if (!base) {
+ pr_err("Failed to find memory chunk for reservation!\n");
+ goto error_out;
}
- if ((base > (memory_boundary - size)) ||
- memblock_reserve(base, size)) {
- pr_err("Failed to reserve memory\n");
- return 0;
+ fw_dump.reserve_dump_area_start = base;
+
+ /*
+ * Calculate the kernel metadata address and register it with
+ * f/w if the platform supports.
+ */
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ goto error_out;
+
+ if (memblock_reserve(base, size)) {
+ pr_err("Failed to reserve memory!\n");
+ goto error_out;
}
- pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
- "assisted dump (System RAM: %ldMB)\n",
- (unsigned long)(size >> 20),
- (unsigned long)(base >> 20),
- (unsigned long)(memblock_phys_mem_size() >> 20));
+ pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
+ (size >> 20), base, (memblock_phys_mem_size() >> 20));
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
- return fadump_cma_init();
+ ret = fadump_cma_init();
}
- return 1;
-}
-unsigned long __init arch_reserved_kernel_pages(void)
-{
- return memblock_reserved_size() / PAGE_SIZE;
+ return ret;
+error_out:
+ fw_dump.fadump_enabled = 0;
+ return 0;
}
/* Look for fadump= cmdline option. */
@@ -596,61 +592,6 @@ static int __init early_fadump_reserve_mem(char *p)
}
early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-static int register_fw_dump(struct fadump_mem_struct *fdm)
-{
- int rc, err;
- unsigned int wait_time;
-
- pr_debug("Registering for firmware-assisted kernel dump...\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_REGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
-
- } while (wait_time);
-
- err = -EIO;
- switch (rc) {
- default:
- pr_err("Failed to register. Unknown Error(%d).\n", rc);
- break;
- case -1:
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Hardware Error(%d).\n", rc);
- break;
- case -3:
- if (!is_boot_memory_area_contiguous())
- pr_err("Can't have holes in boot memory area while registering fadump\n");
- else if (!is_reserved_memory_area_contiguous())
- pr_err("Can't have holes in reserved memory area while"
- " registering fadump\n");
-
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Parameter Error(%d).\n", rc);
- err = -EINVAL;
- break;
- case -9:
- printk(KERN_ERR "firmware-assisted kernel dump is already "
- " registered.");
- fw_dump.dump_registered = 1;
- err = -EEXIST;
- break;
- case 0:
- printk(KERN_INFO "firmware-assisted kernel dump registration"
- " is successful\n");
- fw_dump.dump_registered = 1;
- err = 0;
- break;
- }
- return err;
-}
-
void crash_fadump(struct pt_regs *regs, const char *str)
{
struct fadump_crash_info_header *fdh = NULL;
@@ -693,71 +634,10 @@ void crash_fadump(struct pt_regs *regs, const char *str)
fdh->online_mask = *cpu_online_mask;
- /* Call ibm,os-term rtas call to trigger firmware assisted dump */
- rtas_os_term((char *)str);
-}
-
-#define GPR_MASK 0xffffff0000000000
-static inline int fadump_gpr_index(u64 id)
-{
- int i = -1;
- char str[3];
-
- if ((id & GPR_MASK) == REG_ID("GPR")) {
- /* get the digits at the end */
- id &= ~GPR_MASK;
- id >>= 24;
- str[2] = '\0';
- str[1] = id & 0xff;
- str[0] = (id >> 8) & 0xff;
- sscanf(str, "%d", &i);
- if (i > 31)
- i = -1;
- }
- return i;
-}
-
-static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
- u64 reg_val)
-{
- int i;
-
- i = fadump_gpr_index(reg_id);
- if (i >= 0)
- regs->gpr[i] = (unsigned long)reg_val;
- else if (reg_id == REG_ID("NIA"))
- regs->nip = (unsigned long)reg_val;
- else if (reg_id == REG_ID("MSR"))
- regs->msr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CTR"))
- regs->ctr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("LR"))
- regs->link = (unsigned long)reg_val;
- else if (reg_id == REG_ID("XER"))
- regs->xer = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CR"))
- regs->ccr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DAR"))
- regs->dar = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DSISR"))
- regs->dsisr = (unsigned long)reg_val;
-}
-
-static struct fadump_reg_entry*
-fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
-{
- memset(regs, 0, sizeof(struct pt_regs));
-
- while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) {
- fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
- be64_to_cpu(reg_entry->reg_value));
- reg_entry++;
- }
- reg_entry++;
- return reg_entry;
+ fw_dump.ops->fadump_trigger(fdh, str);
}
-static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
@@ -772,7 +652,7 @@ static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
return buf;
}
-static void fadump_update_elfcore_header(char *bufp)
+void fadump_update_elfcore_header(char *bufp)
{
struct elfhdr *elf;
struct elf_phdr *phdr;
@@ -784,7 +664,7 @@ static void fadump_update_elfcore_header(char *bufp)
phdr = (struct elf_phdr *)bufp;
if (phdr->p_type == PT_NOTE) {
- phdr->p_paddr = fw_dump.cpu_notes_buf;
+ phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr);
phdr->p_offset = phdr->p_paddr;
phdr->p_filesz = fw_dump.cpu_notes_buf_size;
phdr->p_memsz = fw_dump.cpu_notes_buf_size;
@@ -792,228 +672,100 @@ static void fadump_update_elfcore_header(char *bufp)
return;
}
-static void *fadump_cpu_notes_buf_alloc(unsigned long size)
+static void *fadump_alloc_buffer(unsigned long size)
{
- void *vaddr;
+ unsigned long count, i;
struct page *page;
- unsigned long order, count, i;
+ void *vaddr;
- order = get_order(size);
- vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+ vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
if (!vaddr)
return NULL;
- count = 1 << order;
+ count = PAGE_ALIGN(size) / PAGE_SIZE;
page = virt_to_page(vaddr);
for (i = 0; i < count; i++)
- SetPageReserved(page + i);
+ mark_page_reserved(page + i);
return vaddr;
}
-static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
+static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
{
- struct page *page;
- unsigned long order, count, i;
-
- order = get_order(size);
- count = 1 << order;
- page = virt_to_page(vaddr);
- for (i = 0; i < count; i++)
- ClearPageReserved(page + i);
- __free_pages(page, order);
+ free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
}
-/*
- * Read CPU state dump data and convert it into ELF notes.
- * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
- * used to access the data to allow for additional fields to be added without
- * affecting compatibility. Each list of registers for a CPU starts with
- * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
- * 8 Byte ASCII identifier and 8 Byte register value. The register entry
- * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
- * of register value. For more details refer to PAPR document.
- *
- * Only for the crashing cpu we ignore the CPU dump data and get exact
- * state from fadump crash info structure populated by first kernel at the
- * time of crash.
- */
-static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
+s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
{
- struct fadump_reg_save_area_header *reg_header;
- struct fadump_reg_entry *reg_entry;
- struct fadump_crash_info_header *fdh = NULL;
- void *vaddr;
- unsigned long addr;
- u32 num_cpus, *note_buf;
- struct pt_regs regs;
- int i, rc = 0, cpu = 0;
-
- if (!fdm->cpu_state_data.bytes_dumped)
- return -EINVAL;
-
- addr = be64_to_cpu(fdm->cpu_state_data.destination_address);
- vaddr = __va(addr);
-
- reg_header = vaddr;
- if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) {
- printk(KERN_ERR "Unable to read register save area.\n");
- return -ENOENT;
- }
- pr_debug("--------CPU State Data------------\n");
- pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
- pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
-
- vaddr += be32_to_cpu(reg_header->num_cpu_offset);
- num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
- pr_debug("NumCpus : %u\n", num_cpus);
- vaddr += sizeof(u32);
- reg_entry = (struct fadump_reg_entry *)vaddr;
-
/* Allocate buffer to hold cpu crash notes. */
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
- note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
- if (!note_buf) {
- printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
- "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr =
+ (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
+ if (!fw_dump.cpu_notes_buf_vaddr) {
+ pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
+ fw_dump.cpu_notes_buf_size);
return -ENOMEM;
}
- fw_dump.cpu_notes_buf = __pa(note_buf);
-
- pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
- (num_cpus * sizeof(note_buf_t)), note_buf);
- if (fw_dump.fadumphdr_addr)
- fdh = __va(fw_dump.fadumphdr_addr);
-
- for (i = 0; i < num_cpus; i++) {
- if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) {
- printk(KERN_ERR "Unable to read CPU state data\n");
- rc = -ENOENT;
- goto error_out;
- }
- /* Lower 4 bytes of reg_value contains logical cpu id */
- cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK;
- if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
- SKIP_TO_NEXT_CPU(reg_entry);
- continue;
- }
- pr_debug("Reading register data for cpu %d...\n", cpu);
- if (fdh && fdh->crashing_cpu == cpu) {
- regs = fdh->regs;
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
- SKIP_TO_NEXT_CPU(reg_entry);
- } else {
- reg_entry++;
- reg_entry = fadump_read_registers(reg_entry, &regs);
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
- }
- }
- final_note(note_buf);
-
- if (fdh) {
- pr_debug("Updating elfcore header (%llx) with cpu notes\n",
- fdh->elfcorehdr_addr);
- fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
- }
+ pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
+ fw_dump.cpu_notes_buf_size,
+ fw_dump.cpu_notes_buf_vaddr);
return 0;
-
-error_out:
- fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- return rc;
-
}
-/*
- * Validate and process the dump data stored by firmware before exporting
- * it through '/proc/vmcore'.
- */
-static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
+void fadump_free_cpu_notes_buf(void)
{
- struct fadump_crash_info_header *fdh;
- int rc = 0;
-
- if (!fdm_active || !fw_dump.fadumphdr_addr)
- return -EINVAL;
-
- /* Check if the dump data is valid. */
- if ((be16_to_cpu(fdm_active->header.dump_status_flag) == FADUMP_ERROR_FLAG) ||
- (fdm_active->cpu_state_data.error_flags != 0) ||
- (fdm_active->rmr_region.error_flags != 0)) {
- printk(KERN_ERR "Dump taken by platform is not valid\n");
- return -EINVAL;
- }
- if ((fdm_active->rmr_region.bytes_dumped !=
- fdm_active->rmr_region.source_len) ||
- !fdm_active->cpu_state_data.bytes_dumped) {
- printk(KERN_ERR "Dump taken by platform is incomplete\n");
- return -EINVAL;
- }
-
- /* Validate the fadump crash info header */
- fdh = __va(fw_dump.fadumphdr_addr);
- if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
- printk(KERN_ERR "Crash info header is not valid.\n");
- return -EINVAL;
- }
-
- rc = fadump_build_cpu_notes(fdm_active);
- if (rc)
- return rc;
-
- /*
- * We are done validating dump info and elfcore header is now ready
- * to be exported. set elfcorehdr_addr so that vmcore module will
- * export the elfcore header through '/proc/vmcore'.
- */
- elfcorehdr_addr = fdh->elfcorehdr_addr;
+ if (!fw_dump.cpu_notes_buf_vaddr)
+ return;
- return 0;
+ fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
+ fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr = 0;
+ fw_dump.cpu_notes_buf_size = 0;
}
-static void free_crash_memory_ranges(void)
+static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- kfree(crash_memory_ranges);
- crash_memory_ranges = NULL;
- crash_memory_ranges_size = 0;
- max_crash_mem_ranges = 0;
+ kfree(mrange_info->mem_ranges);
+ mrange_info->mem_ranges = NULL;
+ mrange_info->mem_ranges_sz = 0;
+ mrange_info->max_mem_ranges = 0;
}
/*
- * Allocate or reallocate crash memory ranges array in incremental units
+ * Allocate or reallocate mem_ranges array in incremental units
* of PAGE_SIZE.
*/
-static int allocate_crash_memory_ranges(void)
+static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- struct fad_crash_memory_ranges *new_array;
+ struct fadump_memory_range *new_array;
u64 new_size;
- new_size = crash_memory_ranges_size + PAGE_SIZE;
- pr_debug("Allocating %llu bytes of memory for crash memory ranges\n",
- new_size);
+ new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
+ pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
+ new_size, mrange_info->name);
- new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL);
+ new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
if (new_array == NULL) {
- pr_err("Insufficient memory for setting up crash memory ranges\n");
- free_crash_memory_ranges();
+ pr_err("Insufficient memory for setting up %s memory ranges\n",
+ mrange_info->name);
+ fadump_free_mem_ranges(mrange_info);
return -ENOMEM;
}
- crash_memory_ranges = new_array;
- crash_memory_ranges_size = new_size;
- max_crash_mem_ranges = (new_size /
- sizeof(struct fad_crash_memory_ranges));
+ mrange_info->mem_ranges = new_array;
+ mrange_info->mem_ranges_sz = new_size;
+ mrange_info->max_mem_ranges = (new_size /
+ sizeof(struct fadump_memory_range));
return 0;
}
-static inline int fadump_add_crash_memory(unsigned long long base,
- unsigned long long end)
+static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
+ u64 base, u64 end)
{
- u64 start, size;
+ struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
bool is_adjacent = false;
+ u64 start, size;
if (base == end)
return 0;
@@ -1022,38 +774,41 @@ static inline int fadump_add_crash_memory(unsigned long long base,
* Fold adjacent memory ranges to bring down the memory ranges/
* PT_LOAD segments count.
*/
- if (crash_mem_ranges) {
- start = crash_memory_ranges[crash_mem_ranges - 1].base;
- size = crash_memory_ranges[crash_mem_ranges - 1].size;
+ if (mrange_info->mem_range_cnt) {
+ start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
+ size = mem_ranges[mrange_info->mem_range_cnt - 1].size;
if ((start + size) == base)
is_adjacent = true;
}
if (!is_adjacent) {
/* resize the array on reaching the limit */
- if (crash_mem_ranges == max_crash_mem_ranges) {
+ if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
int ret;
- ret = allocate_crash_memory_ranges();
+ ret = fadump_alloc_mem_ranges(mrange_info);
if (ret)
return ret;
+
+ /* Update to the new resized array */
+ mem_ranges = mrange_info->mem_ranges;
}
start = base;
- crash_memory_ranges[crash_mem_ranges].base = start;
- crash_mem_ranges++;
+ mem_ranges[mrange_info->mem_range_cnt].base = start;
+ mrange_info->mem_range_cnt++;
}
- crash_memory_ranges[crash_mem_ranges - 1].size = (end - start);
- pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
- (crash_mem_ranges - 1), start, end - 1, (end - start));
+ mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
+ pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
+ mrange_info->name, (mrange_info->mem_range_cnt - 1),
+ start, end - 1, (end - start));
return 0;
}
-static int fadump_exclude_reserved_area(unsigned long long start,
- unsigned long long end)
+static int fadump_exclude_reserved_area(u64 start, u64 end)
{
- unsigned long long ra_start, ra_end;
+ u64 ra_start, ra_end;
int ret = 0;
ra_start = fw_dump.reserve_dump_area_start;
@@ -1061,18 +816,22 @@ static int fadump_exclude_reserved_area(unsigned long long start,
if ((ra_start < end) && (ra_end > start)) {
if ((start < ra_start) && (end > ra_end)) {
- ret = fadump_add_crash_memory(start, ra_start);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ start, ra_start);
if (ret)
return ret;
- ret = fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ ra_end, end);
} else if (start < ra_start) {
- ret = fadump_add_crash_memory(start, ra_start);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ start, ra_start);
} else if (ra_end < end) {
- ret = fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ ra_end, end);
}
} else
- ret = fadump_add_crash_memory(start, end);
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
return ret;
}
@@ -1117,36 +876,36 @@ static int fadump_init_elfcore_header(char *bufp)
static int fadump_setup_crash_memory_ranges(void)
{
struct memblock_region *reg;
- unsigned long long start, end;
- int ret;
+ u64 start, end;
+ int i, ret;
pr_debug("Setup crash memory ranges.\n");
- crash_mem_ranges = 0;
+ crash_mrange_info.mem_range_cnt = 0;
/*
- * add the first memory chunk (RMA_START through boot_memory_size) as
- * a separate memory chunk. The reason is, at the time crash firmware
- * will move the content of this memory chunk to different location
- * specified during fadump registration. We need to create a separate
- * program header for this chunk with the correct offset.
+ * Boot memory region(s) registered with firmware are moved to
+ * different location at the time of crash. Create separate program
+ * header(s) for this memory chunk(s) with the correct offset.
*/
- ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
- if (ret)
- return ret;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ start = fw_dump.boot_mem_addr[i];
+ end = start + fw_dump.boot_mem_sz[i];
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
+ if (ret)
+ return ret;
+ }
for_each_memblock(memory, reg) {
- start = (unsigned long long)reg->base;
- end = start + (unsigned long long)reg->size;
+ start = (u64)reg->base;
+ end = start + (u64)reg->size;
/*
- * skip the first memory chunk that is already added (RMA_START
- * through boot_memory_size). This logic needs a relook if and
- * when RMA_START changes to a non-zero value.
+ * skip the memory chunk that is already added
+ * (0 through boot_memory_top).
*/
- BUILD_BUG_ON(RMA_START != 0);
- if (start < fw_dump.boot_memory_size) {
- if (end > fw_dump.boot_memory_size)
- start = fw_dump.boot_memory_size;
+ if (start < fw_dump.boot_mem_top) {
+ if (end > fw_dump.boot_mem_top)
+ start = fw_dump.boot_mem_top;
else
continue;
}
@@ -1167,17 +926,35 @@ static int fadump_setup_crash_memory_ranges(void)
*/
static inline unsigned long fadump_relocate(unsigned long paddr)
{
- if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
- return be64_to_cpu(fdm.rmr_region.destination_address) + paddr;
- else
- return paddr;
+ unsigned long raddr, rstart, rend, rlast, hole_size;
+ int i;
+
+ hole_size = 0;
+ rlast = 0;
+ raddr = paddr;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ rstart = fw_dump.boot_mem_addr[i];
+ rend = rstart + fw_dump.boot_mem_sz[i];
+ hole_size += (rstart - rlast);
+
+ if (paddr >= rstart && paddr < rend) {
+ raddr += fw_dump.boot_mem_dest_addr - hole_size;
+ break;
+ }
+
+ rlast = rend;
+ }
+
+ pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
+ return raddr;
}
static int fadump_create_elfcore_headers(char *bufp)
{
- struct elfhdr *elf;
+ unsigned long long raddr, offset;
struct elf_phdr *phdr;
- int i;
+ struct elfhdr *elf;
+ int i, j;
fadump_init_elfcore_header(bufp);
elf = (struct elfhdr *)bufp;
@@ -1220,12 +997,14 @@ static int fadump_create_elfcore_headers(char *bufp)
(elf->e_phnum)++;
/* setup PT_LOAD sections. */
-
- for (i = 0; i < crash_mem_ranges; i++) {
- unsigned long long mbase, msize;
- mbase = crash_memory_ranges[i].base;
- msize = crash_memory_ranges[i].size;
-
+ j = 0;
+ offset = 0;
+ raddr = fw_dump.boot_mem_addr[0];
+ for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
+ u64 mbase, msize;
+
+ mbase = crash_mrange_info.mem_ranges[i].base;
+ msize = crash_mrange_info.mem_ranges[i].size;
if (!msize)
continue;
@@ -1235,13 +1014,17 @@ static int fadump_create_elfcore_headers(char *bufp)
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = mbase;
- if (mbase == RMA_START) {
+ if (mbase == raddr) {
/*
- * The entire RMA region will be moved by firmware
- * to the specified destination_address. Hence set
- * the correct offset.
+ * The entire real memory region will be moved by
+ * firmware to the specified destination_address.
+ * Hence set the correct offset.
*/
- phdr->p_offset = be64_to_cpu(fdm.rmr_region.destination_address);
+ phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
+ if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
+ offset += fw_dump.boot_mem_sz[j];
+ raddr = fw_dump.boot_mem_addr[++j];
+ }
}
phdr->p_paddr = mbase;
@@ -1263,7 +1046,6 @@ static unsigned long init_fadump_header(unsigned long addr)
if (!addr)
return 0;
- fw_dump.fadumphdr_addr = addr;
fdh = __va(addr);
addr += sizeof(struct fadump_crash_info_header);
@@ -1271,7 +1053,7 @@ static unsigned long init_fadump_header(unsigned long addr)
fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
fdh->elfcorehdr_addr = addr;
/* We will set the crashing cpu id in crash_fadump() during crash. */
- fdh->crashing_cpu = CPU_UNKNOWN;
+ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
return addr;
}
@@ -1293,7 +1075,8 @@ static int register_fadump(void)
if (ret)
return ret;
- addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len);
+ addr = fw_dump.fadumphdr_addr;
+
/* Initialize fadump crash info header. */
addr = init_fadump_header(addr);
vaddr = __va(addr);
@@ -1302,74 +1085,27 @@ static int register_fadump(void)
fadump_create_elfcore_headers(vaddr);
/* register the future kernel dump with firmware. */
- return register_fw_dump(&fdm);
-}
-
-static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Un-register firmware-assisted dump\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_UNREGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- printk(KERN_ERR "Failed to un-register firmware-assisted dump."
- " unexpected error(%d).\n", rc);
- return rc;
- }
- fw_dump.dump_registered = 0;
- return 0;
-}
-
-static int fadump_invalidate_dump(const struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Invalidating firmware-assisted dump registration\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_INVALIDATE, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc);
- return rc;
- }
- fw_dump.dump_active = 0;
- fdm_active = NULL;
- return 0;
+ pr_debug("Registering for firmware-assisted kernel dump...\n");
+ return fw_dump.ops->fadump_register(&fw_dump);
}
void fadump_cleanup(void)
{
+ if (!fw_dump.fadump_supported)
+ return;
+
/* Invalidate the registration only if dump is active. */
if (fw_dump.dump_active) {
- /* pass the same memory dump structure provided by platform */
- fadump_invalidate_dump(fdm_active);
+ pr_debug("Invalidating firmware-assisted dump registration\n");
+ fw_dump.ops->fadump_invalidate(&fw_dump);
} else if (fw_dump.dump_registered) {
/* Un-register Firmware-assisted dump if it was registered. */
- fadump_unregister_dump(&fdm);
- free_crash_memory_ranges();
+ fw_dump.ops->fadump_unregister(&fw_dump);
+ fadump_free_mem_ranges(&crash_mrange_info);
}
+
+ if (fw_dump.ops->fadump_cleanup)
+ fw_dump.ops->fadump_cleanup(&fw_dump);
}
static void fadump_free_reserved_memory(unsigned long start_pfn,
@@ -1394,90 +1130,197 @@ static void fadump_free_reserved_memory(unsigned long start_pfn,
/*
* Skip memory holes and free memory that was actually reserved.
*/
-static void fadump_release_reserved_area(unsigned long start, unsigned long end)
+static void fadump_release_reserved_area(u64 start, u64 end)
{
+ u64 tstart, tend, spfn, epfn;
struct memblock_region *reg;
- unsigned long tstart, tend;
- unsigned long start_pfn = PHYS_PFN(start);
- unsigned long end_pfn = PHYS_PFN(end);
+ spfn = PHYS_PFN(start);
+ epfn = PHYS_PFN(end);
for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
+ tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
+ tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend);
- if (tend == end_pfn)
+ if (tend == epfn)
break;
- start_pfn = tend + 1;
+ spfn = tend;
}
}
}
/*
- * Release the memory that was reserved in early boot to preserve the memory
- * contents. The released memory will be available for general use.
+ * Sort the mem ranges in-place and merge adjacent ranges
+ * to minimize the memory ranges count.
*/
-static void fadump_release_memory(unsigned long begin, unsigned long end)
+static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- unsigned long ra_start, ra_end;
+ struct fadump_memory_range *mem_ranges;
+ struct fadump_memory_range tmp_range;
+ u64 base, size;
+ int i, j, idx;
+
+ if (!reserved_mrange_info.mem_range_cnt)
+ return;
+
+ /* Sort the memory ranges */
+ mem_ranges = mrange_info->mem_ranges;
+ for (i = 0; i < mrange_info->mem_range_cnt; i++) {
+ idx = i;
+ for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
+ if (mem_ranges[idx].base > mem_ranges[j].base)
+ idx = j;
+ }
+ if (idx != i) {
+ tmp_range = mem_ranges[idx];
+ mem_ranges[idx] = mem_ranges[i];
+ mem_ranges[i] = tmp_range;
+ }
+ }
+
+ /* Merge adjacent reserved ranges */
+ idx = 0;
+ for (i = 1; i < mrange_info->mem_range_cnt; i++) {
+ base = mem_ranges[i-1].base;
+ size = mem_ranges[i-1].size;
+ if (mem_ranges[i].base == (base + size))
+ mem_ranges[idx].size += mem_ranges[i].size;
+ else {
+ idx++;
+ if (i == idx)
+ continue;
+
+ mem_ranges[idx] = mem_ranges[i];
+ }
+ }
+ mrange_info->mem_range_cnt = idx + 1;
+}
+
+/*
+ * Scan reserved-ranges to consider them while reserving/releasing
+ * memory for FADump.
+ */
+static inline int fadump_scan_reserved_mem_ranges(void)
+{
+ struct device_node *root;
+ const __be32 *prop;
+ int len, ret = -1;
+ unsigned long i;
+
+ root = of_find_node_by_path("/");
+ if (!root)
+ return ret;
+
+ prop = of_get_property(root, "reserved-ranges", &len);
+ if (!prop)
+ return ret;
+
+ /*
+ * Each reserved range is an (address,size) pair, 2 cells each,
+ * totalling 4 cells per range.
+ */
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+ u64 base, size;
+
+ base = of_read_number(prop + (i * 4) + 0, 2);
+ size = of_read_number(prop + (i * 4) + 2, 2);
+
+ if (size) {
+ ret = fadump_add_mem_range(&reserved_mrange_info,
+ base, base + size);
+ if (ret < 0) {
+ pr_warn("some reserved ranges are ignored!\n");
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Release the memory that was reserved during early boot to preserve the
+ * crash'ed kernel's memory contents except reserved dump area (permanent
+ * reservation) and reserved ranges used by F/W. The released memory will
+ * be available for general use.
+ */
+static void fadump_release_memory(u64 begin, u64 end)
+{
+ u64 ra_start, ra_end, tstart;
+ int i, ret;
+
+ fadump_scan_reserved_mem_ranges();
ra_start = fw_dump.reserve_dump_area_start;
ra_end = ra_start + fw_dump.reserve_dump_area_size;
/*
- * exclude the dump reserve area. Will reuse it for next
- * fadump registration.
+ * Add reserved dump area to reserved ranges list
+ * and exclude all these ranges while releasing memory.
*/
- if (begin < ra_end && end > ra_start) {
- if (begin < ra_start)
- fadump_release_reserved_area(begin, ra_start);
- if (end > ra_end)
- fadump_release_reserved_area(ra_end, end);
- } else
- fadump_release_reserved_area(begin, end);
+ ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
+ if (ret != 0) {
+ /*
+ * Not enough memory to setup reserved ranges but the system is
+ * running shortage of memory. So, release all the memory except
+ * Reserved dump area (reused for next fadump registration).
+ */
+ if (begin < ra_end && end > ra_start) {
+ if (begin < ra_start)
+ fadump_release_reserved_area(begin, ra_start);
+ if (end > ra_end)
+ fadump_release_reserved_area(ra_end, end);
+ } else
+ fadump_release_reserved_area(begin, end);
+
+ return;
+ }
+
+ /* Get the reserved ranges list in order first. */
+ sort_and_merge_mem_ranges(&reserved_mrange_info);
+
+ /* Exclude reserved ranges and release remaining memory */
+ tstart = begin;
+ for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
+ ra_start = reserved_mrange_info.mem_ranges[i].base;
+ ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
+
+ if (tstart >= ra_end)
+ continue;
+
+ if (tstart < ra_start)
+ fadump_release_reserved_area(tstart, ra_start);
+ tstart = ra_end;
+ }
+
+ if (tstart < end)
+ fadump_release_reserved_area(tstart, end);
}
static void fadump_invalidate_release_mem(void)
{
- unsigned long reserved_area_start, reserved_area_end;
- unsigned long destination_address;
-
mutex_lock(&fadump_mutex);
if (!fw_dump.dump_active) {
mutex_unlock(&fadump_mutex);
return;
}
- destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
fadump_cleanup();
mutex_unlock(&fadump_mutex);
+ fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
+ fadump_free_cpu_notes_buf();
+
/*
- * Save the current reserved memory bounds we will require them
- * later for releasing the memory for general use.
- */
- reserved_area_start = fw_dump.reserve_dump_area_start;
- reserved_area_end = reserved_area_start +
- fw_dump.reserve_dump_area_size;
- /*
- * Setup reserve_dump_area_start and its size so that we can
- * reuse this reserved memory for Re-registration.
+ * Setup kernel metadata and initialize the kernel dump
+ * memory structure for FADump re-registration.
*/
- fw_dump.reserve_dump_area_start = destination_address;
- fw_dump.reserve_dump_area_size = get_fadump_area_size();
-
- fadump_release_memory(reserved_area_start, reserved_area_end);
- if (fw_dump.cpu_notes_buf) {
- fadump_cpu_notes_buf_free(
- (unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- }
- /* Initialize the kernel dump memory structure for FAD registration. */
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ pr_warn("Failed to setup kernel metadata!\n");
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
}
static ssize_t fadump_release_memory_store(struct kobject *kobj,
@@ -1528,7 +1371,7 @@ static ssize_t fadump_register_store(struct kobject *kobj,
int ret = 0;
int input = -1;
- if (!fw_dump.fadump_enabled || fdm_active)
+ if (!fw_dump.fadump_enabled || fw_dump.dump_active)
return -EPERM;
if (kstrtoint(buf, 0, &input))
@@ -1541,13 +1384,15 @@ static ssize_t fadump_register_store(struct kobject *kobj,
if (fw_dump.dump_registered == 0) {
goto unlock_out;
}
+
/* Un-register Firmware-assisted dump */
- fadump_unregister_dump(&fdm);
+ pr_debug("Un-register firmware-assisted dump\n");
+ fw_dump.ops->fadump_unregister(&fw_dump);
break;
case 1:
if (fw_dump.dump_registered == 1) {
/* Un-register Firmware-assisted dump */
- fadump_unregister_dump(&fdm);
+ fw_dump.ops->fadump_unregister(&fw_dump);
}
/* Register Firmware-assisted dump */
ret = register_fadump();
@@ -1564,62 +1409,12 @@ unlock_out:
static int fadump_region_show(struct seq_file *m, void *private)
{
- const struct fadump_mem_struct *fdm_ptr;
-
if (!fw_dump.fadump_enabled)
return 0;
mutex_lock(&fadump_mutex);
- if (fdm_active)
- fdm_ptr = fdm_active;
- else {
- mutex_unlock(&fadump_mutex);
- fdm_ptr = &fdm;
- }
-
- seq_printf(m,
- "CPU : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address),
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) +
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len),
- be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped));
- seq_printf(m,
- "HPTE: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->hpte_region.destination_address),
- be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
- be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->hpte_region.source_len),
- be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
- seq_printf(m,
- "DUMP: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->rmr_region.destination_address),
- be64_to_cpu(fdm_ptr->rmr_region.destination_address) +
- be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->rmr_region.source_len),
- be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
-
- if (!fdm_active ||
- (fw_dump.reserve_dump_area_start ==
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address)))
- goto out;
-
- /* Dump is active. Show reserved memory region. */
- seq_printf(m,
- " : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- (unsigned long long)fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start);
-out:
- if (fdm_active)
- mutex_unlock(&fadump_mutex);
+ fw_dump.ops->fadump_region_show(&fw_dump, m);
+ mutex_unlock(&fadump_mutex);
return 0;
}
@@ -1690,14 +1485,77 @@ int __init setup_fadump(void)
* if dump process fails then invalidate the registration
* and release memory before proceeding for re-registration.
*/
- if (process_fadump(fdm_active) < 0)
+ if (fw_dump.ops->fadump_process(&fw_dump) < 0)
fadump_invalidate_release_mem();
}
/* Initialize the kernel dump memory structure for FAD registration. */
else if (fw_dump.reserve_dump_area_size)
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
+
fadump_init_files();
return 1;
}
subsys_initcall(setup_fadump);
+#else /* !CONFIG_PRESERVE_FA_DUMP */
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
+ return 0;
+
+ opal_fadump_dt_scan(&fw_dump, node);
+ return 1;
+}
+
+/*
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
+ * preserve crash data. The subsequent memory preserving kernel boot
+ * is likely to process this crash data.
+ */
+int __init fadump_reserve_mem(void)
+{
+ if (fw_dump.dump_active) {
+ /*
+ * If last boot has crashed then reserve all the memory
+ * above boot memory to preserve crash data.
+ */
+ pr_info("Preserving crash data for processing in next boot.\n");
+ fadump_reserve_crash_area(fw_dump.boot_mem_top);
+ } else
+ pr_debug("FADump-aware kernel..\n");
+
+ return 1;
+}
+#endif /* CONFIG_PRESERVE_FA_DUMP */
+
+/* Preserve everything above the base address */
+static void __init fadump_reserve_crash_area(u64 base)
+{
+ struct memblock_region *reg;
+ u64 mstart, msize;
+
+ for_each_memblock(memory, reg) {
+ mstart = reg->base;
+ msize = reg->size;
+
+ if ((mstart + msize) < base)
+ continue;
+
+ if (mstart < base) {
+ msize -= (base - mstart);
+ mstart = base;
+ }
+
+ pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
+ (msize >> 20), mstart);
+ memblock_reserve(mstart, msize);
+ }
+}
+
+unsigned long __init arch_reserved_kernel_pages(void)
+{
+ return memblock_reserved_size() / PAGE_SIZE;
+}
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index f255e22184b4..4a24f8f026c7 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -34,7 +34,16 @@
#include "head_32.h"
-/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
+/* 601 only have IBAT */
+#ifdef CONFIG_PPC_BOOK3S_601
+#define LOAD_BAT(n, reg, RA, RB) \
+ li RA,0; \
+ mtspr SPRN_IBAT##n##U,RA; \
+ lwz RA,(n*16)+0(reg); \
+ lwz RB,(n*16)+4(reg); \
+ mtspr SPRN_IBAT##n##U,RA; \
+ mtspr SPRN_IBAT##n##L,RB
+#else
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
li RA,0; \
@@ -44,12 +53,11 @@
lwz RB,(n*16)+4(reg); \
mtspr SPRN_IBAT##n##U,RA; \
mtspr SPRN_IBAT##n##L,RB; \
- beq 1f; \
lwz RA,(n*16)+8(reg); \
lwz RB,(n*16)+12(reg); \
mtspr SPRN_DBAT##n##U,RA; \
- mtspr SPRN_DBAT##n##L,RB; \
-1:
+ mtspr SPRN_DBAT##n##L,RB
+#endif
__HEAD
.stabs "arch/powerpc/kernel/",N_SO,0,0,0f
@@ -557,9 +565,9 @@ DataStoreTLBMiss:
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
#ifdef CONFIG_SWAP
- li r1, _PAGE_RW | _PAGE_PRESENT | _PAGE_ACCESSED
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
#else
- li r1, _PAGE_RW | _PAGE_PRESENT
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
#endif
bge- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
@@ -820,9 +828,6 @@ load_up_mmu:
/* Load the BAT registers with the values set up by MMU_init.
MMU_init takes care of whether we're on a 601 or not. */
- mfpvr r3
- srwi r3,r3,16
- cmpwi r3,1
lis r3,BATS@ha
addi r3,r3,BATS@l
tophys(r3,r3)
@@ -897,9 +902,11 @@ start_here:
bl machine_init
bl __save_cpu_setup
bl MMU_init
+#ifdef CONFIG_KASAN
BEGIN_MMU_FTR_SECTION
bl MMU_init_hw_patch
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
/*
* Go back to running unmapped so we can load up new values
@@ -996,11 +1003,8 @@ EXPORT_SYMBOL(switch_mmu_context)
*/
clear_bats:
li r10,0
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi r9, 1
- beq 1f
+#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT0U,r10
mtspr SPRN_DBAT0L,r10
mtspr SPRN_DBAT1U,r10
@@ -1009,7 +1013,7 @@ clear_bats:
mtspr SPRN_DBAT2L,r10
mtspr SPRN_DBAT3U,r10
mtspr SPRN_DBAT3L,r10
-1:
+#endif
mtspr SPRN_IBAT0U,r10
mtspr SPRN_IBAT0L,r10
mtspr SPRN_IBAT1U,r10
@@ -1104,10 +1108,7 @@ mmu_off:
*/
initial_bats:
lis r11,PAGE_OFFSET@h
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- bne 4f
+#ifdef CONFIG_PPC_BOOK3S_601
ori r11,r11,4 /* set up BAT registers for 601 */
li r8,0x7f /* valid, block length = 8MB */
mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
@@ -1120,10 +1121,8 @@ initial_bats:
addis r8,r8,0x800000@h
mtspr SPRN_IBAT2U,r11
mtspr SPRN_IBAT2L,r8
- isync
- blr
-
-4: tophys(r8,r11)
+#else
+ tophys(r8,r11)
#ifdef CONFIG_SMP
ori r8,r8,0x12 /* R/W access, M=1 */
#else
@@ -1135,10 +1134,10 @@ initial_bats:
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
mtspr SPRN_IBAT0L,r8
mtspr SPRN_IBAT0U,r11
+#endif
isync
blr
-
#ifdef CONFIG_BOOTX_TEXT
setup_disp_bat:
/*
@@ -1153,15 +1152,13 @@ setup_disp_bat:
beqlr
lwz r11,0(r8)
lwz r8,4(r8)
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- beq 1f
+#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT3L,r8
mtspr SPRN_DBAT3U,r11
- blr
-1: mtspr SPRN_IBAT3L,r8
+#else
+ mtspr SPRN_IBAT3L,r8
mtspr SPRN_IBAT3U,r11
+#endif
blr
#endif /* CONFIG_BOOTX_TEXT */
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 4a692553651f..8abc7783dbe5 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -5,19 +5,6 @@
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
/*
- * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
- */
-.macro __LOAD_MSR_KERNEL r, x
-.if \x >= 0x8000
- lis \r, (\x)@h
- ori \r, \r, (\x)@l
-.else
- li \r, (\x)
-.endif
-.endm
-#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
-
-/*
* Exception entry code. This code runs with address translation
* turned off, i.e. using physical addresses.
* We assume sprg3 has the physical address of the current
@@ -92,7 +79,7 @@
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
MTMSRD(r10) /* (except for mach check in rtas) */
#endif
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@@ -140,10 +127,10 @@
* otherwise we might risk taking an interrupt before we tell lockdep
* they are enabled.
*/
- LOAD_MSR_KERNEL(r10, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL)
rlwimi r10, r9, 0, MSR_EE
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
#endif
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
@@ -187,7 +174,7 @@ label:
#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
li r10,trap; \
stw r10,_TRAP(r11); \
- LOAD_MSR_KERNEL(r10, msr); \
+ LOAD_REG_IMMEDIATE(r10, msr); \
bl tfer; \
.long hdlr; \
.long ret
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index b5a5c6896019..ad79fddb974d 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -182,7 +182,8 @@ __secondary_hold:
isync
bctr
#else
- BUG_OPCODE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
#endif
CLOSE_FIXED_SECTION(first_256B)
@@ -635,7 +636,7 @@ __after_prom_start:
sub r5,r5,r11
#else
/* just copy interrupts */
- LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
+ LOAD_REG_IMMEDIATE_SYM(r5, r11, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
#endif
b 5f
3:
@@ -900,6 +901,7 @@ p_toc: .8byte __toc_start + 0x8000 - 0b
/*
* This is where the main kernel code starts.
*/
+__REF
start_here_multiplatform:
/* set up the TOC */
bl relative_toc
@@ -975,6 +977,7 @@ start_here_multiplatform:
RFI
b . /* prevent speculative execution */
+ .previous
/* This is where all platforms converge execution */
start_here_common:
@@ -996,7 +999,8 @@ start_here_common:
bl start_kernel
/* Not reached */
- BUG_OPCODE
+ trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
/*
* We put a few things here that have to be page-aligned.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 5ab9178c2347..19f583e18402 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -15,6 +15,7 @@
*/
#include <linux/init.h>
+#include <linux/magic.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -574,8 +575,6 @@ InstructionBreakpoint:
* by decoding the registers used by the dcbx instruction and adding them.
* DAR is set to the calculated address.
*/
- /* define if you don't want to use self modifying code */
-#define NO_SELF_MODIFYING_CODE
FixupDAR:/* Entry point for dcbx workaround. */
mtspr SPRN_M_TW, r10
/* fetch instruction from memory. */
@@ -639,27 +638,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
142: /* continue, it was a dcbx, dcbi instruction. */
-#ifndef NO_SELF_MODIFYING_CODE
- andis. r10,r11,0x1f /* test if reg RA is r0 */
- li r10,modified_instr@l
- dcbtst r0,r10 /* touch for store */
- rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */
- oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */
- ori r11,r11,532
- stw r11,0(r10) /* store add/and instruction */
- dcbf 0,r10 /* flush new instr. to memory. */
- icbi 0,r10 /* invalidate instr. cache line */
- mfspr r11, SPRN_SPRG_SCRATCH1 /* restore r11 */
- mfspr r10, SPRN_SPRG_SCRATCH0 /* restore r10 */
- isync /* Wait until new instr is loaded from memory */
-modified_instr:
- .space 4 /* this is where the add instr. is stored */
- bne+ 143f
- subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */
-143: mtdar r10 /* store faulting EA in DAR */
- mfspr r10,SPRN_M_TW
- b DARFixed /* Go back to normal TLB handling */
-#else
mfctr r10
mtdar r10 /* save ctr reg in DAR */
rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */
@@ -723,7 +701,6 @@ modified_instr:
add r10, r10, r11 /* add it */
mfctr r11 /* restore r11 */
b 151b
-#endif
/*
* This is where the main kernel code starts.
@@ -741,6 +718,9 @@ start_here:
/* stack */
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
+ lis r0, STACK_END_MAGIC@h
+ ori r0, r0, STACK_END_MAGIC@l
+ stw r0, 0(r1)
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index a293a53b4365..1007ec36b4cb 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -195,18 +195,63 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
tsk->thread.last_hit_ubp = NULL;
}
+static bool is_larx_stcx_instr(struct pt_regs *regs, unsigned int instr)
+{
+ int ret, type;
+ struct instruction_op op;
+
+ ret = analyse_instr(&op, regs, instr);
+ type = GETTYPE(op.type);
+ return (!ret && (type == LARX || type == STCX));
+}
+
/*
* Handle debug exception notifications.
*/
+static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
+ unsigned long addr)
+{
+ unsigned int instr = 0;
+
+ if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
+ goto fail;
+
+ if (is_larx_stcx_instr(regs, instr)) {
+ printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
+ " Breakpoint at 0x%lx will be disabled.\n", addr);
+ goto disable;
+ }
+
+ /* Do not emulate user-space instructions, instead single-step them */
+ if (user_mode(regs)) {
+ current->thread.last_hit_ubp = bp;
+ regs->msr |= MSR_SE;
+ return false;
+ }
+
+ if (!emulate_step(regs, instr))
+ goto fail;
+
+ return true;
+
+fail:
+ /*
+ * We've failed in reliably handling the hw-breakpoint. Unregister
+ * it and throw a warning message to let the user know about it.
+ */
+ WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
+ "0x%lx will be disabled.", addr);
+
+disable:
+ perf_event_disable_inatomic(bp);
+ return false;
+}
+
int hw_breakpoint_handler(struct die_args *args)
{
int rc = NOTIFY_STOP;
struct perf_event *bp;
struct pt_regs *regs = args->regs;
-#ifndef CONFIG_PPC_8xx
- int stepped = 1;
- unsigned int instr;
-#endif
struct arch_hw_breakpoint *info;
unsigned long dar = regs->dar;
@@ -251,31 +296,9 @@ int hw_breakpoint_handler(struct die_args *args)
(dar - bp->attr.bp_addr < bp->attr.bp_len)))
info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-#ifndef CONFIG_PPC_8xx
- /* Do not emulate user-space instructions, instead single-step them */
- if (user_mode(regs)) {
- current->thread.last_hit_ubp = bp;
- regs->msr |= MSR_SE;
+ if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info->address))
goto out;
- }
- stepped = 0;
- instr = 0;
- if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
- stepped = emulate_step(regs, instr);
-
- /*
- * emulate_step() could not execute it. We've failed in reliably
- * handling the hw-breakpoint. Unregister it and throw a warning
- * message to let the user know about it.
- */
- if (!stepped) {
- WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
- "0x%lx will be disabled.", info->address);
- perf_event_disable_inatomic(bp);
- goto out;
- }
-#endif
/*
* As a policy, the callback is invoked in a 'trigger-after-execute'
* fashion
@@ -366,59 +389,3 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
-
-bool dawr_force_enable;
-EXPORT_SYMBOL_GPL(dawr_force_enable);
-
-static ssize_t dawr_write_file_bool(struct file *file,
- const char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct arch_hw_breakpoint null_brk = {0, 0, 0};
- size_t rc;
-
- /* Send error to user if they hypervisor won't allow us to write DAWR */
- if ((!dawr_force_enable) &&
- (firmware_has_feature(FW_FEATURE_LPAR)) &&
- (set_dawr(&null_brk) != H_SUCCESS))
- return -1;
-
- rc = debugfs_write_file_bool(file, user_buf, count, ppos);
- if (rc)
- return rc;
-
- /* If we are clearing, make sure all CPUs have the DAWR cleared */
- if (!dawr_force_enable)
- smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0);
-
- return rc;
-}
-
-static const struct file_operations dawr_enable_fops = {
- .read = debugfs_read_file_bool,
- .write = dawr_write_file_bool,
- .open = simple_open,
- .llseek = default_llseek,
-};
-
-static int __init dawr_force_setup(void)
-{
- dawr_force_enable = false;
-
- if (cpu_has_feature(CPU_FTR_DAWR)) {
- /* Don't setup sysfs file for user control on P8 */
- dawr_force_enable = true;
- return 0;
- }
-
- if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
- /* Turn DAWR off by default, but allow admin to turn it on */
- dawr_force_enable = false;
- debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
- powerpc_debugfs_root,
- &dawr_force_enable,
- &dawr_enable_fops);
- }
- return 0;
-}
-arch_initcall(dawr_force_setup);
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index fbd2d0007c52..0276bc8c8969 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -149,8 +149,8 @@ static const struct ppc_pci_io iowa_pci_io = {
};
#ifdef CONFIG_PPC_INDIRECT_MMIO
-static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
- pgprot_t prot, void *caller)
+void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+ pgprot_t prot, void *caller)
{
struct iowa_bus *bus;
void __iomem *res = __ioremap_caller(addr, size, prot, caller);
@@ -163,20 +163,17 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
}
return res;
}
-#else /* CONFIG_PPC_INDIRECT_MMIO */
-#define iowa_ioremap NULL
#endif /* !CONFIG_PPC_INDIRECT_MMIO */
+bool io_workaround_inited;
+
/* Enable IO workaround */
static void io_workaround_init(void)
{
- static int io_workaround_inited;
-
if (io_workaround_inited)
return;
ppc_pci_io = iowa_pci_io;
- ppc_md.ioremap = iowa_ioremap;
- io_workaround_inited = 1;
+ io_workaround_inited = true;
}
/* Register new bus to support workaround */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 0a67ce9f827e..9704f3f76e63 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -633,11 +633,54 @@ static void iommu_table_clear(struct iommu_table *tbl)
#endif
}
+static void iommu_table_reserve_pages(struct iommu_table *tbl,
+ unsigned long res_start, unsigned long res_end)
+{
+ int i;
+
+ WARN_ON_ONCE(res_end < res_start);
+ /*
+ * Reserve page 0 so it will not be used for any mappings.
+ * This avoids buggy drivers that consider page 0 to be invalid
+ * to crash the machine or even lose data.
+ */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+
+ tbl->it_reserved_start = res_start;
+ tbl->it_reserved_end = res_end;
+
+ /* Check if res_start..res_end isn't empty and overlaps the table */
+ if (res_start && res_end &&
+ (tbl->it_offset + tbl->it_size < res_start ||
+ res_end < tbl->it_offset))
+ return;
+
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+ set_bit(i - tbl->it_offset, tbl->it_map);
+}
+
+static void iommu_table_release_pages(struct iommu_table *tbl)
+{
+ int i;
+
+ /*
+ * In case we have reserved the first bit, we should not emit
+ * the warning below.
+ */
+ if (tbl->it_offset == 0)
+ clear_bit(0, tbl->it_map);
+
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+ clear_bit(i - tbl->it_offset, tbl->it_map);
+}
+
/*
* Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space.
*/
-struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
+ unsigned long res_start, unsigned long res_end)
{
unsigned long sz;
static int welcomed = 0;
@@ -656,13 +699,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
tbl->it_map = page_address(page);
memset(tbl->it_map, 0, sz);
- /*
- * Reserve page 0 so it will not be used for any mappings.
- * This avoids buggy drivers that consider page 0 to be invalid
- * to crash the machine or even lose data.
- */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ iommu_table_reserve_pages(tbl, res_start, res_end);
/* We only split the IOMMU table if we have 1GB or more of space */
if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
@@ -714,12 +751,7 @@ static void iommu_table_free(struct kref *kref)
return;
}
- /*
- * In case we have reserved the first bit, we should not emit
- * the warning below.
- */
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ iommu_table_release_pages(tbl);
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
@@ -981,29 +1013,32 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
-long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
+extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+ struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
unsigned long size = 0;
- ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
-
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL)) &&
!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
&size))
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
- /* if (unlikely(ret))
- pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
- __func__, hwaddr, entry << tbl->it_page_shift,
- hwaddr, ret); */
-
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_tce_xchg);
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
+
+void iommu_tce_kill(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages)
+{
+ if (tbl->it_ops->tce_kill)
+ tbl->it_ops->tce_kill(tbl, entry, pages, false);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_kill);
int iommu_take_ownership(struct iommu_table *tbl)
{
@@ -1017,22 +1052,21 @@ int iommu_take_ownership(struct iommu_table *tbl)
* requires exchange() callback defined so if it is not
* implemented, we disallow taking ownership over the table.
*/
- if (!tbl->it_ops->exchange)
+ if (!tbl->it_ops->xchg_no_kill)
return -EINVAL;
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
spin_lock(&tbl->pools[i].lock);
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ iommu_table_release_pages(tbl);
if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
pr_err("iommu_tce: it_map is not empty");
ret = -EBUSY;
- /* Restore bit#0 set by iommu_init_table() */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ /* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
+ tbl->it_reserved_end);
} else {
memset(tbl->it_map, 0xff, sz);
}
@@ -1055,9 +1089,8 @@ void iommu_release_ownership(struct iommu_table *tbl)
memset(tbl->it_map, 0, sz);
- /* Restore bit#0 set by iommu_init_table() */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
+ tbl->it_reserved_end);
for (i = 0; i < tbl->nr_pools; i++)
spin_unlock(&tbl->pools[i].lock);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bc68c53af67c..5645bc9cbc09 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -255,7 +255,7 @@ notrace void arch_local_irq_restore(unsigned long mask)
irq_happened = get_irq_happened();
if (!irq_happened) {
#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- WARN_ON(!(mfmsr() & MSR_EE));
+ WARN_ON_ONCE(!(mfmsr() & MSR_EE));
#endif
return;
}
@@ -268,7 +268,7 @@ notrace void arch_local_irq_restore(unsigned long mask)
*/
if (!(irq_happened & PACA_IRQ_HARD_DIS)) {
#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- WARN_ON(!(mfmsr() & MSR_EE));
+ WARN_ON_ONCE(!(mfmsr() & MSR_EE));
#endif
__hard_irq_disable();
#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
@@ -279,7 +279,7 @@ notrace void arch_local_irq_restore(unsigned long mask)
* warn if we are wrong. Only do that when IRQ tracing
* is enabled as mfmsr() can be costly.
*/
- if (WARN_ON(mfmsr() & MSR_EE))
+ if (WARN_ON_ONCE(mfmsr() & MSR_EE))
__hard_irq_disable();
#endif
}
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
index 83cf7b852876..3072fd6dbe94 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -23,541 +23,6 @@
#include <linux/slab.h>
#include <linux/types.h>
-#define PURGATORY_STACK_SIZE (16 * 1024)
-
-#define elf_addr_to_cpu elf64_to_cpu
-
-#ifndef Elf_Rel
-#define Elf_Rel Elf64_Rel
-#endif /* Elf_Rel */
-
-struct elf_info {
- /*
- * Where the ELF binary contents are kept.
- * Memory managed by the user of the struct.
- */
- const char *buffer;
-
- const struct elfhdr *ehdr;
- const struct elf_phdr *proghdrs;
- struct elf_shdr *sechdrs;
-};
-
-static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
-{
- return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
-}
-
-static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le64_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be64_to_cpu(value);
-
- return value;
-}
-
-static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le16_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be16_to_cpu(value);
-
- return value;
-}
-
-static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le32_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be32_to_cpu(value);
-
- return value;
-}
-
-/**
- * elf_is_ehdr_sane - check that it is safe to use the ELF header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
-{
- if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
- pr_debug("Bad program header size.\n");
- return false;
- } else if (ehdr->e_shnum > 0 &&
- ehdr->e_shentsize != sizeof(struct elf_shdr)) {
- pr_debug("Bad section header size.\n");
- return false;
- } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
- ehdr->e_version != EV_CURRENT) {
- pr_debug("Unknown ELF version.\n");
- return false;
- }
-
- if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
- size_t phdr_size;
-
- /*
- * e_phnum is at most 65535 so calculating the size of the
- * program header cannot overflow.
- */
- phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
- /* Sanity check the program header table location. */
- if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
- pr_debug("Program headers at invalid location.\n");
- return false;
- } else if (ehdr->e_phoff + phdr_size > buf_len) {
- pr_debug("Program headers truncated.\n");
- return false;
- }
- }
-
- if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
- size_t shdr_size;
-
- /*
- * e_shnum is at most 65536 so calculating
- * the size of the section header cannot overflow.
- */
- shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum;
-
- /* Sanity check the section header table location. */
- if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) {
- pr_debug("Section headers at invalid location.\n");
- return false;
- } else if (ehdr->e_shoff + shdr_size > buf_len) {
- pr_debug("Section headers truncated.\n");
- return false;
- }
- }
-
- return true;
-}
-
-static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr)
-{
- struct elfhdr *buf_ehdr;
-
- if (len < sizeof(*buf_ehdr)) {
- pr_debug("Buffer is too small to hold ELF header.\n");
- return -ENOEXEC;
- }
-
- memset(ehdr, 0, sizeof(*ehdr));
- memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident));
- if (!elf_is_elf_file(ehdr)) {
- pr_debug("No ELF header magic.\n");
- return -ENOEXEC;
- }
-
- if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) {
- pr_debug("Not a supported ELF class.\n");
- return -ENOEXEC;
- } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
- ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
- pr_debug("Not a supported ELF data format.\n");
- return -ENOEXEC;
- }
-
- buf_ehdr = (struct elfhdr *) buf;
- if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) {
- pr_debug("Bad ELF header size.\n");
- return -ENOEXEC;
- }
-
- ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type);
- ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
- ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version);
- ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
- ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
- ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
- ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
- ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
- ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
- ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize);
- ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum);
- ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx);
-
- return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_is_phdr_sane - check that it is safe to use the program header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len)
-{
-
- if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) {
- pr_debug("ELF segment location wraps around.\n");
- return false;
- } else if (phdr->p_offset + phdr->p_filesz > buf_len) {
- pr_debug("ELF segment not in file.\n");
- return false;
- } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) {
- pr_debug("ELF segment address wraps around.\n");
- return false;
- }
-
- return true;
-}
-
-static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info,
- int idx)
-{
- /* Override the const in proghdrs, we are the ones doing the loading. */
- struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx];
- const char *pbuf;
- struct elf_phdr *buf_phdr;
-
- pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr));
- buf_phdr = (struct elf_phdr *) pbuf;
-
- phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
- phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
- phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
- phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
- phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
-
- /*
- * The following fields have a type equivalent to Elf_Addr
- * both in 32 bit and 64 bit ELF.
- */
- phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
- phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
- phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
-
- return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_phdrs - read the program headers from the buffer
- *
- * This function assumes that the program header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_phdrs(const char *buf, size_t len,
- struct elf_info *elf_info)
-{
- size_t phdr_size, i;
- const struct elfhdr *ehdr = elf_info->ehdr;
-
- /*
- * e_phnum is at most 65535 so calculating the size of the
- * program header cannot overflow.
- */
- phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
- elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL);
- if (!elf_info->proghdrs)
- return -ENOMEM;
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- int ret;
-
- ret = elf_read_phdr(buf, len, elf_info, i);
- if (ret) {
- kfree(elf_info->proghdrs);
- elf_info->proghdrs = NULL;
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_is_shdr_sane - check that it is safe to use the section header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
-{
- bool size_ok;
-
- /* SHT_NULL headers have undefined values, so we can't check them. */
- if (shdr->sh_type == SHT_NULL)
- return true;
-
- /* Now verify sh_entsize */
- switch (shdr->sh_type) {
- case SHT_SYMTAB:
- size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
- break;
- case SHT_RELA:
- size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
- break;
- case SHT_DYNAMIC:
- size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
- break;
- case SHT_REL:
- size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
- break;
- case SHT_NOTE:
- case SHT_PROGBITS:
- case SHT_HASH:
- case SHT_NOBITS:
- default:
- /*
- * This is a section whose entsize requirements
- * I don't care about. If I don't know about
- * the section I can't care about it's entsize
- * requirements.
- */
- size_ok = true;
- break;
- }
-
- if (!size_ok) {
- pr_debug("ELF section with wrong entry size.\n");
- return false;
- } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
- pr_debug("ELF section address wraps around.\n");
- return false;
- }
-
- if (shdr->sh_type != SHT_NOBITS) {
- if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
- pr_debug("ELF section location wraps around.\n");
- return false;
- } else if (shdr->sh_offset + shdr->sh_size > buf_len) {
- pr_debug("ELF section not in file.\n");
- return false;
- }
- }
-
- return true;
-}
-
-static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info,
- int idx)
-{
- struct elf_shdr *shdr = &elf_info->sechdrs[idx];
- const struct elfhdr *ehdr = elf_info->ehdr;
- const char *sbuf;
- struct elf_shdr *buf_shdr;
-
- sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
- buf_shdr = (struct elf_shdr *) sbuf;
-
- shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name);
- shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type);
- shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
- shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
- shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link);
- shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info);
-
- /*
- * The following fields have a type equivalent to Elf_Addr
- * both in 32 bit and 64 bit ELF.
- */
- shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
- shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
- shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
- shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
-
- return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_shdrs - read the section headers from the buffer
- *
- * This function assumes that the section header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_shdrs(const char *buf, size_t len,
- struct elf_info *elf_info)
-{
- size_t shdr_size, i;
-
- /*
- * e_shnum is at most 65536 so calculating
- * the size of the section header cannot overflow.
- */
- shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
-
- elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
- if (!elf_info->sechdrs)
- return -ENOMEM;
-
- for (i = 0; i < elf_info->ehdr->e_shnum; i++) {
- int ret;
-
- ret = elf_read_shdr(buf, len, elf_info, i);
- if (ret) {
- kfree(elf_info->sechdrs);
- elf_info->sechdrs = NULL;
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info
- * @buf: Buffer to read ELF file from.
- * @len: Size of @buf.
- * @ehdr: Pointer to existing struct which will be populated.
- * @elf_info: Pointer to existing struct which will be populated.
- *
- * This function allows reading ELF files with different byte order than
- * the kernel, byte-swapping the fields as needed.
- *
- * Return:
- * On success returns 0, and the caller should call elf_free_info(elf_info) to
- * free the memory allocated for the section and program headers.
- */
-int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr,
- struct elf_info *elf_info)
-{
- int ret;
-
- ret = elf_read_ehdr(buf, len, ehdr);
- if (ret)
- return ret;
-
- elf_info->buffer = buf;
- elf_info->ehdr = ehdr;
- if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
- ret = elf_read_phdrs(buf, len, elf_info);
- if (ret)
- return ret;
- }
- if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
- ret = elf_read_shdrs(buf, len, elf_info);
- if (ret) {
- kfree(elf_info->proghdrs);
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_free_info - free memory allocated by elf_read_from_buffer
- */
-void elf_free_info(struct elf_info *elf_info)
-{
- kfree(elf_info->proghdrs);
- kfree(elf_info->sechdrs);
- memset(elf_info, 0, sizeof(*elf_info));
-}
-/**
- * build_elf_exec_info - read ELF executable and check that we can use it
- */
-static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr,
- struct elf_info *elf_info)
-{
- int i;
- int ret;
-
- ret = elf_read_from_buffer(buf, len, ehdr, elf_info);
- if (ret)
- return ret;
-
- /* Big endian vmlinux has type ET_DYN. */
- if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) {
- pr_err("Not an ELF executable.\n");
- goto error;
- } else if (!elf_info->proghdrs) {
- pr_err("No ELF program header.\n");
- goto error;
- }
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- /*
- * Kexec does not support loading interpreters.
- * In addition this check keeps us from attempting
- * to kexec ordinay executables.
- */
- if (elf_info->proghdrs[i].p_type == PT_INTERP) {
- pr_err("Requires an ELF interpreter.\n");
- goto error;
- }
- }
-
- return 0;
-error:
- elf_free_info(elf_info);
- return -ENOEXEC;
-}
-
-static int elf64_probe(const char *buf, unsigned long len)
-{
- struct elfhdr ehdr;
- struct elf_info elf_info;
- int ret;
-
- ret = build_elf_exec_info(buf, len, &ehdr, &elf_info);
- if (ret)
- return ret;
-
- elf_free_info(&elf_info);
-
- return elf_check_arch(&ehdr) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_exec_load - load ELF executable image
- * @lowest_load_addr: On return, will be the address where the first PT_LOAD
- * section will be loaded in memory.
- *
- * Return:
- * 0 on success, negative value on failure.
- */
-static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr,
- struct elf_info *elf_info,
- unsigned long *lowest_load_addr)
-{
- unsigned long base = 0, lowest_addr = UINT_MAX;
- int ret;
- size_t i;
- struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size,
- .top_down = false };
-
- /* Read in the PT_LOAD segments. */
- for (i = 0; i < ehdr->e_phnum; i++) {
- unsigned long load_addr;
- size_t size;
- const struct elf_phdr *phdr;
-
- phdr = &elf_info->proghdrs[i];
- if (phdr->p_type != PT_LOAD)
- continue;
-
- size = phdr->p_filesz;
- if (size > phdr->p_memsz)
- size = phdr->p_memsz;
-
- kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
- kbuf.bufsz = size;
- kbuf.memsz = phdr->p_memsz;
- kbuf.buf_align = phdr->p_align;
- kbuf.buf_min = phdr->p_paddr + base;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- load_addr = kbuf.mem;
-
- if (load_addr < lowest_addr)
- lowest_addr = load_addr;
- }
-
- /* Update entry point to reflect new load address. */
- ehdr->e_entry += base;
-
- *lowest_load_addr = lowest_addr;
- ret = 0;
- out:
- return ret;
-}
-
static void *elf64_load(struct kimage *image, char *kernel_buf,
unsigned long kernel_len, char *initrd,
unsigned long initrd_len, char *cmdline,
@@ -570,18 +35,18 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
void *fdt;
const void *slave_code;
struct elfhdr ehdr;
- struct elf_info elf_info;
+ struct kexec_elf_info elf_info;
struct kexec_buf kbuf = { .image = image, .buf_min = 0,
.buf_max = ppc64_rma_size };
struct kexec_buf pbuf = { .image = image, .buf_min = 0,
.buf_max = ppc64_rma_size, .top_down = true,
.mem = KEXEC_BUF_MEM_UNKNOWN };
- ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
if (ret)
goto out;
- ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr);
+ ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
if (ret)
goto out;
@@ -648,13 +113,13 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
pr_err("Error setting up the purgatory.\n");
out:
- elf_free_info(&elf_info);
+ kexec_free_elf_info(&elf_info);
/* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
return ret ? ERR_PTR(ret) : fdt;
}
const struct kexec_file_ops kexec_elf64_ops = {
- .probe = elf64_probe,
+ .probe = kexec_elf_probe,
.load = elf64_load,
};
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index b7b3a5e4e224..617eba82531c 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -64,16 +64,17 @@
#define KVM_INST_MTSRIN 0x7c0001e4
static bool kvm_patching_worked = true;
-char kvm_tmp[1024 * 1024];
+extern char kvm_tmp[];
+extern char kvm_tmp_end[];
static int kvm_tmp_index;
-static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
+static void __init kvm_patch_ins(u32 *inst, u32 new_inst)
{
*inst = new_inst;
flush_icache_range((ulong)inst, (ulong)inst + 4);
}
-static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -82,7 +83,7 @@ static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -91,12 +92,12 @@ static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
}
-static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
@@ -105,17 +106,17 @@ static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
}
-static void kvm_patch_ins_nop(u32 *inst)
+static void __init kvm_patch_ins_nop(u32 *inst)
{
kvm_patch_ins(inst, KVM_INST_NOP);
}
-static void kvm_patch_ins_b(u32 *inst, int addr)
+static void __init kvm_patch_ins_b(u32 *inst, int addr)
{
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S)
/* On relocatable kernels interrupts handlers and our code
@@ -128,11 +129,11 @@ static void kvm_patch_ins_b(u32 *inst, int addr)
kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
}
-static u32 *kvm_alloc(int len)
+static u32 * __init kvm_alloc(int len)
{
u32 *p;
- if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
+ if ((kvm_tmp_index + len) > (kvm_tmp_end - kvm_tmp)) {
printk(KERN_ERR "KVM: No more space (%d + %d)\n",
kvm_tmp_index, len);
kvm_patching_worked = false;
@@ -151,7 +152,7 @@ extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
extern u32 kvm_emulate_mtmsrd_len;
extern u32 kvm_emulate_mtmsrd[];
-static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -204,7 +205,7 @@ extern u32 kvm_emulate_mtmsr_orig_ins_offs;
extern u32 kvm_emulate_mtmsr_len;
extern u32 kvm_emulate_mtmsr[];
-static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -265,7 +266,7 @@ extern u32 kvm_emulate_wrtee_orig_ins_offs;
extern u32 kvm_emulate_wrtee_len;
extern u32 kvm_emulate_wrtee[];
-static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
+static void __init kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
{
u32 *p;
int distance_start;
@@ -322,7 +323,7 @@ extern u32 kvm_emulate_wrteei_0_branch_offs;
extern u32 kvm_emulate_wrteei_0_len;
extern u32 kvm_emulate_wrteei_0[];
-static void kvm_patch_ins_wrteei_0(u32 *inst)
+static void __init kvm_patch_ins_wrteei_0(u32 *inst)
{
u32 *p;
int distance_start;
@@ -363,7 +364,7 @@ extern u32 kvm_emulate_mtsrin_orig_ins_offs;
extern u32 kvm_emulate_mtsrin_len;
extern u32 kvm_emulate_mtsrin[];
-static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
+static void __init kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
{
u32 *p;
int distance_start;
@@ -399,7 +400,7 @@ static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
#endif
-static void kvm_map_magic_page(void *data)
+static void __init kvm_map_magic_page(void *data)
{
u32 *features = data;
@@ -414,7 +415,7 @@ static void kvm_map_magic_page(void *data)
*features = out[0];
}
-static void kvm_check_ins(u32 *inst, u32 features)
+static void __init kvm_check_ins(u32 *inst, u32 features)
{
u32 _inst = *inst;
u32 inst_no_rt = _inst & ~KVM_MASK_RT;
@@ -658,7 +659,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
extern u32 kvm_template_start[];
extern u32 kvm_template_end[];
-static void kvm_use_magic_page(void)
+static void __init kvm_use_magic_page(void)
{
u32 *p;
u32 *start, *end;
@@ -699,25 +700,13 @@ static void kvm_use_magic_page(void)
kvm_patching_worked ? "worked" : "failed");
}
-static __init void kvm_free_tmp(void)
-{
- /*
- * Inform kmemleak about the hole in the .bss section since the
- * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
- */
- kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
- ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
- free_reserved_area(&kvm_tmp[kvm_tmp_index],
- &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
-}
-
static int __init kvm_guest_init(void)
{
if (!kvm_para_available())
- goto free_tmp;
+ return 0;
if (!epapr_paravirt_enabled)
- goto free_tmp;
+ return 0;
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
kvm_use_magic_page();
@@ -727,9 +716,6 @@ static int __init kvm_guest_init(void)
powersave_nap = 1;
#endif
-free_tmp:
- kvm_free_tmp();
-
return 0;
}
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index eb2568f583ae..7af6f8b50c5d 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -192,6 +192,8 @@ kvm_emulate_mtmsr_orig_ins_offs:
kvm_emulate_mtmsr_len:
.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+#ifdef CONFIG_BOOKE
+
/* also used for wrteei 1 */
.global kvm_emulate_wrtee
kvm_emulate_wrtee:
@@ -285,6 +287,10 @@ kvm_emulate_wrteei_0_branch_offs:
kvm_emulate_wrteei_0_len:
.long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
+#endif /* CONFIG_BOOKE */
+
+#ifdef CONFIG_PPC_BOOK3S_32
+
.global kvm_emulate_mtsrin
kvm_emulate_mtsrin:
@@ -334,5 +340,15 @@ kvm_emulate_mtsrin_orig_ins_offs:
kvm_emulate_mtsrin_len:
.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
+#endif /* CONFIG_PPC_BOOK3S_32 */
+
+ .balign 4
+ .global kvm_tmp
+kvm_tmp:
+ .space (64 * 1024)
+
+.global kvm_tmp_end
+kvm_tmp_end:
+
.global kvm_template_end
kvm_template_end:
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 18481b0e2788..04a7cba58eff 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -29,6 +29,8 @@
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/asm-prototypes.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
int default_machine_kexec_prepare(struct kimage *image)
{
@@ -327,6 +329,13 @@ void default_machine_kexec(struct kimage *image)
#ifdef CONFIG_PPC_PSERIES
kexec_paca.lppaca_ptr = NULL;
#endif
+
+ if (is_secure_guest() && !(image->preserve_context ||
+ image->type == KEXEC_TYPE_CRASH)) {
+ uv_unshare_all_pages();
+ printk("kexec: Unshared all shared pages.\n");
+ }
+
paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
setup_paca(&kexec_paca);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index b18df633eae9..34c1001e9e8b 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -33,13 +33,18 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
mce_ue_event_queue);
static void machine_check_process_queued_event(struct irq_work *work);
-void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_check_ue_irq_work(struct irq_work *work);
+static void machine_check_ue_event(struct machine_check_event *evt);
static void machine_process_ue_event(struct work_struct *work);
static struct irq_work mce_event_process_work = {
.func = machine_check_process_queued_event,
};
+static struct irq_work mce_ue_event_irq_work = {
+ .func = machine_check_ue_irq_work,
+};
+
DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
static void mce_set_error_info(struct machine_check_event *mce,
@@ -144,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
if (phys_addr != ULONG_MAX) {
mce->u.ue_error.physical_address_provided = true;
mce->u.ue_error.physical_address = phys_addr;
+ mce->u.ue_error.ignore_event = mce_err->ignore_event;
machine_check_ue_event(mce);
}
}
@@ -199,11 +205,15 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
+static void machine_check_ue_irq_work(struct irq_work *work)
+{
+ schedule_work(&mce_ue_event_work);
+}
/*
* Queue up the MCE event which then can be handled later.
*/
-void machine_check_ue_event(struct machine_check_event *evt)
+static void machine_check_ue_event(struct machine_check_event *evt)
{
int index;
@@ -216,7 +226,7 @@ void machine_check_ue_event(struct machine_check_event *evt)
memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
/* Queue work to process this event later. */
- schedule_work(&mce_ue_event_work);
+ irq_work_queue(&mce_ue_event_irq_work);
}
/*
@@ -257,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work)
/*
* This should probably queued elsewhere, but
* oh! well
+ *
+ * Don't report this machine check because the caller has a
+ * asked us to ignore the event, it has a fixup handler which
+ * will do the appropriate error handling and reporting.
*/
if (evt->error_type == MCE_ERROR_TYPE_UE) {
+ if (evt->u.ue_error.ignore_event) {
+ __this_cpu_dec(mce_ue_count);
+ continue;
+ }
+
if (evt->u.ue_error.physical_address_provided) {
unsigned long pfn;
@@ -292,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work)
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
evt = this_cpu_ptr(&mce_event_queue[index]);
+
+ if (evt->error_type == MCE_ERROR_TYPE_UE &&
+ evt->u.ue_error.ignore_event) {
+ __this_cpu_dec(mce_queue_count);
+ continue;
+ }
machine_check_print_event_info(evt, false, false);
__this_cpu_dec(mce_queue_count);
}
@@ -300,7 +325,7 @@ static void machine_check_process_queued_event(struct irq_work *work)
void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest)
{
- const char *level, *sevstr, *subtype, *err_type;
+ const char *level, *sevstr, *subtype, *err_type, *initiator;
uint64_t ea = 0, pa = 0;
int n = 0;
char dar_str[50];
@@ -385,6 +410,28 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
+ switch(evt->initiator) {
+ case MCE_INITIATOR_CPU:
+ initiator = "CPU";
+ break;
+ case MCE_INITIATOR_PCI:
+ initiator = "PCI";
+ break;
+ case MCE_INITIATOR_ISA:
+ initiator = "ISA";
+ break;
+ case MCE_INITIATOR_MEMORY:
+ initiator = "Memory";
+ break;
+ case MCE_INITIATOR_POWERMGM:
+ initiator = "Power Management";
+ break;
+ case MCE_INITIATOR_UNKNOWN:
+ default:
+ initiator = "Unknown";
+ break;
+ }
+
switch (evt->error_type) {
case MCE_ERROR_TYPE_UE:
err_type = "UE";
@@ -451,6 +498,14 @@ void machine_check_print_event_info(struct machine_check_event *evt,
if (evt->u.link_error.effective_address_provided)
ea = evt->u.link_error.effective_address;
break;
+ case MCE_ERROR_TYPE_DCACHE:
+ err_type = "D-Cache";
+ subtype = "Unknown";
+ break;
+ case MCE_ERROR_TYPE_ICACHE:
+ err_type = "I-Cache";
+ subtype = "Unknown";
+ break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
err_type = "Unknown";
@@ -483,9 +538,17 @@ void machine_check_print_event_info(struct machine_check_event *evt,
level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
}
+ printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
+
subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
mc_error_class[evt->error_class] : "Unknown";
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Display faulty slb contents for SLB errors. */
+ if (evt->error_type == MCE_ERROR_TYPE_SLB)
+ slb_dump_contents(local_paca->mce_faulty_slbs);
+#endif
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index e39536aad30d..1cbf7f1a4e3d 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <linux/ptrace.h>
+#include <linux/extable.h>
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
@@ -18,6 +19,7 @@
#include <asm/pte-walk.h>
#include <asm/sstep.h>
#include <asm/exception-64s.h>
+#include <asm/extable.h>
/*
* Convert an address related to an mm to a PFN. NOTE: we are in real
@@ -26,7 +28,8 @@
unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
{
pte_t *ptep;
- unsigned long flags;
+ unsigned int shift;
+ unsigned long pfn, flags;
struct mm_struct *mm;
if (user_mode(regs))
@@ -35,14 +38,23 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
mm = &init_mm;
local_irq_save(flags);
- if (mm == current->mm)
- ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
- else
- ptep = find_init_mm_pte(addr, NULL);
+ ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift);
+
+ if (!ptep || pte_special(*ptep)) {
+ pfn = ULONG_MAX;
+ goto out;
+ }
+
+ if (shift <= PAGE_SHIFT)
+ pfn = pte_pfn(*ptep);
+ else {
+ unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+ pfn = pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask)));
+ }
+
+out:
local_irq_restore(flags);
- if (!ptep || pte_special(*ptep))
- return ULONG_MAX;
- return pte_pfn(*ptep);
+ return pfn;
}
/* flush SLBs and reload */
@@ -82,8 +94,7 @@ static void flush_erat(void)
return;
}
#endif
- /* PPC_INVALIDATE_ERAT can only be used on ISA v3 and newer */
- asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
}
#define MCE_FLUSH_SLB 1
@@ -345,7 +356,7 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0, false, 0, 0, 0, 0, 0 } };
-static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
+static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
uint64_t *phys_addr)
{
/*
@@ -398,6 +409,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
handled = mce_flush(MCE_FLUSH_SLB);
break;
case MCE_ERROR_TYPE_ERAT:
@@ -483,6 +496,8 @@ static int mce_handle_derror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
if (mce_flush(MCE_FLUSH_SLB))
handled = 1;
break;
@@ -542,7 +557,8 @@ static int mce_handle_derror(struct pt_regs *regs,
* kernel/exception-64s.h
*/
if (get_paca()->in_mce < MAX_MCE_DEPTH)
- mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
+ mce_find_instr_ea_and_phys(regs, addr,
+ phys_addr);
}
found = 1;
}
@@ -559,9 +575,18 @@ static int mce_handle_derror(struct pt_regs *regs,
return 0;
}
-static long mce_handle_ue_error(struct pt_regs *regs)
+static long mce_handle_ue_error(struct pt_regs *regs,
+ struct mce_error_info *mce_err)
{
long handled = 0;
+ const struct exception_table_entry *entry;
+
+ entry = search_kernel_exception_table(regs->nip);
+ if (entry) {
+ mce_err->ignore_event = true;
+ regs->nip = extable_fixup(entry);
+ return 1;
+ }
/*
* On specific SCOM read via MMIO we may get a machine check
@@ -594,7 +619,7 @@ static long mce_handle_error(struct pt_regs *regs,
&phys_addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
+ handled = mce_handle_ue_error(regs, &mce_err);
save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index fe4bd321730e..82df4b09e79f 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -292,22 +292,20 @@ _GLOBAL(flush_instruction_cache)
iccci 0,r3
#endif
#elif defined(CONFIG_FSL_BOOKE)
-BEGIN_FTR_SECTION
+#ifdef CONFIG_E200
mfspr r3,SPRN_L1CSR0
ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
/* msync; isync recommended here */
mtspr SPRN_L1CSR0,r3
isync
blr
-END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
+#endif
mfspr r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr SPRN_L1CSR1,r3
+#elif defined(CONFIG_PPC_BOOK3S_601)
+ blr /* for 601, do nothing */
#else
- mfspr r3,SPRN_PVR
- rlwinm r3,r3,16,16,31
- cmpwi 0,r3,1
- beqlr /* for 601, do nothing */
/* 603/604 processor - use invalidate-all bit in HID0 */
mfspr r3,SPRN_HID0
ori r3,r3,HID0_ICFI
@@ -326,10 +324,10 @@ EXPORT_SYMBOL(flush_instruction_cache)
* flush_icache_range(unsigned long start, unsigned long stop)
*/
_GLOBAL(flush_icache_range)
-BEGIN_FTR_SECTION
+#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ blr /* for 601 and e200, do nothing */
+#else
rlwinm r3,r3,0,0,31 - L1_CACHE_SHIFT
subf r4,r3,r4
addi r4,r4,L1_CACHE_BYTES - 1
@@ -355,6 +353,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
sync /* additional sync needed on g4 */
isync
blr
+#endif
_ASM_NOKPROBE_SYMBOL(flush_icache_range)
EXPORT_SYMBOL(flush_icache_range)
@@ -362,15 +361,15 @@ EXPORT_SYMBOL(flush_icache_range)
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
* snoop from the data cache.
- * This is a no-op on the 601 which has a unified cache.
+ * This is a no-op on the 601 and e200 which have a unified cache.
*
* void __flush_dcache_icache(void *page)
*/
_GLOBAL(__flush_dcache_icache)
-BEGIN_FTR_SECTION
+#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+#else
rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
mtctr r4
@@ -398,6 +397,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
sync
isync
blr
+#endif
#ifndef CONFIG_BOOKE
/*
@@ -409,10 +409,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
* void __flush_dcache_icache_phys(unsigned long physaddr)
*/
_GLOBAL(__flush_dcache_icache_phys)
-BEGIN_FTR_SECTION
+#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ blr /* for 601 and e200, do nothing */
+#else
mfmsr r10
rlwinm r0,r10,0,28,26 /* clear DR */
mtmsr r0
@@ -433,6 +433,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
mtmsr r10 /* restore DR */
isync
blr
+#endif
#endif /* CONFIG_BOOKE */
/*
@@ -452,7 +453,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
stwu r9,16(r3)
_GLOBAL(copy_page)
+ rlwinm r5, r3, 0, L1_CACHE_BYTES - 1
addi r3,r3,-4
+
+0: twnei r5, 0 /* WARN if r3 is not cache aligned */
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+
addi r4,r4,-4
li r5,4
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 1ad4089dd110..b55a7b4cb543 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -110,58 +110,6 @@ _ASM_NOKPROBE_SYMBOL(flush_icache_range)
EXPORT_SYMBOL(flush_icache_range)
/*
- * Like above, but only do the D-cache.
- *
- * flush_dcache_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL_TOC(flush_dcache_range)
-
-/*
- * Flush the data cache to memory
- *
- * Different systems have different cache line sizes
- */
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mtctr r8
-0: dcbst 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- blr
-EXPORT_SYMBOL(flush_dcache_range)
-
-_GLOBAL(flush_inval_dcache_range)
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- sync
- isync
- mtctr r8
-0: dcbf 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- isync
- blr
-
-
-/*
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
* snoop from the data cache.
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 991d396fb50d..d7134c614c16 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -160,10 +160,12 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
{
- if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16)
- && entry->jump[1] == 0x398c0000 + (val & 0xffff))
- return 1;
- return 0;
+ if (entry->jump[0] != (PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val)))
+ return 0;
+ if (entry->jump[1] != (PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) |
+ PPC_LO(val)))
+ return 0;
+ return 1;
}
/* Set up a trampoline in the PLT to bounce us to the distant function */
@@ -188,10 +190,16 @@ static uint32_t do_plt_call(void *location,
entry++;
}
- entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */
- entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/
- entry->jump[2] = 0x7d8903a6; /* mtctr r12 */
- entry->jump[3] = 0x4e800420; /* bctr */
+ /*
+ * lis r12, sym@ha
+ * addi r12, r12, sym@l
+ * mtctr r12
+ * bctr
+ */
+ entry->jump[0] = PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val);
+ entry->jump[1] = PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | PPC_LO(val);
+ entry->jump[2] = PPC_INST_MTCTR | __PPC_RS(R12);
+ entry->jump[3] = PPC_INST_BCTR;
pr_debug("Initialized plt for 0x%x at %p\n", val, entry);
return (uint32_t)entry;
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index a93b10c48000..007606a48fd9 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -121,20 +121,27 @@ struct ppc64_stub_entry
* the stub, but it's significantly shorter to put these values at the
* end of the stub code, and patch the stub address (32-bits relative
* to the TOC ptr, r2) into the stub.
+ *
+ * addis r11,r2, <high>
+ * addi r11,r11, <low>
+ * std r2,R2_STACK_OFFSET(r1)
+ * ld r12,32(r11)
+ * ld r2,40(r11)
+ * mtctr r12
+ * bctr
*/
-
static u32 ppc64_stub_insns[] = {
- 0x3d620000, /* addis r11,r2, <high> */
- 0x396b0000, /* addi r11,r11, <low> */
+ PPC_INST_ADDIS | __PPC_RT(R11) | __PPC_RA(R2),
+ PPC_INST_ADDI | __PPC_RT(R11) | __PPC_RA(R11),
/* Save current r2 value in magic place on the stack. */
- 0xf8410000|R2_STACK_OFFSET, /* std r2,R2_STACK_OFFSET(r1) */
- 0xe98b0020, /* ld r12,32(r11) */
+ PPC_INST_STD | __PPC_RS(R2) | __PPC_RA(R1) | R2_STACK_OFFSET,
+ PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R11) | 32,
#ifdef PPC64_ELF_ABI_v1
/* Set up new r2 from function descriptor */
- 0xe84b0028, /* ld r2,40(r11) */
+ PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R11) | 40,
#endif
- 0x7d8903a6, /* mtctr r12 */
- 0x4e800420 /* bctr */
+ PPC_INST_MTCTR | __PPC_RS(R12),
+ PPC_INST_BCTR,
};
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -388,13 +395,6 @@ static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000;
}
-/* Both low and high 16 bits are added as SIGNED additions, so if low
- 16 bits has high bit set, high 16 bits must be adjusted. These
- macros do that (stolen from binutils). */
-#define PPC_LO(v) ((v) & 0xffff)
-#define PPC_HI(v) (((v) >> 16) & 0xffff)
-#define PPC_HA(v) PPC_HI ((v) + 0x8000)
-
/* Patch stub to reference function and correct r2 value. */
static inline int create_stub(const Elf64_Shdr *sechdrs,
struct ppc64_stub_entry *entry,
@@ -699,18 +699,21 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
* ld r2, ...(r12)
* add r2, r2, r12
*/
- if ((((uint32_t *)location)[0] & ~0xfffc)
- != 0xe84c0000)
+ if ((((uint32_t *)location)[0] & ~0xfffc) !=
+ (PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R12)))
break;
- if (((uint32_t *)location)[1] != 0x7c426214)
+ if (((uint32_t *)location)[1] !=
+ (PPC_INST_ADD | __PPC_RT(R2) | __PPC_RA(R2) | __PPC_RB(R12)))
break;
/*
* If found, replace it with:
* addis r2, r12, (.TOC.-func)@ha
- * addi r2, r12, (.TOC.-func)@l
+ * addi r2, r2, (.TOC.-func)@l
*/
- ((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value);
- ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value);
+ ((uint32_t *)location)[0] = PPC_INST_ADDIS | __PPC_RT(R2) |
+ __PPC_RA(R12) | PPC_HA(value);
+ ((uint32_t *)location)[1] = PPC_INST_ADDI | __PPC_RT(R2) |
+ __PPC_RA(R2) | PPC_LO(value);
break;
case R_PPC64_REL16_HA:
@@ -764,12 +767,19 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs,
{
struct ppc64_stub_entry *entry;
unsigned int i, num_stubs;
+ /*
+ * ld r12,PACATOC(r13)
+ * addis r12,r12,<high>
+ * addi r12,r12,<low>
+ * mtctr r12
+ * bctr
+ */
static u32 stub_insns[] = {
- 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */
- 0x3d8c0000, /* addis r12,r12,<high> */
- 0x398c0000, /* addi r12,r12,<low> */
- 0x7d8903a6, /* mtctr r12 */
- 0x4e800420, /* bctr */
+ PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC,
+ PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12),
+ PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12),
+ PPC_INST_MTCTR | __PPC_RS(R12),
+ PPC_INST_BCTR,
};
long reladdr;
diff --git a/arch/powerpc/kernel/note.S b/arch/powerpc/kernel/note.S
new file mode 100644
index 000000000000..bcdad15395dd
--- /dev/null
+++ b/arch/powerpc/kernel/note.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC ELF notes.
+ *
+ * Copyright 2019, IBM Corporation
+ */
+
+#include <linux/elfnote.h>
+#include <asm/elfnote.h>
+
+/*
+ * Ultravisor-capable bit (PowerNV only).
+ *
+ * Bit 0 indicates that the powerpc kernel binary knows how to run in an
+ * ultravisor-enabled system.
+ *
+ * In an ultravisor-enabled system, some machine resources are now controlled
+ * by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
+ * being run on a machine with ultravisor, the kernel will probably crash
+ * trying to access ultravisor resources. For instance, it may crash in early
+ * boot trying to set the partition table entry 0.
+ *
+ * In an ultravisor-enabled system, a bootloader could warn the user or prevent
+ * the kernel from being run if the PowerPC ultravisor capability doesn't exist
+ * or the Ultravisor-capable bit is not set.
+ */
+#ifdef CONFIG_PPC_POWERNV
+#define PPCCAP_ULTRAVISOR_BIT (1 << 0)
+#else
+#define PPCCAP_ULTRAVISOR_BIT 0
+#endif
+
+/*
+ * Add the PowerPC Capabilities in the binary ELF note. It is a bitmap that
+ * can be used to advertise kernel capabilities to userland.
+ */
+#define PPC_CAPABILITIES_BITMAP (PPCCAP_ULTRAVISOR_BIT)
+
+ELFNOTE(PowerPC, PPC_ELFNOTE_CAPABILITIES,
+ .long PPC_CAPABILITIES_BITMAP)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index e3ad8aa4730d..949eceb254d8 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -14,6 +14,8 @@
#include <asm/sections.h>
#include <asm/pgtable.h>
#include <asm/kexec.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
#include "setup.h"
@@ -52,6 +54,43 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align,
#ifdef CONFIG_PPC_PSERIES
+#define LPPACA_SIZE 0x400
+
+static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align,
+ unsigned long limit, int cpu)
+{
+ size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
+ static unsigned long shared_lppaca_size;
+ static void *shared_lppaca;
+ void *ptr;
+
+ if (!shared_lppaca) {
+ memblock_set_bottom_up(true);
+
+ shared_lppaca =
+ memblock_alloc_try_nid(shared_lppaca_total_size,
+ PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
+ if (!shared_lppaca)
+ panic("cannot allocate shared data");
+
+ memblock_set_bottom_up(false);
+ uv_share_page(PHYS_PFN(__pa(shared_lppaca)),
+ shared_lppaca_total_size >> PAGE_SHIFT);
+ }
+
+ ptr = shared_lppaca + shared_lppaca_size;
+ shared_lppaca_size += size;
+
+ /*
+ * This is very early in boot, so no harm done if the kernel crashes at
+ * this point.
+ */
+ BUG_ON(shared_lppaca_size >= shared_lppaca_total_size);
+
+ return ptr;
+}
+
/*
* See asm/lppaca.h for more detail.
*
@@ -65,7 +104,7 @@ static inline void init_lppaca(struct lppaca *lppaca)
*lppaca = (struct lppaca) {
.desc = cpu_to_be32(0xd397d781), /* "LpPa" */
- .size = cpu_to_be16(0x400),
+ .size = cpu_to_be16(LPPACA_SIZE),
.fpregs_in_use = 1,
.slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
@@ -75,19 +114,22 @@ static inline void init_lppaca(struct lppaca *lppaca)
static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
{
struct lppaca *lp;
- size_t size = 0x400;
- BUILD_BUG_ON(size < sizeof(struct lppaca));
+ BUILD_BUG_ON(sizeof(struct lppaca) > LPPACA_SIZE);
if (early_cpu_has_feature(CPU_FTR_HVMODE))
return NULL;
- lp = alloc_paca_data(size, 0x400, limit, cpu);
+ if (is_secure_guest())
+ lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu);
+ else
+ lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
+
init_lppaca(lp);
return lp;
}
-#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_PSERIES */
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f627e15bb43c..1c448cf25506 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1379,10 +1379,6 @@ void __init pcibios_resource_survey(void)
pr_debug("PCI: Assigning unassigned resources...\n");
pci_assign_unassigned_resources();
}
-
- /* Call machine dependent fixup */
- if (ppc_md.pcibios_fixup)
- ppc_md.pcibios_fixup();
}
/* This is used by the PCI hotplug driver to allocate resource
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 0b0cf8168b47..fc62c4bc47b1 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -55,11 +55,18 @@ EXPORT_SYMBOL_GPL(pci_find_bus_by_node);
void pcibios_release_device(struct pci_dev *dev)
{
struct pci_controller *phb = pci_bus_to_host(dev->bus);
+ struct pci_dn *pdn = pci_get_pdn(dev);
eeh_remove_device(dev);
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
+
+ /* free()ing the pci_dn has been deferred to us, do it now */
+ if (pdn && (pdn->flags & PCI_DN_FLAG_DEAD)) {
+ pci_dbg(dev, "freeing dead pdn\n");
+ kfree(pdn);
+ }
}
/**
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 50942a1d1a5f..b49e1060a3bf 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -263,6 +263,10 @@ static int __init pcibios_init(void)
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
/* Call machine dependent post-init code */
if (ppc_md.pcibios_after_init)
ppc_md.pcibios_after_init();
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index b7030b1189d0..f83d1f69b1dd 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -54,14 +54,20 @@ static int __init pcibios_init(void)
pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
/* Scan all of the recorded PCI controllers. */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
pcibios_scan_phb(hose);
- pci_bus_add_devices(hose->bus);
- }
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Add devices. */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ pci_bus_add_devices(hose->bus);
+
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
return 0;
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index c4c8c237a106..9524009ca1ae 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -323,6 +323,7 @@ void pci_remove_device_node_info(struct device_node *dn)
{
struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
struct device_node *parent;
+ struct pci_dev *pdev;
#ifdef CONFIG_EEH
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -336,12 +337,28 @@ void pci_remove_device_node_info(struct device_node *dn)
WARN_ON(!list_empty(&pdn->child_list));
list_del(&pdn->list);
+ /* Drop the parent pci_dn's ref to our backing dt node */
parent = of_get_parent(dn);
if (parent)
of_node_put(parent);
- dn->data = NULL;
- kfree(pdn);
+ /*
+ * At this point we *might* still have a pci_dev that was
+ * instantiated from this pci_dn. So defer free()ing it until
+ * the pci_dev's release function is called.
+ */
+ pdev = pci_get_domain_bus_and_slot(pdn->phb->global_number,
+ pdn->busno, pdn->devfn);
+ if (pdev) {
+ /* NB: pdev has a ref to dn */
+ pci_dbg(pdev, "marked pdn (from %pOF) as dead\n", dn);
+ pdn->flags |= PCI_DN_FLAG_DEAD;
+ } else {
+ dn->data = NULL;
+ kfree(pdn);
+ }
+
+ pci_dev_put(pdev);
}
EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 24522aa37665..f91d7e94872e 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -34,29 +34,75 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
* pci_parse_of_flags - Parse the flags cell of a device tree PCI address
* @addr0: value of 1st cell of a device tree PCI address.
* @bridge: Set this flag if the address is from a bridge 'ranges' property
+ *
+ * PCI Bus Binding to IEEE Std 1275-1994
+ *
+ * Bit# 33222222 22221111 11111100 00000000
+ * 10987654 32109876 54321098 76543210
+ * phys.hi cell: npt000ss bbbbbbbb dddddfff rrrrrrrr
+ * phys.mid cell: hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh
+ * phys.lo cell: llllllll llllllll llllllll llllllll
+ *
+ * where:
+ * n is 0 if the address is relocatable, 1 otherwise
+ * p is 1 if the addressable region is "prefetchable", 0 otherwise
+ * t is 1 if the address is aliased (for non-relocatable I/O),
+ * below 1 MB (for Memory),or below 64 KB (for relocatable I/O).
+ * ss is the space code, denoting the address space:
+ * 00 denotes Configuration Space
+ * 01 denotes I/O Space
+ * 10 denotes 32-bit-address Memory Space
+ * 11 denotes 64-bit-address Memory Space
+ * bbbbbbbb is the 8-bit Bus Number
+ * ddddd is the 5-bit Device Number
+ * fff is the 3-bit Function Number
+ * rrrrrrrr is the 8-bit Register Number
*/
+#define OF_PCI_ADDR0_SPACE(ss) (((ss)&3)<<24)
+#define OF_PCI_ADDR0_SPACE_CFG OF_PCI_ADDR0_SPACE(0)
+#define OF_PCI_ADDR0_SPACE_IO OF_PCI_ADDR0_SPACE(1)
+#define OF_PCI_ADDR0_SPACE_MMIO32 OF_PCI_ADDR0_SPACE(2)
+#define OF_PCI_ADDR0_SPACE_MMIO64 OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_SPACE_MASK OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_RELOC (1UL<<31)
+#define OF_PCI_ADDR0_PREFETCH (1UL<<30)
+#define OF_PCI_ADDR0_ALIAS (1UL<<29)
+#define OF_PCI_ADDR0_BUS 0x00FF0000UL
+#define OF_PCI_ADDR0_DEV 0x0000F800UL
+#define OF_PCI_ADDR0_FN 0x00000700UL
+#define OF_PCI_ADDR0_BARREG 0x000000FFUL
+
unsigned int pci_parse_of_flags(u32 addr0, int bridge)
{
- unsigned int flags = 0;
+ unsigned int flags = 0, as = addr0 & OF_PCI_ADDR0_SPACE_MASK;
- if (addr0 & 0x02000000) {
+ if (as == OF_PCI_ADDR0_SPACE_MMIO32 || as == OF_PCI_ADDR0_SPACE_MMIO64) {
flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
- flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
- flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
- if (addr0 & 0x40000000)
- flags |= IORESOURCE_PREFETCH
- | PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+ if (as == OF_PCI_ADDR0_SPACE_MMIO64)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 | IORESOURCE_MEM_64;
+
+ if (addr0 & OF_PCI_ADDR0_ALIAS)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_1M;
+
+ if (addr0 & OF_PCI_ADDR0_PREFETCH)
+ flags |= IORESOURCE_PREFETCH |
+ PCI_BASE_ADDRESS_MEM_PREFETCH;
+
/* Note: We don't know whether the ROM has been left enabled
* by the firmware or not. We mark it as disabled (ie, we do
* not set the IORESOURCE_ROM_ENABLE flag) for now rather than
* do a config space read, it will be force-enabled if needed
*/
- if (!bridge && (addr0 & 0xff) == 0x30)
+ if (!bridge && (addr0 & OF_PCI_ADDR0_BARREG) == PCI_ROM_ADDRESS)
flags |= IORESOURCE_READONLY;
- } else if (addr0 & 0x01000000)
+
+ } else if (as == OF_PCI_ADDR0_SPACE_IO)
flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+
if (flags)
flags |= IORESOURCE_SIZEALIGN;
+
return flags;
}
@@ -77,10 +123,16 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
const __be32 *addrs;
u32 i;
int proplen;
+ bool mark_unset = false;
addrs = of_get_property(node, "assigned-addresses", &proplen);
- if (!addrs)
- return;
+ if (!addrs || !proplen) {
+ addrs = of_get_property(node, "reg", &proplen);
+ if (!addrs || !proplen)
+ return;
+ mark_unset = true;
+ }
+
pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs);
for (; proplen >= 20; proplen -= 20, addrs += 5) {
flags = pci_parse_of_flags(of_read_number(addrs, 1), 0);
@@ -105,6 +157,8 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
continue;
}
res->flags = flags;
+ if (mark_unset)
+ res->flags |= IORESOURCE_UNSET;
res->name = pci_name(dev);
region.start = base;
region.end = base + size - 1;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f0fbbf6a6a1f..639ceae7da9d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -101,21 +101,8 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
}
}
-static bool tm_active_with_fp(struct task_struct *tsk)
-{
- return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
- (tsk->thread.ckpt_regs.msr & MSR_FP);
-}
-
-static bool tm_active_with_altivec(struct task_struct *tsk)
-{
- return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
- (tsk->thread.ckpt_regs.msr & MSR_VEC);
-}
#else
static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
-static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
-static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
bool strict_msr_control;
@@ -252,7 +239,7 @@ EXPORT_SYMBOL(enable_kernel_fp);
static int restore_fp(struct task_struct *tsk)
{
- if (tsk->thread.load_fp || tm_active_with_fp(tsk)) {
+ if (tsk->thread.load_fp) {
load_fp_state(&current->thread.fp_state);
current->thread.load_fp++;
return 1;
@@ -334,8 +321,7 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
static int restore_altivec(struct task_struct *tsk)
{
- if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
- (tsk->thread.load_vec || tm_active_with_altivec(tsk))) {
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
load_vr_state(&tsk->thread.vr_state);
tsk->thread.used_vr = 1;
tsk->thread.load_vec++;
@@ -497,13 +483,14 @@ void giveup_all(struct task_struct *tsk)
if (!tsk->thread.regs)
return;
+ check_if_tm_restore_required(tsk);
+
usermsr = tsk->thread.regs->msr;
if ((usermsr & msr_all_available) == 0)
return;
msr_check_and_set(msr_all_available);
- check_if_tm_restore_required(tsk);
WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
@@ -639,7 +626,7 @@ void do_break (struct pt_regs *regs, unsigned long address,
hw_breakpoint_disable();
/* Deliver the signal to userspace */
- force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address);
}
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -793,34 +780,6 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
return __set_dabr(dabr, dabrx);
}
-int set_dawr(struct arch_hw_breakpoint *brk)
-{
- unsigned long dawr, dawrx, mrd;
-
- dawr = brk->address;
-
- dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) \
- << (63 - 58); //* read/write bits */
- dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) \
- << (63 - 59); //* translate */
- dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) \
- >> 3; //* PRIM bits */
- /* dawr length is stored in field MDR bits 48:53. Matches range in
- doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
- 0b111111=64DW.
- brk->len is in bytes.
- This aligns up to double word size, shifts and does the bias.
- */
- mrd = ((brk->len + 7) >> 3) - 1;
- dawrx |= (mrd & 0x3f) << (63 - 53);
-
- if (ppc_md.set_dawr)
- return ppc_md.set_dawr(dawr, dawrx);
- mtspr(SPRN_DAWR, dawr);
- mtspr(SPRN_DAWRX, dawrx);
- return 0;
-}
-
void __set_breakpoint(struct arch_hw_breakpoint *brk)
{
memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
@@ -1628,8 +1587,9 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+ unsigned long kthread_arg, struct task_struct *p,
+ unsigned long tls)
{
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
@@ -1670,10 +1630,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_PPC64
if (!is_32bit_task())
- childregs->gpr[13] = childregs->gpr[6];
+ childregs->gpr[13] = tls;
else
#endif
- childregs->gpr[2] = childregs->gpr[6];
+ childregs->gpr[2] = tls;
}
f = ret_from_fork;
@@ -2074,10 +2034,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
int count = 0;
int firstframe = 1;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- struct ftrace_ret_stack *ret_stack;
- extern void return_to_handler(void);
- unsigned long rth = (unsigned long)return_to_handler;
- int curr_frame = 0;
+ unsigned long ret_addr;
+ int ftrace_idx = 0;
#endif
if (tsk == NULL)
@@ -2106,15 +2064,10 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
if (!firstframe || ip != lr) {
printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if ((ip == rth) && curr_frame >= 0) {
- ret_stack = ftrace_graph_get_ret_stack(current,
- curr_frame++);
- if (ret_stack)
- pr_cont(" (%pS)",
- (void *)ret_stack->ret);
- else
- curr_frame = -1;
- }
+ ret_addr = ftrace_graph_ret_addr(current,
+ &ftrace_idx, ip, stack);
+ if (ret_addr != ip)
+ pr_cont(" (%pS)", (void *)ret_addr);
#endif
if (firstframe)
pr_cont(" (unreliable)");
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 7159e791a70d..6620f37abe73 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -55,6 +55,7 @@
#include <asm/firmware.h>
#include <asm/dt_cpu_ftrs.h>
#include <asm/drmem.h>
+#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
@@ -702,9 +703,12 @@ void __init early_init_devtree(void *params)
#ifdef CONFIG_PPC_POWERNV
/* Some machines might need OPAL info for debugging, grab it now. */
of_scan_flat_dt(early_init_dt_scan_opal, NULL);
+
+ /* Scan tree for ultravisor feature */
+ of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
#endif
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/* scan tree to see if dump is active during last boot */
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
#endif
@@ -731,7 +735,7 @@ void __init early_init_devtree(void *params)
if (PHYSICAL_START > MEMORY_START)
memblock_reserve(MEMORY_START, 0x8000);
reserve_kdump_trampoline();
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/*
* If we fail to reserve memory for firmware-assisted dump then
* fallback to kexec based kdump.
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index ed446b7ea164..a4e7762dd286 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -40,6 +40,7 @@
#include <asm/sections.h>
#include <asm/machdep.h>
#include <asm/asm-prototypes.h>
+#include <asm/ultravisor-api.h>
#include <linux/linux_logo.h>
@@ -94,7 +95,7 @@ static int of_workarounds __prombss;
#define PROM_BUG() do { \
prom_printf("kernel BUG at %s line 0x%x!\n", \
__FILE__, __LINE__); \
- __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \
+ __builtin_trap(); \
} while (0)
#ifdef DEBUG_PROM
@@ -168,6 +169,11 @@ static unsigned long __prombss prom_tce_alloc_end;
#ifdef CONFIG_PPC_PSERIES
static bool __prombss prom_radix_disable;
+static bool __prombss prom_xive_disable;
+#endif
+
+#ifdef CONFIG_PPC_SVM
+static bool __prombss prom_svm_enable;
#endif
struct platform_support {
@@ -804,7 +810,24 @@ static void __init early_cmdline_parse(void)
}
if (prom_radix_disable)
prom_debug("Radix disabled from cmdline\n");
+
+ opt = prom_strstr(prom_cmd_line, "xive=off");
+ if (opt) {
+ prom_xive_disable = true;
+ prom_debug("XIVE disabled from cmdline\n");
+ }
#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_SVM
+ opt = prom_strstr(prom_cmd_line, "svm=");
+ if (opt) {
+ bool val;
+
+ opt += sizeof("svm=") - 1;
+ if (!prom_strtobool(opt, &val))
+ prom_svm_enable = val;
+ }
+#endif /* CONFIG_PPC_SVM */
}
#ifdef CONFIG_PPC_PSERIES
@@ -1212,10 +1235,17 @@ static void __init prom_parse_xive_model(u8 val,
switch (val) {
case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
prom_debug("XIVE - either mode supported\n");
- support->xive = true;
+ support->xive = !prom_xive_disable;
break;
case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
prom_debug("XIVE - exploitation mode supported\n");
+ if (prom_xive_disable) {
+ /*
+ * If we __have__ to do XIVE, we're better off ignoring
+ * the command line rather than not booting.
+ */
+ prom_printf("WARNING: Ignoring cmdline option xive=off\n");
+ }
support->xive = true;
break;
case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
@@ -1562,9 +1592,6 @@ static void __init reserve_mem(u64 base, u64 size)
static void __init prom_init_mem(void)
{
phandle node;
-#ifdef DEBUG_PROM
- char *path;
-#endif
char type[64];
unsigned int plen;
cell_t *p, *endp;
@@ -1586,9 +1613,6 @@ static void __init prom_init_mem(void)
prom_debug("root_size_cells: %x\n", rsc);
prom_debug("scanning memory:\n");
-#ifdef DEBUG_PROM
- path = prom_scratch;
-#endif
for (node = 0; prom_next_node(&node); ) {
type[0] = 0;
@@ -1613,9 +1637,10 @@ static void __init prom_init_mem(void)
endp = p + (plen / sizeof(cell_t));
#ifdef DEBUG_PROM
- memset(path, 0, sizeof(prom_scratch));
- call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1);
- prom_debug(" node %s :\n", path);
+ memset(prom_scratch, 0, sizeof(prom_scratch));
+ call_prom("package-to-path", 3, 1, node, prom_scratch,
+ sizeof(prom_scratch) - 1);
+ prom_debug(" node %s :\n", prom_scratch);
#endif /* DEBUG_PROM */
while ((endp - p) >= (rac + rsc)) {
@@ -1703,6 +1728,43 @@ static void __init prom_close_stdin(void)
}
}
+#ifdef CONFIG_PPC_SVM
+static int prom_rtas_hcall(uint64_t args)
+{
+ register uint64_t arg1 asm("r3") = H_RTAS;
+ register uint64_t arg2 asm("r4") = args;
+
+ asm volatile("sc 1\n" : "=r" (arg1) :
+ "r" (arg1),
+ "r" (arg2) :);
+ return arg1;
+}
+
+static struct rtas_args __prombss os_term_args;
+
+static void __init prom_rtas_os_term(char *str)
+{
+ phandle rtas_node;
+ __be32 val;
+ u32 token;
+
+ prom_debug("%s: start...\n", __func__);
+ rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas"));
+ prom_debug("rtas_node: %x\n", rtas_node);
+ if (!PHANDLE_VALID(rtas_node))
+ return;
+
+ val = 0;
+ prom_getprop(rtas_node, "ibm,os-term", &val, sizeof(val));
+ token = be32_to_cpu(val);
+ prom_debug("ibm,os-term: %x\n", token);
+ if (token == 0)
+ prom_panic("Could not get token for ibm,os-term\n");
+ os_term_args.token = cpu_to_be32(token);
+ prom_rtas_hcall((uint64_t)&os_term_args);
+}
+#endif /* CONFIG_PPC_SVM */
+
/*
* Allocate room for and instantiate RTAS
*/
@@ -3159,6 +3221,46 @@ static void unreloc_toc(void)
#endif
#endif
+#ifdef CONFIG_PPC_SVM
+/*
+ * Perform the Enter Secure Mode ultracall.
+ */
+static int enter_secure_mode(unsigned long kbase, unsigned long fdt)
+{
+ register unsigned long r3 asm("r3") = UV_ESM;
+ register unsigned long r4 asm("r4") = kbase;
+ register unsigned long r5 asm("r5") = fdt;
+
+ asm volatile("sc 2" : "+r"(r3) : "r"(r4), "r"(r5));
+
+ return r3;
+}
+
+/*
+ * Call the Ultravisor to transfer us to secure memory if we have an ESM blob.
+ */
+static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+ int ret;
+
+ if (!prom_svm_enable)
+ return;
+
+ /* Switch to secure mode. */
+ prom_printf("Switching to secure mode.\n");
+
+ ret = enter_secure_mode(kbase, fdt);
+ if (ret != U_SUCCESS) {
+ prom_printf("Returned %d from switching to secure mode.\n", ret);
+ prom_rtas_os_term("Switch to secure mode failed.\n");
+ }
+}
+#else
+static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
/*
* We enter here early on, when the Open Firmware prom is still
* handling exceptions and the MMU hash table for us.
@@ -3357,6 +3459,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
unreloc_toc();
#endif
+ /* Move to secure memory if we're supposed to be secure guests. */
+ setup_secure_guest(kbase, hdr);
+
__start(hdr, kbase, 0, 0, 0, 0, 0);
return 0;
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 160bef0d553d..78bab17b1396 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -33,7 +33,7 @@ OBJ="$2"
ERROR=0
-function check_section()
+check_section()
{
file=$1
section=$2
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 684b0b315c32..8c92febf5f44 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -2521,7 +2521,6 @@ void ptrace_disable(struct task_struct *child)
{
/* make sure the single step bit is not set. */
user_disable_single_step(child);
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
}
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b824f4c69622..c5fa251b8950 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -16,6 +16,7 @@
#include <linux/capability.h>
#include <linux/delay.h>
#include <linux/cpu.h>
+#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -871,15 +872,17 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
return 0;
for_each_cpu(cpu, cpus) {
+ struct device *dev = get_cpu_device(cpu);
+
switch (state) {
case DOWN:
- cpuret = cpu_down(cpu);
+ cpuret = device_offline(dev);
break;
case UP:
- cpuret = cpu_up(cpu);
+ cpuret = device_online(dev);
break;
}
- if (cpuret) {
+ if (cpuret < 0) {
pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
__func__,
((state == UP) ? "up" : "down"),
@@ -896,6 +899,7 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
cpumask_clear_cpu(cpu, cpus);
}
}
+ cond_resched();
}
return ret;
@@ -922,13 +926,11 @@ int rtas_online_cpus_mask(cpumask_var_t cpus)
return ret;
}
-EXPORT_SYMBOL(rtas_online_cpus_mask);
int rtas_offline_cpus_mask(cpumask_var_t cpus)
{
return rtas_cpu_state_change_mask(DOWN, cpus);
}
-EXPORT_SYMBOL(rtas_offline_cpus_mask);
int rtas_ibm_suspend_me(u64 handle)
{
@@ -968,6 +970,8 @@ int rtas_ibm_suspend_me(u64 handle)
data.token = rtas_token("ibm,suspend-me");
data.complete = &done;
+ lock_device_hotplug();
+
/* All present CPUs must be online */
cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
cpuret = rtas_online_cpus_mask(offline_mask);
@@ -980,18 +984,16 @@ int rtas_ibm_suspend_me(u64 handle)
cpu_hotplug_disable();
/* Check if we raced with a CPU-Offline Operation */
- if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) {
- pr_err("%s: Raced against a concurrent CPU-Offline\n",
- __func__);
- atomic_set(&data.error, -EBUSY);
+ if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) {
+ pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__);
+ atomic_set(&data.error, -EAGAIN);
goto out_hotplug_enable;
}
/* Call function on all CPUs. One of us will make the
* rtas call
*/
- if (on_each_cpu(rtas_percpu_suspend_me, &data, 0))
- atomic_set(&data.error, -EINVAL);
+ on_each_cpu(rtas_percpu_suspend_me, &data, 0);
wait_for_completion(&done);
@@ -1008,6 +1010,7 @@ out_hotplug_enable:
__func__);
out:
+ unlock_device_hotplug();
free_cpumask_var(offline_mask);
return atomic_read(&data.error);
}
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index e1c9cf079503..7cfcb294b11c 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -28,7 +28,7 @@ static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NO
bool barrier_nospec_enabled;
static bool no_nospec;
static bool btb_flush_enabled;
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
static bool no_spectrev2;
#endif
@@ -114,7 +114,7 @@ static __init int security_feature_debugfs_init(void)
device_initcall(security_feature_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
static int __init handle_nospectre_v2(char *p)
{
no_spectrev2 = true;
@@ -122,6 +122,9 @@ static int __init handle_nospectre_v2(char *p)
return 0;
}
early_param("nospectre_v2", handle_nospectre_v2);
+#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
void setup_spectre_v2(void)
{
if (no_spectrev2 || cpu_mitigations_off())
@@ -399,7 +402,17 @@ static void toggle_count_cache_flush(bool enable)
void setup_count_cache_flush(void)
{
- toggle_count_cache_flush(true);
+ bool enable = true;
+
+ if (no_spectrev2 || cpu_mitigations_off()) {
+ if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) ||
+ security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
+ pr_warn("Spectre v2 mitigations not under software control, can't disable\n");
+
+ enable = false;
+ }
+
+ toggle_count_cache_flush(enable);
}
#ifdef CONFIG_DEBUG_FS
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1f8db666468d..25aaa3903000 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -778,12 +778,6 @@ void ppc_printk_progress(char *s, unsigned short hex)
pr_info("%s\n", s);
}
-void arch_setup_pdev_archdata(struct platform_device *pdev)
-{
- pdev->archdata.dma_mask = DMA_BIT_MASK(32);
- pdev->dev.dma_mask = &pdev->archdata.dma_mask;
-}
-
static __init void print_system_info(void)
{
pr_info("-----------------------------------------------------\n");
@@ -806,9 +800,15 @@ static __init void print_system_info(void)
pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
#ifdef CONFIG_PPC64
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+#ifdef CONFIG_PPC_BOOK3S
+ pr_info("vmalloc start = 0x%lx\n", KERN_VIRT_START);
+ pr_info("IO start = 0x%lx\n", KERN_IO_START);
+ pr_info("vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
+#endif
#endif
- print_system_hash_info();
+ if (!early_radix_enabled())
+ print_system_hash_info();
if (PHYSICAL_START > 0)
pr_info("physical_start = 0x%llx\n",
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 94517e4a2723..a7541edf0cdb 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -206,6 +206,6 @@ __init void initialize_cache_info(void)
dcache_bsize = cur_cpu_spec->dcache_bsize;
icache_bsize = cur_cpu_spec->icache_bsize;
ucache_bsize = 0;
- if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601) || IS_ENABLED(CONFIG_E200))
ucache_bsize = icache_bsize = dcache_bsize;
}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index a2b74e057904..98600b276f76 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1198,6 +1198,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
goto bad;
if (MSR_TM_ACTIVE(msr_hi<<32)) {
+ /* Trying to start TM on non TM system */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto bad;
/* We only recheckpoint on return if we're
* transaction.
*/
@@ -1245,7 +1248,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
current->comm, current->pid,
rt_sf, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -1334,7 +1337,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
current->comm, current->pid,
ctx, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -1512,6 +1515,6 @@ badframe:
current->comm, current->pid,
addr, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 4292ea39baa4..117515564ec7 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -771,6 +771,11 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (MSR_TM_ACTIVE(msr)) {
/* We recheckpoint on return. */
struct ucontext __user *uc_transact;
+
+ /* Trying to start TM on non TM system */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto badframe;
+
if (__get_user(uc_transact, &uc->uc_link))
goto badframe;
if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
@@ -808,7 +813,7 @@ badframe:
current->comm, current->pid, "rt_sigreturn",
(long)uc, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 1e2276963f6d..e2a46cfed5fd 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -182,7 +182,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
* FIXME: IMHO these tests do not belong in
* arch-dependent code, they are generic.
*/
- ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL);
+ ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
#ifdef CONFIG_KPROBES
/*
* Mark stacktraces with kretprobed functions on them
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
index c612d50c9d18..b84992c10854 100644
--- a/arch/powerpc/kernel/suspend.c
+++ b/arch/powerpc/kernel/suspend.c
@@ -7,6 +7,7 @@
*/
#include <linux/mm.h>
+#include <linux/suspend.h>
#include <asm/page.h>
#include <asm/sections.h>
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index 7a919e9a3400..cbdf86228eaa 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -25,11 +25,19 @@
#define SL_IBAT2 0x48
#define SL_DBAT3 0x50
#define SL_IBAT3 0x58
-#define SL_TB 0x60
-#define SL_R2 0x68
-#define SL_CR 0x6c
-#define SL_LR 0x70
-#define SL_R12 0x74 /* r12 to r31 */
+#define SL_DBAT4 0x60
+#define SL_IBAT4 0x68
+#define SL_DBAT5 0x70
+#define SL_IBAT5 0x78
+#define SL_DBAT6 0x80
+#define SL_IBAT6 0x88
+#define SL_DBAT7 0x90
+#define SL_IBAT7 0x98
+#define SL_TB 0xa0
+#define SL_R2 0xa8
+#define SL_CR 0xac
+#define SL_LR 0xb0
+#define SL_R12 0xb4 /* r12 to r31 */
#define SL_SIZE (SL_R12 + 80)
.section .data
@@ -114,6 +122,41 @@ _GLOBAL(swsusp_arch_suspend)
mfibatl r4,3
stw r4,SL_IBAT3+4(r11)
+BEGIN_MMU_FTR_SECTION
+ mfspr r4,SPRN_DBAT4U
+ stw r4,SL_DBAT4(r11)
+ mfspr r4,SPRN_DBAT4L
+ stw r4,SL_DBAT4+4(r11)
+ mfspr r4,SPRN_DBAT5U
+ stw r4,SL_DBAT5(r11)
+ mfspr r4,SPRN_DBAT5L
+ stw r4,SL_DBAT5+4(r11)
+ mfspr r4,SPRN_DBAT6U
+ stw r4,SL_DBAT6(r11)
+ mfspr r4,SPRN_DBAT6L
+ stw r4,SL_DBAT6+4(r11)
+ mfspr r4,SPRN_DBAT7U
+ stw r4,SL_DBAT7(r11)
+ mfspr r4,SPRN_DBAT7L
+ stw r4,SL_DBAT7+4(r11)
+ mfspr r4,SPRN_IBAT4U
+ stw r4,SL_IBAT4(r11)
+ mfspr r4,SPRN_IBAT4L
+ stw r4,SL_IBAT4+4(r11)
+ mfspr r4,SPRN_IBAT5U
+ stw r4,SL_IBAT5(r11)
+ mfspr r4,SPRN_IBAT5L
+ stw r4,SL_IBAT5+4(r11)
+ mfspr r4,SPRN_IBAT6U
+ stw r4,SL_IBAT6(r11)
+ mfspr r4,SPRN_IBAT6L
+ stw r4,SL_IBAT6+4(r11)
+ mfspr r4,SPRN_IBAT7U
+ stw r4,SL_IBAT7(r11)
+ mfspr r4,SPRN_IBAT7L
+ stw r4,SL_IBAT7+4(r11)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
#if 0
/* Backup various CPU config stuffs */
bl __save_cpu_setup
@@ -279,27 +322,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
mtibatu 3,r4
lwz r4,SL_IBAT3+4(r11)
mtibatl 3,r4
-#endif
-
BEGIN_MMU_FTR_SECTION
- li r4,0
+ lwz r4,SL_DBAT4(r11)
mtspr SPRN_DBAT4U,r4
+ lwz r4,SL_DBAT4+4(r11)
mtspr SPRN_DBAT4L,r4
+ lwz r4,SL_DBAT5(r11)
mtspr SPRN_DBAT5U,r4
+ lwz r4,SL_DBAT5+4(r11)
mtspr SPRN_DBAT5L,r4
+ lwz r4,SL_DBAT6(r11)
mtspr SPRN_DBAT6U,r4
+ lwz r4,SL_DBAT6+4(r11)
mtspr SPRN_DBAT6L,r4
+ lwz r4,SL_DBAT7(r11)
mtspr SPRN_DBAT7U,r4
+ lwz r4,SL_DBAT7+4(r11)
mtspr SPRN_DBAT7L,r4
+ lwz r4,SL_IBAT4(r11)
mtspr SPRN_IBAT4U,r4
+ lwz r4,SL_IBAT4+4(r11)
mtspr SPRN_IBAT4L,r4
+ lwz r4,SL_IBAT5(r11)
mtspr SPRN_IBAT5U,r4
+ lwz r4,SL_IBAT5+4(r11)
mtspr SPRN_IBAT5L,r4
+ lwz r4,SL_IBAT6(r11)
mtspr SPRN_IBAT6U,r4
+ lwz r4,SL_IBAT6+4(r11)
mtspr SPRN_IBAT6L,r4
+ lwz r4,SL_IBAT7(r11)
mtspr SPRN_IBAT7U,r4
+ lwz r4,SL_IBAT7+4(r11)
mtspr SPRN_IBAT7L,r4
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+#endif
/* Flush all TLBs */
lis r4,0x1000
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 103655d84b4b..43f736ed47f2 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -515,3 +515,5 @@
431 common fsconfig sys_fsconfig
432 common fsmount sys_fsmount
433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 nospu clone3 ppc_clone3
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index e2147d7c9e72..80a676da11cb 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -19,6 +19,7 @@
#include <asm/smp.h>
#include <asm/pmc.h>
#include <asm/firmware.h>
+#include <asm/svm.h>
#include "cacheinfo.h"
#include "setup.h"
@@ -715,6 +716,23 @@ static struct device_attribute pa6t_attrs[] = {
#endif /* HAS_PPC_PMC_PA6T */
#endif /* HAS_PPC_PMC_CLASSIC */
+#ifdef CONFIG_PPC_SVM
+static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", is_secure_guest());
+}
+static DEVICE_ATTR(svm, 0444, show_svm, NULL);
+
+static void create_svm_file(void)
+{
+ device_create_file(cpu_subsys.dev_root, &dev_attr_svm);
+}
+#else
+static void create_svm_file(void)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
static int register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -1058,6 +1076,8 @@ static int __init topology_init(void)
sysfs_create_dscr_default();
#endif /* CONFIG_PPC64 */
+ create_svm_file();
+
return 0;
}
subsys_initcall(topology_init);
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 9fabdce255cd..6ba0fdd1e7f8 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -148,7 +148,7 @@ _GLOBAL(tm_reclaim)
/* Stash the stack pointer away for use after reclaim */
std r1, PACAR1(r13)
- /* Clear MSR RI since we are about to change r1, EE is already off. */
+ /* Clear MSR RI since we are about to use SCRATCH0, EE is already off */
li r5, 0
mtmsrd r5, 1
@@ -474,7 +474,7 @@ restore_gprs:
REST_GPR(7, r7)
- /* Clear MSR RI since we are about to change r1. EE is already off */
+ /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */
li r5, 0
mtmsrd r5, 1
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 517662a56bdc..7ea0ca044b65 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -866,10 +866,6 @@ void arch_ftrace_update_code(int command)
#ifdef CONFIG_PPC64
#define PACATOC offsetof(struct paca_struct, kernel_toc)
-#define PPC_LO(v) ((v) & 0xffff)
-#define PPC_HI(v) (((v) >> 16) & 0xffff)
-#define PPC_HA(v) PPC_HI ((v) + 0x8000)
-
extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
int __init ftrace_dyn_arch_init(void)
@@ -948,7 +944,8 @@ int ftrace_disable_ftrace_graph_caller(void)
* Hook the return address and push it in the stack of return addrs
* in current thread info. Return the address we want to divert to.
*/
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+ unsigned long sp)
{
unsigned long return_hooker;
@@ -960,7 +957,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
return_hooker = ppc_function_entry(return_to_handler);
- if (!function_graph_enter(parent, ip, 0, NULL))
+ if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
parent = return_hooker;
out:
return parent;
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
index 183f608efb81..e023ae59c429 100644
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -50,6 +50,7 @@ _GLOBAL(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 48
/* load r4 with local address */
lwz r4, 44(r1)
subi r4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 74acbf16a666..f9fd5f743eba 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -294,6 +294,7 @@ _GLOBAL(ftrace_graph_caller)
std r2, 24(r1)
ld r2, PACATOC(r13) /* get kernel TOC in r2 */
+ addi r5, r1, 112
mfctr r4 /* ftrace_caller has moved local addr here */
std r4, 40(r1)
mflr r3 /* ftrace_caller has restored LR from stack */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
index e41a7d13c99c..6708e24db0ab 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S
@@ -41,6 +41,7 @@ _GLOBAL(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 112
/* load r4 with local address */
ld r4, 128(r1)
subi r4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 47df30982de1..82f43535e686 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -297,7 +297,7 @@ NOKPROBE_SYMBOL(die);
void user_single_step_report(struct pt_regs *regs)
{
- force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip, current);
+ force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip);
}
static void show_signal_msg(int signr, struct pt_regs *regs, int code,
@@ -363,7 +363,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
if (!exception_common(signr, regs, code, addr))
return;
- force_sig_fault(signr, code, (void __user *)addr, current);
+ force_sig_fault(signr, code, (void __user *)addr);
}
/*
@@ -472,6 +472,7 @@ void system_reset_exception(struct pt_regs *regs)
if (debugger(regs))
goto out;
+ kmsg_dump(KMSG_DUMP_OOPS);
/*
* A system reset is a request to dump, so we always send
* it through the crashdump code (if fadump or kdump are
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
new file mode 100644
index 000000000000..07296bc39166
--- /dev/null
+++ b/arch/powerpc/kernel/ucall.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic code to perform an ultravisor call.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include <asm/ppc_asm.h>
+#include <asm/export.h>
+
+_GLOBAL(ucall_norets)
+EXPORT_SYMBOL_GPL(ucall_norets)
+ sc 2 /* Invoke the ultravisor */
+ blr /* Return r3 = status */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index d60598113a9f..eae9ddaecbcf 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -94,28 +94,6 @@ static struct vdso_patch_def vdso_patches[] = {
CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
},
-#ifdef CONFIG_PPC32
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_gettimeofday", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_clock_gettime", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_clock_getres", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_get_tbfreq", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_time", NULL
- },
-#endif
};
/*
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 6984125b9fc0..6c7401bd284e 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -70,6 +70,7 @@ V_FUNCTION_END(__kernel_get_syscall_map)
*
* returns the timebase frequency in HZ
*/
+#ifndef CONFIG_PPC_BOOK3S_601
V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
@@ -82,3 +83,4 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
+#endif
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 099a6db14e67..00c025ba4a92 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -144,10 +144,13 @@ VERSION
__kernel_datapage_offset;
__kernel_get_syscall_map;
+#ifndef CONFIG_PPC_BOOK3S_601
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_time;
__kernel_get_tbfreq;
+#endif
__kernel_sync_dicache;
__kernel_sync_dicache_p5;
__kernel_sigtramp32;
@@ -155,7 +158,6 @@ VERSION
#ifdef CONFIG_PPC64
__kernel_getcpu;
#endif
- __kernel_time;
local: *;
};
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index f53997a8ca62..711fca9bc6f0 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -38,6 +38,7 @@ config KVM_BOOK3S_32_HANDLER
config KVM_BOOK3S_64_HANDLER
bool
select KVM_BOOK3S_HANDLER
+ select PPC_DAWR_FORCE_ENABLE
config KVM_BOOK3S_PR_POSSIBLE
bool
@@ -183,9 +184,9 @@ config KVM_MPIC
select HAVE_KVM_MSI
help
Enable support for emulating MPIC devices inside the
- host kernel, rather than relying on userspace to emulate.
- Currently, support is limited to certain versions of
- Freescale's MPIC implementation.
+ host kernel, rather than relying on userspace to emulate.
+ Currently, support is limited to certain versions of
+ Freescale's MPIC implementation.
config KVM_XICS
bool "KVM in-kernel XICS emulation"
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 9524d92bc45d..d7fcdfa7fee4 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1083,9 +1083,11 @@ static int kvmppc_book3s_init(void)
if (xics_on_xive()) {
kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
- kvmppc_xive_native_init_module();
- kvm_register_device_ops(&kvm_xive_native_ops,
- KVM_DEV_TYPE_XIVE);
+ if (kvmppc_xive_native_supported()) {
+ kvmppc_xive_native_init_module();
+ kvm_register_device_ops(&kvm_xive_native_ops,
+ KVM_DEV_TYPE_XIVE);
+ }
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 653936177857..18f244aad7aa 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -239,6 +239,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
case 2:
case 6:
pte->may_write = true;
+ /* fall through */
case 3:
case 5:
case 7:
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 08b2dfbc5305..2d415c36a61d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -361,12 +361,6 @@ static void kvmppc_pte_free(pte_t *ptep)
kmem_cache_free(kvm_pte_cache, ptep);
}
-/* Like pmd_huge() and pmd_large(), but works regardless of config options */
-static inline int pmd_is_leaf(pmd_t pmd)
-{
- return !!(pmd_val(pmd) & _PAGE_PTE);
-}
-
static pmd_t *kvmppc_pmd_alloc(void)
{
return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
@@ -487,7 +481,7 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud,
for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
if (!pud_present(*p))
continue;
- if (pud_huge(*p)) {
+ if (pud_is_leaf(*p)) {
pud_clear(p);
} else {
pmd_t *pmd;
@@ -586,7 +580,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
new_pud = pud_alloc_one(kvm->mm, gpa);
pmd = NULL;
- if (pud && pud_present(*pud) && !pud_huge(*pud))
+ if (pud && pud_present(*pud) && !pud_is_leaf(*pud))
pmd = pmd_offset(pud, gpa);
else if (level <= 1)
new_pmd = kvmppc_pmd_alloc();
@@ -609,7 +603,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
new_pud = NULL;
}
pud = pud_offset(pgd, gpa);
- if (pud_huge(*pud)) {
+ if (pud_is_leaf(*pud)) {
unsigned long hgpa = gpa & PUD_MASK;
/* Check if we raced and someone else has set the same thing */
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 5bf05cc774e2..5834db0a54c6 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -19,6 +19,7 @@
#include <linux/anon_inodes.h>
#include <linux/iommu.h>
#include <linux/file.h>
+#include <linux/mm.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -45,43 +46,6 @@ static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
}
-static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
-{
- long ret = 0;
-
- if (!current || !current->mm)
- return ret; /* process exited */
-
- down_write(&current->mm->mmap_sem);
-
- if (inc) {
- unsigned long locked, lock_limit;
-
- locked = current->mm->locked_vm + stt_pages;
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
- ret = -ENOMEM;
- else
- current->mm->locked_vm += stt_pages;
- } else {
- if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
- stt_pages = current->mm->locked_vm;
-
- current->mm->locked_vm -= stt_pages;
- }
-
- pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
- inc ? '+' : '-',
- stt_pages << PAGE_SHIFT,
- current->mm->locked_vm << PAGE_SHIFT,
- rlimit(RLIMIT_MEMLOCK),
- ret ? " - exceeded" : "");
-
- up_write(&current->mm->mmap_sem);
-
- return ret;
-}
-
static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
{
struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
@@ -291,7 +255,7 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
kvm_put_kvm(stt->kvm);
- kvmppc_account_memlimit(
+ account_locked_vm(current->mm,
kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
call_rcu(&stt->rcu, release_spapr_tce_table);
@@ -316,7 +280,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
return -EINVAL;
npages = kvmppc_tce_pages(size);
- ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
+ ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true);
if (ret)
return ret;
@@ -362,7 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
kfree(stt);
fail_acct:
- kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
+ account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
return ret;
}
@@ -452,7 +416,7 @@ static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE;
- iommu_tce_xchg(mm, tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir);
}
static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -483,7 +447,8 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0;
long ret;
- if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir)))
+ if (WARN_ON_ONCE(iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa,
+ &dir)))
return H_TOO_HARD;
if (dir == DMA_NONE)
@@ -491,7 +456,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret != H_SUCCESS)
- iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
return ret;
}
@@ -537,7 +502,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (mm_iommu_mapped_inc(mem))
return H_TOO_HARD;
- ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
+ ret = iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
if (WARN_ON_ONCE(ret)) {
mm_iommu_mapped_dec(mem);
return H_TOO_HARD;
@@ -615,6 +580,8 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
entry, ua, dir);
+ iommu_tce_kill(stit->tbl, entry, 1);
+
if (ret != H_SUCCESS) {
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
goto unlock_exit;
@@ -692,12 +659,14 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
*/
if (get_user(tce, tces + i)) {
ret = H_TOO_HARD;
- goto unlock_exit;
+ goto invalidate_exit;
}
tce = be64_to_cpu(tce);
- if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua))
- return H_PARAMETER;
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
+ ret = H_PARAMETER;
+ goto invalidate_exit;
+ }
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt,
@@ -707,13 +676,17 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS) {
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
entry);
- goto unlock_exit;
+ goto invalidate_exit;
}
}
kvmppc_tce_put(stt, entry + i, tce);
}
+invalidate_exit:
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+ iommu_tce_kill(stit->tbl, entry, npages);
+
unlock_exit:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -752,7 +725,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
continue;
if (ret == H_TOO_HARD)
- return ret;
+ goto invalidate_exit;
WARN_ON_ONCE(1);
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
@@ -762,6 +735,10 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
- return H_SUCCESS;
+invalidate_exit:
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+ iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index f50bbeedfc66..ab6eeb8e753e 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -218,13 +218,14 @@ static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
-static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
+static long iommu_tce_xchg_no_kill_rm(struct mm_struct *mm,
+ struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
- ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, true);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
@@ -240,13 +241,20 @@ static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
return ret;
}
+extern void iommu_tce_kill_rm(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages)
+{
+ if (tbl->it_ops->tce_kill)
+ tbl->it_ops->tce_kill(tbl, entry, pages, true);
+}
+
static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
unsigned long entry)
{
unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE;
- iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
}
static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -278,7 +286,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0;
long ret;
- if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
+ if (iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir))
/*
* real mode xchg can fail if struct page crosses
* a page boundary
@@ -290,7 +298,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret)
- iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
return ret;
}
@@ -336,7 +344,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_TOO_HARD;
- ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ ret = iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) {
mm_iommu_mapped_dec(mem);
/*
@@ -417,6 +425,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
stit->tbl, entry, ua, dir);
+ iommu_tce_kill_rm(stit->tbl, entry, 1);
+
if (ret != H_SUCCESS) {
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
return ret;
@@ -556,8 +566,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0;
- if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
- return H_PARAMETER;
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
+ ret = H_PARAMETER;
+ goto invalidate_exit;
+ }
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
@@ -567,13 +579,17 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS) {
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
entry);
- goto unlock_exit;
+ goto invalidate_exit;
}
}
kvmppc_rm_tce_put(stt, entry + i, tce);
}
+invalidate_exit:
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+ iommu_tce_kill_rm(stit->tbl, entry, npages);
+
unlock_exit:
if (rmap)
unlock_rmap(rmap);
@@ -616,7 +632,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
continue;
if (ret == H_TOO_HARD)
- return ret;
+ goto invalidate_exit;
WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
@@ -626,7 +642,11 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
- return H_SUCCESS;
+invalidate_exit:
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
+ iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages);
+
+ return ret;
}
/* This can be called in either virtual mode or real mode */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 76b1801aa44a..efd8f93bc9dc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1678,7 +1678,14 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, vcpu->arch.pspb);
break;
case KVM_REG_PPC_DPDES:
- *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
+ /*
+ * On POWER9, where we are emulating msgsndp etc.,
+ * we return 1 bit for each vcpu, which can come from
+ * either vcore->dpdes or doorbell_request.
+ * On POWER8, doorbell_request is 0.
+ */
+ *val = get_reg_val(id, vcpu->arch.vcore->dpdes |
+ vcpu->arch.doorbell_request);
break;
case KVM_REG_PPC_VTB:
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
@@ -2860,7 +2867,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
if (!spin_trylock(&pvc->lock))
continue;
prepare_threads(pvc);
- if (!pvc->n_runnable) {
+ if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
list_del_init(&pvc->preempt_list);
if (pvc->runner == NULL) {
pvc->vcore_state = VCORE_INACTIVE;
@@ -2881,15 +2888,20 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
spin_unlock(&lp->lock);
}
-static bool recheck_signals(struct core_info *cip)
+static bool recheck_signals_and_mmu(struct core_info *cip)
{
int sub, i;
struct kvm_vcpu *vcpu;
+ struct kvmppc_vcore *vc;
- for (sub = 0; sub < cip->n_subcores; ++sub)
- for_each_runnable_thread(i, vcpu, cip->vc[sub])
+ for (sub = 0; sub < cip->n_subcores; ++sub) {
+ vc = cip->vc[sub];
+ if (!vc->kvm->arch.mmu_ready)
+ return true;
+ for_each_runnable_thread(i, vcpu, vc)
if (signal_pending(vcpu->arch.run_task))
return true;
+ }
return false;
}
@@ -3119,7 +3131,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
local_irq_disable();
hard_irq_disable();
if (lazy_irq_pending() || need_resched() ||
- recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) {
+ recheck_signals_and_mmu(&core_info)) {
local_irq_enable();
vc->vcore_state = VCORE_INACTIVE;
/* Unlock all except the primary vcore */
@@ -3569,9 +3581,18 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
if (kvmhv_on_pseries()) {
+ /*
+ * We need to save and restore the guest visible part of the
+ * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
+ * doesn't do this for us. Note only required if pseries since
+ * this is done in kvmhv_load_hv_regs_and_go() below otherwise.
+ */
+ unsigned long host_psscr;
/* call our hypervisor to load up HV regs and go */
struct hv_guest_state hvregs;
+ host_psscr = mfspr(SPRN_PSSCR_PR);
+ mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
kvmhv_save_hv_regs(vcpu, &hvregs);
hvregs.lpcr = lpcr;
vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
@@ -3590,6 +3611,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+ mtspr(SPRN_PSSCR_PR, host_psscr);
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
@@ -3603,6 +3626,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.slb_max = 0;
dec = mfspr(SPRN_DEC);
+ if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+ dec = (s32) dec;
tb = mftb();
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
@@ -3652,6 +3677,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.vpa.dirty = 1;
save_pmu = lp->pmcregs_in_use;
}
+ /* Must save pmu if this guest is capable of running nested guests */
+ save_pmu |= nesting_enabled(vcpu->kvm);
kvmhv_save_guest_pmu(vcpu, save_pmu);
@@ -4122,8 +4149,15 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
preempt_enable();
- /* cancel pending decrementer exception if DEC is now positive */
- if (get_tb() < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu))
+ /*
+ * cancel pending decrementer exception if DEC is now positive, or if
+ * entering a nested guest in which case the decrementer is now owned
+ * by L2 and the L1 decrementer is provided in hdec_expires
+ */
+ if (kvmppc_core_pending_dec(vcpu) &&
+ ((get_tb() < vcpu->arch.dec_expires) ||
+ (trap == BOOK3S_INTERRUPT_SYSCALL &&
+ kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
kvmppc_core_dequeue_dec(vcpu);
trace_kvm_guest_exit(vcpu);
@@ -5440,6 +5474,12 @@ static int kvmppc_radix_possible(void)
static int kvmppc_book3s_init_hv(void)
{
int r;
+
+ if (!tlbie_capable) {
+ pr_err("KVM-HV: Host does not support TLBIE\n");
+ return -ENODEV;
+ }
+
/*
* FIXME!! Do we need to check on all cpus ?
*/
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index cb05ccc8bc6a..7c1909657b55 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -820,6 +820,8 @@ static void flush_guest_tlb(struct kvm *kvm)
: : "r" (rb), "i" (1), "i" (1), "i" (0),
"r" (0) : "memory");
}
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
} else {
for (set = 0; set < kvm->arch.tlb_sets; ++set) {
/* R=0 PRS=0 RIC=0 */
@@ -828,9 +830,9 @@ static void flush_guest_tlb(struct kvm *kvm)
"r" (0) : "memory");
rb += PPC_BIT(51); /* increment set number */
}
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
}
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
}
void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 735e0ac6f5b2..fff90f2c3de2 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -398,7 +398,7 @@ static void kvmhv_flush_lpid(unsigned int lpid)
long rc;
if (!kvmhv_on_pseries()) {
- radix__flush_tlb_lpid(lpid);
+ radix__flush_all_lpid(lpid);
return;
}
@@ -411,7 +411,7 @@ static void kvmhv_flush_lpid(unsigned int lpid)
void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
{
if (!kvmhv_on_pseries()) {
- mmu_partition_table_set_entry(lpid, dw0, dw1);
+ mmu_partition_table_set_entry(lpid, dw0, dw1, true);
return;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 63e0ce91e29d..7186c65c61c9 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -99,7 +99,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
} else {
rev->forw = rev->back = pte_index;
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
- pte_index | KVMPPC_RMAP_PRESENT;
+ pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT;
}
unlock_rmap(rmap);
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 337e64468d78..9a05b0d932ef 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -29,6 +29,7 @@
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#include <asm/cpuidle.h>
+#include <asm/ultravisor-api.h>
/* Sign-extend HDEC if not on POWER9 */
#define EXTEND_HDEC(reg) \
@@ -942,6 +943,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS
lwz r8, VCPU_XIVE_CAM_WORD(r4)
+ cmpwi r8, 0
+ beq no_xive
li r7, TM_QW1_OS + TM_WORD2
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
@@ -1083,16 +1086,10 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r5, VCPU_LR(r4)
- ld r6, VCPU_CR(r4)
mtlr r5
- mtcr r6
ld r1, VCPU_GPR(R1)(r4)
- ld r2, VCPU_GPR(R2)(r4)
- ld r3, VCPU_GPR(R3)(r4)
ld r5, VCPU_GPR(R5)(r4)
- ld r6, VCPU_GPR(R6)(r4)
- ld r7, VCPU_GPR(R7)(r4)
ld r8, VCPU_GPR(R8)(r4)
ld r9, VCPU_GPR(R9)(r4)
ld r10, VCPU_GPR(R10)(r4)
@@ -1110,10 +1107,42 @@ BEGIN_FTR_SECTION
mtspr SPRN_HDSISR, r0
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ ld r6, VCPU_KVM(r4)
+ lbz r7, KVM_SECURE_GUEST(r6)
+ cmpdi r7, 0
+ ld r6, VCPU_GPR(R6)(r4)
+ ld r7, VCPU_GPR(R7)(r4)
+ bne ret_to_ultra
+
+ lwz r0, VCPU_CR(r4)
+ mtcr r0
+
ld r0, VCPU_GPR(R0)(r4)
+ ld r2, VCPU_GPR(R2)(r4)
+ ld r3, VCPU_GPR(R3)(r4)
ld r4, VCPU_GPR(R4)(r4)
HRFI_TO_GUEST
b .
+/*
+ * Use UV_RETURN ultracall to return control back to the Ultravisor after
+ * processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
+ * to the Hypervisor.
+ *
+ * All registers have already been loaded, except:
+ * R0 = hcall result
+ * R2 = SRR1, so UV can detect a synthesized interrupt (if any)
+ * R3 = UV_RETURN
+ */
+ret_to_ultra:
+ lwz r0, VCPU_CR(r4)
+ mtcr r0
+
+ ld r0, VCPU_GPR(R3)(r4)
+ mfspr r2, SPRN_SRR1
+ li r3, 0
+ ori r3, r3, UV_RETURN
+ ld r4, VCPU_GPR(R4)(r4)
+ sc 2
/*
* Enter the guest on a P9 or later system where we have exactly
@@ -2831,29 +2860,39 @@ kvm_cede_prodded:
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
#ifdef CONFIG_KVM_XICS
- /* Abort if we still have a pending escalation */
+ /* are we using XIVE with single escalation? */
+ ld r10, VCPU_XIVE_ESC_VADDR(r9)
+ cmpdi r10, 0
+ beq 3f
+ li r6, XIVE_ESB_SET_PQ_00
+ /*
+ * If we still have a pending escalation, abort the cede,
+ * and we must set PQ to 10 rather than 00 so that we don't
+ * potentially end up with two entries for the escalation
+ * interrupt in the XIVE interrupt queue. In that case
+ * we also don't want to set xive_esc_on to 1 here in
+ * case we race with xive_esc_irq().
+ */
lbz r5, VCPU_XIVE_ESC_ON(r9)
cmpwi r5, 0
- beq 1f
+ beq 4f
li r0, 0
stb r0, VCPU_CEDED(r9)
-1: /* Enable XIVE escalation */
- li r5, XIVE_ESB_SET_PQ_00
+ li r6, XIVE_ESB_SET_PQ_10
+ b 5f
+4: li r0, 1
+ stb r0, VCPU_XIVE_ESC_ON(r9)
+ /* make sure store to xive_esc_on is seen before xive_esc_irq runs */
+ sync
+5: /* Enable XIVE escalation */
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
beq 1f
- ld r10, VCPU_XIVE_ESC_VADDR(r9)
- cmpdi r10, 0
- beq 3f
- ldx r0, r10, r5
+ ldx r0, r10, r6
b 2f
1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
- cmpdi r10, 0
- beq 3f
- ldcix r0, r10, r5
+ ldcix r0, r10, r6
2: sync
- li r0, 1
- stb r0, VCPU_XIVE_ESC_ON(r9)
#endif /* CONFIG_KVM_XICS */
3: b guest_exit_cont
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index 229496e2652e..0db937497169 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -128,7 +128,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
}
/* Set CR0 to indicate previous transactional state */
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
- (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
/* L=1 => tresume, L=0 => tsuspend */
if (instr & (1 << 21)) {
if (MSR_TM_SUSPENDED(msr))
@@ -172,7 +172,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
/* Set CR0 to indicate previous transactional state */
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
- (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
return RESUME_GUEST;
@@ -202,7 +202,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
/* Set CR0 to indicate previous transactional state */
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
- (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
vcpu->arch.shregs.msr = msr | MSR_TS_S;
return RESUME_GUEST;
}
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index e8276161872e..381bf8dea193 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -827,7 +827,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
*
* Note: If EOI is incorrectly used by SW to lower the CPPR
* value (ie more favored), we do not check for rejection of
- * a pending interrupt, this is a SW error and PAPR sepcifies
+ * a pending interrupt, this is a SW error and PAPR specifies
* that we don't have to deal with it.
*
* The sending of an EOI to the ICS is handled after the
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 6ca0d7376a9f..591bfb4bfd0f 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
- if (!tima)
+ /*
+ * Nothing to do if the platform doesn't have a XIVE
+ * or this vCPU doesn't have its own XIVE context
+ * (e.g. because it's not using an in-kernel interrupt controller).
+ */
+ if (!tima || !vcpu->arch.xive_cam_word)
return;
+
eieio();
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
@@ -160,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
*/
vcpu->arch.xive_esc_on = false;
+ /* This orders xive_esc_on = false vs. subsequent stale_p = true */
+ smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
+
return IRQ_HANDLED;
}
@@ -1113,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
vcpu->arch.xive_esc_raddr = 0;
}
+/*
+ * In single escalation mode, the escalation interrupt is marked so
+ * that EOI doesn't re-enable it, but just sets the stale_p flag to
+ * indicate that the P bit has already been dealt with. However, the
+ * assembly code that enters the guest sets PQ to 00 without clearing
+ * stale_p (because it has no easy way to address it). Hence we have
+ * to adjust stale_p before shutting down the interrupt.
+ */
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
+ struct kvmppc_xive_vcpu *xc, int irq)
+{
+ struct irq_data *d = irq_get_irq_data(irq);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+ /*
+ * This slightly odd sequence gives the right result
+ * (i.e. stale_p set if xive_esc_on is false) even if
+ * we race with xive_esc_irq() and xive_irq_eoi().
+ */
+ xd->stale_p = false;
+ smp_mb(); /* paired with smb_wmb in xive_esc_irq */
+ if (!vcpu->arch.xive_esc_on)
+ xd->stale_p = true;
+}
+
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -1134,20 +1168,28 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
/* Mask the VP IPI */
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
- /* Disable the VP */
- xive_native_disable_vp(xc->vp_id);
-
- /* Free the queues & associated interrupts */
+ /* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
- struct xive_q *q = &xc->queues[i];
-
- /* Free the escalation irq */
if (xc->esc_virq[i]) {
+ if (xc->xive->single_escalation)
+ xive_cleanup_single_escalation(vcpu, xc,
+ xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
}
- /* Free the queue */
+ }
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
+
+ /* Clear the cam word so guest entry won't try to push context */
+ vcpu->arch.xive_cam_word = 0;
+
+ /* Free the queues */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+
xive_native_disable_queue(xc->vp_id, q, i);
if (q->qpage) {
free_pages((unsigned long)q->qpage,
@@ -1986,10 +2028,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
xive->single_escalation = xive_native_has_single_escalation();
- if (ret) {
- kfree(xive);
+ if (ret)
return ret;
- }
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 50494d0ee375..955b820ffd6d 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -282,6 +282,8 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
bool single_escalation);
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
+ struct kvmppc_xive_vcpu *xc, int irq);
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 5596c8ec221a..248c1ea9e788 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -67,20 +67,28 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
xc->valid = false;
kvmppc_xive_disable_vcpu_interrupts(vcpu);
- /* Disable the VP */
- xive_native_disable_vp(xc->vp_id);
-
- /* Free the queues & associated interrupts */
+ /* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
/* Free the escalation irq */
if (xc->esc_virq[i]) {
+ if (xc->xive->single_escalation)
+ xive_cleanup_single_escalation(vcpu, xc,
+ xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
xc->esc_virq[i] = 0;
}
+ }
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
- /* Free the queue */
+ /* Clear the cam word so guest entry won't try to push context */
+ vcpu->arch.xive_cam_word = 0;
+
+ /* Free the queues */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
kvmppc_xive_native_cleanup_queue(vcpu, i);
}
@@ -1090,9 +1098,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
xive->ops = &kvmppc_xive_native_ops;
if (ret)
- kfree(xive);
+ return ret;
- return ret;
+ return 0;
}
/*
@@ -1171,6 +1179,11 @@ int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
return 0;
}
+bool kvmppc_xive_native_supported(void)
+{
+ return xive_native_has_queue_state_support();
+}
+
static int xive_native_debug_show(struct seq_file *m, void *private)
{
struct kvmppc_xive *xive = m->private;
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index b5a848a55504..00649ca5fa9a 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -440,6 +440,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
struct kvm_vcpu *vcpu;
int err;
+ BUILD_BUG_ON_MSG(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0,
+ "struct kvm_vcpu must be at offset 0 for arch usercopy region");
+
vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu_e500) {
err = -ENOMEM;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index bb4d09c1ad56..6fca38ca791f 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -271,6 +271,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
*/
if (inst == KVMPPC_INST_SW_BREAKPOINT) {
run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.status = 0;
run->debug.arch.address = kvmppc_get_pc(vcpu);
emulated = EMULATE_EXIT_USER;
advance = 0;
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 9208c82ed08d..2e496eb86e94 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -89,12 +89,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
rs = get_rs(inst);
rt = get_rt(inst);
- /*
- * if mmio_vsx_tx_sx_enabled == 0, copy data between
- * VSR[0..31] and memory
- * if mmio_vsx_tx_sx_enabled == 1, copy data between
- * VSR[32..63] and memory
- */
vcpu->arch.mmio_vsx_copy_nums = 0;
vcpu->arch.mmio_vsx_offset = 0;
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6d704ad2472b..3a77bb643452 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -50,6 +50,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
}
+bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+ return kvm_arch_vcpu_runnable(vcpu);
+}
+
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
{
return false;
@@ -414,9 +419,9 @@ int kvm_arch_hardware_setup(void)
return 0;
}
-void kvm_arch_check_processor_compat(void *rtn)
+int kvm_arch_check_processor_compat(void)
{
- *(int *)rtn = kvmppc_core_check_processor_compat();
+ return kvmppc_core_check_processor_compat();
}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
@@ -452,16 +457,6 @@ err_out:
return -EINVAL;
}
-bool kvm_arch_has_vcpu_debugfs(void)
-{
- return false;
-}
-
-int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
void kvm_arch_destroy_vm(struct kvm *kvm)
{
unsigned int i;
@@ -566,7 +561,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* a POWER9 processor) and the PowerNV platform, as
* nested is not yet supported.
*/
- r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE);
+ r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) &&
+ kvmppc_xive_native_supported();
break;
#endif
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index c55f9c27bf79..b8de3be10eb4 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
endif
-obj-y += alloc.o code-patching.o feature-fixups.o
+obj-y += alloc.o code-patching.o feature-fixups.o pmem.o
ifndef CONFIG_KASAN
obj-y += string.o memcmp_$(BITS).o
@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
memcpy_power7.o
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
- memcpy_64.o pmem.o
+ memcpy_64.o memcpy_mcsafe_64.o
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
@@ -49,7 +49,8 @@ obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \
obj-y += checksum_$(BITS).o checksum_wrappers.o \
string_$(BITS).o
-obj-y += sstep.o ldstfp.o quad.o
+obj-y += sstep.o
+obj-$(CONFIG_PPC_FPU) += ldstfp.o
obj64-y += quad.o
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
index e32f477d4426..e00abeabc54d 100644
--- a/arch/powerpc/lib/ldstfp.S
+++ b/arch/powerpc/lib/ldstfp.S
@@ -14,8 +14,6 @@
#include <asm/asm-compat.h>
#include <linux/errno.h>
-#ifdef CONFIG_PPC_FPU
-
#define STKFRM (PPC_MIN_STKFRM + 16)
/* Get the contents of frN into *p; N is in r3 and p is in r4. */
@@ -237,5 +235,3 @@ _GLOBAL(conv_dp_to_sp)
MTMSRD(r6)
isync
blr
-
-#endif /* CONFIG_PPC_FPU */
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 6550b9e5ce5f..6440d5943c00 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -18,7 +18,7 @@
#include <asm/hvcall.h>
#include <asm/smp.h>
-void __spin_yield(arch_spinlock_t *lock)
+void splpar_spin_yield(arch_spinlock_t *lock)
{
unsigned int lock_value, holder_cpu, yield_count;
@@ -36,14 +36,14 @@ void __spin_yield(arch_spinlock_t *lock)
plpar_hcall_norets(H_CONFER,
get_hard_smp_processor_id(holder_cpu), yield_count);
}
-EXPORT_SYMBOL_GPL(__spin_yield);
+EXPORT_SYMBOL_GPL(splpar_spin_yield);
/*
* Waiting for a read lock or a write lock on a rwlock...
* This turns out to be the same for read and write locks, since
* we only know the holder if it is write-locked.
*/
-void __rw_yield(arch_rwlock_t *rw)
+void splpar_rw_yield(arch_rwlock_t *rw)
{
int lock_value;
unsigned int holder_cpu, yield_count;
diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S
new file mode 100644
index 000000000000..cb882d9a6d8a
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_mcsafe_64.S
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) IBM Corporation, 2011
+ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
+ * Author - Balbir Singh <bsingharora@gmail.com>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/errno.h>
+#include <asm/export.h>
+
+ .macro err1
+100:
+ EX_TABLE(100b,.Ldo_err1)
+ .endm
+
+ .macro err2
+200:
+ EX_TABLE(200b,.Ldo_err2)
+ .endm
+
+ .macro err3
+300: EX_TABLE(300b,.Ldone)
+ .endm
+
+.Ldo_err2:
+ ld r22,STK_REG(R22)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r14,STK_REG(R14)(r1)
+ addi r1,r1,STACKFRAMESIZE
+.Ldo_err1:
+ /* Do a byte by byte copy to get the exact remaining size */
+ mtctr r7
+46:
+err3; lbz r0,0(r4)
+ addi r4,r4,1
+err3; stb r0,0(r3)
+ addi r3,r3,1
+ bdnz 46b
+ li r3,0
+ blr
+
+.Ldone:
+ mfctr r3
+ blr
+
+
+_GLOBAL(memcpy_mcsafe)
+ mr r7,r5
+ cmpldi r5,16
+ blt .Lshort_copy
+
+.Lcopy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+err1; lbz r0,0(r4)
+ addi r4,r4,1
+err1; stb r0,0(r3)
+ addi r3,r3,1
+ subi r7,r7,1
+
+1: bf cr7*4+2,2f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+2: bf cr7*4+1,3f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+ std r21,STK_REG(R21)(r1)
+ std r22,STK_REG(R22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ blt 5f
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+err2; ld r15,64(r4)
+err2; ld r16,72(r4)
+err2; ld r17,80(r4)
+err2; ld r18,88(r4)
+err2; ld r19,96(r4)
+err2; ld r20,104(r4)
+err2; ld r21,112(r4)
+err2; ld r22,120(r4)
+ addi r4,r4,128
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+err2; std r15,64(r3)
+err2; std r16,72(r3)
+err2; std r17,80(r3)
+err2; std r18,88(r3)
+err2; std r19,96(r3)
+err2; std r20,104(r3)
+err2; std r21,112(r3)
+err2; std r22,120(r3)
+ addi r3,r3,128
+ subi r7,r7,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+ addi r4,r4,64
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+ addi r3,r3,64
+ subi r7,r7,64
+
+7: ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r22,STK_REG(R22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 63B to go */
+ bf cr7*4+2,8f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+err1; ld r8,16(r4)
+err1; ld r9,24(r4)
+ addi r4,r4,32
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+err1; std r8,16(r3)
+err1; std r9,24(r3)
+ addi r3,r3,32
+ subi r7,r7,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+ addi r4,r4,16
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+ addi r3,r3,16
+ subi r7,r7,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err1; lwz r6,4(r4)
+ addi r4,r4,8
+err1; stw r0,0(r3)
+err1; stw r6,4(r3)
+ addi r3,r3,8
+ subi r7,r7,8
+
+12: bf cr7*4+1,13f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+13: bf cr7*4+2,14f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+14: bf cr7*4+3,15f
+err1; lbz r0,0(r4)
+err1; stb r0,0(r3)
+
+15: li r3,0
+ blr
+
+EXPORT_SYMBOL_GPL(memcpy_mcsafe);
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 3c6c134224f8..377712e85605 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -15,14 +15,14 @@
void arch_wb_cache_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
}
EXPORT_SYMBOL(arch_wb_cache_pmem);
void arch_invalidate_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
}
EXPORT_SYMBOL(arch_invalidate_pmem);
@@ -35,7 +35,7 @@ long __copy_from_user_flushcache(void *dest, const void __user *src,
unsigned long copied, start = (unsigned long) dest;
copied = __copy_from_user(dest, src, size);
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
return copied;
}
@@ -45,7 +45,7 @@ void *memcpy_flushcache(void *dest, const void *src, size_t size)
unsigned long start = (unsigned long) dest;
memcpy(dest, src, size);
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
return dest;
}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 0f499db315d6..5e147986400d 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -7,7 +7,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(BITS).o pgtable_$(BITS).o \
- pgtable-frag.o \
+ pgtable-frag.o ioremap.o ioremap_$(BITS).o \
init-common.o mmu_context.o drmem.o
obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index e249fbf6b9c3..84d5fab94f8f 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -74,7 +74,7 @@ static int find_free_bat(void)
{
int b;
- if (cpu_has_feature(CPU_FTR_601)) {
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) {
for (b = 0; b < 4; b++) {
struct ppc_bat *bat = BATS[b];
@@ -106,7 +106,7 @@ static int find_free_bat(void)
*/
static unsigned int block_size(unsigned long base, unsigned long top)
{
- unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
+ unsigned int max_size = IS_ENABLED(CONFIG_PPC_BOOK3S_601) ? SZ_8M : SZ_256M;
unsigned int base_shift = (ffs(base) - 1) & 31;
unsigned int block_shift = (fls(top - base) - 1) & 31;
@@ -189,7 +189,7 @@ void mmu_mark_initmem_nx(void)
unsigned long top = (unsigned long)_etext - PAGE_OFFSET;
unsigned long size;
- if (cpu_has_feature(CPU_FTR_601))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
return;
for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) {
@@ -227,7 +227,7 @@ void mmu_mark_rodata_ro(void)
int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
int i;
- if (cpu_has_feature(CPU_FTR_601))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
return;
for (i = 0; i < nb; i++) {
@@ -259,7 +259,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
flags &= ~_PAGE_COHERENT;
bl = (size >> 17) - 1;
- if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_601)) {
/* 603, 604, etc. */
/* Do DBAT first */
wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
@@ -297,8 +297,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
/*
* Preload a translation in the hash table
*/
-void hash_preload(struct mm_struct *mm, unsigned long ea,
- bool is_exec, unsigned long trap)
+void hash_preload(struct mm_struct *mm, unsigned long ea)
{
pmd_t *pmd;
@@ -310,6 +309,39 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
}
/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ *
+ * This must always be called with the pte lock held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep)
+{
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return;
+ /*
+ * We don't need to worry about _PAGE_PRESENT here because we are
+ * called with either mm->page_table_lock held or ptl lock held
+ */
+
+ /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+ if (!pte_young(*ptep) || address >= TASK_SIZE)
+ return;
+
+ /* We have to test for regs NULL since init will get here first thing at boot */
+ if (!current->thread.regs)
+ return;
+
+ /* We also avoid filling the hash if not coming from a fault */
+ if (TRAP(current->thread.regs) != 0x300 && TRAP(current->thread.regs) != 0x400)
+ return;
+
+ hash_preload(vma->vm_mm, address);
+}
+
+/*
* Initialize the hash table and patch the instructions in hashtable.S.
*/
void __init MMU_init_hw(void)
@@ -358,6 +390,15 @@ void __init MMU_init_hw(void)
hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
if (lg_n_hpteg > 16)
hash_mb2 = 16 - LG_HPTEG_SIZE;
+
+ /*
+ * When KASAN is selected, there is already an early temporary hash
+ * table and the switch to the final hash table is done later.
+ */
+ if (IS_ENABLED(CONFIG_KASAN))
+ return;
+
+ MMU_init_hw_patch();
}
void __init MMU_init_hw_patch(void)
@@ -400,7 +441,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
BUG_ON(first_memblock_base != 0);
/* 601 can only access 16MB at the moment */
- if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
else /* Anything else has 256M mapped */
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
@@ -418,9 +459,6 @@ void __init setup_kuep(bool disabled)
{
pr_info("Activating Kernel Userspace Execution Prevention\n");
- if (cpu_has_feature(CPU_FTR_601))
- pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n");
-
if (disabled)
pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
}
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
index 974b4fc19f4f..fd393b8be14f 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -10,7 +10,6 @@ obj-$(CONFIG_PPC_NATIVE) += hash_native.o
obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o
-obj-$(CONFIG_PPC_SPLPAR) += vphn.o
obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o
ifdef CONFIG_HUGETLB_PAGE
obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 30d62ffe3310..90ab4f31e2b3 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -41,7 +41,7 @@
#define HPTE_LOCK_BIT (56+3)
#endif
-DEFINE_RAW_SPINLOCK(native_tlbie_lock);
+static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
{
@@ -56,7 +56,7 @@ static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
* tlbiel instruction for hash, set invalidation
* i.e., r=1 and is=01 or is=10 or is=11
*/
-static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
unsigned int pid,
unsigned int ric, unsigned int prs)
{
@@ -112,7 +112,7 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
}
void hash__tlbiel_all(unsigned int action)
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 28ced26f2a00..6c123760164e 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -34,6 +34,7 @@
#include <linux/libfdt.h>
#include <linux/pkeys.h>
#include <linux/hugetlb.h>
+#include <linux/cpu.h>
#include <asm/debugfs.h>
#include <asm/processor.h>
@@ -61,6 +62,7 @@
#include <asm/ps3.h>
#include <asm/pte-walk.h>
#include <asm/asm-prototypes.h>
+#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
@@ -271,10 +273,6 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
if (overlaps_kernel_text(vaddr, vaddr + step))
tprot &= ~HPTE_R_N;
- /* Make kvm guest trampolines executable */
- if (overlaps_kvm_tmp(vaddr, vaddr + step))
- tprot &= ~HPTE_R_N;
-
/*
* If relocatable, check if it overlaps interrupt vectors that
* are copied down to real 0. For relocatable kernel
@@ -684,10 +682,8 @@ static void __init htab_init_page_sizes(void)
if (mmu_psize_defs[MMU_PAGE_16M].shift &&
memblock_phys_mem_size() >= 0x40000000)
mmu_vmemmap_psize = MMU_PAGE_16M;
- else if (mmu_psize_defs[MMU_PAGE_64K].shift)
- mmu_vmemmap_psize = MMU_PAGE_64K;
else
- mmu_vmemmap_psize = MMU_PAGE_4K;
+ mmu_vmemmap_psize = mmu_virtual_psize;
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
printk(KERN_DEBUG "Page orders: linear mapping = %d, "
@@ -825,7 +821,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
* For now, UPRT is 0 and we have no segment table.
*/
htab_size = __ilog2(htab_size) - 18;
- mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
+ mmu_partition_table_set_entry(0, hash_table | htab_size, 0, false);
pr_info("Partition table %p\n", partition_tb);
}
@@ -859,12 +855,6 @@ static void __init htab_initialize(void)
/* Using a hypervisor which owns the htab */
htab_address = NULL;
_SDR1 = 0;
- /*
- * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
- * to inform the hypervisor that we wish to use the HPT.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- register_process_table(0, 0, 0);
#ifdef CONFIG_FA_DUMP
/*
* If firmware assisted dump is active firmware preserves
@@ -981,7 +971,7 @@ void __init hash__early_init_devtree(void)
htab_scan_page_sizes();
}
-struct hash_mm_context init_hash_mm_context;
+static struct hash_mm_context init_hash_mm_context;
void __init hash__early_init_mmu(void)
{
#ifndef CONFIG_PPC_64K_PAGES
@@ -1077,8 +1067,8 @@ void hash__early_init_mmu_secondary(void)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
mtspr(SPRN_SDR1, _SDR1);
else
- mtspr(SPRN_PTCR,
- __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ set_ptcr_when_no_uv(__pa(partition_tb) |
+ (PATB_SIZE_SHIFT - 12));
}
/* Initialize SLB */
slb_initialize();
@@ -1462,8 +1452,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
}
EXPORT_SYMBOL_GPL(hash_page);
-int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
- unsigned long dsisr)
+int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr,
+ unsigned long msr)
{
unsigned long access = _PAGE_PRESENT | _PAGE_READ;
unsigned long flags = 0;
@@ -1520,8 +1510,8 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
}
#endif
-void hash_preload(struct mm_struct *mm, unsigned long ea,
- bool is_exec, unsigned long trap)
+static void hash_preload(struct mm_struct *mm, unsigned long ea,
+ bool is_exec, unsigned long trap)
{
int hugepage_shift;
unsigned long vsid;
@@ -1601,6 +1591,57 @@ out_exit:
local_irq_restore(flags);
}
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ *
+ * This must always be called with the pte lock held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep)
+{
+ /*
+ * We don't need to worry about _PAGE_PRESENT here because we are
+ * called with either mm->page_table_lock held or ptl lock held
+ */
+ unsigned long trap;
+ bool is_exec;
+
+ if (radix_enabled()) {
+ prefetch((void *)address);
+ return;
+ }
+
+ /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+ if (!pte_young(*ptep) || address >= TASK_SIZE)
+ return;
+
+ /*
+ * We try to figure out if we are coming from an instruction
+ * access fault and pass that down to __hash_page so we avoid
+ * double-faulting on execution of fresh text. We have to test
+ * for regs NULL since init will get here first thing at boot.
+ *
+ * We also avoid filling the hash if not coming from a fault.
+ */
+
+ trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
+ switch (trap) {
+ case 0x300:
+ is_exec = false;
+ break;
+ case 0x400:
+ is_exec = true;
+ break;
+ default:
+ return;
+ }
+
+ hash_preload(vma->vm_mm, address, is_exec, trap);
+}
+
#ifdef CONFIG_PPC_MEM_KEYS
/*
* Return the protection key associated with the given address and the
@@ -1707,7 +1748,7 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
/*
* IF we try to do a HUGE PTE update after a withdraw is done.
* we will find the below NULL. This happens when we do
- * split_huge_page_pmd
+ * split_huge_pmd
*/
if (!hpte_slot_array)
return;
@@ -1901,11 +1942,20 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
*
* For guests on platforms before POWER9, we clamp the it limit to 1G
* to avoid some funky things such as RTAS bugs etc...
+ *
+ * On POWER9 we limit to 1TB in case the host erroneously told us that
+ * the RMA was >1TB. Effective address bits 0:23 are treated as zero
+ * (meaning the access is aliased to zero i.e. addr = addr % 1TB)
+ * for virtual real mode addressing and so it doesn't make sense to
+ * have an area larger than 1TB as it can't be addressed.
*/
if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
ppc64_rma_size = first_memblock_size;
if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
+ else
+ ppc64_rma_size = min_t(u64, ppc64_rma_size,
+ 1UL << SID_SHIFT_1T);
/* Finally limit subsequent allocations */
memblock_set_current_limit(ppc64_rma_size);
@@ -1924,10 +1974,16 @@ static int hpt_order_get(void *data, u64 *val)
static int hpt_order_set(void *data, u64 val)
{
+ int ret;
+
if (!mmu_hash_ops.resize_hpt)
return -ENODEV;
- return mmu_hash_ops.resize_hpt(val);
+ cpus_read_lock();
+ ret = mmu_hash_ops.resize_hpt(val);
+ cpus_read_unlock();
+
+ return ret;
}
DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
@@ -1950,7 +2006,4 @@ void __init print_system_hash_info(void)
if (htab_hash_mask)
pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
- pr_info("kernel vmalloc start = 0x%lx\n", KERN_VIRT_START);
- pr_info("kernel IO start = 0x%lx\n", KERN_IO_START);
- pr_info("kernel vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
}
diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index 90ee3a89722c..56cc84520577 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -14,6 +14,7 @@
#include <linux/hugetlb.h>
#include <linux/swap.h>
#include <linux/sizes.h>
+#include <linux/mm.h>
#include <asm/mmu_context.h>
#include <asm/pte-walk.h>
#include <linux/mm_inline.h>
@@ -46,40 +47,6 @@ struct mm_iommu_table_group_mem_t {
u64 dev_hpa; /* Device memory base address */
};
-static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
- unsigned long npages, bool incr)
-{
- long ret = 0, locked, lock_limit;
-
- if (!npages)
- return 0;
-
- down_write(&mm->mmap_sem);
-
- if (incr) {
- locked = mm->locked_vm + npages;
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
- ret = -ENOMEM;
- else
- mm->locked_vm += npages;
- } else {
- if (WARN_ON_ONCE(npages > mm->locked_vm))
- npages = mm->locked_vm;
- mm->locked_vm -= npages;
- }
-
- pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
- current ? current->pid : 0,
- incr ? '+' : '-',
- npages << PAGE_SHIFT,
- mm->locked_vm << PAGE_SHIFT,
- rlimit(RLIMIT_MEMLOCK));
- up_write(&mm->mmap_sem);
-
- return ret;
-}
-
bool mm_iommu_preregistered(struct mm_struct *mm)
{
return !list_empty(&mm->context.iommu_group_mem_list);
@@ -96,7 +63,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
unsigned long entry, chunk;
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
- ret = mm_iommu_adjust_locked_vm(mm, entries, true);
+ ret = account_locked_vm(mm, entries, true);
if (ret)
return ret;
@@ -162,11 +129,8 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
* Allow to use larger than 64k IOMMU pages. Only do that
* if we are backed by hugetlb.
*/
- if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
- struct page *head = compound_head(page);
-
- pageshift = compound_order(head) + PAGE_SHIFT;
- }
+ if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page))
+ pageshift = page_shift(compound_head(page));
mem->pageshift = min(mem->pageshift, pageshift);
/*
* We don't need struct page reference any more, switch
@@ -211,7 +175,7 @@ free_exit:
kfree(mem);
unlock_exit:
- mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+ account_locked_vm(mm, locked_entries, false);
return ret;
}
@@ -311,7 +275,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
unlock_exit:
mutex_unlock(&mem_list_mutex);
- mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
+ account_locked_vm(mm, unlock_entries, false);
return ret;
}
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index 794404d50a85..2d0cb5ba9a47 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -174,7 +174,6 @@ static int radix__init_new_context(struct mm_struct *mm)
*/
asm volatile("ptesync;isync" : : : "memory");
- mm->context.npu_context = NULL;
mm->context.hash_context = NULL;
return index;
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 01bc9663360d..75483b40fcb1 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -8,10 +8,13 @@
#include <linux/memblock.h>
#include <misc/cxl-base.h>
+#include <asm/debugfs.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/trace.h>
#include <asm/powernv.h>
+#include <asm/firmware.h>
+#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
#include <trace/events/thp.h>
@@ -21,9 +24,6 @@ EXPORT_SYMBOL(__pmd_frag_nr);
unsigned long __pmd_frag_size_shift;
EXPORT_SYMBOL(__pmd_frag_size_shift);
-int (*register_process_table)(unsigned long base, unsigned long page_size,
- unsigned long tbl_size);
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* This is called when relaxing access to a hugepage. It's also called in the page
@@ -72,7 +72,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
assert_spin_locked(pmd_lockptr(mm, pmdp));
- WARN_ON(!(pmd_large(pmd) || pmd_devmap(pmd)));
+ WARN_ON(!(pmd_large(pmd)));
#endif
trace_hugepage_set_pmd(addr, pmd_val(pmd));
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
@@ -205,37 +205,61 @@ void __init mmu_partition_table_init(void)
* 64 K size.
*/
ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
- mtspr(SPRN_PTCR, ptcr);
+ set_ptcr_when_no_uv(ptcr);
powernv_set_nmmu_ptcr(ptcr);
}
+static void flush_partition(unsigned int lpid, bool radix)
+{
+ if (radix) {
+ radix__flush_all_lpid(lpid);
+ radix__flush_all_lpid_guest(lpid);
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ /* do we need fixup here ?*/
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+ }
+}
+
void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
- unsigned long dw1)
+ unsigned long dw1, bool flush)
{
unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
+ /*
+ * When ultravisor is enabled, the partition table is stored in secure
+ * memory and can only be accessed doing an ultravisor call. However, we
+ * maintain a copy of the partition table in normal memory to allow Nest
+ * MMU translations to occur (for normal VMs).
+ *
+ * Therefore, here we always update partition_tb, regardless of whether
+ * we are running under an ultravisor or not.
+ */
partition_tb[lpid].patb0 = cpu_to_be64(dw0);
partition_tb[lpid].patb1 = cpu_to_be64(dw1);
/*
- * Global flush of TLBs and partition table caches for this lpid.
- * The type of flush (hash or radix) depends on what the previous
- * use of this partition ID was, not the new use.
+ * If ultravisor is enabled, we do an ultravisor call to register the
+ * partition table entry (PATE), which also do a global flush of TLBs
+ * and partition table caches for the lpid. Otherwise, just do the
+ * flush. The type of flush (hash or radix) depends on what the previous
+ * use of the partition ID was, not the new use.
*/
- asm volatile("ptesync" : : : "memory");
- if (old & PATB_HR) {
- asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
- } else {
- asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+ if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
+ uv_register_pate(lpid, dw0, dw1);
+ pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n",
+ dw0, dw1);
+ } else if (flush) {
+ /*
+ * Boot does not need to flush, because MMU is off and each
+ * CPU does a tlbiel_all() before switching them on, which
+ * flushes everything.
+ */
+ flush_partition(lpid, (old & PATB_HR));
}
- /* do we need fixup here ?*/
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
}
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
@@ -446,3 +470,49 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
return true;
}
+
+/*
+ * Does the CPU support tlbie?
+ */
+bool tlbie_capable __read_mostly = true;
+EXPORT_SYMBOL(tlbie_capable);
+
+/*
+ * Should tlbie be used for management of CPU TLBs, for kernel and process
+ * address spaces? tlbie may still be used for nMMU accelerators, and for KVM
+ * guest address spaces.
+ */
+bool tlbie_enabled __read_mostly = true;
+
+static int __init setup_disable_tlbie(char *str)
+{
+ if (!radix_enabled()) {
+ pr_err("disable_tlbie: Unable to disable TLBIE with Hash MMU.\n");
+ return 1;
+ }
+
+ tlbie_capable = false;
+ tlbie_enabled = false;
+
+ return 1;
+}
+__setup("disable_tlbie", setup_disable_tlbie);
+
+static int __init pgtable_debugfs_setup(void)
+{
+ if (!tlbie_capable)
+ return 0;
+
+ /*
+ * There is no locking vs tlb flushing when changing this value.
+ * The tlb flushers will see one value or another, and use either
+ * tlbie or tlbiel with IPIs. In both cases the TLBs will be
+ * invalidated as expected.
+ */
+ debugfs_create_bool("tlbie_enabled", 0600,
+ powerpc_debugfs_root,
+ &tlbie_enabled);
+
+ return 0;
+}
+arch_initcall(pgtable_debugfs_setup);
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 273ae66a9a45..3a1fbf9cb8f8 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) "radix-mmu: " fmt
+#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/sched/mm.h>
#include <linux/memblock.h>
@@ -26,25 +27,13 @@
#include <asm/sections.h>
#include <asm/trace.h>
#include <asm/uaccess.h>
+#include <asm/ultravisor.h>
#include <trace/events/thp.h>
unsigned int mmu_pid_bits;
unsigned int mmu_base_pid;
-static int native_register_process_table(unsigned long base, unsigned long pg_sz,
- unsigned long table_size)
-{
- unsigned long patb0, patb1;
-
- patb0 = be64_to_cpu(partition_tb[0].patb0);
- patb1 = base | table_size | PATB_GR;
-
- mmu_partition_table_set_entry(0, patb0, patb1);
-
- return 0;
-}
-
static __ref void *early_alloc_pgtable(unsigned long size, int nid,
unsigned long region_start, unsigned long region_end)
{
@@ -198,14 +187,14 @@ void radix__change_memory_range(unsigned long start, unsigned long end,
pudp = pud_alloc(&init_mm, pgdp, idx);
if (!pudp)
continue;
- if (pud_huge(*pudp)) {
+ if (pud_is_leaf(*pudp)) {
ptep = (pte_t *)pudp;
goto update_the_pte;
}
pmdp = pmd_alloc(&init_mm, pudp, idx);
if (!pmdp)
continue;
- if (pmd_huge(*pmdp)) {
+ if (pmd_is_leaf(*pmdp)) {
ptep = pmdp_ptep(pmdp);
goto update_the_pte;
}
@@ -319,7 +308,7 @@ static int __meminit create_physical_mapping(unsigned long start,
return 0;
}
-void __init radix_init_pgtable(void)
+static void __init radix_init_pgtable(void)
{
unsigned long rts_field;
struct memblock_region *reg;
@@ -379,18 +368,6 @@ void __init radix_init_pgtable(void)
*/
rts_field = radix__get_tree_size();
process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
- /*
- * Fill in the partition table. We are suppose to use effective address
- * of process table here. But our linear mapping also enable us to use
- * physical address here.
- */
- register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
- pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
- asm volatile("ptesync" : : : "memory");
- asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
- "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
- trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
/*
* The init_mm context is given the first available (non-zero) PID,
@@ -411,20 +388,15 @@ void __init radix_init_pgtable(void)
static void __init radix_init_partition_table(void)
{
- unsigned long rts_field, dw0;
+ unsigned long rts_field, dw0, dw1;
mmu_partition_table_init();
rts_field = radix__get_tree_size();
dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
- mmu_partition_table_set_entry(0, dw0, 0);
+ dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR;
+ mmu_partition_table_set_entry(0, dw0, dw1, false);
pr_info("Initializing Radix MMU\n");
- pr_info("Partition table %p\n", partition_tb);
-}
-
-void __init radix_init_native(void)
-{
- register_process_table = native_register_process_table;
}
static int __init get_idx_from_shift(unsigned int shift)
@@ -515,14 +487,6 @@ void __init radix__early_init_devtree(void)
mmu_psize_defs[MMU_PAGE_64K].shift = 16;
mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
found:
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- if (mmu_psize_defs[MMU_PAGE_2M].shift) {
- /*
- * map vmemmap using 2M if available
- */
- mmu_vmemmap_psize = MMU_PAGE_2M;
- }
-#endif /* CONFIG_SPARSEMEM_VMEMMAP */
return;
}
@@ -587,7 +551,13 @@ void __init radix__early_init_mmu(void)
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/* vmemmap mapping */
- mmu_vmemmap_psize = mmu_virtual_psize;
+ if (mmu_psize_defs[MMU_PAGE_2M].shift) {
+ /*
+ * map vmemmap using 2M if available
+ */
+ mmu_vmemmap_psize = MMU_PAGE_2M;
+ } else
+ mmu_vmemmap_psize = mmu_virtual_psize;
#endif
/*
* initialize page table size
@@ -622,8 +592,9 @@ void __init radix__early_init_mmu(void)
__pmd_frag_nr = RADIX_PMD_FRAG_NR;
__pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
+ radix_init_pgtable();
+
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
- radix_init_native();
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
@@ -634,11 +605,9 @@ void __init radix__early_init_mmu(void)
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
- radix_init_pgtable();
/* Switch to the guard PID before turning on MMU */
radix__switch_mmu_context(NULL, &init_mm);
- if (cpu_has_feature(CPU_FTR_HVMODE))
- tlbiel_all();
+ tlbiel_all();
}
void radix__early_init_mmu_secondary(void)
@@ -651,14 +620,14 @@ void radix__early_init_mmu_secondary(void)
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
- mtspr(SPRN_PTCR,
- __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ set_ptcr_when_no_uv(__pa(partition_tb) |
+ (PATB_SIZE_SHIFT - 12));
+
radix_init_amor();
}
radix__switch_mmu_context(NULL, &init_mm);
- if (cpu_has_feature(CPU_FTR_HVMODE))
- tlbiel_all();
+ tlbiel_all();
}
void radix__mmu_cleanup_all(void)
@@ -668,7 +637,7 @@ void radix__mmu_cleanup_all(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
- mtspr(SPRN_PTCR, 0);
+ set_ptcr_when_no_uv(0);
powernv_set_nmmu_ptcr(0);
radix__flush_tlb_all();
}
@@ -738,8 +707,8 @@ static int __meminit stop_machine_change_mapping(void *data)
spin_unlock(&init_mm.page_table_lock);
pte_clear(&init_mm, params->aligned_start, params->pte);
- create_physical_mapping(params->aligned_start, params->start, -1);
- create_physical_mapping(params->end, params->aligned_end, -1);
+ create_physical_mapping(__pa(params->aligned_start), __pa(params->start), -1);
+ create_physical_mapping(__pa(params->end), __pa(params->aligned_end), -1);
spin_lock(&init_mm.page_table_lock);
return 0;
}
@@ -832,7 +801,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
if (!pmd_present(*pmd))
continue;
- if (pmd_huge(*pmd)) {
+ if (pmd_is_leaf(*pmd)) {
split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
continue;
}
@@ -857,7 +826,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
if (!pud_present(*pud))
continue;
- if (pud_huge(*pud)) {
+ if (pud_is_leaf(*pud)) {
split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
continue;
}
@@ -883,7 +852,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
if (!pgd_present(*pgd))
continue;
- if (pgd_huge(*pgd)) {
+ if (pgd_is_leaf(*pgd)) {
split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
continue;
}
@@ -903,7 +872,7 @@ int __meminit radix__create_section_mapping(unsigned long start, unsigned long e
return -1;
}
- return create_physical_mapping(start, end, nid);
+ return create_physical_mapping(__pa(start), __pa(end), nid);
}
int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
@@ -1118,3 +1087,108 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
set_pte_at(mm, addr, ptep, pte);
}
+
+int __init arch_ioremap_pud_supported(void)
+{
+ /* HPT does not cope with large pages in the vmalloc area */
+ return radix_enabled();
+}
+
+int __init arch_ioremap_pmd_supported(void)
+{
+ return radix_enabled();
+}
+
+int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
+{
+ return 0;
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
+{
+ pte_t *ptep = (pte_t *)pud;
+ pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
+
+ if (!radix_enabled())
+ return 0;
+
+ set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);
+
+ return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+ if (pud_huge(*pud)) {
+ pud_clear(pud);
+ return 1;
+ }
+
+ return 0;
+}
+
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
+{
+ pmd_t *pmd;
+ int i;
+
+ pmd = (pmd_t *)pud_page_vaddr(*pud);
+ pud_clear(pud);
+
+ flush_tlb_kernel_range(addr, addr + PUD_SIZE);
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ if (!pmd_none(pmd[i])) {
+ pte_t *pte;
+ pte = (pte_t *)pmd_page_vaddr(pmd[i]);
+
+ pte_free_kernel(&init_mm, pte);
+ }
+ }
+
+ pmd_free(&init_mm, pmd);
+
+ return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+{
+ pte_t *ptep = (pte_t *)pmd;
+ pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
+
+ if (!radix_enabled())
+ return 0;
+
+ set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);
+
+ return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+ if (pmd_huge(*pmd)) {
+ pmd_clear(pmd);
+ return 1;
+ }
+
+ return 0;
+}
+
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
+{
+ pte_t *pte;
+
+ pte = (pte_t *)pmd_page_vaddr(*pmd);
+ pmd_clear(pmd);
+
+ flush_tlb_kernel_range(addr, addr + PMD_SIZE);
+
+ pte_free_kernel(&init_mm, pte);
+
+ return 1;
+}
+
+int __init arch_ioremap_p4d_supported(void)
+{
+ return 0;
+}
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index bb9835681315..631be42abd33 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -25,7 +25,7 @@
* tlbiel instruction for radix, set invalidation
* i.e., r=1 and is=01 or is=10 or is=11
*/
-static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
+static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
unsigned int pid,
unsigned int ric, unsigned int prs)
{
@@ -51,11 +51,15 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
* and partition table entries. Then flush the remaining sets of the
* TLB.
*/
- tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
- for (set = 1; set < num_sets; set++)
- tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
- /* Do the same for process scoped entries. */
+ if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
+ /* MSR[HV] should flush partition scope translations first. */
+ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
+ for (set = 1; set < num_sets; set++)
+ tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0);
+ }
+
+ /* Flush process scoped entries. */
tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
for (set = 1; set < num_sets; set++)
tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
@@ -83,7 +87,7 @@ void radix__tlbiel_all(unsigned int action)
else
WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
}
static __always_inline void __tlbiel_pid(unsigned long pid, int set,
@@ -116,22 +120,6 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static __always_inline void __tlbiel_lpid(unsigned long lpid, int set,
- unsigned long ric)
-{
- unsigned long rb,rs,prs,r;
-
- rb = PPC_BIT(52); /* IS = 2 */
- rb |= set << PPC_BITLSHIFT(51);
- rs = 0; /* LPID comes from LPIDR */
- prs = 0; /* partition scoped */
- r = 1; /* radix format */
-
- asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
- : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
-}
-
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -146,25 +134,22 @@ static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
-static inline void __tlbiel_lpid_guest(unsigned long lpid, int set,
- unsigned long ric)
+static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
rb = PPC_BIT(52); /* IS = 2 */
- rb |= set << PPC_BITLSHIFT(51);
- rs = 0; /* LPID comes from LPIDR */
+ rs = lpid;
prs = 1; /* process scoped */
r = 1; /* radix format */
- asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
- trace_tlbie(lpid, 1, rb, rs, ric, prs, r);
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
-
-static inline void __tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -179,8 +164,8 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
-static inline void __tlbie_va(unsigned long va, unsigned long pid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -195,8 +180,8 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
-static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
- unsigned long ap, unsigned long ric)
+static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
+ unsigned long ap, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -235,7 +220,7 @@ static inline void fixup_tlbie_lpid(unsigned long lpid)
/*
* We use 128 set in radix mode and 256 set in hpt mode.
*/
-static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
{
int set;
@@ -258,7 +243,7 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+ asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
}
static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
@@ -285,32 +270,37 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric)
+struct tlbiel_pid {
+ unsigned long pid;
+ unsigned long ric;
+};
+
+static void do_tlbiel_pid(void *info)
{
- int set;
+ struct tlbiel_pid *t = info;
- VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
+ if (t->ric == RIC_FLUSH_TLB)
+ _tlbiel_pid(t->pid, RIC_FLUSH_TLB);
+ else if (t->ric == RIC_FLUSH_PWC)
+ _tlbiel_pid(t->pid, RIC_FLUSH_PWC);
+ else
+ _tlbiel_pid(t->pid, RIC_FLUSH_ALL);
+}
- asm volatile("ptesync": : :"memory");
+static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
+ unsigned long pid, unsigned long ric)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_pid t = { .pid = pid, .ric = ric };
+ on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
/*
- * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
- * also flush the entire Page Walk Cache.
+ * Always want the CPU translations to be invalidated with tlbiel in
+ * these paths, so while coprocessors must use tlbie, we can not
+ * optimise away the tlbiel component.
*/
- __tlbiel_lpid(lpid, 0, ric);
-
- /* For PWC, only one flush is needed */
- if (ric == RIC_FLUSH_PWC) {
- asm volatile("ptesync": : :"memory");
- return;
- }
-
- /* For the remaining sets, just flush the TLB */
- for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
- __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB);
-
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
}
static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
@@ -337,35 +327,28 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
+static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
{
- int set;
-
- VM_BUG_ON(mfspr(SPRN_LPID) != lpid);
-
- asm volatile("ptesync": : :"memory");
-
/*
- * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
- * also flush the entire Page Walk Cache.
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
*/
- __tlbiel_lpid_guest(lpid, 0, ric);
-
- /* For PWC, only one flush is needed */
- if (ric == RIC_FLUSH_PWC) {
- asm volatile("ptesync": : :"memory");
- return;
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
}
-
- /* For the remaining sets, just flush the TLB */
- for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
- __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
-
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+ fixup_tlbie_lpid(lpid);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-
static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
unsigned long psize)
@@ -377,8 +360,8 @@ static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
}
-static inline void _tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long psize, unsigned long ric)
+static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
{
unsigned long ap = mmu_get_ap(psize);
@@ -409,8 +392,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
}
-static inline void _tlbie_va(unsigned long va, unsigned long pid,
- unsigned long psize, unsigned long ric)
+static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
{
unsigned long ap = mmu_get_ap(psize);
@@ -420,7 +403,54 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
+struct tlbiel_va {
+ unsigned long pid;
+ unsigned long va;
+ unsigned long psize;
+ unsigned long ric;
+};
+
+static void do_tlbiel_va(void *info)
+{
+ struct tlbiel_va *t = info;
+
+ if (t->ric == RIC_FLUSH_TLB)
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
+ else if (t->ric == RIC_FLUSH_PWC)
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
+ else
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_va_multicast(struct mm_struct *mm,
+ unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
+ on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
+}
+
+struct tlbiel_va_range {
+ unsigned long pid;
+ unsigned long start;
+ unsigned long end;
+ unsigned long page_size;
+ unsigned long psize;
+ bool also_pwc;
+};
+
+static void do_tlbiel_va_range(void *info)
+{
+ struct tlbiel_va_range *t = info;
+
+ _tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
+ t->psize, t->also_pwc);
+}
+
+static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long psize, unsigned long ric)
{
unsigned long ap = mmu_get_ap(psize);
@@ -443,6 +473,21 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_va_range t = { .start = start, .end = end,
+ .pid = pid, .page_size = page_size,
+ .psize = psize, .also_pwc = also_pwc };
+
+ on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+}
+
/*
* Base TLB flushing operations:
*
@@ -580,10 +625,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
goto local;
}
- if (mm_needs_flush_escalation(mm))
- _tlbie_pid(pid, RIC_FLUSH_ALL);
- else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
+ if (cputlb_use_tlbie()) {
+ if (mm_needs_flush_escalation(mm))
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
+ }
} else {
local:
_tlbiel_pid(pid, RIC_FLUSH_TLB);
@@ -609,7 +658,10 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
goto local;
}
}
- _tlbie_pid(pid, RIC_FLUSH_ALL);
+ if (cputlb_use_tlbie())
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
} else {
local:
_tlbiel_pid(pid, RIC_FLUSH_ALL);
@@ -644,7 +696,10 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
exit_flush_lazy_tlbs(mm);
goto local;
}
- _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ if (cputlb_use_tlbie())
+ _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ else
+ _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
} else {
local:
_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
@@ -666,9 +721,35 @@ EXPORT_SYMBOL(radix__flush_tlb_page);
#define radix__flush_all_mm radix__local_flush_all_mm
#endif /* CONFIG_SMP */
+static void do_tlbiel_kernel(void *info)
+{
+ _tlbiel_pid(0, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_kernel_broadcast(void)
+{
+ on_each_cpu(do_tlbiel_kernel, NULL, 1);
+ if (tlbie_capable) {
+ /*
+ * Coherent accelerators don't refcount kernel memory mappings,
+ * so have to always issue a tlbie for them. This is quite a
+ * slow path anyway.
+ */
+ _tlbie_pid(0, RIC_FLUSH_ALL);
+ }
+}
+
+/*
+ * If kernel TLBIs ever become local rather than global, then
+ * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
+ * assumes kernel TLBIs are global.
+ */
void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- _tlbie_pid(0, RIC_FLUSH_ALL);
+ if (cputlb_use_tlbie())
+ _tlbie_pid(0, RIC_FLUSH_ALL);
+ else
+ _tlbiel_kernel_broadcast();
}
EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
@@ -724,10 +805,14 @@ is_local:
if (local) {
_tlbiel_pid(pid, RIC_FLUSH_TLB);
} else {
- if (mm_needs_flush_escalation(mm))
- _tlbie_pid(pid, RIC_FLUSH_ALL);
- else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
+ if (cputlb_use_tlbie()) {
+ if (mm_needs_flush_escalation(mm))
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
+ }
}
} else {
bool hflush = flush_all_sizes;
@@ -752,8 +837,8 @@ is_local:
gflush = false;
}
- asm volatile("ptesync": : :"memory");
if (local) {
+ asm volatile("ptesync": : :"memory");
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbiel_va_range(hstart, hend, pid,
@@ -762,7 +847,8 @@ is_local:
__tlbiel_va_range(gstart, gend, pid,
PUD_SIZE, MMU_PAGE_1G);
asm volatile("ptesync": : :"memory");
- } else {
+ } else if (cputlb_use_tlbie()) {
+ asm volatile("ptesync": : :"memory");
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbie_va_range(hstart, hend, pid,
@@ -772,6 +858,15 @@ is_local:
PUD_SIZE, MMU_PAGE_1G);
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ } else {
+ _tlbiel_va_range_multicast(mm,
+ start, end, pid, page_size, mmu_virtual_psize, false);
+ if (hflush)
+ _tlbiel_va_range_multicast(mm,
+ hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
+ if (gflush)
+ _tlbiel_va_range_multicast(mm,
+ gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false);
}
}
preempt_enable();
@@ -830,32 +925,19 @@ EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
/*
* Flush partition scoped translations from LPID (=LPIDR)
*/
-void radix__flush_tlb_lpid(unsigned int lpid)
+void radix__flush_all_lpid(unsigned int lpid)
{
_tlbie_lpid(lpid, RIC_FLUSH_ALL);
}
-EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid);
-
-/*
- * Flush partition scoped translations from LPID (=LPIDR)
- */
-void radix__local_flush_tlb_lpid(unsigned int lpid)
-{
- _tlbiel_lpid(lpid, RIC_FLUSH_ALL);
-}
-EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid);
+EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
/*
- * Flush process scoped translations from LPID (=LPIDR).
- * Important difference, the guest normally manages its own translations,
- * but some cases e.g., vCPU CPU migration require KVM to flush.
+ * Flush process scoped translations from LPID (=LPIDR)
*/
-void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
+void radix__flush_all_lpid_guest(unsigned int lpid)
{
- _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL);
+ _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
}
-EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest);
-
static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long end, int psize);
@@ -961,16 +1043,26 @@ is_local:
if (local) {
_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
} else {
- if (mm_needs_flush_escalation(mm))
- also_pwc = true;
+ if (cputlb_use_tlbie()) {
+ if (mm_needs_flush_escalation(mm))
+ also_pwc = true;
+
+ _tlbie_pid(pid,
+ also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+ } else {
+ _tlbiel_pid_multicast(mm, pid,
+ also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+ }
- _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
}
} else {
if (local)
_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
- else
+ else if (cputlb_use_tlbie())
_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+ else
+ _tlbiel_va_range_multicast(mm,
+ start, end, pid, page_size, psize, also_pwc);
}
preempt_enable();
}
@@ -1012,7 +1104,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
exit_flush_lazy_tlbs(mm);
goto local;
}
- _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ if (cputlb_use_tlbie())
+ _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ else
+ _tlbiel_va_range_multicast(mm,
+ addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
} else {
local:
_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 9ba07e55c489..2ef24a53f4c9 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -7,7 +7,7 @@
#include <linux/kernel.h>
#include <linux/gfp.h>
#include <linux/types.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
#include <linux/hugetlb.h>
#include <linux/syscalls.h>
@@ -139,14 +139,14 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
return 0;
}
+static const struct mm_walk_ops subpage_walk_ops = {
+ .pmd_entry = subpage_walk_pmd_entry,
+};
+
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
struct vm_area_struct *vma;
- struct mm_walk subpage_proto_walk = {
- .mm = mm,
- .pmd_entry = subpage_walk_pmd_entry,
- };
/*
* We don't try too hard, we just mark all the vma in that range
@@ -163,7 +163,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
if (vma->vm_start >= (addr + len))
break;
vma->vm_flags |= VM_NOHUGEPAGE;
- walk_page_vma(vma, &subpage_proto_walk);
+ walk_page_vma(vma, &subpage_walk_ops, NULL);
vma = vma->vm_next;
}
}
diff --git a/arch/powerpc/mm/book3s64/vphn.h b/arch/powerpc/mm/book3s64/vphn.h
deleted file mode 100644
index f0b93c2dd578..000000000000
--- a/arch/powerpc/mm/book3s64/vphn.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ARCH_POWERPC_MM_VPHN_H_
-#define _ARCH_POWERPC_MM_VPHN_H_
-
-/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
-#define VPHN_REGISTER_COUNT 6
-
-/*
- * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
- * form the complete property we have to add the length in the first cell.
- */
-#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
-
-extern int vphn_unpack_associativity(const long *packed, __be32 *unpacked);
-
-#endif
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index c617282d5b2a..2a82984356f8 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -4,310 +4,18 @@
* Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
*
* Copyright (C) 2000 Russell King
- *
- * Consistent memory allocators. Used for DMA devices that want to
- * share uncached memory with the processor core. The function return
- * is the virtual address and 'dma_handle' is the physical address.
- * Mostly stolen from the ARM port, with some changes for PowerPC.
- * -- Dan
- *
- * Reorganized to get rid of the arch-specific consistent_* functions
- * and provide non-coherent implementations for the DMA API. -Matt
- *
- * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
- * implementation. This is pulled straight from ARM and barely
- * modified. -Matt
*/
-#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/errno.h>
-#include <linux/string.h>
#include <linux/types.h>
#include <linux/highmem.h>
#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
-#include <linux/export.h>
#include <asm/tlbflush.h>
#include <asm/dma.h>
-#include <mm/mmu_decl.h>
-
-/*
- * This address range defaults to a value that is safe for all
- * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
- * can be further configured for specific applications under
- * the "Advanced Setup" menu. -Matt
- */
-#define CONSISTENT_BASE (IOREMAP_TOP)
-#define CONSISTENT_END (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE)
-#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
-
-/*
- * This is the page table (2MB) covering uncached, DMA consistent allocations
- */
-static DEFINE_SPINLOCK(consistent_lock);
-
-/*
- * VM region handling support.
- *
- * This should become something generic, handling VM region allocations for
- * vmalloc and similar (ioremap, module space, etc).
- *
- * I envisage vmalloc()'s supporting vm_struct becoming:
- *
- * struct vm_struct {
- * struct vm_region region;
- * unsigned long flags;
- * struct page **pages;
- * unsigned int nr_pages;
- * unsigned long phys_addr;
- * };
- *
- * get_vm_area() would then call vm_region_alloc with an appropriate
- * struct vm_region head (eg):
- *
- * struct vm_region vmalloc_head = {
- * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
- * .vm_start = VMALLOC_START,
- * .vm_end = VMALLOC_END,
- * };
- *
- * However, vmalloc_head.vm_start is variable (typically, it is dependent on
- * the amount of RAM found at boot time.) I would imagine that get_vm_area()
- * would have to initialise this each time prior to calling vm_region_alloc().
- */
-struct ppc_vm_region {
- struct list_head vm_list;
- unsigned long vm_start;
- unsigned long vm_end;
-};
-
-static struct ppc_vm_region consistent_head = {
- .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
- .vm_start = CONSISTENT_BASE,
- .vm_end = CONSISTENT_END,
-};
-
-static struct ppc_vm_region *
-ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
-{
- unsigned long addr = head->vm_start, end = head->vm_end - size;
- unsigned long flags;
- struct ppc_vm_region *c, *new;
-
- new = kmalloc(sizeof(struct ppc_vm_region), gfp);
- if (!new)
- goto out;
-
- spin_lock_irqsave(&consistent_lock, flags);
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if ((addr + size) < addr)
- goto nospc;
- if ((addr + size) <= c->vm_start)
- goto found;
- addr = c->vm_end;
- if (addr > end)
- goto nospc;
- }
-
- found:
- /*
- * Insert this entry _before_ the one we found.
- */
- list_add_tail(&new->vm_list, &c->vm_list);
- new->vm_start = addr;
- new->vm_end = addr + size;
-
- spin_unlock_irqrestore(&consistent_lock, flags);
- return new;
-
- nospc:
- spin_unlock_irqrestore(&consistent_lock, flags);
- kfree(new);
- out:
- return NULL;
-}
-
-static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
-{
- struct ppc_vm_region *c;
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if (c->vm_start == addr)
- goto out;
- }
- c = NULL;
- out:
- return c;
-}
-
-/*
- * Allocate DMA-coherent memory space and return both the kernel remapped
- * virtual and bus address for that space.
- */
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
-{
- struct page *page;
- struct ppc_vm_region *c;
- unsigned long order;
- u64 mask = ISA_DMA_THRESHOLD, limit;
-
- if (dev) {
- mask = dev->coherent_dma_mask;
-
- /*
- * Sanity check the DMA mask - it must be non-zero, and
- * must be able to be satisfied by a DMA allocation.
- */
- if (mask == 0) {
- dev_warn(dev, "coherent DMA mask is unset\n");
- goto no_page;
- }
-
- if ((~mask) & ISA_DMA_THRESHOLD) {
- dev_warn(dev, "coherent DMA mask %#llx is smaller "
- "than system GFP_DMA mask %#llx\n",
- mask, (unsigned long long)ISA_DMA_THRESHOLD);
- goto no_page;
- }
- }
-
-
- size = PAGE_ALIGN(size);
- limit = (mask + 1) & ~mask;
- if ((limit && size >= limit) ||
- size >= (CONSISTENT_END - CONSISTENT_BASE)) {
- printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
- size, mask);
- return NULL;
- }
-
- order = get_order(size);
-
- /* Might be useful if we ever have a real legacy DMA zone... */
- if (mask != 0xffffffff)
- gfp |= GFP_DMA;
-
- page = alloc_pages(gfp, order);
- if (!page)
- goto no_page;
-
- /*
- * Invalidate any data that might be lurking in the
- * kernel direct-mapped region for device DMA.
- */
- {
- unsigned long kaddr = (unsigned long)page_address(page);
- memset(page_address(page), 0, size);
- flush_dcache_range(kaddr, kaddr + size);
- }
-
- /*
- * Allocate a virtual address in the consistent mapping region.
- */
- c = ppc_vm_region_alloc(&consistent_head, size,
- gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
- if (c) {
- unsigned long vaddr = c->vm_start;
- struct page *end = page + (1 << order);
-
- split_page(page, order);
-
- /*
- * Set the "dma handle"
- */
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
-
- do {
- SetPageReserved(page);
- map_kernel_page(vaddr, page_to_phys(page),
- pgprot_noncached(PAGE_KERNEL));
- page++;
- vaddr += PAGE_SIZE;
- } while (size -= PAGE_SIZE);
-
- /*
- * Free the otherwise unused pages.
- */
- while (page < end) {
- __free_page(page);
- page++;
- }
-
- return (void *)c->vm_start;
- }
-
- if (page)
- __free_pages(page, order);
- no_page:
- return NULL;
-}
-
-/*
- * free a page as defined by the above mapping.
- */
-void arch_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- struct ppc_vm_region *c;
- unsigned long flags, addr;
-
- size = PAGE_ALIGN(size);
-
- spin_lock_irqsave(&consistent_lock, flags);
-
- c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
- if (!c)
- goto no_area;
-
- if ((c->vm_end - c->vm_start) != size) {
- printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
- __func__, c->vm_end - c->vm_start, size);
- dump_stack();
- size = c->vm_end - c->vm_start;
- }
-
- addr = c->vm_start;
- do {
- pte_t *ptep;
- unsigned long pfn;
-
- ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr),
- addr),
- addr),
- addr);
- if (!pte_none(*ptep) && pte_present(*ptep)) {
- pfn = pte_pfn(*ptep);
- pte_clear(&init_mm, addr, ptep);
- if (pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
- __free_reserved_page(page);
- }
- }
- addr += PAGE_SIZE;
- } while (size -= PAGE_SIZE);
-
- flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
- list_del(&c->vm_list);
-
- spin_unlock_irqrestore(&consistent_lock, flags);
-
- kfree(c);
- return;
-
- no_area:
- spin_unlock_irqrestore(&consistent_lock, flags);
- printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
- __func__, vaddr);
- dump_stack();
-}
-
/*
* make an area consistent.
*/
@@ -408,23 +116,9 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
__dma_sync_page(paddr, size, dir);
}
-/*
- * Return the PFN for a given cpu virtual address returned by arch_dma_alloc.
- */
-long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
- dma_addr_t dma_addr)
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
- /* This should always be populated, so we don't test every
- * level. If that fails, we'll have a nice crash which
- * will be as good as a BUG_ON()
- */
- unsigned long cpu_addr = (unsigned long)vaddr;
- pgd_t *pgd = pgd_offset_k(cpu_addr);
- pud_t *pud = pud_offset(pgd, cpu_addr);
- pmd_t *pmd = pmd_offset(pud, cpu_addr);
- pte_t *ptep = pte_offset_kernel(pmd, cpu_addr);
+ unsigned long kaddr = (unsigned long)page_address(page);
- if (pte_none(*ptep) || !pte_present(*ptep))
- return 0;
- return pte_pfn(*ptep);
+ flush_dcache_range(kaddr, kaddr + size);
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ec6b7ad70659..8432c281de92 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -42,26 +42,6 @@
#include <asm/debug.h>
#include <asm/kup.h>
-static inline bool notify_page_fault(struct pt_regs *regs)
-{
- bool ret = false;
-
-#ifdef CONFIG_KPROBES
- /* kprobe_running() needs smp_processor_id() */
- if (!user_mode(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, 11))
- ret = true;
- preempt_enable();
- }
-#endif /* CONFIG_KPROBES */
-
- if (unlikely(debugger_fault_handler(regs)))
- ret = true;
-
- return ret;
-}
-
/*
* Check whether the instruction inst is a store using
* an update addressing form which will update r1.
@@ -178,13 +158,12 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
if (fault & VM_FAULT_HWPOISON)
lsb = PAGE_SHIFT;
- force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb,
- current);
+ force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
return 0;
}
#endif
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
return 0;
}
@@ -462,8 +441,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
int is_write = page_fault_is_write(error_code);
vm_fault_t fault, major = 0;
bool must_retry = false;
+ bool kprobe_fault = kprobe_page_fault(regs, 11);
- if (notify_page_fault(regs))
+ if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
return 0;
if (unlikely(page_fault_is_bad(error_code))) {
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index b5d92dc32844..73d4873fc7f8 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -61,12 +61,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
num_hugepd = 1;
}
+ if (!cachep) {
+ WARN_ONCE(1, "No page table cache created for hugetlb tables");
+ return -ENOMEM;
+ }
+
new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
- if (! new)
+ if (!new)
return -ENOMEM;
/*
@@ -130,6 +135,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
} else {
pdshift = PUD_SHIFT;
pu = pud_alloc(mm, pg, addr);
+ if (!pu)
+ return NULL;
if (pshift == PUD_SHIFT)
return (pte_t *)pu;
else if (pshift > PMD_SHIFT) {
@@ -138,6 +145,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
} else {
pdshift = PMD_SHIFT;
pm = pmd_alloc(mm, pu, addr);
+ if (!pm)
+ return NULL;
if (pshift == PMD_SHIFT)
/* 16MB hugepage */
return (pte_t *)pm;
@@ -154,12 +163,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
} else {
pdshift = PUD_SHIFT;
pu = pud_alloc(mm, pg, addr);
+ if (!pu)
+ return NULL;
if (pshift >= PUD_SHIFT) {
ptl = pud_lockptr(mm, pu);
hpdp = (hugepd_t *)pu;
} else {
pdshift = PMD_SHIFT;
pm = pmd_alloc(mm, pu, addr);
+ if (!pm)
+ return NULL;
ptl = pmd_lockptr(mm, pm);
hpdp = (hugepd_t *)pm;
}
@@ -511,13 +524,6 @@ retry:
return page;
}
-static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
- unsigned long sz)
-{
- unsigned long __boundary = (addr + sz) & ~(sz-1);
- return (__boundary - 1 < end - 1) ? __boundary : end;
-}
-
#ifdef CONFIG_PPC_MM_SLICES
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
@@ -588,6 +594,7 @@ __setup("hugepagesz=", hugepage_setup_sz);
static int __init hugetlbpage_init(void)
{
+ bool configured = false;
int psize;
if (hugetlb_disabled) {
@@ -638,10 +645,15 @@ static int __init hugetlbpage_init(void)
pgtable_cache_add(pdshift - shift);
else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx))
pgtable_cache_add(PTE_T_ORDER);
+
+ configured = true;
}
- if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
- hugetlbpage_init_default();
+ if (configured) {
+ if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
+ hugetlbpage_init_default();
+ } else
+ pr_info("Failed to initialize. Disabling HugeTLB");
return 0;
}
@@ -655,7 +667,7 @@ void flush_dcache_icache_hugepage(struct page *page)
BUG_ON(!PageCompound(page));
- for (i = 0; i < (1UL << compound_order(page)); i++) {
+ for (i = 0; i < compound_nr(page); i++) {
if (!PageHighMem(page)) {
__flush_dcache_icache(page_address(page+i));
} else {
@@ -665,68 +677,3 @@ void flush_dcache_icache_hugepage(struct page *page)
}
}
}
-
-static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long pte_end;
- struct page *head, *page;
- pte_t pte;
- int refs;
-
- pte_end = (addr + sz) & ~(sz-1);
- if (pte_end < end)
- end = pte_end;
-
- pte = READ_ONCE(*ptep);
-
- if (!pte_access_permitted(pte, write))
- return 0;
-
- /* hugepages are never "special" */
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- refs = 0;
- head = pte_page(pte);
-
- page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
- do {
- VM_BUG_ON(compound_head(page) != head);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
- return 0;
- }
-
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- /* Could be optimized better */
- *nr -= refs;
- while (refs--)
- put_page(head);
- return 0;
- }
-
- return 1;
-}
-
-int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- pte_t *ptep;
- unsigned long sz = 1UL << hugepd_shift(hugepd);
- unsigned long next;
-
- ptep = hugepte_offset(hugepd, addr, pdshift);
- do {
- next = hugepte_addr_end(addr, end, sz);
- if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
- return 0;
- } while (ptep++, addr = next, addr != end);
-
- return 1;
-}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a4e17a979e45..a44f6281ca3a 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -194,8 +194,11 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
* fail due to alignment issues when using 16MB hugepages, so
* fall back to system memory if the altmap allocation fail.
*/
- if (altmap)
+ if (altmap) {
p = altmap_alloc_block_buf(page_size, altmap);
+ if (!p)
+ pr_debug("altmap block allocation failed, falling back to system memory");
+ }
if (!p)
p = vmemmap_alloc_block_buf(page_size, node);
if (!p)
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
new file mode 100644
index 000000000000..fc669643ce6a
--- /dev/null
+++ b/arch/powerpc/mm/ioremap.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/io-workarounds.h>
+
+unsigned long ioremap_bot;
+EXPORT_SYMBOL(ioremap_bot);
+
+void __iomem *ioremap(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_cached(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
+}
+
+void __iomem *ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
+{
+ pte_t pte = __pte(flags);
+ void *caller = __builtin_return_address(0);
+
+ /* writeable implies dirty for kernel addresses */
+ if (pte_write(pte))
+ pte = pte_mkdirty(pte);
+
+ /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
+ pte = pte_exprotect(pte);
+ pte = pte_mkprivileged(pte);
+
+ if (iowa_is_active())
+ return iowa_ioremap(addr, size, pte_pgprot(pte), caller);
+ return __ioremap_caller(addr, size, pte_pgprot(pte), caller);
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+int early_ioremap_range(unsigned long ea, phys_addr_t pa,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long i;
+
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ int err = map_kernel_page(ea + i, pa + i, prot);
+
+ if (WARN_ON_ONCE(err)) /* Should clean up */
+ return err;
+ }
+
+ return 0;
+}
+
+void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
+ pgprot_t prot, void *caller)
+{
+ struct vm_struct *area;
+ int ret;
+ unsigned long va;
+
+ area = __get_vm_area_caller(size, VM_IOREMAP, IOREMAP_START, IOREMAP_END, caller);
+ if (area == NULL)
+ return NULL;
+
+ area->phys_addr = pa;
+ va = (unsigned long)area->addr;
+
+ ret = ioremap_page_range(va, va + size, pa, prot);
+ if (!ret)
+ return (void __iomem *)area->addr + offset;
+
+ unmap_kernel_range(va, size);
+ free_vm_area(area);
+
+ return NULL;
+}
diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c
new file mode 100644
index 000000000000..f36121f25243
--- /dev/null
+++ b/arch/powerpc/mm/ioremap_32.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <mm/mmu_decl.h>
+
+void __iomem *ioremap_wt(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL);
+
+ return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
+void __iomem *
+__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller)
+{
+ unsigned long v;
+ phys_addr_t p, offset;
+ int err;
+
+ /*
+ * Choose an address to map it to.
+ * Once the vmalloc system is running, we use it.
+ * Before then, we use space going down from IOREMAP_TOP
+ * (ioremap_bot records where we're up to).
+ */
+ p = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ size = PAGE_ALIGN(addr + size) - p;
+
+ /*
+ * If the address lies within the first 16 MB, assume it's in ISA
+ * memory space
+ */
+ if (p < 16 * 1024 * 1024)
+ p += _ISA_MEM_BASE;
+
+#ifndef CONFIG_CRASH_DUMP
+ /*
+ * Don't allow anybody to remap normal RAM that we're using.
+ * mem_init() sets high_memory so only do the check after that.
+ */
+ if (slab_is_available() && p <= virt_to_phys(high_memory - 1) &&
+ page_is_ram(__phys_to_pfn(p))) {
+ pr_warn("%s(): phys addr 0x%llx is RAM lr %ps\n", __func__,
+ (unsigned long long)p, __builtin_return_address(0));
+ return NULL;
+ }
+#endif
+
+ if (size == 0)
+ return NULL;
+
+ /*
+ * Is it already mapped? Perhaps overlapped by a previous
+ * mapping.
+ */
+ v = p_block_mapped(p);
+ if (v)
+ return (void __iomem *)v + offset;
+
+ if (slab_is_available())
+ return do_ioremap(p, offset, size, prot, caller);
+
+ /*
+ * Should check if it is a candidate for a BAT mapping
+ */
+
+ err = early_ioremap_range(ioremap_bot - size, p, size, prot);
+ if (err)
+ return NULL;
+ ioremap_bot -= size;
+
+ return (void __iomem *)ioremap_bot + offset;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+ /*
+ * If mapped by BATs then there is nothing to do.
+ * Calling vfree() generates a benign warning.
+ */
+ if (v_block_mapped((unsigned long)addr))
+ return;
+
+ if (addr > high_memory && (unsigned long)addr < ioremap_bot)
+ vunmap((void *)(PAGE_MASK & (unsigned long)addr));
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
new file mode 100644
index 000000000000..fd29e51700cd
--- /dev/null
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/**
+ * Low level function to establish the page tables for an IO mapping
+ */
+void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
+{
+ int ret;
+ unsigned long va = (unsigned long)ea;
+
+ /* We don't support the 4K PFN hack with ioremap */
+ if (pgprot_val(prot) & H_PAGE_4K_PFN)
+ return NULL;
+
+ if ((ea + size) >= (void *)IOREMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return NULL;
+ }
+
+ WARN_ON(pa & ~PAGE_MASK);
+ WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
+ WARN_ON(size & ~PAGE_MASK);
+
+ if (slab_is_available()) {
+ ret = ioremap_page_range(va, va + size, pa, prot);
+ if (ret)
+ unmap_kernel_range(va, size);
+ } else {
+ ret = early_ioremap_range(va, pa, size, prot);
+ }
+
+ if (ret)
+ return NULL;
+
+ return (void __iomem *)ea;
+}
+EXPORT_SYMBOL(__ioremap_at);
+
+/**
+ * Low level function to tear down the page tables for an IO mapping. This is
+ * used for mappings that are manipulated manually, like partial unmapping of
+ * PCI IOs or ISA space.
+ */
+void __iounmap_at(void *ea, unsigned long size)
+{
+ WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
+ WARN_ON(size & ~PAGE_MASK);
+
+ unmap_kernel_range((unsigned long)ea, size);
+}
+EXPORT_SYMBOL(__iounmap_at);
+
+void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
+ pgprot_t prot, void *caller)
+{
+ phys_addr_t paligned, offset;
+ void __iomem *ret;
+ int err;
+
+ /* We don't support the 4K PFN hack with ioremap */
+ if (pgprot_val(prot) & H_PAGE_4K_PFN)
+ return NULL;
+
+ /*
+ * Choose an address to map it to. Once the vmalloc system is running,
+ * we use it. Before that, we map using addresses going up from
+ * ioremap_bot. vmalloc will use the addresses from IOREMAP_BASE
+ * through ioremap_bot.
+ */
+ paligned = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ size = PAGE_ALIGN(addr + size) - paligned;
+
+ if (size == 0 || paligned == 0)
+ return NULL;
+
+ if (slab_is_available())
+ return do_ioremap(paligned, offset, size, prot, caller);
+
+ err = early_ioremap_range(ioremap_bot, paligned, size, prot);
+ if (err)
+ return NULL;
+
+ ret = (void __iomem *)ioremap_bot + offset;
+ ioremap_bot += size;
+
+ return ret;
+}
+
+/*
+ * Unmap an IO region and remove it from vmalloc'd list.
+ * Access to IO memory should be serialized by driver.
+ */
+void iounmap(volatile void __iomem *token)
+{
+ void *addr;
+
+ if (!slab_is_available())
+ return;
+
+ addr = (void *)((unsigned long __force)PCI_FIX_ADDR(token) & PAGE_MASK);
+
+ if ((unsigned long)addr < ioremap_bot) {
+ pr_warn("Attempt to iounmap early bolted mapping at 0x%p\n", addr);
+ return;
+ }
+ vunmap(addr);
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 0d62be3cba47..802387b231ad 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -5,6 +5,7 @@
#include <linux/kasan.h>
#include <linux/printk.h>
#include <linux/memblock.h>
+#include <linux/moduleloader.h>
#include <linux/sched/task.h>
#include <linux/vmalloc.h>
#include <asm/pgalloc.h>
@@ -21,7 +22,7 @@ static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
__set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
}
-static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
+static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
{
pmd_t *pmd;
unsigned long k_cur, k_next;
@@ -35,7 +36,10 @@ static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_
if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
continue;
- new = pte_alloc_one_kernel(&init_mm);
+ if (slab_is_available())
+ new = pte_alloc_one_kernel(&init_mm);
+ else
+ new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
if (!new)
return -ENOMEM;
@@ -43,7 +47,19 @@ static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_
kasan_populate_pte(new, PAGE_READONLY);
else
kasan_populate_pte(new, PAGE_KERNEL_RO);
- pmd_populate_kernel(&init_mm, pmd, new);
+
+ smp_wmb(); /* See comment in __pte_alloc */
+
+ spin_lock(&init_mm.page_table_lock);
+ /* Has another populated it ? */
+ if (likely((void *)pmd_page_vaddr(*pmd) == kasan_early_shadow_pte)) {
+ pmd_populate_kernel(&init_mm, pmd, new);
+ new = NULL;
+ }
+ spin_unlock(&init_mm.page_table_lock);
+
+ if (new && slab_is_available())
+ pte_free_kernel(&init_mm, new);
}
return 0;
}
@@ -71,7 +87,7 @@ static int __ref kasan_init_region(void *start, size_t size)
if (!slab_is_available())
block = memblock_alloc(k_end - k_start, PAGE_SIZE);
- for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) {
+ for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
void *va = block ? block + k_cur - k_start : kasan_get_one_page();
pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
@@ -134,7 +150,11 @@ void __init kasan_init(void)
#ifdef CONFIG_MODULES
void *module_alloc(unsigned long size)
{
- void *base = vmalloc_exec(size);
+ void *base;
+
+ base = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE, __builtin_return_address(0));
if (!base)
return NULL;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2540d3b2588c..be941d382c8d 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -120,33 +120,24 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
start, start + size, rc);
return -EFAULT;
}
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
return __add_pages(nid, start_pfn, nr_pages, restrictions);
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
void __ref arch_remove_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct page *page;
+ struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
int ret;
- /*
- * If we have an altmap then we need to skip over any reserved PFNs
- * when querying the zone.
- */
- page = pfn_to_page(start_pfn);
- if (altmap)
- page += vmem_altmap_offset(altmap);
-
__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
/* Remove htab bolted mappings for this section of memory */
start = (unsigned long)__va(start);
- flush_inval_dcache_range(start, start + size);
+ flush_dcache_range(start, start + size);
ret = remove_section_mapping(start, start + size);
WARN_ON_ONCE(ret);
@@ -159,7 +150,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
pr_warn("Hash collision while resizing HPT\n");
}
#endif
-#endif /* CONFIG_MEMORY_HOTPLUG */
#ifndef CONFIG_NEED_MULTIPLE_NODES
void __init mem_topology_setup(void)
@@ -249,7 +239,7 @@ void __init paging_init(void)
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT);
+ 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
@@ -312,12 +302,9 @@ void __init mem_init(void)
pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n",
PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP));
#endif /* CONFIG_HIGHMEM */
-#ifdef CONFIG_NOT_COHERENT_CACHE
- pr_info(" * 0x%08lx..0x%08lx : consistent mem\n",
- IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE);
-#endif /* CONFIG_NOT_COHERENT_CACHE */
- pr_info(" * 0x%08lx..0x%08lx : early ioremap\n",
- ioremap_bot, IOREMAP_TOP);
+ if (ioremap_bot != IOREMAP_TOP)
+ pr_info(" * 0x%08lx..0x%08lx : early ioremap\n",
+ ioremap_bot, IOREMAP_TOP);
pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n",
VMALLOC_START, VMALLOC_END);
#endif /* CONFIG_PPC32 */
@@ -417,63 +404,6 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
EXPORT_SYMBOL(flush_icache_user_range);
/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a PTE in the linux page tables.
- * We use it to preload an HPTE into the hash table corresponding to
- * the updated linux PTE.
- *
- * This must always be called with the pte lock held.
- */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
- pte_t *ptep)
-{
-#ifdef CONFIG_PPC_BOOK3S
- /*
- * We don't need to worry about _PAGE_PRESENT here because we are
- * called with either mm->page_table_lock held or ptl lock held
- */
- unsigned long trap;
- bool is_exec;
-
- if (radix_enabled()) {
- prefetch((void *)address);
- return;
- }
-
- /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
- if (!pte_young(*ptep) || address >= TASK_SIZE)
- return;
-
- /* We try to figure out if we are coming from an instruction
- * access fault and pass that down to __hash_page so we avoid
- * double-faulting on execution of fresh text. We have to test
- * for regs NULL since init will get here first thing at boot
- *
- * We also avoid filling the hash if not coming from a fault
- */
-
- trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
- switch (trap) {
- case 0x300:
- is_exec = false;
- break;
- case 0x400:
- is_exec = true;
- break;
- default:
- return;
- }
-
- hash_preload(vma->vm_mm, address, is_exec, trap);
-#endif /* CONFIG_PPC_BOOK3S */
-#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
- && defined(CONFIG_HUGETLB_PAGE)
- if (is_vm_hugetlb_page(vma))
- book3e_hugetlb_preload(vma, address, *ptep);
-#endif
-}
-
-/*
* System memory should not be in /proc/iomem but various tools expect it
* (eg kdump).
*/
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 32c1a191c28a..c750ac9ec713 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -82,10 +82,6 @@ static inline void print_system_hash_info(void) {}
#else /* CONFIG_PPC_MMU_NOHASH */
-extern void hash_preload(struct mm_struct *mm, unsigned long ea,
- bool is_exec, unsigned long trap);
-
-
extern void _tlbie(unsigned long address);
extern void _tlbia(void);
@@ -95,6 +91,8 @@ void print_system_hash_info(void);
#ifdef CONFIG_PPC32
+void hash_preload(struct mm_struct *mm, unsigned long ea);
+
extern void mapin_ram(void);
extern void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot);
@@ -108,7 +106,6 @@ extern u8 early_hash[];
#endif /* CONFIG_PPC32 */
-extern unsigned long ioremap_bot;
extern unsigned long __max_low_memory;
extern phys_addr_t __initial_memory_limit_addr;
extern phys_addr_t total_memory;
diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
index 61915f4d3c7f..8b88be91b622 100644
--- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
+++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
@@ -122,8 +122,8 @@ static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
return found;
}
-void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
- pte_t pte)
+static void
+book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte)
{
unsigned long mas1, mas2;
u64 mas7_3;
@@ -183,6 +183,18 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
local_irq_restore(flags);
}
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ *
+ * This must always be called with the pte lock held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+{
+ if (is_vm_hugetlb_page(vma))
+ book3e_hugetlb_preload(vma, address, *ptep);
+}
+
void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
struct hstate *hstate = hstate_file(vma->vm_file);
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index d4acf6fa0596..696f568253a0 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -630,7 +630,6 @@ static void early_init_this_mmu(void)
#ifdef CONFIG_PPC_FSL_BOOK3E
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
unsigned int num_cams;
- int __maybe_unused cpu = smp_processor_id();
bool map = true;
/* use a quarter of the TLBCAM for bolted linear map */
@@ -704,6 +703,8 @@ static void __init early_init_mmu_global(void)
* for use by the TLB miss code
*/
linear_map_top = memblock_end_of_DRAM();
+
+ ioremap_bot = IOREMAP_BASE;
}
static void __init early_mmu_set_memory_limit(void)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 917904d2fe97..50d68d21ddcc 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -163,6 +163,22 @@ static void unmap_cpu_from_node(unsigned long cpu)
}
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
+int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+{
+ int dist = 0;
+
+ int i, index;
+
+ for (i = 0; i < distance_ref_points_depth; i++) {
+ index = be32_to_cpu(distance_ref_points[i]);
+ if (cpu1_assoc[index] == cpu2_assoc[index])
+ break;
+ dist++;
+ }
+
+ return dist;
+}
+
/* must hold reference to node during call */
static const __be32 *of_get_associativity(struct device_node *dev)
{
@@ -212,7 +228,7 @@ static int associativity_to_nid(const __be32 *associativity)
{
int nid = NUMA_NO_NODE;
- if (min_common_depth == -1)
+ if (!numa_enabled)
goto out;
if (of_read_number(associativity, 1) >= min_common_depth)
@@ -416,17 +432,19 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
{
struct assoc_arrays aa = { .arrays = NULL };
- int default_nid = 0;
+ int default_nid = NUMA_NO_NODE;
int nid = default_nid;
int rc, index;
+ if ((min_common_depth < 0) || !numa_enabled)
+ return default_nid;
+
rc = of_get_assoc_arrays(&aa);
if (rc)
return default_nid;
- if (min_common_depth > 0 && min_common_depth <= aa.array_sz &&
- !(lmb->flags & DRCONF_MEM_AI_INVALID) &&
- lmb->aa_index < aa.n_arrays) {
+ if (min_common_depth <= aa.array_sz &&
+ !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
nid = of_read_number(&aa.arrays[index], 1);
@@ -626,8 +644,14 @@ static int __init parse_numa_properties(void)
min_common_depth = find_min_common_depth();
- if (min_common_depth < 0)
+ if (min_common_depth < 0) {
+ /*
+ * if we fail to parse min_common_depth from device tree
+ * mark the numa disabled, boot with numa disabled.
+ */
+ numa_enabled = false;
return min_common_depth;
+ }
dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
@@ -743,7 +767,7 @@ void __init dump_numa_cpu_topology(void)
unsigned int node;
unsigned int cpu, count;
- if (min_common_depth == -1 || !numa_enabled)
+ if (!numa_enabled)
return;
for_each_online_node(node) {
@@ -808,7 +832,7 @@ static void __init find_possible_nodes(void)
struct device_node *rtas;
u32 numnodes, i;
- if (min_common_depth <= 0)
+ if (!numa_enabled)
return;
rtas = of_find_node_by_path("/rtas");
@@ -1010,7 +1034,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
struct device_node *memory = NULL;
int nid;
- if (!numa_enabled || (min_common_depth < 0))
+ if (!numa_enabled)
return first_online_node;
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
@@ -1063,9 +1087,6 @@ u64 memory_hotplug_max(void)
/* Virtual Processor Home Node (VPHN) support */
#ifdef CONFIG_PPC_SPLPAR
-
-#include "book3s64/vphn.h"
-
struct topology_update_data {
struct topology_update_data *next;
unsigned int cpu;
@@ -1161,25 +1182,13 @@ static int update_cpu_associativity_changes_mask(void)
* Retrieve the new associativity information for a virtual processor's
* home node.
*/
-static long hcall_vphn(unsigned long cpu, __be32 *associativity)
-{
- long rc;
- long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
- u64 flags = 1;
- int hwcpu = get_hard_smp_processor_id(cpu);
-
- rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
- vphn_unpack_associativity(retbuf, associativity);
-
- return rc;
-}
-
static long vphn_get_associativity(unsigned long cpu,
__be32 *associativity)
{
long rc;
- rc = hcall_vphn(cpu, associativity);
+ rc = hcall_vphn(get_hard_smp_processor_id(cpu),
+ VPHN_FLAG_VCPU, associativity);
switch (rc) {
case H_FUNCTION:
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index a7b05214760c..ee4bd6d38602 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -25,7 +25,7 @@ void pte_frag_destroy(void *pte_frag)
count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
/* We allow PTE_FRAG_NR fragments from a PTE page */
if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
- pgtable_page_dtor(page);
+ pgtable_pte_page_dtor(page);
__free_page(page);
}
}
@@ -61,7 +61,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
if (!page)
return NULL;
- if (!pgtable_page_ctor(page)) {
+ if (!pgtable_pte_page_ctor(page)) {
__free_page(page);
return NULL;
}
@@ -113,7 +113,7 @@ void pte_fragment_free(unsigned long *table, int kernel)
BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
if (atomic_dec_and_test(&page->pt_frag_refcount)) {
if (!kernel)
- pgtable_page_dtor(page);
+ pgtable_pte_page_dtor(page);
__free_page(page);
}
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index fc10c0c24f51..e3759b69f81b 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -336,10 +336,11 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
if (pgd_none(pgd))
return NULL;
- if (pgd_huge(pgd)) {
+ if (pgd_is_leaf(pgd)) {
ret_pte = (pte_t *)pgdp;
goto out;
}
+
if (is_hugepd(__hugepd(pgd_val(pgd)))) {
hpdp = (hugepd_t *)&pgd;
goto out_huge;
@@ -357,14 +358,16 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
if (pud_none(pud))
return NULL;
- if (pud_huge(pud)) {
+ if (pud_is_leaf(pud)) {
ret_pte = (pte_t *)pudp;
goto out;
}
+
if (is_hugepd(__hugepd(pud_val(pud)))) {
hpdp = (hugepd_t *)&pud;
goto out_huge;
}
+
pdshift = PMD_SHIFT;
pmdp = pmd_offset(&pud, ea);
pmd = READ_ONCE(*pmdp);
@@ -393,15 +396,12 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
ret_pte = (pte_t *)pmdp;
goto out;
}
- /*
- * pmd_large check below will handle the swap pmd pte
- * we need to do both the check because they are config
- * dependent.
- */
- if (pmd_huge(pmd) || pmd_large(pmd)) {
+
+ if (pmd_is_leaf(pmd)) {
ret_pte = (pte_t *)pmdp;
goto out;
}
+
if (is_hugepd(__hugepd(pmd_val(pmd)))) {
hpdp = (hugepd_t *)&pmd;
goto out_huge;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index d53188dee18f..8ec5dfb65b2e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -27,166 +27,13 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
-#include <asm/io.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <mm/mmu_decl.h>
-unsigned long ioremap_bot;
-EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
-
extern char etext[], _stext[], _sinittext[], _einittext[];
-void __iomem *
-ioremap(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap);
-
-void __iomem *
-ioremap_wc(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_wc);
-
-void __iomem *
-ioremap_wt(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_wt);
-
-void __iomem *
-ioremap_coherent(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_cached(PAGE_KERNEL);
-
- return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_coherent);
-
-void __iomem *
-ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
-{
- pte_t pte = __pte(flags);
-
- /* writeable implies dirty for kernel addresses */
- if (pte_write(pte))
- pte = pte_mkdirty(pte);
-
- /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
- pte = pte_exprotect(pte);
- pte = pte_mkprivileged(pte);
-
- return __ioremap_caller(addr, size, pte_pgprot(pte), __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap_prot);
-
-void __iomem *
-__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
-{
- return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
-}
-
-void __iomem *
-__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller)
-{
- unsigned long v, i;
- phys_addr_t p;
- int err;
-
- /*
- * Choose an address to map it to.
- * Once the vmalloc system is running, we use it.
- * Before then, we use space going down from IOREMAP_TOP
- * (ioremap_bot records where we're up to).
- */
- p = addr & PAGE_MASK;
- size = PAGE_ALIGN(addr + size) - p;
-
- /*
- * If the address lies within the first 16 MB, assume it's in ISA
- * memory space
- */
- if (p < 16*1024*1024)
- p += _ISA_MEM_BASE;
-
-#ifndef CONFIG_CRASH_DUMP
- /*
- * Don't allow anybody to remap normal RAM that we're using.
- * mem_init() sets high_memory so only do the check after that.
- */
- if (slab_is_available() && p <= virt_to_phys(high_memory - 1) &&
- page_is_ram(__phys_to_pfn(p))) {
- printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
- (unsigned long long)p, __builtin_return_address(0));
- return NULL;
- }
-#endif
-
- if (size == 0)
- return NULL;
-
- /*
- * Is it already mapped? Perhaps overlapped by a previous
- * mapping.
- */
- v = p_block_mapped(p);
- if (v)
- goto out;
-
- if (slab_is_available()) {
- struct vm_struct *area;
- area = get_vm_area_caller(size, VM_IOREMAP, caller);
- if (area == 0)
- return NULL;
- area->phys_addr = p;
- v = (unsigned long) area->addr;
- } else {
- v = (ioremap_bot -= size);
- }
-
- /*
- * Should check if it is a candidate for a BAT mapping
- */
-
- err = 0;
- for (i = 0; i < size && err == 0; i += PAGE_SIZE)
- err = map_kernel_page(v + i, p + i, prot);
- if (err) {
- if (slab_is_available())
- vunmap((void *)v);
- return NULL;
- }
-
-out:
- return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
-}
-EXPORT_SYMBOL(__ioremap);
-
-void iounmap(volatile void __iomem *addr)
-{
- /*
- * If mapped by BATs then there is nothing to do.
- * Calling vfree() generates a benign warning.
- */
- if (v_block_mapped((unsigned long)addr))
- return;
-
- if (addr > high_memory && (unsigned long) addr < ioremap_bot)
- vunmap((void *) (PAGE_MASK & (unsigned long)addr));
-}
-EXPORT_SYMBOL(iounmap);
-
static void __init *early_alloc_pgtable(unsigned long size)
{
void *ptr = memblock_alloc(size, size);
@@ -252,7 +99,7 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL);
#ifdef CONFIG_PPC_BOOK3S_32
if (ktext)
- hash_preload(&init_mm, v, false, 0x300);
+ hash_preload(&init_mm, v);
#endif
v += PAGE_SIZE;
p += PAGE_SIZE;
@@ -360,7 +207,7 @@ void mark_initmem_nx(void)
unsigned long numpages = PFN_UP((unsigned long)_einittext) -
PFN_DOWN((unsigned long)_sinittext);
- if (v_block_mapped((unsigned long)_stext) + 1)
+ if (v_block_mapped((unsigned long)_stext + 1))
mmu_mark_initmem_nx();
else
change_page_attr(page, numpages, PAGE_KERNEL);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 12d5e083942d..e78832dce7bb 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This file contains ioremap and related functions for 64-bit machines.
+ * This file contains pgtable related functions for 64-bit machines.
*
* Derived from arch/ppc64/mm/init.c
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -34,7 +34,6 @@
#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/prom.h>
-#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -98,209 +97,26 @@ unsigned long __pte_frag_nr;
EXPORT_SYMBOL(__pte_frag_nr);
unsigned long __pte_frag_size_shift;
EXPORT_SYMBOL(__pte_frag_size_shift);
-unsigned long ioremap_bot;
-#else /* !CONFIG_PPC_BOOK3S_64 */
-unsigned long ioremap_bot = IOREMAP_BASE;
#endif
-/**
- * __ioremap_at - Low level function to establish the page tables
- * for an IO mapping
- */
-void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
-{
- unsigned long i;
-
- /* We don't support the 4K PFN hack with ioremap */
- if (pgprot_val(prot) & H_PAGE_4K_PFN)
- return NULL;
-
- if ((ea + size) >= (void *)IOREMAP_END) {
- pr_warn("Outside the supported range\n");
- return NULL;
- }
-
- WARN_ON(pa & ~PAGE_MASK);
- WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
- WARN_ON(size & ~PAGE_MASK);
-
- for (i = 0; i < size; i += PAGE_SIZE)
- if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
- return NULL;
-
- return (void __iomem *)ea;
-}
-
-/**
- * __iounmap_from - Low level function to tear down the page tables
- * for an IO mapping. This is used for mappings that
- * are manipulated manually, like partial unmapping of
- * PCI IOs or ISA space.
- */
-void __iounmap_at(void *ea, unsigned long size)
-{
- WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
- WARN_ON(size & ~PAGE_MASK);
-
- unmap_kernel_range((unsigned long)ea, size);
-}
-
-void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
- pgprot_t prot, void *caller)
-{
- phys_addr_t paligned;
- void __iomem *ret;
-
- /*
- * Choose an address to map it to.
- * Once the imalloc system is running, we use it.
- * Before that, we map using addresses going
- * up from ioremap_bot. imalloc will use
- * the addresses from ioremap_bot through
- * IMALLOC_END
- *
- */
- paligned = addr & PAGE_MASK;
- size = PAGE_ALIGN(addr + size) - paligned;
-
- if ((size == 0) || (paligned == 0))
- return NULL;
-
- if (slab_is_available()) {
- struct vm_struct *area;
-
- area = __get_vm_area_caller(size, VM_IOREMAP,
- ioremap_bot, IOREMAP_END,
- caller);
- if (area == NULL)
- return NULL;
-
- area->phys_addr = paligned;
- ret = __ioremap_at(paligned, area->addr, size, prot);
- if (!ret)
- vunmap(area->addr);
- } else {
- ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot);
- if (ret)
- ioremap_bot += size;
- }
-
- if (ret)
- ret += addr & ~PAGE_MASK;
- return ret;
-}
-
-void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
- unsigned long flags)
-{
- return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
-}
-
-void __iomem * ioremap(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
- void *caller = __builtin_return_address(0);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, prot, caller);
- return __ioremap_caller(addr, size, prot, caller);
-}
-
-void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
- void *caller = __builtin_return_address(0);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, prot, caller);
- return __ioremap_caller(addr, size, prot, caller);
-}
-
-void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
-{
- pgprot_t prot = pgprot_cached(PAGE_KERNEL);
- void *caller = __builtin_return_address(0);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, prot, caller);
- return __ioremap_caller(addr, size, prot, caller);
-}
-
-void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
- unsigned long flags)
-{
- pte_t pte = __pte(flags);
- void *caller = __builtin_return_address(0);
-
- /* writeable implies dirty for kernel addresses */
- if (pte_write(pte))
- pte = pte_mkdirty(pte);
-
- /* we don't want to let _PAGE_EXEC leak out */
- pte = pte_exprotect(pte);
- /*
- * Force kernel mapping.
- */
- pte = pte_mkprivileged(pte);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, pte_pgprot(pte), caller);
- return __ioremap_caller(addr, size, pte_pgprot(pte), caller);
-}
-
-
-/*
- * Unmap an IO region and remove it from imalloc'd list.
- * Access to IO memory should be serialized by driver.
- */
-void __iounmap(volatile void __iomem *token)
-{
- void *addr;
-
- if (!slab_is_available())
- return;
-
- addr = (void *) ((unsigned long __force)
- PCI_FIX_ADDR(token) & PAGE_MASK);
- if ((unsigned long)addr < ioremap_bot) {
- printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
- " at 0x%p\n", addr);
- return;
- }
- vunmap(addr);
-}
-
-void iounmap(volatile void __iomem *token)
-{
- if (ppc_md.iounmap)
- ppc_md.iounmap(token);
- else
- __iounmap(token);
-}
-
-EXPORT_SYMBOL(ioremap);
-EXPORT_SYMBOL(ioremap_wc);
-EXPORT_SYMBOL(ioremap_prot);
-EXPORT_SYMBOL(__ioremap);
-EXPORT_SYMBOL(__ioremap_at);
-EXPORT_SYMBOL(iounmap);
-EXPORT_SYMBOL(__iounmap);
-EXPORT_SYMBOL(__iounmap_at);
-
#ifndef __PAGETABLE_PUD_FOLDED
/* 4 level page table */
struct page *pgd_page(pgd_t pgd)
{
- if (pgd_huge(pgd))
+ if (pgd_is_leaf(pgd)) {
+ VM_WARN_ON(!pgd_huge(pgd));
return pte_page(pgd_pte(pgd));
+ }
return virt_to_page(pgd_page_vaddr(pgd));
}
#endif
struct page *pud_page(pud_t pud)
{
- if (pud_huge(pud))
+ if (pud_is_leaf(pud)) {
+ VM_WARN_ON(!pud_huge(pud));
return pte_page(pud_pte(pud));
+ }
return virt_to_page(pud_page_vaddr(pud));
}
@@ -310,8 +126,10 @@ struct page *pud_page(pud_t pud)
*/
struct page *pmd_page(pmd_t pmd)
{
- if (pmd_large(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
+ if (pmd_is_leaf(pmd)) {
+ VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
return pte_page(pmd_pte(pmd));
+ }
return virt_to_page(pmd_page_vaddr(pmd));
}
diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
index a0d23e96e841..4154feac1da3 100644
--- a/arch/powerpc/mm/ptdump/bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -149,7 +149,7 @@ static int bats_show_603(struct seq_file *m, void *v)
static int bats_open(struct inode *inode, struct file *file)
{
- if (cpu_has_feature(CPU_FTR_601))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
return single_open(file, bats_show_601, NULL);
return single_open(file, bats_show_603, NULL);
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index 72f0e4a3d839..a07278027c6f 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -237,7 +237,6 @@ static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64
return -1;
}
-#ifdef CONFIG_PPC_PSERIES
static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r)
{
struct hash_pte ptes[4];
@@ -274,7 +273,6 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *
}
return -1;
}
-#endif
static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
unsigned long *lp_bits)
@@ -316,10 +314,9 @@ static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v,
u64 *r)
{
-#ifdef CONFIG_PPC_PSERIES
- if (firmware_has_feature(FW_FEATURE_LPAR))
+ if (IS_ENABLED(CONFIG_PPC_PSERIES) && firmware_has_feature(FW_FEATURE_LPAR))
return pseries_find(ea, psize, primary, v, r);
-#endif
+
return native_find(ea, psize, primary, v, r);
}
@@ -386,12 +383,13 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
psize = mmu_vmalloc_psize;
else
psize = mmu_io_psize;
-#ifdef CONFIG_PPC_64K_PAGES
+
/* check for secret 4K mappings */
- if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) ||
- ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) &&
+ ((pteval & H_PAGE_COMBO) == H_PAGE_COMBO ||
+ (pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
psize = mmu_io_psize;
-#endif
+
/* check for hashpte */
status = hpte_find(st, addr, psize);
@@ -469,9 +467,10 @@ static void walk_linearmapping(struct pg_state *st)
static void walk_vmemmap(struct pg_state *st)
{
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
struct vmemmap_backing *ptr = vmemmap_list;
+ if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
+ return;
/*
* Traverse the vmemmaped memory and dump pages that are in the hash
* pagetable.
@@ -481,7 +480,6 @@ static void walk_vmemmap(struct pg_state *st)
ptr = ptr->list;
}
seq_puts(st->seq, "---[ vmemmap end ]---\n");
-#endif
}
static void populate_markers(void)
@@ -495,11 +493,7 @@ static void populate_markers(void)
address_markers[6].start_address = PHB_IO_END;
address_markers[7].start_address = IOREMAP_BASE;
address_markers[8].start_address = IOREMAP_END;
-#ifdef CONFIG_PPC_BOOK3S_64
address_markers[9].start_address = H_VMEMMAP_START;
-#else
- address_markers[9].start_address = VMEMMAP_BASE;
-#endif
}
static int ptdump_show(struct seq_file *m, void *v)
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 39bf1e2cba13..2f9ddc29c535 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -26,10 +26,6 @@
#include "ptdump.h"
-#ifdef CONFIG_PPC32
-#define KERN_VIRT_START PAGE_OFFSET
-#endif
-
/*
* To visualise what is happening,
*
@@ -88,10 +84,6 @@ static struct addr_marker address_markers[] = {
#else
{ 0, "Early I/O remap start" },
{ 0, "Early I/O remap end" },
-#ifdef CONFIG_NOT_COHERENT_CACHE
- { 0, "Consistent mem start" },
- { 0, "Consistent mem end" },
-#endif
#ifdef CONFIG_HIGHMEM
{ 0, "Highmem PTEs start" },
{ 0, "Highmem PTEs end" },
@@ -181,7 +173,7 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
static void note_prot_wx(struct pg_state *st, unsigned long addr)
{
- if (!st->check_wx)
+ if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx)
return;
if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X)))
@@ -273,7 +265,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
addr = start + i * PMD_SIZE;
- if (!pmd_none(*pmd) && !pmd_huge(*pmd))
+ if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd))
/* pmd exists */
walk_pte(st, pmd, addr);
else
@@ -289,7 +281,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
addr = start + i * PUD_SIZE;
- if (!pud_none(*pud) && !pud_huge(*pud))
+ if (!pud_none(*pud) && !pud_is_leaf(*pud))
/* pud exists */
walk_pmd(st, pud, addr);
else
@@ -299,18 +291,16 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
static void walk_pagetables(struct pg_state *st)
{
- pgd_t *pgd = pgd_offset_k(0UL);
unsigned int i;
- unsigned long addr;
-
- addr = st->start_address;
+ unsigned long addr = st->start_address & PGDIR_MASK;
+ pgd_t *pgd = pgd_offset_k(addr);
/*
* Traverse the linux pagetable structure and dump pages that are in
* the hash pagetable.
*/
- for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
- if (!pgd_none(*pgd) && !pgd_huge(*pgd))
+ for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
+ if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd))
/* pgd exists */
walk_pud(st, pgd, addr);
else
@@ -341,11 +331,6 @@ static void populate_markers(void)
#else /* !CONFIG_PPC64 */
address_markers[i++].start_address = ioremap_bot;
address_markers[i++].start_address = IOREMAP_TOP;
-#ifdef CONFIG_NOT_COHERENT_CACHE
- address_markers[i++].start_address = IOREMAP_TOP;
- address_markers[i++].start_address = IOREMAP_TOP +
- CONFIG_CONSISTENT_SIZE;
-#endif
#ifdef CONFIG_HIGHMEM
address_markers[i++].start_address = PKMAP_BASE;
address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP);
@@ -364,12 +349,13 @@ static int ptdump_show(struct seq_file *m, void *v)
struct pg_state st = {
.seq = m,
.marker = address_markers,
+ .start_address = PAGE_OFFSET,
};
- if (radix_enabled())
- st.start_address = PAGE_OFFSET;
- else
+#ifdef CONFIG_PPC64
+ if (!radix_enabled())
st.start_address = KERN_VIRT_START;
+#endif
/* Traverse kernel page tables */
walk_pagetables(&st);
@@ -407,12 +393,13 @@ void ptdump_check_wx(void)
.seq = NULL,
.marker = address_markers,
.check_wx = true,
+ .start_address = PAGE_OFFSET,
};
- if (radix_enabled())
- st.start_address = PAGE_OFFSET;
- else
+#ifdef CONFIG_PPC64
+ if (!radix_enabled())
st.start_address = KERN_VIRT_START;
+#endif
walk_pagetables(&st);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index c2ee6041f02c..02a59946a78a 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -500,6 +500,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
/* slw clears top 32 bits */
PPC_SLW(dst_reg, dst_reg, src_reg);
+ /* skip zero extension move, but set address map. */
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
PPC_SLD(dst_reg, dst_reg, src_reg);
@@ -507,6 +510,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
/* with imm 0, we still need to clear top 32 bits */
PPC_SLWI(dst_reg, dst_reg, imm);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
if (imm != 0)
@@ -514,12 +519,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
break;
case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
PPC_SRW(dst_reg, dst_reg, src_reg);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
PPC_SRD(dst_reg, dst_reg, src_reg);
break;
case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
PPC_SRWI(dst_reg, dst_reg, imm);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
if (imm != 0)
@@ -544,6 +553,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
*/
case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+ if (imm == 1) {
+ /* special mov32 for zext */
+ PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
+ break;
+ }
PPC_MR(dst_reg, src_reg);
goto bpf_alu32_trunc;
case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
@@ -551,11 +565,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
PPC_LI32(dst_reg, imm);
if (imm < 0)
goto bpf_alu32_trunc;
+ else if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
bpf_alu32_trunc:
/* Truncate to 32-bits */
- if (BPF_CLASS(code) == BPF_ALU)
+ if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
break;
@@ -614,10 +630,13 @@ emit_clear:
case 16:
/* zero-extend 16 bits into 64 bits */
PPC_RLDICL(dst_reg, dst_reg, 0, 48);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
case 32:
- /* zero-extend 32 bits into 64 bits */
- PPC_RLDICL(dst_reg, dst_reg, 0, 32);
+ if (!fp->aux->verifier_zext)
+ /* zero-extend 32 bits into 64 bits */
+ PPC_RLDICL(dst_reg, dst_reg, 0, 32);
break;
case 64:
/* nop */
@@ -694,14 +713,20 @@ emit_clear:
/* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
PPC_LBZ(dst_reg, src_reg, off);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_H:
PPC_LHZ(dst_reg, src_reg, off);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_W:
PPC_LWZ(dst_reg, src_reg, off);
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
break;
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
@@ -1042,6 +1067,11 @@ struct powerpc64_jit_data {
struct codegen_context ctx;
};
+bool bpf_jit_needs_zext(void)
+{
+ return true;
+}
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
u32 proglen;
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index faad5b315f49..573e0b309c0c 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -567,7 +567,7 @@ static int event_uniq_add(struct rb_root *root, const char *name, int nl,
struct event_uniq *it;
int result;
- it = container_of(*new, struct event_uniq, node);
+ it = rb_entry(*new, struct event_uniq, node);
result = ev_uniq_ord(name, nl, domain, it->name, it->nl,
it->domain);
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 3bdfc1e32096..cb50a9e1fd2d 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -362,7 +362,14 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
*/
nid = cpu_to_node(cpu);
l_cpumask = cpumask_of_node(nid);
- target = cpumask_any_but(l_cpumask, cpu);
+ target = cpumask_last(l_cpumask);
+
+ /*
+ * If this(target) is the last cpu in the cpumask for this chip,
+ * check for any possible online cpu in the chip.
+ */
+ if (unlikely(target == cpu))
+ target = cpumask_any_but(l_cpumask, cpu);
/*
* Update the cpumask with the target cpu and
@@ -570,6 +577,7 @@ static int core_imc_mem_init(int cpu, int size)
{
int nid, rc = 0, core_id = (cpu / threads_per_core);
struct imc_mem_info *mem_info;
+ struct page *page;
/*
* alloc_pages_node() will allocate memory for core in the
@@ -580,11 +588,12 @@ static int core_imc_mem_init(int cpu, int size)
mem_info->id = core_id;
/* We need only vbase for core counters */
- mem_info->vbase = page_address(alloc_pages_node(nid,
- GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
- __GFP_NOWARN, get_order(size)));
- if (!mem_info->vbase)
+ page = alloc_pages_node(nid,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size));
+ if (!page)
return -ENOMEM;
+ mem_info->vbase = page_address(page);
/* Init the mutex */
core_imc_refc[core_id].id = core_id;
@@ -667,7 +676,10 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
return 0;
/* Find any online cpu in that core except the current "cpu" */
- ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
+ ncpu = cpumask_last(cpu_sibling_mask(cpu));
+
+ if (unlikely(ncpu == cpu))
+ ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
if (ncpu >= 0 && ncpu < nr_cpu_ids) {
cpumask_set_cpu(ncpu, &core_imc_cpumask);
@@ -839,15 +851,17 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
int nid = cpu_to_node(cpu_id);
if (!local_mem) {
+ struct page *page;
/*
* This case could happen only once at start, since we dont
* free the memory in cpu offline path.
*/
- local_mem = page_address(alloc_pages_node(nid,
+ page = alloc_pages_node(nid,
GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
- __GFP_NOWARN, get_order(size)));
- if (!local_mem)
+ __GFP_NOWARN, get_order(size));
+ if (!page)
return -ENOMEM;
+ local_mem = page_address(page);
per_cpu(thread_imc_mem, cpu_id) = local_mem;
}
@@ -1085,11 +1099,14 @@ static int trace_imc_mem_alloc(int cpu_id, int size)
int core_id = (cpu_id / threads_per_core);
if (!local_mem) {
- local_mem = page_address(alloc_pages_node(phys_id,
- GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
- __GFP_NOWARN, get_order(size)));
- if (!local_mem)
+ struct page *page;
+
+ page = alloc_pages_node(phys_id,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size));
+ if (!page)
return -ENOMEM;
+ local_mem = page_address(page);
per_cpu(trace_imc_mem, cpu_id) = local_mem;
/* Initialise the counters for trace mode */
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
index ad2bb1408b4c..6da813b65b42 100644
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -16,12 +16,12 @@ config EP405
This option enables support for the EP405/EP405PC boards.
config HOTFOOT
- bool "Hotfoot"
+ bool "Hotfoot"
depends on 40x
select PPC40x_SIMPLE
select FORCE_PCI
- help
- This option enables support for the ESTEEM 195E Hotfoot board.
+ help
+ This option enables support for the ESTEEM 195E Hotfoot board.
config KILAUEA
bool "Kilauea"
@@ -80,7 +80,6 @@ config OBS600
help
This option enables support for PlatHome OpenBlockS 600 server
-
config PPC40x_SIMPLE
bool "Simple PowerPC 40x board support"
depends on 40x
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 35be81fd2dc2..25ebe634a661 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -40,12 +40,12 @@ config EBONY
This option enables support for the IBM PPC440GP evaluation board.
config SAM440EP
- bool "Sam440ep"
+ bool "Sam440ep"
depends on 44x
- select 440EP
- select FORCE_PCI
- help
- This option enables support for the ACube Sam440ep board.
+ select 440EP
+ select FORCE_PCI
+ help
+ This option enables support for the ACube Sam440ep board.
config SEQUOIA
bool "Sequoia"
@@ -272,14 +272,6 @@ config PPC4xx_GPIO
help
Enable gpiolib support for ppc440 based boards
-config PPC4xx_OCM
- bool "PPC4xx On Chip Memory (OCM) support"
- depends on 4xx
- select PPC_LIB_RHEAP
- help
- Enable OCM support for PowerPC 4xx platforms with on chip memory,
- OCM provides the fast place for memory access to improve performance.
-
# 44x specific CPU modules, selected based on the board above.
config 440EP
bool
diff --git a/arch/powerpc/platforms/4xx/Makefile b/arch/powerpc/platforms/4xx/Makefile
index f5ae27ca131b..d009d2e0b9e8 100644
--- a/arch/powerpc/platforms/4xx/Makefile
+++ b/arch/powerpc/platforms/4xx/Makefile
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y += uic.o machine_check.o
-obj-$(CONFIG_PPC4xx_OCM) += ocm.o
obj-$(CONFIG_4xx_SOC) += soc.o
obj-$(CONFIG_PCI) += pci.o
obj-$(CONFIG_PPC4xx_HSTA_MSI) += hsta_msi.o
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
deleted file mode 100644
index ba3257406ced..000000000000
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ /dev/null
@@ -1,390 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * PowerPC 4xx OCM memory allocation support
- *
- * (C) Copyright 2009, Applied Micro Circuits Corporation
- * Victor Gallardo (vgallardo@amcc.com)
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- */
-
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <asm/rheap.h>
-#include <asm/ppc4xx_ocm.h>
-#include <linux/slab.h>
-#include <linux/debugfs.h>
-
-#define OCM_DISABLED 0
-#define OCM_ENABLED 1
-
-struct ocm_block {
- struct list_head list;
- void __iomem *addr;
- int size;
- const char *owner;
-};
-
-/* non-cached or cached region */
-struct ocm_region {
- phys_addr_t phys;
- void __iomem *virt;
-
- int memtotal;
- int memfree;
-
- rh_info_t *rh;
- struct list_head list;
-};
-
-struct ocm_info {
- int index;
- int status;
- int ready;
-
- phys_addr_t phys;
-
- int alignment;
- int memtotal;
- int cache_size;
-
- struct ocm_region nc; /* non-cached region */
- struct ocm_region c; /* cached region */
-};
-
-static struct ocm_info *ocm_nodes;
-static int ocm_count;
-
-static struct ocm_info *ocm_get_node(unsigned int index)
-{
- if (index >= ocm_count) {
- printk(KERN_ERR "PPC4XX OCM: invalid index");
- return NULL;
- }
-
- return &ocm_nodes[index];
-}
-
-static int ocm_free_region(struct ocm_region *ocm_reg, const void *addr)
-{
- struct ocm_block *blk, *tmp;
- unsigned long offset;
-
- if (!ocm_reg->virt)
- return 0;
-
- list_for_each_entry_safe(blk, tmp, &ocm_reg->list, list) {
- if (blk->addr == addr) {
- offset = addr - ocm_reg->virt;
- ocm_reg->memfree += blk->size;
- rh_free(ocm_reg->rh, offset);
- list_del(&blk->list);
- kfree(blk);
- return 1;
- }
- }
-
- return 0;
-}
-
-static void __init ocm_init_node(int count, struct device_node *node)
-{
- struct ocm_info *ocm;
-
- const unsigned int *cell_index;
- const unsigned int *cache_size;
- int len;
-
- struct resource rsrc;
-
- ocm = ocm_get_node(count);
-
- cell_index = of_get_property(node, "cell-index", &len);
- if (!cell_index) {
- printk(KERN_ERR "PPC4XX OCM: missing cell-index property");
- return;
- }
- ocm->index = *cell_index;
-
- if (of_device_is_available(node))
- ocm->status = OCM_ENABLED;
-
- cache_size = of_get_property(node, "cached-region-size", &len);
- if (cache_size)
- ocm->cache_size = *cache_size;
-
- if (of_address_to_resource(node, 0, &rsrc)) {
- printk(KERN_ERR "PPC4XX OCM%d: could not get resource address\n",
- ocm->index);
- return;
- }
-
- ocm->phys = rsrc.start;
- ocm->memtotal = (rsrc.end - rsrc.start + 1);
-
- printk(KERN_INFO "PPC4XX OCM%d: %d Bytes (%s)\n",
- ocm->index, ocm->memtotal,
- (ocm->status == OCM_DISABLED) ? "disabled" : "enabled");
-
- if (ocm->status == OCM_DISABLED)
- return;
-
- /* request region */
-
- if (!request_mem_region(ocm->phys, ocm->memtotal, "ppc4xx_ocm")) {
- printk(KERN_ERR "PPC4XX OCM%d: could not request region\n",
- ocm->index);
- return;
- }
-
- /* Configure non-cached and cached regions */
-
- ocm->nc.phys = ocm->phys;
- ocm->nc.memtotal = ocm->memtotal - ocm->cache_size;
- ocm->nc.memfree = ocm->nc.memtotal;
-
- ocm->c.phys = ocm->phys + ocm->nc.memtotal;
- ocm->c.memtotal = ocm->cache_size;
- ocm->c.memfree = ocm->c.memtotal;
-
- if (ocm->nc.memtotal == 0)
- ocm->nc.phys = 0;
-
- if (ocm->c.memtotal == 0)
- ocm->c.phys = 0;
-
- printk(KERN_INFO "PPC4XX OCM%d: %d Bytes (non-cached)\n",
- ocm->index, ocm->nc.memtotal);
-
- printk(KERN_INFO "PPC4XX OCM%d: %d Bytes (cached)\n",
- ocm->index, ocm->c.memtotal);
-
- /* ioremap the non-cached region */
- if (ocm->nc.memtotal) {
- ocm->nc.virt = __ioremap(ocm->nc.phys, ocm->nc.memtotal,
- _PAGE_EXEC | pgprot_val(PAGE_KERNEL_NCG));
-
- if (!ocm->nc.virt) {
- printk(KERN_ERR
- "PPC4XX OCM%d: failed to ioremap non-cached memory\n",
- ocm->index);
- ocm->nc.memfree = 0;
- return;
- }
- }
-
- /* ioremap the cached region */
-
- if (ocm->c.memtotal) {
- ocm->c.virt = __ioremap(ocm->c.phys, ocm->c.memtotal,
- _PAGE_EXEC | pgprot_val(PAGE_KERNEL));
-
- if (!ocm->c.virt) {
- printk(KERN_ERR
- "PPC4XX OCM%d: failed to ioremap cached memory\n",
- ocm->index);
- ocm->c.memfree = 0;
- return;
- }
- }
-
- /* Create Remote Heaps */
-
- ocm->alignment = 4; /* default 4 byte alignment */
-
- if (ocm->nc.virt) {
- ocm->nc.rh = rh_create(ocm->alignment);
- rh_attach_region(ocm->nc.rh, 0, ocm->nc.memtotal);
- }
-
- if (ocm->c.virt) {
- ocm->c.rh = rh_create(ocm->alignment);
- rh_attach_region(ocm->c.rh, 0, ocm->c.memtotal);
- }
-
- INIT_LIST_HEAD(&ocm->nc.list);
- INIT_LIST_HEAD(&ocm->c.list);
-
- ocm->ready = 1;
-}
-
-static int ocm_debugfs_show(struct seq_file *m, void *v)
-{
- struct ocm_block *blk, *tmp;
- unsigned int i;
-
- for (i = 0; i < ocm_count; i++) {
- struct ocm_info *ocm = ocm_get_node(i);
-
- if (!ocm || !ocm->ready)
- continue;
-
- seq_printf(m, "PPC4XX OCM : %d\n", ocm->index);
- seq_printf(m, "PhysAddr : %pa\n", &(ocm->phys));
- seq_printf(m, "MemTotal : %d Bytes\n", ocm->memtotal);
- seq_printf(m, "MemTotal(NC) : %d Bytes\n", ocm->nc.memtotal);
- seq_printf(m, "MemTotal(C) : %d Bytes\n\n", ocm->c.memtotal);
-
- seq_printf(m, "NC.PhysAddr : %pa\n", &(ocm->nc.phys));
- seq_printf(m, "NC.VirtAddr : 0x%p\n", ocm->nc.virt);
- seq_printf(m, "NC.MemTotal : %d Bytes\n", ocm->nc.memtotal);
- seq_printf(m, "NC.MemFree : %d Bytes\n", ocm->nc.memfree);
-
- list_for_each_entry_safe(blk, tmp, &ocm->nc.list, list) {
- seq_printf(m, "NC.MemUsed : %d Bytes (%s)\n",
- blk->size, blk->owner);
- }
-
- seq_printf(m, "\nC.PhysAddr : %pa\n", &(ocm->c.phys));
- seq_printf(m, "C.VirtAddr : 0x%p\n", ocm->c.virt);
- seq_printf(m, "C.MemTotal : %d Bytes\n", ocm->c.memtotal);
- seq_printf(m, "C.MemFree : %d Bytes\n", ocm->c.memfree);
-
- list_for_each_entry_safe(blk, tmp, &ocm->c.list, list) {
- seq_printf(m, "C.MemUsed : %d Bytes (%s)\n",
- blk->size, blk->owner);
- }
-
- seq_putc(m, '\n');
- }
-
- return 0;
-}
-
-static int ocm_debugfs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ocm_debugfs_show, NULL);
-}
-
-static const struct file_operations ocm_debugfs_fops = {
- .open = ocm_debugfs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int ocm_debugfs_init(void)
-{
- struct dentry *junk;
-
- junk = debugfs_create_dir("ppc4xx_ocm", 0);
- if (!junk) {
- printk(KERN_ALERT "debugfs ppc4xx ocm: failed to create dir\n");
- return -1;
- }
-
- if (debugfs_create_file("info", 0644, junk, NULL, &ocm_debugfs_fops)) {
- printk(KERN_ALERT "debugfs ppc4xx ocm: failed to create file\n");
- return -1;
- }
-
- return 0;
-}
-
-void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
- int flags, const char *owner)
-{
- void __iomem *addr = NULL;
- unsigned long offset;
- struct ocm_info *ocm;
- struct ocm_region *ocm_reg;
- struct ocm_block *ocm_blk;
- int i;
-
- for (i = 0; i < ocm_count; i++) {
- ocm = ocm_get_node(i);
-
- if (!ocm || !ocm->ready)
- continue;
-
- if (flags == PPC4XX_OCM_NON_CACHED)
- ocm_reg = &ocm->nc;
- else
- ocm_reg = &ocm->c;
-
- if (!ocm_reg->virt)
- continue;
-
- if (align < ocm->alignment)
- align = ocm->alignment;
-
- offset = rh_alloc_align(ocm_reg->rh, size, align, NULL);
-
- if (IS_ERR_VALUE(offset))
- continue;
-
- ocm_blk = kzalloc(sizeof(*ocm_blk), GFP_KERNEL);
- if (!ocm_blk) {
- rh_free(ocm_reg->rh, offset);
- break;
- }
-
- *phys = ocm_reg->phys + offset;
- addr = ocm_reg->virt + offset;
- size = ALIGN(size, align);
-
- ocm_blk->addr = addr;
- ocm_blk->size = size;
- ocm_blk->owner = owner;
- list_add_tail(&ocm_blk->list, &ocm_reg->list);
-
- ocm_reg->memfree -= size;
-
- break;
- }
-
- return addr;
-}
-
-void ppc4xx_ocm_free(const void *addr)
-{
- int i;
-
- if (!addr)
- return;
-
- for (i = 0; i < ocm_count; i++) {
- struct ocm_info *ocm = ocm_get_node(i);
-
- if (!ocm || !ocm->ready)
- continue;
-
- if (ocm_free_region(&ocm->nc, addr) ||
- ocm_free_region(&ocm->c, addr))
- return;
- }
-}
-
-static int __init ppc4xx_ocm_init(void)
-{
- struct device_node *np;
- int count;
-
- count = 0;
- for_each_compatible_node(np, NULL, "ibm,ocm")
- count++;
-
- if (!count)
- return 0;
-
- ocm_nodes = kzalloc((count * sizeof(struct ocm_info)), GFP_KERNEL);
- if (!ocm_nodes)
- return -ENOMEM;
-
- ocm_count = count;
- count = 0;
-
- for_each_compatible_node(np, NULL, "ibm,ocm") {
- ocm_init_node(count, np);
- count++;
- }
-
- ocm_debugfs_init();
-
- return 0;
-}
-
-arch_initcall(ppc4xx_ocm_init);
diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c
index 31f12ad37a98..36fb66ce54cf 100644
--- a/arch/powerpc/platforms/4xx/uic.c
+++ b/arch/powerpc/platforms/4xx/uic.c
@@ -154,6 +154,7 @@ static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type)
mtdcr(uic->dcrbase + UIC_PR, pr);
mtdcr(uic->dcrbase + UIC_TR, tr);
+ mtdcr(uic->dcrbase + UIC_SR, ~mask);
raw_spin_unlock_irqrestore(&uic->lock, flags);
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index d1af0ee2f8c8..fa3d29dcb57e 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -147,10 +147,10 @@ config SOCRATES
This option enables support for the Socrates board.
config KSI8560
- bool "Emerson KSI8560"
- select DEFAULT_UIMAGE
- help
- This option enables support for the Emerson KSI8560 board
+ bool "Emerson KSI8560"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the Emerson KSI8560 board
config XES_MPC85xx
bool "X-ES single-board computer"
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
index 0a610114bc38..07a9d60c618a 100644
--- a/arch/powerpc/platforms/86xx/Kconfig
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -62,9 +62,9 @@ config GEF_SBC610
This option enables support for the GE SBC610.
config MVME7100
- bool "Artesyn MVME7100"
- help
- This option enables support for the Emerson/Artesyn MVME7100 board.
+ bool "Artesyn MVME7100"
+ help
+ This option enables support for the Emerson/Artesyn MVME7100 board.
endif
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index d408162d5af4..e0fe670f06f6 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -157,6 +157,13 @@ config I2C_SPI_SMC1_UCODE_PATCH
help
Help not implemented yet, coming soon.
+config SMC_UCODE_PATCH
+ bool "SMC relocation patch"
+ help
+ This microcode relocates SMC1 and SMC2 parameter RAMs at
+ offset 0x1ec0 and 0x1fc0 to allow extended parameter RAM
+ for SCC3 and SCC4.
+
endchoice
config UCODE_PATCH
diff --git a/arch/powerpc/platforms/8xx/Makefile b/arch/powerpc/platforms/8xx/Makefile
index 708ab099e886..27a7c6f828e0 100644
--- a/arch/powerpc/platforms/8xx/Makefile
+++ b/arch/powerpc/platforms/8xx/Makefile
@@ -3,6 +3,8 @@
# Makefile for the PowerPC 8xx linux kernel.
#
obj-y += m8xx_setup.o machine_check.o pic.o
+obj-$(CONFIG_CPM1) += cpm1.o
+obj-$(CONFIG_UCODE_PATCH) += micropatch.o
obj-$(CONFIG_MPC885ADS) += mpc885ads_setup.o
obj-$(CONFIG_MPC86XADS) += mpc86xads_setup.o
obj-$(CONFIG_PPC_EP88XC) += ep88xc.o
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
index 4f8dcf124828..0f65c51271db 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -88,7 +88,8 @@ int cpm_get_irq(void)
{
int cpm_vec;
- /* Get the vector by setting the ACK bit and then reading
+ /*
+ * Get the vector by setting the ACK bit and then reading
* the register.
*/
out_be16(&cpic_reg->cpic_civr, 1);
@@ -108,7 +109,8 @@ static int cpm_pic_host_map(struct irq_domain *h, unsigned int virq,
return 0;
}
-/* The CPM can generate the error interrupt when there is a race condition
+/*
+ * The CPM can generate the error interrupt when there is a race condition
* between generating and masking interrupts. All we have to do is ACK it
* and return. This is a no-op function so we don't need any special
* tests in the interrupt handler.
@@ -208,12 +210,10 @@ void __init cpm_reset(void)
cpmp = &mpc8xx_immr->im_cpm;
#ifndef CONFIG_PPC_EARLY_DEBUG_CPM
- /* Perform a reset.
- */
+ /* Perform a reset. */
out_be16(&cpmp->cp_cpcr, CPM_CR_RST | CPM_CR_FLG);
- /* Wait for it.
- */
+ /* Wait for it. */
while (in_be16(&cpmp->cp_cpcr) & CPM_CR_FLG);
#endif
@@ -221,7 +221,8 @@ void __init cpm_reset(void)
cpm_load_patch(cpmp);
#endif
- /* Set SDMA Bus Request priority 5.
+ /*
+ * Set SDMA Bus Request priority 5.
* On 860T, this also enables FEC priority 6. I am not sure
* this is what we really want for some applications, but the
* manual recommends it.
@@ -263,7 +264,8 @@ out:
}
EXPORT_SYMBOL(cpm_command);
-/* Set a baud rate generator. This needs lots of work. There are
+/*
+ * Set a baud rate generator. This needs lots of work. There are
* four BRGs, any of which can be wired to any channel.
* The internal baud rate clock is the system clock divided by 16.
* This assumes the baudrate is 16x oversampled by the uart.
@@ -277,11 +279,11 @@ cpm_setbrg(uint brg, uint rate)
{
u32 __iomem *bp;
- /* This is good enough to get SMCs running.....
- */
+ /* This is good enough to get SMCs running..... */
bp = &cpmp->cp_brgc1;
bp += brg;
- /* The BRG has a 12-bit counter. For really slow baud rates (or
+ /*
+ * The BRG has a 12-bit counter. For really slow baud rates (or
* really fast processors), we may have to further divide by 16.
*/
if (((BRG_UART_CLK / rate) - 1) < 4096)
diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c
new file mode 100644
index 000000000000..c80bd7afd6c5
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/micropatch.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Microcode patches for the CPM as supplied by Motorola.
+ * This is the one for IIC/SPI. There is a newer one that
+ * also relocates SMC2, but this would require additional changes
+ * to uart.c, so I am holding off on that for a moment.
+ */
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm.h>
+#include <asm/cpm1.h>
+
+struct patch_params {
+ ushort rccr;
+ ushort cpmcr1;
+ ushort cpmcr2;
+ ushort cpmcr3;
+ ushort cpmcr4;
+};
+
+/*
+ * I2C/SPI relocation patch arrays.
+ */
+
+#ifdef CONFIG_I2C_SPI_UCODE_PATCH
+
+static char patch_name[] __initdata = "I2C/SPI";
+
+static struct patch_params patch_params __initdata = {
+ 1, 0x802a, 0x8028, 0x802e, 0x802c,
+};
+
+static uint patch_2000[] __initdata = {
+ 0x7FFFEFD9, 0x3FFD0000, 0x7FFB49F7, 0x7FF90000,
+ 0x5FEFADF7, 0x5F89ADF7, 0x5FEFAFF7, 0x5F89AFF7,
+ 0x3A9CFBC8, 0xE7C0EDF0, 0x77C1E1BB, 0xF4DC7F1D,
+ 0xABAD932F, 0x4E08FDCF, 0x6E0FAFF8, 0x7CCF76CF,
+ 0xFD1FF9CF, 0xABF88DC6, 0xAB5679F7, 0xB0937383,
+ 0xDFCE79F7, 0xB091E6BB, 0xE5BBE74F, 0xB3FA6F0F,
+ 0x6FFB76CE, 0xEE0DF9CF, 0x2BFBEFEF, 0xCFEEF9CF,
+ 0x76CEAD24, 0x90B2DF9A, 0x7FDDD0BF, 0x4BF847FD,
+ 0x7CCF76CE, 0xCFEF7E1F, 0x7F1D7DFD, 0xF0B6EF71,
+ 0x7FC177C1, 0xFBC86079, 0xE722FBC8, 0x5FFFDFFF,
+ 0x5FB2FFFB, 0xFBC8F3C8, 0x94A67F01, 0x7F1D5F39,
+ 0xAFE85F5E, 0xFFDFDF96, 0xCB9FAF7D, 0x5FC1AFED,
+ 0x8C1C5FC1, 0xAFDD5FC3, 0xDF9A7EFD, 0xB0B25FB2,
+ 0xFFFEABAD, 0x5FB2FFFE, 0x5FCE600B, 0xE6BB600B,
+ 0x5FCEDFC6, 0x27FBEFDF, 0x5FC8CFDE, 0x3A9CE7C0,
+ 0xEDF0F3C8, 0x7F0154CD, 0x7F1D2D3D, 0x363A7570,
+ 0x7E0AF1CE, 0x37EF2E68, 0x7FEE10EC, 0xADF8EFDE,
+ 0xCFEAE52F, 0x7D0FE12B, 0xF1CE5F65, 0x7E0A4DF8,
+ 0xCFEA5F72, 0x7D0BEFEE, 0xCFEA5F74, 0xE522EFDE,
+ 0x5F74CFDA, 0x0B627385, 0xDF627E0A, 0x30D8145B,
+ 0xBFFFF3C8, 0x5FFFDFFF, 0xA7F85F5E, 0xBFFE7F7D,
+ 0x10D31450, 0x5F36BFFF, 0xAF785F5E, 0xBFFDA7F8,
+ 0x5F36BFFE, 0x77FD30C0, 0x4E08FDCF, 0xE5FF6E0F,
+ 0xAFF87E1F, 0x7E0FFD1F, 0xF1CF5F1B, 0xABF80D5E,
+ 0x5F5EFFEF, 0x79F730A2, 0xAFDD5F34, 0x47F85F34,
+ 0xAFED7FDD, 0x50B24978, 0x47FD7F1D, 0x7DFD70AD,
+ 0xEF717EC1, 0x6BA47F01, 0x2D267EFD, 0x30DE5F5E,
+ 0xFFFD5F5E, 0xFFEF5F5E, 0xFFDF0CA0, 0xAFED0A9E,
+ 0xAFDD0C3A, 0x5F3AAFBD, 0x7FBDB082, 0x5F8247F8
+};
+
+static uint patch_2f00[] __initdata = {
+ 0x3E303430, 0x34343737, 0xABF7BF9B, 0x994B4FBD,
+ 0xBD599493, 0x349FFF37, 0xFB9B177D, 0xD9936956,
+ 0xBBFDD697, 0xBDD2FD11, 0x31DB9BB3, 0x63139637,
+ 0x93733693, 0x193137F7, 0x331737AF, 0x7BB9B999,
+ 0xBB197957, 0x7FDFD3D5, 0x73B773F7, 0x37933B99,
+ 0x1D115316, 0x99315315, 0x31694BF4, 0xFBDBD359,
+ 0x31497353, 0x76956D69, 0x7B9D9693, 0x13131979,
+ 0x79376935
+};
+
+static uint patch_2e00[] __initdata = {};
+#endif
+
+/*
+ * I2C/SPI/SMC1 relocation patch arrays.
+ */
+
+#ifdef CONFIG_I2C_SPI_SMC1_UCODE_PATCH
+
+static char patch_name[] __initdata = "I2C/SPI/SMC1";
+
+static struct patch_params patch_params __initdata = {
+ 3, 0x8080, 0x808a, 0x8028, 0x802a,
+};
+
+static uint patch_2000[] __initdata = {
+ 0x3fff0000, 0x3ffd0000, 0x3ffb0000, 0x3ff90000,
+ 0x5f13eff8, 0x5eb5eff8, 0x5f88adf7, 0x5fefadf7,
+ 0x3a9cfbc8, 0x77cae1bb, 0xf4de7fad, 0xabae9330,
+ 0x4e08fdcf, 0x6e0faff8, 0x7ccf76cf, 0xfdaff9cf,
+ 0xabf88dc8, 0xab5879f7, 0xb0925d8d, 0xdfd079f7,
+ 0xb090e6bb, 0xe5bbe74f, 0x9e046f0f, 0x6ffb76ce,
+ 0xee0cf9cf, 0x2bfbefef, 0xcfeef9cf, 0x76cead23,
+ 0x90b3df99, 0x7fddd0c1, 0x4bf847fd, 0x7ccf76ce,
+ 0xcfef77ca, 0x7eaf7fad, 0x7dfdf0b7, 0xef7a7fca,
+ 0x77cafbc8, 0x6079e722, 0xfbc85fff, 0xdfff5fb3,
+ 0xfffbfbc8, 0xf3c894a5, 0xe7c9edf9, 0x7f9a7fad,
+ 0x5f36afe8, 0x5f5bffdf, 0xdf95cb9e, 0xaf7d5fc3,
+ 0xafed8c1b, 0x5fc3afdd, 0x5fc5df99, 0x7efdb0b3,
+ 0x5fb3fffe, 0xabae5fb3, 0xfffe5fd0, 0x600be6bb,
+ 0x600b5fd0, 0xdfc827fb, 0xefdf5fca, 0xcfde3a9c,
+ 0xe7c9edf9, 0xf3c87f9e, 0x54ca7fed, 0x2d3a3637,
+ 0x756f7e9a, 0xf1ce37ef, 0x2e677fee, 0x10ebadf8,
+ 0xefdecfea, 0xe52f7d9f, 0xe12bf1ce, 0x5f647e9a,
+ 0x4df8cfea, 0x5f717d9b, 0xefeecfea, 0x5f73e522,
+ 0xefde5f73, 0xcfda0b61, 0x5d8fdf61, 0xe7c9edf9,
+ 0x7e9a30d5, 0x1458bfff, 0xf3c85fff, 0xdfffa7f8,
+ 0x5f5bbffe, 0x7f7d10d0, 0x144d5f33, 0xbfffaf78,
+ 0x5f5bbffd, 0xa7f85f33, 0xbffe77fd, 0x30bd4e08,
+ 0xfdcfe5ff, 0x6e0faff8, 0x7eef7e9f, 0xfdeff1cf,
+ 0x5f17abf8, 0x0d5b5f5b, 0xffef79f7, 0x309eafdd,
+ 0x5f3147f8, 0x5f31afed, 0x7fdd50af, 0x497847fd,
+ 0x7f9e7fed, 0x7dfd70a9, 0xef7e7ece, 0x6ba07f9e,
+ 0x2d227efd, 0x30db5f5b, 0xfffd5f5b, 0xffef5f5b,
+ 0xffdf0c9c, 0xafed0a9a, 0xafdd0c37, 0x5f37afbd,
+ 0x7fbdb081, 0x5f8147f8, 0x3a11e710, 0xedf0ccdd,
+ 0xf3186d0a, 0x7f0e5f06, 0x7fedbb38, 0x3afe7468,
+ 0x7fedf4fc, 0x8ffbb951, 0xb85f77fd, 0xb0df5ddd,
+ 0xdefe7fed, 0x90e1e74d, 0x6f0dcbf7, 0xe7decfed,
+ 0xcb74cfed, 0xcfeddf6d, 0x91714f74, 0x5dd2deef,
+ 0x9e04e7df, 0xefbb6ffb, 0xe7ef7f0e, 0x9e097fed,
+ 0xebdbeffa, 0xeb54affb, 0x7fea90d7, 0x7e0cf0c3,
+ 0xbffff318, 0x5fffdfff, 0xac59efea, 0x7fce1ee5,
+ 0xe2ff5ee1, 0xaffbe2ff, 0x5ee3affb, 0xf9cc7d0f,
+ 0xaef8770f, 0x7d0fb0c6, 0xeffbbfff, 0xcfef5ede,
+ 0x7d0fbfff, 0x5ede4cf8, 0x7fddd0bf, 0x49f847fd,
+ 0x7efdf0bb, 0x7fedfffd, 0x7dfdf0b7, 0xef7e7e1e,
+ 0x5ede7f0e, 0x3a11e710, 0xedf0ccab, 0xfb18ad2e,
+ 0x1ea9bbb8, 0x74283b7e, 0x73c2e4bb, 0x2ada4fb8,
+ 0xdc21e4bb, 0xb2a1ffbf, 0x5e2c43f8, 0xfc87e1bb,
+ 0xe74ffd91, 0x6f0f4fe8, 0xc7ba32e2, 0xf396efeb,
+ 0x600b4f78, 0xe5bb760b, 0x53acaef8, 0x4ef88b0e,
+ 0xcfef9e09, 0xabf8751f, 0xefef5bac, 0x741f4fe8,
+ 0x751e760d, 0x7fdbf081, 0x741cafce, 0xefcc7fce,
+ 0x751e70ac, 0x741ce7bb, 0x3372cfed, 0xafdbefeb,
+ 0xe5bb760b, 0x53f2aef8, 0xafe8e7eb, 0x4bf8771e,
+ 0x7e247fed, 0x4fcbe2cc, 0x7fbc30a9, 0x7b0f7a0f,
+ 0x34d577fd, 0x308b5db7, 0xde553e5f, 0xaf78741f,
+ 0x741f30f0, 0xcfef5e2c, 0x741f3eac, 0xafb8771e,
+ 0x5e677fed, 0x0bd3e2cc, 0x741ccfec, 0xe5ca53cd,
+ 0x6fcb4f74, 0x5dadde4b, 0x2ab63d38, 0x4bb3de30,
+ 0x751f741c, 0x6c42effa, 0xefea7fce, 0x6ffc30be,
+ 0xefec3fca, 0x30b3de2e, 0xadf85d9e, 0xaf7daefd,
+ 0x5d9ede2e, 0x5d9eafdd, 0x761f10ac, 0x1da07efd,
+ 0x30adfffe, 0x4908fb18, 0x5fffdfff, 0xafbb709b,
+ 0x4ef85e67, 0xadf814ad, 0x7a0f70ad, 0xcfef50ad,
+ 0x7a0fde30, 0x5da0afed, 0x3c12780f, 0xefef780f,
+ 0xefef790f, 0xa7f85e0f, 0xffef790f, 0xefef790f,
+ 0x14adde2e, 0x5d9eadfd, 0x5e2dfffb, 0xe79addfd,
+ 0xeff96079, 0x607ae79a, 0xddfceff9, 0x60795dff,
+ 0x607acfef, 0xefefefdf, 0xefbfef7f, 0xeeffedff,
+ 0xebffe7ff, 0xafefafdf, 0xafbfaf7f, 0xaeffadff,
+ 0xabffa7ff, 0x6fef6fdf, 0x6fbf6f7f, 0x6eff6dff,
+ 0x6bff67ff, 0x2fef2fdf, 0x2fbf2f7f, 0x2eff2dff,
+ 0x2bff27ff, 0x4e08fd1f, 0xe5ff6e0f, 0xaff87eef,
+ 0x7e0ffdef, 0xf11f6079, 0xabf8f542, 0x7e0af11c,
+ 0x37cfae3a, 0x7fec90be, 0xadf8efdc, 0xcfeae52f,
+ 0x7d0fe12b, 0xf11c6079, 0x7e0a4df8, 0xcfea5dc4,
+ 0x7d0befec, 0xcfea5dc6, 0xe522efdc, 0x5dc6cfda,
+ 0x4e08fd1f, 0x6e0faff8, 0x7c1f761f, 0xfdeff91f,
+ 0x6079abf8, 0x761cee24, 0xf91f2bfb, 0xefefcfec,
+ 0xf91f6079, 0x761c27fb, 0xefdf5da7, 0xcfdc7fdd,
+ 0xd09c4bf8, 0x47fd7c1f, 0x761ccfcf, 0x7eef7fed,
+ 0x7dfdf093, 0xef7e7f1e, 0x771efb18, 0x6079e722,
+ 0xe6bbe5bb, 0xae0ae5bb, 0x600bae85, 0xe2bbe2bb,
+ 0xe2bbe2bb, 0xaf02e2bb, 0xe2bb2ff9, 0x6079e2bb
+};
+
+static uint patch_2f00[] __initdata = {
+ 0x30303030, 0x3e3e3434, 0xabbf9b99, 0x4b4fbdbd,
+ 0x59949334, 0x9fff37fb, 0x9b177dd9, 0x936956bb,
+ 0xfbdd697b, 0xdd2fd113, 0x1db9f7bb, 0x36313963,
+ 0x79373369, 0x3193137f, 0x7331737a, 0xf7bb9b99,
+ 0x9bb19795, 0x77fdfd3d, 0x573b773f, 0x737933f7,
+ 0xb991d115, 0x31699315, 0x31531694, 0xbf4fbdbd,
+ 0x35931497, 0x35376956, 0xbd697b9d, 0x96931313,
+ 0x19797937, 0x6935af78, 0xb9b3baa3, 0xb8788683,
+ 0x368f78f7, 0x87778733, 0x3ffffb3b, 0x8e8f78b8,
+ 0x1d118e13, 0xf3ff3f8b, 0x6bd8e173, 0xd1366856,
+ 0x68d1687b, 0x3daf78b8, 0x3a3a3f87, 0x8f81378f,
+ 0xf876f887, 0x77fd8778, 0x737de8d6, 0xbbf8bfff,
+ 0xd8df87f7, 0xfd876f7b, 0x8bfff8bd, 0x8683387d,
+ 0xb873d87b, 0x3b8fd7f8, 0xf7338883, 0xbb8ee1f8,
+ 0xef837377, 0x3337b836, 0x817d11f8, 0x7378b878,
+ 0xd3368b7d, 0xed731b7d, 0x833731f3, 0xf22f3f23
+};
+
+static uint patch_2e00[] __initdata = {
+ 0x27eeeeee, 0xeeeeeeee, 0xeeeeeeee, 0xeeeeeeee,
+ 0xee4bf4fb, 0xdbd259bb, 0x1979577f, 0xdfd2d573,
+ 0xb773f737, 0x4b4fbdbd, 0x25b9b177, 0xd2d17376,
+ 0x956bbfdd, 0x697bdd2f, 0xff9f79ff, 0xff9ff22f
+};
+#endif
+
+/*
+ * USB SOF patch arrays.
+ */
+
+#ifdef CONFIG_USB_SOF_UCODE_PATCH
+
+static char patch_name[] __initdata = "USB SOF";
+
+static struct patch_params patch_params __initdata = {
+ 9,
+};
+
+static uint patch_2000[] __initdata = {
+ 0x7fff0000, 0x7ffd0000, 0x7ffb0000, 0x49f7ba5b,
+ 0xba383ffb, 0xf9b8b46d, 0xe5ab4e07, 0xaf77bffe,
+ 0x3f7bbf79, 0xba5bba38, 0xe7676076, 0x60750000
+};
+
+static uint patch_2f00[] __initdata = {
+ 0x3030304c, 0xcab9e441, 0xa1aaf220
+};
+
+static uint patch_2e00[] __initdata = {};
+#endif
+
+/*
+ * SMC relocation patch arrays.
+ */
+
+#ifdef CONFIG_SMC_UCODE_PATCH
+
+static char patch_name[] __initdata = "SMC";
+
+static struct patch_params patch_params __initdata = {
+ 2, 0x8080, 0x8088,
+};
+
+static uint patch_2000[] __initdata = {
+ 0x3fff0000, 0x3ffd0000, 0x3ffb0000, 0x3ff90000,
+ 0x5fefeff8, 0x5f91eff8, 0x3ff30000, 0x3ff10000,
+ 0x3a11e710, 0xedf0ccb9, 0xf318ed66, 0x7f0e5fe2,
+ 0x7fedbb38, 0x3afe7468, 0x7fedf4d8, 0x8ffbb92d,
+ 0xb83b77fd, 0xb0bb5eb9, 0xdfda7fed, 0x90bde74d,
+ 0x6f0dcbd3, 0xe7decfed, 0xcb50cfed, 0xcfeddf6d,
+ 0x914d4f74, 0x5eaedfcb, 0x9ee0e7df, 0xefbb6ffb,
+ 0xe7ef7f0e, 0x9ee57fed, 0xebb7effa, 0xeb30affb,
+ 0x7fea90b3, 0x7e0cf09f, 0xbffff318, 0x5fffdfff,
+ 0xac35efea, 0x7fce1fc1, 0xe2ff5fbd, 0xaffbe2ff,
+ 0x5fbfaffb, 0xf9a87d0f, 0xaef8770f, 0x7d0fb0a2,
+ 0xeffbbfff, 0xcfef5fba, 0x7d0fbfff, 0x5fba4cf8,
+ 0x7fddd09b, 0x49f847fd, 0x7efdf097, 0x7fedfffd,
+ 0x7dfdf093, 0xef7e7e1e, 0x5fba7f0e, 0x3a11e710,
+ 0xedf0cc87, 0xfb18ad0a, 0x1f85bbb8, 0x74283b7e,
+ 0x7375e4bb, 0x2ab64fb8, 0x5c7de4bb, 0x32fdffbf,
+ 0x5f0843f8, 0x7ce3e1bb, 0xe74f7ded, 0x6f0f4fe8,
+ 0xc7ba32be, 0x73f2efeb, 0x600b4f78, 0xe5bb760b,
+ 0x5388aef8, 0x4ef80b6a, 0xcfef9ee5, 0xabf8751f,
+ 0xefef5b88, 0x741f4fe8, 0x751e760d, 0x7fdb70dd,
+ 0x741cafce, 0xefcc7fce, 0x751e7088, 0x741ce7bb,
+ 0x334ecfed, 0xafdbefeb, 0xe5bb760b, 0x53ceaef8,
+ 0xafe8e7eb, 0x4bf8771e, 0x7e007fed, 0x4fcbe2cc,
+ 0x7fbc3085, 0x7b0f7a0f, 0x34b177fd, 0xb0e75e93,
+ 0xdf313e3b, 0xaf78741f, 0x741f30cc, 0xcfef5f08,
+ 0x741f3e88, 0xafb8771e, 0x5f437fed, 0x0bafe2cc,
+ 0x741ccfec, 0xe5ca53a9, 0x6fcb4f74, 0x5e89df27,
+ 0x2a923d14, 0x4b8fdf0c, 0x751f741c, 0x6c1eeffa,
+ 0xefea7fce, 0x6ffc309a, 0xefec3fca, 0x308fdf0a,
+ 0xadf85e7a, 0xaf7daefd, 0x5e7adf0a, 0x5e7aafdd,
+ 0x761f1088, 0x1e7c7efd, 0x3089fffe, 0x4908fb18,
+ 0x5fffdfff, 0xafbbf0f7, 0x4ef85f43, 0xadf81489,
+ 0x7a0f7089, 0xcfef5089, 0x7a0fdf0c, 0x5e7cafed,
+ 0xbc6e780f, 0xefef780f, 0xefef790f, 0xa7f85eeb,
+ 0xffef790f, 0xefef790f, 0x1489df0a, 0x5e7aadfd,
+ 0x5f09fffb, 0xe79aded9, 0xeff96079, 0x607ae79a,
+ 0xded8eff9, 0x60795edb, 0x607acfef, 0xefefefdf,
+ 0xefbfef7f, 0xeeffedff, 0xebffe7ff, 0xafefafdf,
+ 0xafbfaf7f, 0xaeffadff, 0xabffa7ff, 0x6fef6fdf,
+ 0x6fbf6f7f, 0x6eff6dff, 0x6bff67ff, 0x2fef2fdf,
+ 0x2fbf2f7f, 0x2eff2dff, 0x2bff27ff, 0x4e08fd1f,
+ 0xe5ff6e0f, 0xaff87eef, 0x7e0ffdef, 0xf11f6079,
+ 0xabf8f51e, 0x7e0af11c, 0x37cfae16, 0x7fec909a,
+ 0xadf8efdc, 0xcfeae52f, 0x7d0fe12b, 0xf11c6079,
+ 0x7e0a4df8, 0xcfea5ea0, 0x7d0befec, 0xcfea5ea2,
+ 0xe522efdc, 0x5ea2cfda, 0x4e08fd1f, 0x6e0faff8,
+ 0x7c1f761f, 0xfdeff91f, 0x6079abf8, 0x761cee00,
+ 0xf91f2bfb, 0xefefcfec, 0xf91f6079, 0x761c27fb,
+ 0xefdf5e83, 0xcfdc7fdd, 0x50f84bf8, 0x47fd7c1f,
+ 0x761ccfcf, 0x7eef7fed, 0x7dfd70ef, 0xef7e7f1e,
+ 0x771efb18, 0x6079e722, 0xe6bbe5bb, 0x2e66e5bb,
+ 0x600b2ee1, 0xe2bbe2bb, 0xe2bbe2bb, 0x2f5ee2bb,
+ 0xe2bb2ff9, 0x6079e2bb,
+};
+
+static uint patch_2f00[] __initdata = {
+ 0x30303030, 0x3e3e3030, 0xaf79b9b3, 0xbaa3b979,
+ 0x9693369f, 0x79f79777, 0x97333fff, 0xfb3b9e9f,
+ 0x79b91d11, 0x9e13f3ff, 0x3f9b6bd9, 0xe173d136,
+ 0x695669d1, 0x697b3daf, 0x79b93a3a, 0x3f979f91,
+ 0x379ff976, 0xf99777fd, 0x9779737d, 0xe9d6bbf9,
+ 0xbfffd9df, 0x97f7fd97, 0x6f7b9bff, 0xf9bd9683,
+ 0x397db973, 0xd97b3b9f, 0xd7f9f733, 0x9993bb9e,
+ 0xe1f9ef93, 0x73773337, 0xb936917d, 0x11f87379,
+ 0xb979d336, 0x8b7ded73, 0x1b7d9337, 0x31f3f22f,
+ 0x3f2327ee, 0xeeeeeeee, 0xeeeeeeee, 0xeeeeeeee,
+ 0xeeeeee4b, 0xf4fbdbd2, 0x58bb1878, 0x577fdfd2,
+ 0xd573b773, 0xf7374b4f, 0xbdbd25b8, 0xb177d2d1,
+ 0x7376856b, 0xbfdd687b, 0xdd2fff8f, 0x78ffff8f,
+ 0xf22f0000,
+};
+
+static uint patch_2e00[] __initdata = {};
+#endif
+
+static void __init cpm_write_patch(cpm8xx_t *cp, int offset, uint *patch, int len)
+{
+ if (!len)
+ return;
+ memcpy_toio(cp->cp_dpmem + offset, patch, len);
+}
+
+void __init cpm_load_patch(cpm8xx_t *cp)
+{
+ out_be16(&cp->cp_rccr, 0);
+
+ cpm_write_patch(cp, 0, patch_2000, sizeof(patch_2000));
+ cpm_write_patch(cp, 0xf00, patch_2f00, sizeof(patch_2f00));
+ cpm_write_patch(cp, 0xe00, patch_2e00, sizeof(patch_2e00));
+
+ if (IS_ENABLED(CONFIG_I2C_SPI_UCODE_PATCH) ||
+ IS_ENABLED(CONFIG_I2C_SPI_SMC1_UCODE_PATCH)) {
+ u16 rpbase = 0x500;
+ iic_t *iip;
+ struct spi_pram *spp;
+
+ iip = (iic_t *)&cp->cp_dparam[PROFF_IIC];
+ out_be16(&iip->iic_rpbase, rpbase);
+
+ /* Put SPI above the IIC, also 32-byte aligned. */
+ spp = (struct spi_pram *)&cp->cp_dparam[PROFF_SPI];
+ out_be16(&spp->rpbase, (rpbase + sizeof(iic_t) + 31) & ~31);
+
+ if (IS_ENABLED(CONFIG_I2C_SPI_SMC1_UCODE_PATCH)) {
+ smc_uart_t *smp;
+
+ smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1];
+ out_be16(&smp->smc_rpbase, 0x1FC0);
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) {
+ smc_uart_t *smp;
+
+ smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1];
+ out_be16(&smp->smc_rpbase, 0x1ec0);
+ smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2];
+ out_be16(&smp->smc_rpbase, 0x1fc0);
+ }
+
+ out_be16(&cp->cp_cpmcr1, patch_params.cpmcr1);
+ out_be16(&cp->cp_cpmcr2, patch_params.cpmcr2);
+ out_be16(&cp->cp_cpmcr3, patch_params.cpmcr3);
+ out_be16(&cp->cp_cpmcr4, patch_params.cpmcr4);
+
+ out_be16(&cp->cp_rccr, patch_params.rccr);
+
+ pr_info("%s microcode patch installed\n", patch_name);
+}
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f3fb79fccc72..d82e3664ffdf 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -197,7 +197,8 @@ endmenu
config PPC601_SYNC_FIX
bool "Workarounds for PPC601 bugs"
- depends on PPC_BOOK3S_32 && PPC_PMAC
+ depends on PPC_BOOK3S_601 && PPC_PMAC
+ default y
help
Some versions of the PPC601 (the first PowerPC chip) have bugs which
mean that extra synchronization instructions are required near
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 2794235e9d3e..12543e53fa96 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -6,6 +6,9 @@ config PPC64
This option selects whether a 32-bit or a 64-bit kernel
will be built.
+config PPC_BOOK3S_32
+ bool
+
menu "Processor support"
choice
prompt "Processor Type"
@@ -21,13 +24,20 @@ choice
If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx.
-config PPC_BOOK3S_32
- bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
+config PPC_BOOK3S_6xx
+ bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx except 601"
+ select PPC_BOOK3S_32
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
+config PPC_BOOK3S_601
+ bool "PowerPC 601"
+ select PPC_BOOK3S_32
+ select PPC_FPU
+ select PPC_HAVE_KUAP
+
config PPC_85xx
bool "Freescale 85xx"
select E500
@@ -330,7 +340,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
config PPC_RADIX_MMU
bool "Radix MMU Support"
- depends on PPC_BOOK3S_64 && HUGETLB_PAGE
+ depends on PPC_BOOK3S_64
select ARCH_HAS_GIGANTIC_PAGE
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
@@ -450,8 +460,10 @@ config NOT_COHERENT_CACHE
depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \
GAMECUBE_COMMON || AMIGAONE
select ARCH_HAS_DMA_COHERENT_TO_PFN
+ select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select DMA_DIRECT_REMAP
default n if PPC_47x
default y
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 16dfee29aa41..ca9ffc1c8685 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -486,7 +486,7 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
window->table.it_size = size >> window->table.it_page_shift;
window->table.it_ops = &cell_iommu_ops;
- iommu_init_table(&window->table, iommu->nid);
+ iommu_init_table(&window->table, iommu->nid, 0, 0);
pr_debug("\tioid %d\n", window->ioid);
pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index 6dfd2cb1bce7..24adbe3c605c 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -31,22 +31,21 @@ static void spufs_handle_event(struct spu_context *ctx,
switch (type) {
case SPE_EVENT_INVALID_DMA:
- force_sig_fault(SIGBUS, BUS_OBJERR, NULL, current);
+ force_sig_fault(SIGBUS, BUS_OBJERR, NULL);
break;
case SPE_EVENT_SPE_DATA_STORAGE:
ctx->ops->restart_dma(ctx);
- force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea,
- current);
+ force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea);
break;
case SPE_EVENT_DMA_ALIGNMENT:
/* DAR isn't set for an alignment fault :( */
- force_sig_fault(SIGBUS, BUS_ADRALN, NULL, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
break;
case SPE_EVENT_SPE_ERROR:
force_sig_fault(
SIGILL, ILL_ILLOPC,
(void __user *)(unsigned long)
- ctx->ops->npc_read(ctx) - 4, current);
+ ctx->ops->npc_read(ctx) - 4);
break;
}
}
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index d40253a18b1c..c0f950a3f4e1 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -446,7 +446,7 @@ static const struct file_operations spufs_cntl_fops = {
.release = spufs_cntl_release,
.read = simple_attr_read,
.write = simple_attr_write,
- .llseek = generic_file_llseek,
+ .llseek = no_llseek,
.mmap = spufs_cntl_mmap,
};
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 065ff14b76e1..1d93e55a2de1 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -10,6 +10,8 @@
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/fsnotify.h>
#include <linux/backing-dev.h>
#include <linux/init.h>
@@ -20,7 +22,6 @@
#include <linux/pagemap.h>
#include <linux/poll.h>
#include <linux/slab.h>
-#include <linux/parser.h>
#include <asm/prom.h>
#include <asm/spu.h>
@@ -30,7 +31,7 @@
#include "spufs.h"
struct spufs_sb_info {
- int debug;
+ bool debug;
};
static struct kmem_cache *spufs_inode_cache;
@@ -574,16 +575,27 @@ long spufs_create(struct path *path, struct dentry *dentry,
}
/* File system initialization */
+struct spufs_fs_context {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+};
+
enum {
- Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
+ Opt_uid, Opt_gid, Opt_mode, Opt_debug,
+};
+
+static const struct fs_parameter_spec spufs_param_specs[] = {
+ fsparam_u32 ("gid", Opt_gid),
+ fsparam_u32oct ("mode", Opt_mode),
+ fsparam_u32 ("uid", Opt_uid),
+ fsparam_flag ("debug", Opt_debug),
+ {}
};
-static const match_table_t spufs_tokens = {
- { Opt_uid, "uid=%d" },
- { Opt_gid, "gid=%d" },
- { Opt_mode, "mode=%o" },
- { Opt_debug, "debug" },
- { Opt_err, NULL },
+static const struct fs_parameter_description spufs_fs_parameters = {
+ .name = "spufs",
+ .specs = spufs_param_specs,
};
static int spufs_show_options(struct seq_file *m, struct dentry *root)
@@ -604,47 +616,41 @@ static int spufs_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
-static int
-spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
-{
- char *p;
- substring_t args[MAX_OPT_ARGS];
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token, option;
-
- if (!*p)
- continue;
-
- token = match_token(p, spufs_tokens, args);
- switch (token) {
- case Opt_uid:
- if (match_int(&args[0], &option))
- return 0;
- root->i_uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(root->i_uid))
- return 0;
- break;
- case Opt_gid:
- if (match_int(&args[0], &option))
- return 0;
- root->i_gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(root->i_gid))
- return 0;
- break;
- case Opt_mode:
- if (match_octal(&args[0], &option))
- return 0;
- root->i_mode = option | S_IFDIR;
- break;
- case Opt_debug:
- spufs_get_sb_info(sb)->debug = 1;
- break;
- default:
- return 0;
- }
+static int spufs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct spufs_fs_context *ctx = fc->fs_private;
+ struct spufs_sb_info *sbi = fc->s_fs_info;
+ struct fs_parse_result result;
+ kuid_t uid;
+ kgid_t gid;
+ int opt;
+
+ opt = fs_parse(fc, &spufs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_uid:
+ uid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(uid))
+ return invalf(fc, "Unknown uid");
+ ctx->uid = uid;
+ break;
+ case Opt_gid:
+ gid = make_kgid(current_user_ns(), result.uint_32);
+ if (!gid_valid(gid))
+ return invalf(fc, "Unknown gid");
+ ctx->gid = gid;
+ break;
+ case Opt_mode:
+ ctx->mode = result.uint_32 & S_IALLUGO;
+ break;
+ case Opt_debug:
+ sbi->debug = true;
+ break;
}
- return 1;
+
+ return 0;
}
static void spufs_exit_isolated_loader(void)
@@ -678,79 +684,98 @@ spufs_init_isolated_loader(void)
printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
}
-static int
-spufs_create_root(struct super_block *sb, void *data)
+static int spufs_create_root(struct super_block *sb, struct fs_context *fc)
{
+ struct spufs_fs_context *ctx = fc->fs_private;
struct inode *inode;
- int ret;
- ret = -ENODEV;
if (!spu_management_ops)
- goto out;
+ return -ENODEV;
- ret = -ENOMEM;
- inode = spufs_new_inode(sb, S_IFDIR | 0775);
+ inode = spufs_new_inode(sb, S_IFDIR | ctx->mode);
if (!inode)
- goto out;
+ return -ENOMEM;
+ inode->i_uid = ctx->uid;
+ inode->i_gid = ctx->gid;
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
SPUFS_I(inode)->i_ctx = NULL;
inc_nlink(inode);
- ret = -EINVAL;
- if (!spufs_parse_options(sb, data, inode))
- goto out_iput;
-
- ret = -ENOMEM;
sb->s_root = d_make_root(inode);
if (!sb->s_root)
- goto out;
-
+ return -ENOMEM;
return 0;
-out_iput:
- iput(inode);
-out:
- return ret;
}
-static int
-spufs_fill_super(struct super_block *sb, void *data, int silent)
-{
- struct spufs_sb_info *info;
- static const struct super_operations s_ops = {
- .alloc_inode = spufs_alloc_inode,
- .free_inode = spufs_free_inode,
- .statfs = simple_statfs,
- .evict_inode = spufs_evict_inode,
- .show_options = spufs_show_options,
- };
-
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
+static const struct super_operations spufs_ops = {
+ .alloc_inode = spufs_alloc_inode,
+ .free_inode = spufs_free_inode,
+ .statfs = simple_statfs,
+ .evict_inode = spufs_evict_inode,
+ .show_options = spufs_show_options,
+};
+static int spufs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = SPUFS_MAGIC;
- sb->s_op = &s_ops;
- sb->s_fs_info = info;
+ sb->s_op = &spufs_ops;
- return spufs_create_root(sb, data);
+ return spufs_create_root(sb, fc);
+}
+
+static int spufs_get_tree(struct fs_context *fc)
+{
+ return get_tree_single(fc, spufs_fill_super);
}
-static struct dentry *
-spufs_mount(struct file_system_type *fstype, int flags,
- const char *name, void *data)
+static void spufs_free_fc(struct fs_context *fc)
{
- return mount_single(fstype, flags, data, spufs_fill_super);
+ kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations spufs_context_ops = {
+ .free = spufs_free_fc,
+ .parse_param = spufs_parse_param,
+ .get_tree = spufs_get_tree,
+};
+
+static int spufs_init_fs_context(struct fs_context *fc)
+{
+ struct spufs_fs_context *ctx;
+ struct spufs_sb_info *sbi;
+
+ ctx = kzalloc(sizeof(struct spufs_fs_context), GFP_KERNEL);
+ if (!ctx)
+ goto nomem;
+
+ sbi = kzalloc(sizeof(struct spufs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ goto nomem_ctx;
+
+ ctx->uid = current_uid();
+ ctx->gid = current_gid();
+ ctx->mode = 0755;
+
+ fc->s_fs_info = sbi;
+ fc->ops = &spufs_context_ops;
+ return 0;
+
+nomem_ctx:
+ kfree(ctx);
+nomem:
+ return -ENOMEM;
}
static struct file_system_type spufs_type = {
.owner = THIS_MODULE,
.name = "spufs",
- .mount = spufs_mount,
+ .init_fs_context = spufs_init_fs_context,
+ .parameters = &spufs_fs_parameters,
.kill_sb = kill_litter_super,
};
MODULE_ALIAS_FS("spufs");
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 07f82d7395ff..3f2380f40f99 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -443,7 +443,7 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP)
&& (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) {
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
ret = -ERESTARTSYS;
}
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index e56b553de27b..f18d5067cd0f 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -128,7 +128,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
* runqueue. The context will be rescheduled on the proper node
* if it is timesliced or preempted.
*/
- cpumask_copy(&ctx->cpus_allowed, &current->cpus_allowed);
+ cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);
/* Save the current cpu id for spu interrupt routing. */
ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig
index 08d530a2a8b1..86ae210bee9a 100644
--- a/arch/powerpc/platforms/maple/Kconfig
+++ b/arch/powerpc/platforms/maple/Kconfig
@@ -14,5 +14,5 @@ config PPC_MAPLE
select MMIO_NVRAM
select ATA_NONSTANDARD if ATA
help
- This option enables support for the Maple 970FX Evaluation Board.
+ This option enables support for the Maple 970FX Evaluation Board.
For more information, refer to <http://www.970eval.com>
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 77fee09104f8..b500a6e47e6b 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -146,7 +146,7 @@ static void iommu_table_iobmap_setup(void)
*/
iommu_table_iobmap.it_blocksize = 4;
iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops;
- iommu_init_table(&iommu_table_iobmap, 0);
+ iommu_init_table(&iommu_table_iobmap, 0, 0, 0);
pr_debug(" <- %s\n", __func__);
}
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
index 6bbcbec97712..bd6085b470b7 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -33,10 +33,18 @@
#define SL_IBAT2 0x48
#define SL_DBAT3 0x50
#define SL_IBAT3 0x58
-#define SL_TB 0x60
-#define SL_R2 0x68
-#define SL_CR 0x6c
-#define SL_R12 0x70 /* r12 to r31 */
+#define SL_DBAT4 0x60
+#define SL_IBAT4 0x68
+#define SL_DBAT5 0x70
+#define SL_IBAT5 0x78
+#define SL_DBAT6 0x80
+#define SL_IBAT6 0x88
+#define SL_DBAT7 0x90
+#define SL_IBAT7 0x98
+#define SL_TB 0xa0
+#define SL_R2 0xa8
+#define SL_CR 0xac
+#define SL_R12 0xb0 /* r12 to r31 */
#define SL_SIZE (SL_R12 + 80)
.section .text
@@ -121,6 +129,41 @@ _GLOBAL(low_sleep_handler)
mfibatl r4,3
stw r4,SL_IBAT3+4(r1)
+BEGIN_MMU_FTR_SECTION
+ mfspr r4,SPRN_DBAT4U
+ stw r4,SL_DBAT4(r1)
+ mfspr r4,SPRN_DBAT4L
+ stw r4,SL_DBAT4+4(r1)
+ mfspr r4,SPRN_DBAT5U
+ stw r4,SL_DBAT5(r1)
+ mfspr r4,SPRN_DBAT5L
+ stw r4,SL_DBAT5+4(r1)
+ mfspr r4,SPRN_DBAT6U
+ stw r4,SL_DBAT6(r1)
+ mfspr r4,SPRN_DBAT6L
+ stw r4,SL_DBAT6+4(r1)
+ mfspr r4,SPRN_DBAT7U
+ stw r4,SL_DBAT7(r1)
+ mfspr r4,SPRN_DBAT7L
+ stw r4,SL_DBAT7+4(r1)
+ mfspr r4,SPRN_IBAT4U
+ stw r4,SL_IBAT4(r1)
+ mfspr r4,SPRN_IBAT4L
+ stw r4,SL_IBAT4+4(r1)
+ mfspr r4,SPRN_IBAT5U
+ stw r4,SL_IBAT5(r1)
+ mfspr r4,SPRN_IBAT5L
+ stw r4,SL_IBAT5+4(r1)
+ mfspr r4,SPRN_IBAT6U
+ stw r4,SL_IBAT6(r1)
+ mfspr r4,SPRN_IBAT6L
+ stw r4,SL_IBAT6+4(r1)
+ mfspr r4,SPRN_IBAT7U
+ stw r4,SL_IBAT7(r1)
+ mfspr r4,SPRN_IBAT7L
+ stw r4,SL_IBAT7+4(r1)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
/* Backup various CPU config stuffs */
bl __save_cpu_setup
@@ -321,22 +364,37 @@ grackle_wake_up:
mtibatl 3,r4
BEGIN_MMU_FTR_SECTION
- li r4,0
+ lwz r4,SL_DBAT4(r1)
mtspr SPRN_DBAT4U,r4
+ lwz r4,SL_DBAT4+4(r1)
mtspr SPRN_DBAT4L,r4
+ lwz r4,SL_DBAT5(r1)
mtspr SPRN_DBAT5U,r4
+ lwz r4,SL_DBAT5+4(r1)
mtspr SPRN_DBAT5L,r4
+ lwz r4,SL_DBAT6(r1)
mtspr SPRN_DBAT6U,r4
+ lwz r4,SL_DBAT6+4(r1)
mtspr SPRN_DBAT6L,r4
+ lwz r4,SL_DBAT7(r1)
mtspr SPRN_DBAT7U,r4
+ lwz r4,SL_DBAT7+4(r1)
mtspr SPRN_DBAT7L,r4
+ lwz r4,SL_IBAT4(r1)
mtspr SPRN_IBAT4U,r4
+ lwz r4,SL_IBAT4+4(r1)
mtspr SPRN_IBAT4L,r4
+ lwz r4,SL_IBAT5(r1)
mtspr SPRN_IBAT5U,r4
+ lwz r4,SL_IBAT5+4(r1)
mtspr SPRN_IBAT5L,r4
+ lwz r4,SL_IBAT6(r1)
mtspr SPRN_IBAT6U,r4
+ lwz r4,SL_IBAT6+4(r1)
mtspr SPRN_IBAT6L,r4
+ lwz r4,SL_IBAT7(r1)
mtspr SPRN_IBAT7U,r4
+ lwz r4,SL_IBAT7+4(r1)
mtspr SPRN_IBAT7L,r4
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 850eee860cf2..938803eab0ad 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -12,7 +12,6 @@ config PPC_POWERNV
select EPAPR_BOOT
select PPC_INDIRECT_PIO
select PPC_UDBG_16550
- select PPC_SCOM
select ARCH_RANDOM
select CPU_FREQ
select PPC_DOORBELL
@@ -47,3 +46,7 @@ config PPC_VAS
VAS adapters are found in POWER9 based systems.
If unsure, say N.
+
+config SCOM_DEBUGFS
+ bool "Expose SCOM controllers via debugfs"
+ depends on DEBUG_FS
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index da2e99efbd04..a3ac9646119d 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -4,15 +4,19 @@ obj-y += idle.o opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
+obj-y += ultravisor.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
+obj-$(CONFIG_FA_DUMP) += opal-fadump.o
+obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o
+obj-$(CONFIG_OPAL_CORE) += opal-core.o
obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
obj-$(CONFIG_CXL_BASE) += pci-cxl.o
obj-$(CONFIG_EEH) += eeh-powernv.o
-obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
obj-$(CONFIG_OPAL_PRD) += opal-prd.o
obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
obj-$(CONFIG_OCXL_BASE) += ocxl.o
+obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 9ade4489f415..6bc24a47e9ef 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1,8 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * The file intends to implement the platform dependent EEH operations on
- * powernv platform. Actually, the powernv was created in order to fully
- * hypervisor support.
+ * PowerNV Platform dependent EEH operations
*
* Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
*/
@@ -36,6 +34,7 @@
#include "powernv.h"
#include "pci.h"
+#include "../../../../drivers/pci/pci.h"
static int eeh_event_irq = -EINVAL;
@@ -43,13 +42,10 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
{
struct pci_dn *pdn = pci_get_pdn(pdev);
- if (!pdev->is_virtfn)
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
return;
- /*
- * The following operations will fail if VF's sysfs files
- * aren't created or its resources aren't finalized.
- */
+ dev_dbg(&pdev->dev, "EEH: Setting up device\n");
eeh_add_device_early(pdn);
eeh_add_device_late(pdev);
eeh_sysfs_add_device(pdev);
@@ -201,6 +197,25 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
#endif /* CONFIG_DEBUG_FS */
+void pnv_eeh_enable_phbs(void)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+ /*
+ * If EEH is enabled, we're going to rely on that.
+ * Otherwise, we restore to conventional mechanism
+ * to clear frozen PE during PCI config access.
+ */
+ if (eeh_enabled())
+ phb->flags |= PNV_PHB_FLAG_EEH;
+ else
+ phb->flags &= ~PNV_PHB_FLAG_EEH;
+ }
+}
+
/**
* pnv_eeh_post_init - EEH platform dependent post initialization
*
@@ -215,9 +230,7 @@ int pnv_eeh_post_init(void)
struct pnv_phb *phb;
int ret = 0;
- /* Probe devices & build address cache */
- eeh_probe_devices();
- eeh_addr_cache_build();
+ eeh_show_enabled();
/* Register OPAL event notifier */
eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
@@ -239,19 +252,11 @@ int pnv_eeh_post_init(void)
if (!eeh_enabled())
disable_irq(eeh_event_irq);
+ pnv_eeh_enable_phbs();
+
list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
- /*
- * If EEH is enabled, we're going to rely on that.
- * Otherwise, we restore to conventional mechanism
- * to clear frozen PE during PCI config access.
- */
- if (eeh_enabled())
- phb->flags |= PNV_PHB_FLAG_EEH;
- else
- phb->flags &= ~PNV_PHB_FLAG_EEH;
-
/* Create debugfs entries */
#ifdef CONFIG_DEBUG_FS
if (phb->has_dbgfs || !phb->dbgfs)
@@ -379,6 +384,8 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
return NULL;
+ eeh_edev_dbg(edev, "Probing device\n");
+
/* Initialize eeh device */
edev->class_code = pdn->class_code;
edev->mode &= 0xFFFFFF00;
@@ -404,9 +411,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
/* Create PE */
ret = eeh_add_to_parent_pe(edev);
if (ret) {
- pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%x)\n",
- __func__, hose->global_number, pdn->busno,
- PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret);
+ eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret);
return NULL;
}
@@ -455,11 +460,17 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
* Enable EEH explicitly so that we will do EEH check
* while accessing I/O stuff
*/
- eeh_add_flag(EEH_ENABLED);
+ if (!eeh_has_flag(EEH_ENABLED)) {
+ enable_irq(eeh_event_irq);
+ pnv_eeh_enable_phbs();
+ eeh_add_flag(EEH_ENABLED);
+ }
/* Save memory bars */
eeh_save_bars(edev);
+ eeh_edev_dbg(edev, "EEH enabled on device\n");
+
return NULL;
}
@@ -839,7 +850,7 @@ static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
int aer = edev ? edev->aer_cap : 0;
u32 ctrl;
- pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
+ pr_debug("%s: Secondary Reset PCI bus %04x:%02x with option %d\n",
__func__, pci_domain_nr(dev->bus),
dev->bus->number, option);
@@ -897,6 +908,10 @@ static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option)
if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL))
return __pnv_eeh_bridge_reset(pdev, option);
+ pr_debug("%s: FW reset PCI bus %04x:%02x with option %d\n",
+ __func__, pci_domain_nr(pdev->bus),
+ pdev->bus->number, option);
+
switch (option) {
case EEH_RESET_FUNDAMENTAL:
scope = OPAL_RESET_PCI_FUNDAMENTAL;
@@ -1115,17 +1130,37 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
return -EIO;
}
+ if (pci_is_root_bus(bus))
+ return pnv_eeh_root_reset(hose, option);
+
/*
- * If dealing with the root bus (or the bus underneath the
- * root port), we reset the bus underneath the root port.
+ * For hot resets try use the generic PCI error recovery reset
+ * functions. These correctly handles the case where the secondary
+ * bus is behind a hotplug slot and it will use the slot provided
+ * reset methods to prevent spurious hotplug events during the reset.
*
- * The cxl driver depends on this behaviour for bi-modal card
- * switching.
+ * Fundemental resets need to be handled internally to EEH since the
+ * PCI core doesn't really have a concept of a fundemental reset,
+ * mainly because there's no standard way to generate one. Only a
+ * few devices require an FRESET so it should be fine.
*/
- if (pci_is_root_bus(bus) ||
- pci_is_root_bus(bus->parent))
- return pnv_eeh_root_reset(hose, option);
+ if (option != EEH_RESET_FUNDAMENTAL) {
+ /*
+ * NB: Skiboot and pnv_eeh_bridge_reset() also no-op the
+ * de-assert step. It's like the OPAL reset API was
+ * poorly designed or something...
+ */
+ if (option == EEH_RESET_DEACTIVATE)
+ return 0;
+ rc = pci_bus_error_reset(bus->self);
+ if (!rc)
+ return 0;
+ }
+
+ /* otherwise, use the generic bridge reset. this might call into FW */
+ if (pci_is_root_bus(bus->parent))
+ return pnv_eeh_root_reset(hose, option);
return pnv_eeh_bridge_reset(bus->self, option);
}
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 2f4479b94ac3..78599bca66c2 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -675,7 +675,8 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
sprs.ptcr = mfspr(SPRN_PTCR);
sprs.rpr = mfspr(SPRN_RPR);
sprs.tscr = mfspr(SPRN_TSCR);
- sprs.ldbar = mfspr(SPRN_LDBAR);
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ sprs.ldbar = mfspr(SPRN_LDBAR);
sprs_saved = true;
@@ -716,7 +717,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
* to reload MMCR0 (see mmcr0 comment above).
*/
if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
- asm volatile(PPC_INVALIDATE_ERAT);
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT);
mtspr(SPRN_MMCR0, mmcr0);
}
@@ -758,7 +759,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
mtspr(SPRN_PTCR, sprs.ptcr);
mtspr(SPRN_RPR, sprs.rpr);
mtspr(SPRN_TSCR, sprs.tscr);
- mtspr(SPRN_LDBAR, sprs.ldbar);
if (pls >= pnv_first_tb_loss_level) {
/* TB loss */
@@ -790,6 +790,8 @@ core_woken:
mtspr(SPRN_MMCR0, sprs.mmcr0);
mtspr(SPRN_MMCR1, sprs.mmcr1);
mtspr(SPRN_MMCR2, sprs.mmcr2);
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ mtspr(SPRN_LDBAR, sprs.ldbar);
mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
@@ -1155,10 +1157,10 @@ static void __init pnv_power9_idle_init(void)
pnv_deepest_stop_psscr_mask);
}
- pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%lld\n",
+ pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n",
pnv_first_spr_loss_level);
- pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%lld\n",
+ pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n",
pnv_first_tb_loss_level);
}
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 5e53c1392d3b..eb2e75dac369 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -70,23 +70,23 @@ static int change_memblock_state(struct memory_block *mem, void *arg)
/* called with device_hotplug_lock held */
static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
{
- u64 end_pfn = start_pfn + nr_pages - 1;
+ const unsigned long start = PFN_PHYS(start_pfn);
+ const unsigned long size = PFN_PHYS(nr_pages);
- if (walk_memory_range(start_pfn, end_pfn, NULL,
- check_memblock_online))
+ if (walk_memory_blocks(start, size, NULL, check_memblock_online))
return false;
- walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE,
- change_memblock_state);
+ walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE,
+ change_memblock_state);
if (offline_pages(start_pfn, nr_pages)) {
- walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE,
- change_memblock_state);
+ walk_memory_blocks(start, size, (void *)MEM_ONLINE,
+ change_memblock_state);
return false;
}
- walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
- change_memblock_state);
+ walk_memory_blocks(start, size, (void *)MEM_OFFLINE,
+ change_memblock_state);
return true;
@@ -242,9 +242,8 @@ static int memtrace_online(void)
*/
if (!memhp_auto_online) {
lock_device_hotplug();
- walk_memory_range(PFN_DOWN(ent->start),
- PFN_UP(ent->start + ent->size - 1),
- NULL, online_mem_block);
+ walk_memory_blocks(ent->start, ent->size, NULL,
+ online_mem_block);
unlock_device_hotplug();
}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index c321fdbc2200..b95b9e3c4c98 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -19,18 +19,25 @@
#include "pci.h"
-/*
- * spinlock to protect initialisation of an npu_context for a particular
- * mm_struct.
- */
-static DEFINE_SPINLOCK(npu_context_lock);
-
static struct pci_dev *get_pci_dev(struct device_node *dn)
{
struct pci_dn *pdn = PCI_DN(dn);
+ struct pci_dev *pdev;
- return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
+ pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
pdn->busno, pdn->devfn);
+
+ /*
+ * pci_get_domain_bus_and_slot() increased the reference count of
+ * the PCI device, but callers don't need that actually as the PE
+ * already holds a reference to the device. Since callers aren't
+ * aware of the reference count change, call pci_dev_put() now to
+ * avoid leaks.
+ */
+ if (pdev)
+ pci_dev_put(pdev);
+
+ return pdev;
}
/* Given a NPU device get the associated PCI device. */
@@ -82,6 +89,7 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
}
EXPORT_SYMBOL(pnv_pci_get_npu_dev);
+#ifdef CONFIG_IOMMU_API
/*
* Returns the PE assoicated with the PCI device of the given
* NPU. Returns the linked pci device if pci_dev != NULL.
@@ -185,106 +193,6 @@ static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
return 0;
}
-/*
- * Enables 32 bit DMA on NPU.
- */
-static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
-{
- struct pci_dev *gpdev;
- struct pnv_ioda_pe *gpe;
- int64_t rc;
-
- /*
- * Find the assoicated PCI devices and get the dma window
- * information from there.
- */
- if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
- return;
-
- gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (!gpe)
- return;
-
- rc = pnv_npu_set_window(&npe->table_group, 0,
- gpe->table_group.tables[0]);
-
- /*
- * NVLink devices use the same TCE table configuration as
- * their parent device so drivers shouldn't be doing DMA
- * operations directly on these devices.
- */
- set_dma_ops(&npe->pdev->dev, &dma_dummy_ops);
-}
-
-/*
- * Enables bypass mode on the NPU. The NPU only supports one
- * window per link, so bypass needs to be explicitly enabled or
- * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
- * active at the same time.
- */
-static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
-{
- struct pnv_phb *phb = npe->phb;
- int64_t rc = 0;
- phys_addr_t top = memblock_end_of_DRAM();
-
- if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev)
- return -EINVAL;
-
- rc = pnv_npu_unset_window(&npe->table_group, 0);
- if (rc != OPAL_SUCCESS)
- return rc;
-
- /* Enable the bypass window */
-
- top = roundup_pow_of_two(top);
- dev_info(&npe->pdev->dev, "Enabling bypass for PE %x\n",
- npe->pe_number);
- rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
- npe->pe_number, npe->pe_number,
- 0 /* bypass base */, top);
-
- if (rc == OPAL_SUCCESS)
- pnv_pci_ioda2_tce_invalidate_entire(phb, false);
-
- return rc;
-}
-
-void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
-{
- int i;
- struct pnv_phb *phb;
- struct pci_dn *pdn;
- struct pnv_ioda_pe *npe;
- struct pci_dev *npdev;
-
- for (i = 0; ; ++i) {
- npdev = pnv_pci_get_npu_dev(gpdev, i);
-
- if (!npdev)
- break;
-
- pdn = pci_get_pdn(npdev);
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return;
-
- phb = pci_bus_to_host(npdev->bus)->private_data;
-
- /* We only do bypass if it's enabled on the linked device */
- npe = &phb->ioda.pe_array[pdn->pe_number];
-
- if (bypass) {
- dev_info(&npdev->dev,
- "Using 64-bit DMA iommu bypass\n");
- pnv_npu_dma_set_bypass(npe);
- } else {
- dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
- pnv_npu_dma_set_32(npe);
- }
- }
-}
-
-#ifdef CONFIG_IOMMU_API
/* Switch ownership from platform code to external user (e.g. VFIO) */
static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
{
@@ -359,15 +267,6 @@ struct npu_comp {
/* An NPU descriptor, valid for POWER9 only */
struct npu {
int index;
- __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
- unsigned int mmio_atsd_count;
-
- /* Bitmask for MMIO register usage */
- unsigned long mmio_atsd_usage;
-
- /* Do we need to explicitly flush the nest mmu? */
- bool nmmu_flush;
-
struct npu_comp npucomp;
};
@@ -624,534 +523,8 @@ struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
}
#endif /* CONFIG_IOMMU_API */
-/* Maximum number of nvlinks per npu */
-#define NV_MAX_LINKS 6
-
-/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */
-static int max_npu2_index;
-
-struct npu_context {
- struct mm_struct *mm;
- struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
- struct mmu_notifier mn;
- struct kref kref;
- bool nmmu_flush;
-
- /* Callback to stop translation requests on a given GPU */
- void (*release_cb)(struct npu_context *context, void *priv);
-
- /*
- * Private pointer passed to the above callback for usage by
- * device drivers.
- */
- void *priv;
-};
-
-struct mmio_atsd_reg {
- struct npu *npu;
- int reg;
-};
-
-/*
- * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
- * if none are available.
- */
-static int get_mmio_atsd_reg(struct npu *npu)
-{
- int i;
-
- for (i = 0; i < npu->mmio_atsd_count; i++) {
- if (!test_bit(i, &npu->mmio_atsd_usage))
- if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
- return i;
- }
-
- return -ENOSPC;
-}
-
-static void put_mmio_atsd_reg(struct npu *npu, int reg)
-{
- clear_bit_unlock(reg, &npu->mmio_atsd_usage);
-}
-
-/* MMIO ATSD register offsets */
-#define XTS_ATSD_LAUNCH 0
-#define XTS_ATSD_AVA 1
-#define XTS_ATSD_STAT 2
-
-static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize)
-{
- unsigned long launch = 0;
-
- if (psize == MMU_PAGE_COUNT) {
- /* IS set to invalidate entire matching PID */
- launch |= PPC_BIT(12);
- } else {
- /* AP set to invalidate region of psize */
- launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17);
- }
-
- /* PRS set to process-scoped */
- launch |= PPC_BIT(13);
-
- /* PID */
- launch |= pid << PPC_BITLSHIFT(38);
-
- /* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */
-
- return launch;
-}
-
-static void mmio_atsd_regs_write(struct mmio_atsd_reg
- mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset,
- unsigned long val)
-{
- struct npu *npu;
- int i, reg;
-
- for (i = 0; i <= max_npu2_index; i++) {
- reg = mmio_atsd_reg[i].reg;
- if (reg < 0)
- continue;
-
- npu = mmio_atsd_reg[i].npu;
- __raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset);
- }
-}
-
-static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
- unsigned long pid)
-{
- unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT);
-
- /* Invalidating the entire process doesn't use a va */
- mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
-}
-
-static void mmio_invalidate_range(struct mmio_atsd_reg
- mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid,
- unsigned long start, unsigned long psize)
-{
- unsigned long launch = get_atsd_launch_val(pid, psize);
-
- /* Write all VAs first */
- mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start);
-
- /* Issue one barrier for all address writes */
- eieio();
-
- /* Launch */
- mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
-}
-
-#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
-
-static void mmio_invalidate_wait(
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
-{
- struct npu *npu;
- int i, reg;
-
- /* Wait for all invalidations to complete */
- for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
- continue;
-
- /* Wait for completion */
- npu = mmio_atsd_reg[i].npu;
- reg = mmio_atsd_reg[i].reg;
- while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
- cpu_relax();
- }
-}
-
-/*
- * Acquires all the address translation shootdown (ATSD) registers required to
- * launch an ATSD on all links this npu_context is active on.
- */
-static void acquire_atsd_reg(struct npu_context *npu_context,
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
-{
- int i, j;
- struct npu *npu;
- struct pci_dev *npdev;
-
- for (i = 0; i <= max_npu2_index; i++) {
- mmio_atsd_reg[i].reg = -1;
- for (j = 0; j < NV_MAX_LINKS; j++) {
- /*
- * There are no ordering requirements with respect to
- * the setup of struct npu_context, but to ensure
- * consistent behaviour we need to ensure npdev[][] is
- * only read once.
- */
- npdev = READ_ONCE(npu_context->npdev[i][j]);
- if (!npdev)
- continue;
-
- npu = pci_bus_to_host(npdev->bus)->npu;
- if (!npu)
- continue;
-
- mmio_atsd_reg[i].npu = npu;
- mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
- while (mmio_atsd_reg[i].reg < 0) {
- mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
- cpu_relax();
- }
- break;
- }
- }
-}
-
-/*
- * Release previously acquired ATSD registers. To avoid deadlocks the registers
- * must be released in the same order they were acquired above in
- * acquire_atsd_reg.
- */
-static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
-{
- int i;
-
- for (i = 0; i <= max_npu2_index; i++) {
- /*
- * We can't rely on npu_context->npdev[][] being the same here
- * as when acquire_atsd_reg() was called, hence we use the
- * values stored in mmio_atsd_reg during the acquire phase
- * rather than re-reading npdev[][].
- */
- if (mmio_atsd_reg[i].reg < 0)
- continue;
-
- put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg);
- }
-}
-
-/*
- * Invalidate a virtual address range
- */
-static void mmio_invalidate(struct npu_context *npu_context,
- unsigned long start, unsigned long size)
-{
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
- unsigned long pid = npu_context->mm->context.id;
- unsigned long atsd_start = 0;
- unsigned long end = start + size - 1;
- int atsd_psize = MMU_PAGE_COUNT;
-
- /*
- * Convert the input range into one of the supported sizes. If the range
- * doesn't fit, use the next larger supported size. Invalidation latency
- * is high, so over-invalidation is preferred to issuing multiple
- * invalidates.
- *
- * A 4K page size isn't supported by NPU/GPU ATS, so that case is
- * ignored.
- */
- if (size == SZ_64K) {
- atsd_start = start;
- atsd_psize = MMU_PAGE_64K;
- } else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) {
- atsd_start = ALIGN_DOWN(start, SZ_2M);
- atsd_psize = MMU_PAGE_2M;
- } else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) {
- atsd_start = ALIGN_DOWN(start, SZ_1G);
- atsd_psize = MMU_PAGE_1G;
- }
-
- if (npu_context->nmmu_flush)
- /*
- * Unfortunately the nest mmu does not support flushing specific
- * addresses so we have to flush the whole mm once before
- * shooting down the GPU translation.
- */
- flush_all_mm(npu_context->mm);
-
- /*
- * Loop over all the NPUs this process is active on and launch
- * an invalidate.
- */
- acquire_atsd_reg(npu_context, mmio_atsd_reg);
-
- if (atsd_psize == MMU_PAGE_COUNT)
- mmio_invalidate_pid(mmio_atsd_reg, pid);
- else
- mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start,
- atsd_psize);
-
- mmio_invalidate_wait(mmio_atsd_reg);
-
- /*
- * The GPU requires two flush ATSDs to ensure all entries have been
- * flushed. We use PID 0 as it will never be used for a process on the
- * GPU.
- */
- mmio_invalidate_pid(mmio_atsd_reg, 0);
- mmio_invalidate_wait(mmio_atsd_reg);
- mmio_invalidate_pid(mmio_atsd_reg, 0);
- mmio_invalidate_wait(mmio_atsd_reg);
-
- release_atsd_reg(mmio_atsd_reg);
-}
-
-static void pnv_npu2_mn_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
-{
- struct npu_context *npu_context = mn_to_npu_context(mn);
-
- /* Call into device driver to stop requests to the NMMU */
- if (npu_context->release_cb)
- npu_context->release_cb(npu_context, npu_context->priv);
-
- /*
- * There should be no more translation requests for this PID, but we
- * need to ensure any entries for it are removed from the TLB.
- */
- mmio_invalidate(npu_context, 0, ~0UL);
-}
-
-static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, start, end - start);
-}
-
-static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
- .release = pnv_npu2_mn_release,
- .invalidate_range = pnv_npu2_mn_invalidate_range,
-};
-
-/*
- * Call into OPAL to setup the nmmu context for the current task in
- * the NPU. This must be called to setup the context tables before the
- * GPU issues ATRs. pdev should be a pointed to PCIe GPU device.
- *
- * A release callback should be registered to allow a device driver to
- * be notified that it should not launch any new translation requests
- * as the final TLB invalidate is about to occur.
- *
- * Returns an error if there no contexts are currently available or a
- * npu_context which should be passed to pnv_npu2_handle_fault().
- *
- * mmap_sem must be held in write mode and must not be called from interrupt
- * context.
- */
-struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
- unsigned long flags,
- void (*cb)(struct npu_context *, void *),
- void *priv)
-{
- int rc;
- u32 nvlink_index;
- struct device_node *nvlink_dn;
- struct mm_struct *mm = current->mm;
- struct npu *npu;
- struct npu_context *npu_context;
- struct pci_controller *hose;
-
- /*
- * At present we don't support GPUs connected to multiple NPUs and I'm
- * not sure the hardware does either.
- */
- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
-
- if (!npdev)
- /* No nvlink associated with this GPU device */
- return ERR_PTR(-ENODEV);
-
- /* We only support DR/PR/HV in pnv_npu2_map_lpar_dev() */
- if (flags & ~(MSR_DR | MSR_PR | MSR_HV))
- return ERR_PTR(-EINVAL);
-
- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
- &nvlink_index)))
- return ERR_PTR(-ENODEV);
-
- if (!mm || mm->context.id == 0) {
- /*
- * Kernel thread contexts are not supported and context id 0 is
- * reserved on the GPU.
- */
- return ERR_PTR(-EINVAL);
- }
-
- hose = pci_bus_to_host(npdev->bus);
- npu = hose->npu;
- if (!npu)
- return ERR_PTR(-ENODEV);
-
- /*
- * We store the npu pci device so we can more easily get at the
- * associated npus.
- */
- spin_lock(&npu_context_lock);
- npu_context = mm->context.npu_context;
- if (npu_context) {
- if (npu_context->release_cb != cb ||
- npu_context->priv != priv) {
- spin_unlock(&npu_context_lock);
- return ERR_PTR(-EINVAL);
- }
-
- WARN_ON(!kref_get_unless_zero(&npu_context->kref));
- }
- spin_unlock(&npu_context_lock);
-
- if (!npu_context) {
- /*
- * We can set up these fields without holding the
- * npu_context_lock as the npu_context hasn't been returned to
- * the caller meaning it can't be destroyed. Parallel allocation
- * is protected against by mmap_sem.
- */
- rc = -ENOMEM;
- npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
- if (npu_context) {
- kref_init(&npu_context->kref);
- npu_context->mm = mm;
- npu_context->mn.ops = &nv_nmmu_notifier_ops;
- rc = __mmu_notifier_register(&npu_context->mn, mm);
- }
-
- if (rc) {
- kfree(npu_context);
- return ERR_PTR(rc);
- }
-
- mm->context.npu_context = npu_context;
- }
-
- npu_context->release_cb = cb;
- npu_context->priv = priv;
-
- /*
- * npdev is a pci_dev pointer setup by the PCI code. We assign it to
- * npdev[][] to indicate to the mmu notifiers that an invalidation
- * should also be sent over this nvlink. The notifiers don't use any
- * other fields in npu_context, so we just need to ensure that when they
- * deference npu_context->npdev[][] it is either a valid pointer or
- * NULL.
- */
- WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
-
- if (!npu->nmmu_flush) {
- /*
- * If we're not explicitly flushing ourselves we need to mark
- * the thread for global flushes
- */
- npu_context->nmmu_flush = false;
- mm_context_add_copro(mm);
- } else
- npu_context->nmmu_flush = true;
-
- return npu_context;
-}
-EXPORT_SYMBOL(pnv_npu2_init_context);
-
-static void pnv_npu2_release_context(struct kref *kref)
-{
- struct npu_context *npu_context =
- container_of(kref, struct npu_context, kref);
-
- if (!npu_context->nmmu_flush)
- mm_context_remove_copro(npu_context->mm);
-
- npu_context->mm->context.npu_context = NULL;
-}
-
-/*
- * Destroy a context on the given GPU. May free the npu_context if it is no
- * longer active on any GPUs. Must not be called from interrupt context.
- */
-void pnv_npu2_destroy_context(struct npu_context *npu_context,
- struct pci_dev *gpdev)
-{
- int removed;
- struct npu *npu;
- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
- struct device_node *nvlink_dn;
- u32 nvlink_index;
- struct pci_controller *hose;
-
- if (WARN_ON(!npdev))
- return;
-
- hose = pci_bus_to_host(npdev->bus);
- npu = hose->npu;
- if (!npu)
- return;
- nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
- if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
- &nvlink_index)))
- return;
- WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
- spin_lock(&npu_context_lock);
- removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
- spin_unlock(&npu_context_lock);
-
- /*
- * We need to do this outside of pnv_npu2_release_context so that it is
- * outside the spinlock as mmu_notifier_destroy uses SRCU.
- */
- if (removed) {
- mmu_notifier_unregister(&npu_context->mn,
- npu_context->mm);
-
- kfree(npu_context);
- }
-
-}
-EXPORT_SYMBOL(pnv_npu2_destroy_context);
-
-/*
- * Assumes mmap_sem is held for the contexts associated mm.
- */
-int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
- unsigned long *flags, unsigned long *status, int count)
-{
- u64 rc = 0, result = 0;
- int i, is_write;
- struct page *page[1];
- const char __user *u;
- char c;
-
- /* mmap_sem should be held so the struct_mm must be present */
- struct mm_struct *mm = context->mm;
-
- WARN_ON(!rwsem_is_locked(&mm->mmap_sem));
-
- for (i = 0; i < count; i++) {
- is_write = flags[i] & NPU2_WRITE;
- rc = get_user_pages_remote(NULL, mm, ea[i], 1,
- is_write ? FOLL_WRITE : 0,
- page, NULL, NULL);
-
- if (rc != 1) {
- status[i] = rc;
- result = -EFAULT;
- continue;
- }
-
- /* Make sure partition scoped tree gets a pte */
- u = page_address(page[0]);
- if (__get_user(c, u))
- result = -EFAULT;
-
- status[i] = 0;
- put_page(page[0]);
- }
-
- return result;
-}
-EXPORT_SYMBOL(pnv_npu2_handle_fault);
-
int pnv_npu2_init(struct pci_controller *hose)
{
- unsigned int i;
- u64 mmio_atsd;
static int npu_index;
struct npu *npu;
int ret;
@@ -1160,33 +533,18 @@ int pnv_npu2_init(struct pci_controller *hose)
if (!npu)
return -ENOMEM;
- npu->nmmu_flush = of_property_read_bool(hose->dn, "ibm,nmmu-flush");
-
- for (i = 0; i < ARRAY_SIZE(npu->mmio_atsd_regs) &&
- !of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
- i, &mmio_atsd); i++)
- npu->mmio_atsd_regs[i] = ioremap(mmio_atsd, 32);
-
- pr_info("NPU%d: Found %d MMIO ATSD registers", hose->global_number, i);
- npu->mmio_atsd_count = i;
- npu->mmio_atsd_usage = 0;
npu_index++;
if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
ret = -ENOSPC;
goto fail_exit;
}
- max_npu2_index = npu_index;
npu->index = npu_index;
hose->npu = npu;
return 0;
fail_exit:
- for (i = 0; i < npu->mmio_atsd_count; ++i)
- iounmap(npu->mmio_atsd_regs[i]);
-
kfree(npu);
-
return ret;
}
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 36c8fa3647a2..a2aa5e433ac8 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -257,7 +257,7 @@ OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO);
OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE);
OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK);
OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK);
-OPAL_CALL(opal_xive_allocate_irq, OPAL_XIVE_ALLOCATE_IRQ);
+OPAL_CALL(opal_xive_allocate_irq_raw, OPAL_XIVE_ALLOCATE_IRQ);
OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ);
OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
@@ -273,7 +273,6 @@ OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP);
-OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P);
OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP);
OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
@@ -288,3 +287,6 @@ OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64);
OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE);
OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT);
+OPAL_CALL(opal_mpipl_update, OPAL_MPIPL_UPDATE);
+OPAL_CALL(opal_mpipl_register_tag, OPAL_MPIPL_REGISTER_TAG);
+OPAL_CALL(opal_mpipl_query_tag, OPAL_MPIPL_QUERY_TAG);
diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
new file mode 100644
index 000000000000..ed895d82c048
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -0,0 +1,636 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Interface for exporting the OPAL ELF core.
+ * Heavily inspired from fs/proc/vmcore.c
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "opal core: " fmt
+
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/crash_core.h>
+#include <linux/of.h>
+
+#include <asm/page.h>
+#include <asm/opal.h>
+#include <asm/fadump-internal.h>
+
+#include "opal-fadump.h"
+
+#define MAX_PT_LOAD_CNT 8
+
+/* NT_AUXV note related info */
+#define AUXV_CNT 1
+#define AUXV_DESC_SZ (((2 * AUXV_CNT) + 1) * sizeof(Elf64_Off))
+
+struct opalcore_config {
+ u32 num_cpus;
+ /* PIR value of crashing CPU */
+ u32 crashing_cpu;
+
+ /* CPU state data info from F/W */
+ u64 cpu_state_destination_vaddr;
+ u64 cpu_state_data_size;
+ u64 cpu_state_entry_size;
+
+ /* OPAL memory to be exported as PT_LOAD segments */
+ u64 ptload_addr[MAX_PT_LOAD_CNT];
+ u64 ptload_size[MAX_PT_LOAD_CNT];
+ u64 ptload_cnt;
+
+ /* Pointer to the first PT_LOAD in the ELF core file */
+ Elf64_Phdr *ptload_phdr;
+
+ /* Total size of opalcore file. */
+ size_t opalcore_size;
+
+ /* Buffer for all the ELF core headers and the PT_NOTE */
+ size_t opalcorebuf_sz;
+ char *opalcorebuf;
+
+ /* NT_AUXV buffer */
+ char auxv_buf[AUXV_DESC_SZ];
+};
+
+struct opalcore {
+ struct list_head list;
+ u64 paddr;
+ size_t size;
+ loff_t offset;
+};
+
+static LIST_HEAD(opalcore_list);
+static struct opalcore_config *oc_conf;
+static const struct opal_mpipl_fadump *opalc_metadata;
+static const struct opal_mpipl_fadump *opalc_cpu_metadata;
+
+/*
+ * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered
+ * by kernel, SIGTERM otherwise.
+ */
+bool kernel_initiated;
+
+static struct opalcore * __init get_new_element(void)
+{
+ return kzalloc(sizeof(struct opalcore), GFP_KERNEL);
+}
+
+static inline int is_opalcore_usable(void)
+{
+ return (oc_conf && oc_conf->opalcorebuf != NULL) ? 1 : 0;
+}
+
+static Elf64_Word *append_elf64_note(Elf64_Word *buf, char *name,
+ u32 type, void *data,
+ size_t data_len)
+{
+ Elf64_Nhdr *note = (Elf64_Nhdr *)buf;
+ Elf64_Word namesz = strlen(name) + 1;
+
+ note->n_namesz = cpu_to_be32(namesz);
+ note->n_descsz = cpu_to_be32(data_len);
+ note->n_type = cpu_to_be32(type);
+ buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf64_Word));
+ memcpy(buf, name, namesz);
+ buf += DIV_ROUND_UP(namesz, sizeof(Elf64_Word));
+ memcpy(buf, data, data_len);
+ buf += DIV_ROUND_UP(data_len, sizeof(Elf64_Word));
+
+ return buf;
+}
+
+static void fill_prstatus(struct elf_prstatus *prstatus, int pir,
+ struct pt_regs *regs)
+{
+ memset(prstatus, 0, sizeof(struct elf_prstatus));
+ elf_core_copy_kernel_regs(&(prstatus->pr_reg), regs);
+
+ /*
+ * Overload PID with PIR value.
+ * As a PIR value could also be '0', add an offset of '100'
+ * to every PIR to avoid misinterpretations in GDB.
+ */
+ prstatus->pr_pid = cpu_to_be32(100 + pir);
+ prstatus->pr_ppid = cpu_to_be32(1);
+
+ /*
+ * Indicate SIGUSR1 for crash initiated from kernel.
+ * SIGTERM otherwise.
+ */
+ if (pir == oc_conf->crashing_cpu) {
+ short sig;
+
+ sig = kernel_initiated ? SIGUSR1 : SIGTERM;
+ prstatus->pr_cursig = cpu_to_be16(sig);
+ }
+}
+
+static Elf64_Word *auxv_to_elf64_notes(Elf64_Word *buf,
+ u64 opal_boot_entry)
+{
+ Elf64_Off *bufp = (Elf64_Off *)oc_conf->auxv_buf;
+ int idx = 0;
+
+ memset(bufp, 0, AUXV_DESC_SZ);
+
+ /* Entry point of OPAL */
+ bufp[idx++] = cpu_to_be64(AT_ENTRY);
+ bufp[idx++] = cpu_to_be64(opal_boot_entry);
+
+ /* end of vector */
+ bufp[idx++] = cpu_to_be64(AT_NULL);
+
+ buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_AUXV,
+ oc_conf->auxv_buf, AUXV_DESC_SZ);
+ return buf;
+}
+
+/*
+ * Read from the ELF header and then the crash dump.
+ * Returns number of bytes read on success, -errno on failure.
+ */
+static ssize_t read_opalcore(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *to,
+ loff_t pos, size_t count)
+{
+ struct opalcore *m;
+ ssize_t tsz, avail;
+ loff_t tpos = pos;
+
+ if (pos >= oc_conf->opalcore_size)
+ return 0;
+
+ /* Adjust count if it goes beyond opalcore size */
+ avail = oc_conf->opalcore_size - pos;
+ if (count > avail)
+ count = avail;
+
+ if (count == 0)
+ return 0;
+
+ /* Read ELF core header and/or PT_NOTE segment */
+ if (tpos < oc_conf->opalcorebuf_sz) {
+ tsz = min_t(size_t, oc_conf->opalcorebuf_sz - tpos, count);
+ memcpy(to, oc_conf->opalcorebuf + tpos, tsz);
+ to += tsz;
+ tpos += tsz;
+ count -= tsz;
+ }
+
+ list_for_each_entry(m, &opalcore_list, list) {
+ /* nothing more to read here */
+ if (count == 0)
+ break;
+
+ if (tpos < m->offset + m->size) {
+ void *addr;
+
+ tsz = min_t(size_t, m->offset + m->size - tpos, count);
+ addr = (void *)(m->paddr + tpos - m->offset);
+ memcpy(to, __va(addr), tsz);
+ to += tsz;
+ tpos += tsz;
+ count -= tsz;
+ }
+ }
+
+ return (tpos - pos);
+}
+
+static struct bin_attribute opal_core_attr = {
+ .attr = {.name = "core", .mode = 0400},
+ .read = read_opalcore
+};
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ *
+ * Each register entry is of 16 bytes, A numerical identifier along with
+ * a GPR/SPR flag in the first 8 bytes and the register value in the next
+ * 8 bytes. For more details refer to F/W documentation.
+ */
+static Elf64_Word * __init opalcore_append_cpu_notes(Elf64_Word *buf)
+{
+ u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize;
+ struct hdat_fadump_thread_hdr *thdr;
+ struct elf_prstatus prstatus;
+ Elf64_Word *first_cpu_note;
+ struct pt_regs regs;
+ char *bufp;
+ int i;
+
+ size_per_thread = oc_conf->cpu_state_entry_size;
+ bufp = __va(oc_conf->cpu_state_destination_vaddr);
+
+ /*
+ * Offset for register entries, entry size and registers count is
+ * duplicated in every thread header in keeping with HDAT format.
+ * Use these values from the first thread header.
+ */
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+ regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
+ be32_to_cpu(thdr->offset));
+ reg_esize = be32_to_cpu(thdr->esize);
+ regs_cnt = be32_to_cpu(thdr->ecnt);
+
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("NumCpus : %u\n", oc_conf->num_cpus);
+ pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+ regs_offset, reg_esize, regs_cnt);
+
+ /*
+ * Skip past the first CPU note. Fill this note with the
+ * crashing CPU's prstatus.
+ */
+ first_cpu_note = buf;
+ buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
+ &prstatus, sizeof(prstatus));
+
+ for (i = 0; i < oc_conf->num_cpus; i++, bufp += size_per_thread) {
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+ thread_pir = be32_to_cpu(thdr->pir);
+
+ pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n",
+ i, thread_pir, thdr->core_state);
+
+ /*
+ * Register state data of MAX cores is provided by firmware,
+ * but some of this cores may not be active. So, while
+ * processing register state data, check core state and
+ * skip threads that belong to inactive cores.
+ */
+ if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE)
+ continue;
+
+ opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
+ reg_esize, false, &regs);
+
+ pr_debug("PIR 0x%x - R1 : 0x%llx, NIP : 0x%llx\n", thread_pir,
+ be64_to_cpu(regs.gpr[1]), be64_to_cpu(regs.nip));
+ fill_prstatus(&prstatus, thread_pir, &regs);
+
+ if (thread_pir != oc_conf->crashing_cpu) {
+ buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME,
+ NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ } else {
+ /*
+ * Add crashing CPU as the first NT_PRSTATUS note for
+ * GDB to process the core file appropriately.
+ */
+ append_elf64_note(first_cpu_note, CRASH_CORE_NOTE_NAME,
+ NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ }
+ }
+
+ return buf;
+}
+
+static int __init create_opalcore(void)
+{
+ u64 opal_boot_entry, opal_base_addr, paddr;
+ u32 hdr_size, cpu_notes_size, count;
+ struct device_node *dn;
+ struct opalcore *new;
+ loff_t opalcore_off;
+ struct page *page;
+ Elf64_Phdr *phdr;
+ Elf64_Ehdr *elf;
+ int i, ret;
+ char *bufp;
+
+ /* Get size of header & CPU notes for OPAL core */
+ hdr_size = (sizeof(Elf64_Ehdr) +
+ ((oc_conf->ptload_cnt + 1) * sizeof(Elf64_Phdr)));
+ cpu_notes_size = ((oc_conf->num_cpus * (CRASH_CORE_NOTE_HEAD_BYTES +
+ CRASH_CORE_NOTE_NAME_BYTES +
+ CRASH_CORE_NOTE_DESC_BYTES)) +
+ (CRASH_CORE_NOTE_HEAD_BYTES +
+ CRASH_CORE_NOTE_NAME_BYTES + AUXV_DESC_SZ));
+
+ /* Allocate buffer to setup OPAL core */
+ oc_conf->opalcorebuf_sz = PAGE_ALIGN(hdr_size + cpu_notes_size);
+ oc_conf->opalcorebuf = alloc_pages_exact(oc_conf->opalcorebuf_sz,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!oc_conf->opalcorebuf) {
+ pr_err("Not enough memory to setup OPAL core (size: %lu)\n",
+ oc_conf->opalcorebuf_sz);
+ oc_conf->opalcorebuf_sz = 0;
+ return -ENOMEM;
+ }
+ count = oc_conf->opalcorebuf_sz / PAGE_SIZE;
+ page = virt_to_page(oc_conf->opalcorebuf);
+ for (i = 0; i < count; i++)
+ mark_page_reserved(page + i);
+
+ pr_debug("opalcorebuf = 0x%llx\n", (u64)oc_conf->opalcorebuf);
+
+ /* Read OPAL related device-tree entries */
+ dn = of_find_node_by_name(NULL, "ibm,opal");
+ if (dn) {
+ ret = of_property_read_u64(dn, "opal-base-address",
+ &opal_base_addr);
+ pr_debug("opal-base-address: %llx\n", opal_base_addr);
+ ret |= of_property_read_u64(dn, "opal-boot-address",
+ &opal_boot_entry);
+ pr_debug("opal-boot-address: %llx\n", opal_boot_entry);
+ }
+ if (!dn || ret)
+ pr_warn("WARNING: Failed to read OPAL base & entry values\n");
+
+ /* Use count to keep track of the program headers */
+ count = 0;
+
+ bufp = oc_conf->opalcorebuf;
+ elf = (Elf64_Ehdr *)bufp;
+ bufp += sizeof(Elf64_Ehdr);
+ memcpy(elf->e_ident, ELFMAG, SELFMAG);
+ elf->e_ident[EI_CLASS] = ELF_CLASS;
+ elf->e_ident[EI_DATA] = ELFDATA2MSB;
+ elf->e_ident[EI_VERSION] = EV_CURRENT;
+ elf->e_ident[EI_OSABI] = ELF_OSABI;
+ memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+ elf->e_type = cpu_to_be16(ET_CORE);
+ elf->e_machine = cpu_to_be16(ELF_ARCH);
+ elf->e_version = cpu_to_be32(EV_CURRENT);
+ elf->e_entry = 0;
+ elf->e_phoff = cpu_to_be64(sizeof(Elf64_Ehdr));
+ elf->e_shoff = 0;
+ elf->e_flags = 0;
+
+ elf->e_ehsize = cpu_to_be16(sizeof(Elf64_Ehdr));
+ elf->e_phentsize = cpu_to_be16(sizeof(Elf64_Phdr));
+ elf->e_phnum = 0;
+ elf->e_shentsize = 0;
+ elf->e_shnum = 0;
+ elf->e_shstrndx = 0;
+
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = cpu_to_be32(PT_NOTE);
+ phdr->p_flags = 0;
+ phdr->p_align = 0;
+ phdr->p_paddr = phdr->p_vaddr = 0;
+ phdr->p_offset = cpu_to_be64(hdr_size);
+ phdr->p_filesz = phdr->p_memsz = cpu_to_be64(cpu_notes_size);
+ count++;
+
+ opalcore_off = oc_conf->opalcorebuf_sz;
+ oc_conf->ptload_phdr = (Elf64_Phdr *)bufp;
+ paddr = 0;
+ for (i = 0; i < oc_conf->ptload_cnt; i++) {
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = cpu_to_be32(PT_LOAD);
+ phdr->p_flags = cpu_to_be32(PF_R|PF_W|PF_X);
+ phdr->p_align = 0;
+
+ new = get_new_element();
+ if (!new)
+ return -ENOMEM;
+ new->paddr = oc_conf->ptload_addr[i];
+ new->size = oc_conf->ptload_size[i];
+ new->offset = opalcore_off;
+ list_add_tail(&new->list, &opalcore_list);
+
+ phdr->p_paddr = cpu_to_be64(paddr);
+ phdr->p_vaddr = cpu_to_be64(opal_base_addr + paddr);
+ phdr->p_filesz = phdr->p_memsz =
+ cpu_to_be64(oc_conf->ptload_size[i]);
+ phdr->p_offset = cpu_to_be64(opalcore_off);
+
+ count++;
+ opalcore_off += oc_conf->ptload_size[i];
+ paddr += oc_conf->ptload_size[i];
+ }
+
+ elf->e_phnum = cpu_to_be16(count);
+
+ bufp = (char *)opalcore_append_cpu_notes((Elf64_Word *)bufp);
+ bufp = (char *)auxv_to_elf64_notes((Elf64_Word *)bufp, opal_boot_entry);
+
+ oc_conf->opalcore_size = opalcore_off;
+ return 0;
+}
+
+static void opalcore_cleanup(void)
+{
+ if (oc_conf == NULL)
+ return;
+
+ /* Remove OPAL core sysfs file */
+ sysfs_remove_bin_file(opal_kobj, &opal_core_attr);
+ oc_conf->ptload_phdr = NULL;
+ oc_conf->ptload_cnt = 0;
+
+ /* free the buffer used for setting up OPAL core */
+ if (oc_conf->opalcorebuf) {
+ void *end = (void *)((u64)oc_conf->opalcorebuf +
+ oc_conf->opalcorebuf_sz);
+
+ free_reserved_area(oc_conf->opalcorebuf, end, -1, NULL);
+ oc_conf->opalcorebuf = NULL;
+ oc_conf->opalcorebuf_sz = 0;
+ }
+
+ kfree(oc_conf);
+ oc_conf = NULL;
+}
+__exitcall(opalcore_cleanup);
+
+static void __init opalcore_config_init(void)
+{
+ u32 idx, cpu_data_version;
+ struct device_node *np;
+ const __be32 *prop;
+ u64 addr = 0;
+ int i, ret;
+
+ np = of_find_node_by_path("/ibm,opal/dump");
+ if (np == NULL)
+ return;
+
+ if (!of_device_is_compatible(np, "ibm,opal-dump")) {
+ pr_warn("Support missing for this f/w version!\n");
+ return;
+ }
+
+ /* Check if dump has been initiated on last reboot */
+ prop = of_get_property(np, "mpipl-boot", NULL);
+ if (!prop) {
+ of_node_put(np);
+ return;
+ }
+
+ /* Get OPAL metadata */
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_OPAL, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_err("Failed to get OPAL metadata (%d)\n", ret);
+ goto error_out;
+ }
+
+ addr = be64_to_cpu(addr);
+ pr_debug("OPAL metadata addr: %llx\n", addr);
+ opalc_metadata = __va(addr);
+
+ /* Get OPAL CPU metadata */
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_err("Failed to get OPAL CPU metadata (%d)\n", ret);
+ goto error_out;
+ }
+
+ addr = be64_to_cpu(addr);
+ pr_debug("CPU metadata addr: %llx\n", addr);
+ opalc_cpu_metadata = __va(addr);
+
+ /* Allocate memory for config buffer */
+ oc_conf = kzalloc(sizeof(struct opalcore_config), GFP_KERNEL);
+ if (oc_conf == NULL)
+ goto error_out;
+
+ /* Parse OPAL metadata */
+ if (opalc_metadata->version != OPAL_MPIPL_VERSION) {
+ pr_warn("Supported OPAL metadata version: %u, found: %u!\n",
+ OPAL_MPIPL_VERSION, opalc_metadata->version);
+ pr_warn("WARNING: F/W using newer OPAL metadata format!!\n");
+ }
+
+ oc_conf->ptload_cnt = 0;
+ idx = be32_to_cpu(opalc_metadata->region_cnt);
+ if (idx > MAX_PT_LOAD_CNT) {
+ pr_warn("WARNING: OPAL regions count (%d) adjusted to limit (%d)",
+ MAX_PT_LOAD_CNT, idx);
+ idx = MAX_PT_LOAD_CNT;
+ }
+ for (i = 0; i < idx; i++) {
+ oc_conf->ptload_addr[oc_conf->ptload_cnt] =
+ be64_to_cpu(opalc_metadata->region[i].dest);
+ oc_conf->ptload_size[oc_conf->ptload_cnt++] =
+ be64_to_cpu(opalc_metadata->region[i].size);
+ }
+ oc_conf->ptload_cnt = i;
+ oc_conf->crashing_cpu = be32_to_cpu(opalc_metadata->crashing_pir);
+
+ if (!oc_conf->ptload_cnt) {
+ pr_err("OPAL memory regions not found\n");
+ goto error_out;
+ }
+
+ /* Parse OPAL CPU metadata */
+ cpu_data_version = be32_to_cpu(opalc_cpu_metadata->cpu_data_version);
+ if (cpu_data_version != HDAT_FADUMP_CPU_DATA_VER) {
+ pr_warn("Supported CPU data version: %u, found: %u!\n",
+ HDAT_FADUMP_CPU_DATA_VER, cpu_data_version);
+ pr_warn("WARNING: F/W using newer CPU state data format!!\n");
+ }
+
+ addr = be64_to_cpu(opalc_cpu_metadata->region[0].dest);
+ if (!addr) {
+ pr_err("CPU state data not found!\n");
+ goto error_out;
+ }
+ oc_conf->cpu_state_destination_vaddr = (u64)__va(addr);
+
+ oc_conf->cpu_state_data_size =
+ be64_to_cpu(opalc_cpu_metadata->region[0].size);
+ oc_conf->cpu_state_entry_size =
+ be32_to_cpu(opalc_cpu_metadata->cpu_data_size);
+
+ if ((oc_conf->cpu_state_entry_size == 0) ||
+ (oc_conf->cpu_state_entry_size > oc_conf->cpu_state_data_size)) {
+ pr_err("CPU state data is invalid.\n");
+ goto error_out;
+ }
+ oc_conf->num_cpus = (oc_conf->cpu_state_data_size /
+ oc_conf->cpu_state_entry_size);
+
+ of_node_put(np);
+ return;
+
+error_out:
+ pr_err("Could not export /sys/firmware/opal/core\n");
+ opalcore_cleanup();
+ of_node_put(np);
+}
+
+static ssize_t fadump_release_opalcore_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int input = -1;
+
+ if (kstrtoint(buf, 0, &input))
+ return -EINVAL;
+
+ if (input == 1) {
+ if (oc_conf == NULL) {
+ pr_err("'/sys/firmware/opal/core' file not accessible!\n");
+ return -EPERM;
+ }
+
+ /*
+ * Take away '/sys/firmware/opal/core' and release all memory
+ * used for exporting this file.
+ */
+ opalcore_cleanup();
+ } else
+ return -EINVAL;
+
+ return count;
+}
+
+static struct kobj_attribute opalcore_rel_attr = __ATTR(fadump_release_opalcore,
+ 0200, NULL,
+ fadump_release_opalcore_store);
+
+static int __init opalcore_init(void)
+{
+ int rc = -1;
+
+ opalcore_config_init();
+
+ if (oc_conf == NULL)
+ return rc;
+
+ create_opalcore();
+
+ /*
+ * If oc_conf->opalcorebuf= is set in the 2nd kernel,
+ * then capture the dump.
+ */
+ if (!(is_opalcore_usable())) {
+ pr_err("Failed to export /sys/firmware/opal/core\n");
+ opalcore_cleanup();
+ return rc;
+ }
+
+ /* Set OPAL core file size */
+ opal_core_attr.size = oc_conf->opalcore_size;
+
+ /* Export OPAL core sysfs file */
+ rc = sysfs_create_bin_file(opal_kobj, &opal_core_attr);
+ if (rc != 0) {
+ pr_err("Failed to export /sys/firmware/opal/core\n");
+ opalcore_cleanup();
+ return rc;
+ }
+
+ rc = sysfs_create_file(kernel_kobj, &opalcore_rel_attr.attr);
+ if (rc) {
+ pr_warn("unable to create sysfs file fadump_release_opalcore (%d)\n",
+ rc);
+ }
+
+ return 0;
+}
+fs_initcall(opalcore_init);
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
new file mode 100644
index 000000000000..d361d37d975f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-fadump.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Firmware-Assisted Dump support on POWER platform (OPAL).
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "opal fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/mm.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/opal.h>
+#include <asm/fadump-internal.h>
+
+#include "opal-fadump.h"
+
+
+#ifdef CONFIG_PRESERVE_FA_DUMP
+/*
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
+ * ensure crash data is preserved in hope that the subsequent memory
+ * preserving kernel boot is going to process this crash data.
+ */
+void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+ const struct opal_fadump_mem_struct *opal_fdm_active;
+ const __be32 *prop;
+ unsigned long dn;
+ u64 addr = 0;
+ s64 ret;
+
+ dn = of_get_flat_dt_subnode_by_name(node, "dump");
+ if (dn == -FDT_ERR_NOTFOUND)
+ return;
+
+ /*
+ * Check if dump has been initiated on last reboot.
+ */
+ prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
+ if (!prop)
+ return;
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_debug("Could not get Kernel metadata (%lld)\n", ret);
+ return;
+ }
+
+ /*
+ * Preserve memory only if kernel memory regions are registered
+ * with f/w for MPIPL.
+ */
+ addr = be64_to_cpu(addr);
+ pr_debug("Kernel metadata addr: %llx\n", addr);
+ opal_fdm_active = (void *)addr;
+ if (opal_fdm_active->registered_regions == 0)
+ return;
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_err("Failed to get boot memory tag (%lld)\n", ret);
+ return;
+ }
+
+ /*
+ * Memory below this address can be used for booting a
+ * capture kernel or petitboot kernel. Preserve everything
+ * above this address for processing crashdump.
+ */
+ fadump_conf->boot_mem_top = be64_to_cpu(addr);
+ pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top);
+
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+}
+
+#else /* CONFIG_PRESERVE_FA_DUMP */
+static const struct opal_fadump_mem_struct *opal_fdm_active;
+static const struct opal_mpipl_fadump *opal_cpu_metadata;
+static struct opal_fadump_mem_struct *opal_fdm;
+
+#ifdef CONFIG_OPAL_CORE
+extern bool kernel_initiated;
+#endif
+
+static int opal_fadump_unregister(struct fw_dump *fadump_conf);
+
+static void opal_fadump_update_config(struct fw_dump *fadump_conf,
+ const struct opal_fadump_mem_struct *fdm)
+{
+ pr_debug("Boot memory regions count: %d\n", fdm->region_cnt);
+
+ /*
+ * The destination address of the first boot memory region is the
+ * destination address of boot memory regions.
+ */
+ fadump_conf->boot_mem_dest_addr = fdm->rgn[0].dest;
+ pr_debug("Destination address of boot memory regions: %#016llx\n",
+ fadump_conf->boot_mem_dest_addr);
+
+ fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr;
+}
+
+/*
+ * This function is called in the capture kernel to get configuration details
+ * from metadata setup by the first kernel.
+ */
+static void opal_fadump_get_config(struct fw_dump *fadump_conf,
+ const struct opal_fadump_mem_struct *fdm)
+{
+ unsigned long base, size, last_end, hole_size;
+ int i;
+
+ if (!fadump_conf->dump_active)
+ return;
+
+ last_end = 0;
+ hole_size = 0;
+ fadump_conf->boot_memory_size = 0;
+
+ pr_debug("Boot memory regions:\n");
+ for (i = 0; i < fdm->region_cnt; i++) {
+ base = fdm->rgn[i].src;
+ size = fdm->rgn[i].size;
+ pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size);
+
+ fadump_conf->boot_mem_addr[i] = base;
+ fadump_conf->boot_mem_sz[i] = size;
+ fadump_conf->boot_memory_size += size;
+ hole_size += (base - last_end);
+
+ last_end = base + size;
+ }
+
+ /*
+ * Start address of reserve dump area (permanent reservation) for
+ * re-registering FADump after dump capture.
+ */
+ fadump_conf->reserve_dump_area_start = fdm->rgn[0].dest;
+
+ /*
+ * Rarely, but it can so happen that system crashes before all
+ * boot memory regions are registered for MPIPL. In such
+ * cases, warn that the vmcore may not be accurate and proceed
+ * anyway as that is the best bet considering free pages, cache
+ * pages, user pages, etc are usually filtered out.
+ *
+ * Hope the memory that could not be preserved only has pages
+ * that are usually filtered out while saving the vmcore.
+ */
+ if (fdm->region_cnt > fdm->registered_regions) {
+ pr_warn("Not all memory regions were saved!!!\n");
+ pr_warn(" Unsaved memory regions:\n");
+ i = fdm->registered_regions;
+ while (i < fdm->region_cnt) {
+ pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n",
+ i, fdm->rgn[i].src, fdm->rgn[i].size);
+ i++;
+ }
+
+ pr_warn("If the unsaved regions only contain pages that are filtered out (eg. free/user pages), the vmcore should still be usable.\n");
+ pr_warn("WARNING: If the unsaved regions contain kernel pages, the vmcore will be corrupted.\n");
+ }
+
+ fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size);
+ fadump_conf->boot_mem_regs_cnt = fdm->region_cnt;
+ opal_fadump_update_config(fadump_conf, fdm);
+}
+
+/* Initialize kernel metadata */
+static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm)
+{
+ fdm->version = OPAL_FADUMP_VERSION;
+ fdm->region_cnt = 0;
+ fdm->registered_regions = 0;
+ fdm->fadumphdr_addr = 0;
+}
+
+static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf)
+{
+ u64 addr = fadump_conf->reserve_dump_area_start;
+ int i;
+
+ opal_fdm = __va(fadump_conf->kernel_metadata);
+ opal_fadump_init_metadata(opal_fdm);
+
+ /* Boot memory regions */
+ for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) {
+ opal_fdm->rgn[i].src = fadump_conf->boot_mem_addr[i];
+ opal_fdm->rgn[i].dest = addr;
+ opal_fdm->rgn[i].size = fadump_conf->boot_mem_sz[i];
+
+ opal_fdm->region_cnt++;
+ addr += fadump_conf->boot_mem_sz[i];
+ }
+
+ /*
+ * Kernel metadata is passed to f/w and retrieved in capture kerenl.
+ * So, use it to save fadump header address instead of calculating it.
+ */
+ opal_fdm->fadumphdr_addr = (opal_fdm->rgn[0].dest +
+ fadump_conf->boot_memory_size);
+
+ opal_fadump_update_config(fadump_conf, opal_fdm);
+
+ return addr;
+}
+
+static u64 opal_fadump_get_metadata_size(void)
+{
+ return PAGE_ALIGN(sizeof(struct opal_fadump_mem_struct));
+}
+
+static int opal_fadump_setup_metadata(struct fw_dump *fadump_conf)
+{
+ int err = 0;
+ s64 ret;
+
+ /*
+ * Use the last page(s) in FADump memory reservation for
+ * kernel metadata.
+ */
+ fadump_conf->kernel_metadata = (fadump_conf->reserve_dump_area_start +
+ fadump_conf->reserve_dump_area_size -
+ opal_fadump_get_metadata_size());
+ pr_info("Kernel metadata addr: %llx\n", fadump_conf->kernel_metadata);
+
+ /* Initialize kernel metadata before registering the address with f/w */
+ opal_fdm = __va(fadump_conf->kernel_metadata);
+ opal_fadump_init_metadata(opal_fdm);
+
+ /*
+ * Register metadata address with f/w. Can be retrieved in
+ * the capture kernel.
+ */
+ ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL,
+ fadump_conf->kernel_metadata);
+ if (ret != OPAL_SUCCESS) {
+ pr_err("Failed to set kernel metadata tag!\n");
+ err = -EPERM;
+ }
+
+ /*
+ * Register boot memory top address with f/w. Should be retrieved
+ * by a kernel that intends to preserve crash'ed kernel's memory.
+ */
+ ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_BOOT_MEM,
+ fadump_conf->boot_mem_top);
+ if (ret != OPAL_SUCCESS) {
+ pr_err("Failed to set boot memory tag!\n");
+ err = -EPERM;
+ }
+
+ return err;
+}
+
+static u64 opal_fadump_get_bootmem_min(void)
+{
+ return OPAL_FADUMP_MIN_BOOT_MEM;
+}
+
+static int opal_fadump_register(struct fw_dump *fadump_conf)
+{
+ s64 rc = OPAL_PARAMETER;
+ int i, err = -EIO;
+
+ for (i = 0; i < opal_fdm->region_cnt; i++) {
+ rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE,
+ opal_fdm->rgn[i].src,
+ opal_fdm->rgn[i].dest,
+ opal_fdm->rgn[i].size);
+ if (rc != OPAL_SUCCESS)
+ break;
+
+ opal_fdm->registered_regions++;
+ }
+
+ switch (rc) {
+ case OPAL_SUCCESS:
+ pr_info("Registration is successful!\n");
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ case OPAL_RESOURCE:
+ /* If MAX regions limit in f/w is hit, warn and proceed. */
+ pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n",
+ (opal_fdm->region_cnt - opal_fdm->registered_regions));
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ case OPAL_PARAMETER:
+ pr_err("Failed to register. Parameter Error(%lld).\n", rc);
+ break;
+ case OPAL_HARDWARE:
+ pr_err("Support not available.\n");
+ fadump_conf->fadump_supported = 0;
+ fadump_conf->fadump_enabled = 0;
+ break;
+ default:
+ pr_err("Failed to register. Unknown Error(%lld).\n", rc);
+ break;
+ }
+
+ /*
+ * If some regions were registered before OPAL_MPIPL_ADD_RANGE
+ * OPAL call failed, unregister all regions.
+ */
+ if ((err < 0) && (opal_fdm->registered_regions > 0))
+ opal_fadump_unregister(fadump_conf);
+
+ return err;
+}
+
+static int opal_fadump_unregister(struct fw_dump *fadump_conf)
+{
+ s64 rc;
+
+ rc = opal_mpipl_update(OPAL_MPIPL_REMOVE_ALL, 0, 0, 0);
+ if (rc) {
+ pr_err("Failed to un-register - unexpected Error(%lld).\n", rc);
+ return -EIO;
+ }
+
+ opal_fdm->registered_regions = 0;
+ fadump_conf->dump_registered = 0;
+ return 0;
+}
+
+static int opal_fadump_invalidate(struct fw_dump *fadump_conf)
+{
+ s64 rc;
+
+ rc = opal_mpipl_update(OPAL_MPIPL_FREE_PRESERVED_MEMORY, 0, 0, 0);
+ if (rc) {
+ pr_err("Failed to invalidate - unexpected Error(%lld).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_active = 0;
+ opal_fdm_active = NULL;
+ return 0;
+}
+
+static void opal_fadump_cleanup(struct fw_dump *fadump_conf)
+{
+ s64 ret;
+
+ ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL, 0);
+ if (ret != OPAL_SUCCESS)
+ pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret);
+}
+
+/*
+ * Verify if CPU state data is available. If available, do a bit of sanity
+ * checking before processing this data.
+ */
+static bool __init is_opal_fadump_cpu_data_valid(struct fw_dump *fadump_conf)
+{
+ if (!opal_cpu_metadata)
+ return false;
+
+ fadump_conf->cpu_state_data_version =
+ be32_to_cpu(opal_cpu_metadata->cpu_data_version);
+ fadump_conf->cpu_state_entry_size =
+ be32_to_cpu(opal_cpu_metadata->cpu_data_size);
+ fadump_conf->cpu_state_dest_vaddr =
+ (u64)__va(be64_to_cpu(opal_cpu_metadata->region[0].dest));
+ fadump_conf->cpu_state_data_size =
+ be64_to_cpu(opal_cpu_metadata->region[0].size);
+
+ if (fadump_conf->cpu_state_data_version != HDAT_FADUMP_CPU_DATA_VER) {
+ pr_warn("Supported CPU state data version: %u, found: %d!\n",
+ HDAT_FADUMP_CPU_DATA_VER,
+ fadump_conf->cpu_state_data_version);
+ pr_warn("WARNING: F/W using newer CPU state data format!!\n");
+ }
+
+ if ((fadump_conf->cpu_state_dest_vaddr == 0) ||
+ (fadump_conf->cpu_state_entry_size == 0) ||
+ (fadump_conf->cpu_state_entry_size >
+ fadump_conf->cpu_state_data_size)) {
+ pr_err("CPU state data is invalid. Ignoring!\n");
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Convert CPU state data saved at the time of crash into ELF notes.
+ *
+ * While the crashing CPU's register data is saved by the kernel, CPU state
+ * data for all CPUs is saved by f/w. In CPU state data provided by f/w,
+ * each register entry is of 16 bytes, a numerical identifier along with
+ * a GPR/SPR flag in the first 8 bytes and the register value in the next
+ * 8 bytes. For more details refer to F/W documentation. If this data is
+ * missing or in unsupported format, append crashing CPU's register data
+ * saved by the kernel in the PT_NOTE, to have something to work with in
+ * the vmcore file.
+ */
+static int __init
+opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf,
+ struct fadump_crash_info_header *fdh)
+{
+ u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize;
+ struct hdat_fadump_thread_hdr *thdr;
+ bool is_cpu_data_valid = false;
+ u32 num_cpus = 1, *note_buf;
+ struct pt_regs regs;
+ char *bufp;
+ int rc, i;
+
+ if (is_opal_fadump_cpu_data_valid(fadump_conf)) {
+ size_per_thread = fadump_conf->cpu_state_entry_size;
+ num_cpus = (fadump_conf->cpu_state_data_size / size_per_thread);
+ bufp = __va(fadump_conf->cpu_state_dest_vaddr);
+ is_cpu_data_valid = true;
+ }
+
+ rc = fadump_setup_cpu_notes_buf(num_cpus);
+ if (rc != 0)
+ return rc;
+
+ note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
+ if (!is_cpu_data_valid)
+ goto out;
+
+ /*
+ * Offset for register entries, entry size and registers count is
+ * duplicated in every thread header in keeping with HDAT format.
+ * Use these values from the first thread header.
+ */
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+ regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
+ be32_to_cpu(thdr->offset));
+ reg_esize = be32_to_cpu(thdr->esize);
+ regs_cnt = be32_to_cpu(thdr->ecnt);
+
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("NumCpus : %u\n", num_cpus);
+ pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+ regs_offset, reg_esize, regs_cnt);
+
+ for (i = 0; i < num_cpus; i++, bufp += size_per_thread) {
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+
+ thread_pir = be32_to_cpu(thdr->pir);
+ pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n",
+ i, thread_pir, thdr->core_state);
+
+ /*
+ * If this is kernel initiated crash, crashing_cpu would be set
+ * appropriately and register data of the crashing CPU saved by
+ * crashing kernel. Add this saved register data of crashing CPU
+ * to elf notes and populate the pt_regs for the remaining CPUs
+ * from register state data provided by firmware.
+ */
+ if (fdh->crashing_cpu == thread_pir) {
+ note_buf = fadump_regs_to_elf_notes(note_buf,
+ &fdh->regs);
+ pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+ fdh->crashing_cpu, fdh->regs.gpr[1],
+ fdh->regs.nip);
+ continue;
+ }
+
+ /*
+ * Register state data of MAX cores is provided by firmware,
+ * but some of this cores may not be active. So, while
+ * processing register state data, check core state and
+ * skip threads that belong to inactive cores.
+ */
+ if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE)
+ continue;
+
+ opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
+ reg_esize, true, &regs);
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ pr_debug("CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+ thread_pir, regs.gpr[1], regs.nip);
+ }
+
+out:
+ /*
+ * CPU state data is invalid/unsupported. Try appending crashing CPU's
+ * register data, if it is saved by the kernel.
+ */
+ if (fadump_conf->cpu_notes_buf_vaddr == (u64)note_buf) {
+ if (fdh->crashing_cpu == FADUMP_CPU_UNKNOWN) {
+ fadump_free_cpu_notes_buf();
+ return -ENODEV;
+ }
+
+ pr_warn("WARNING: appending only crashing CPU's register data\n");
+ note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
+ }
+
+ final_note(note_buf);
+
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ fdh->elfcorehdr_addr);
+ fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
+ return 0;
+}
+
+static int __init opal_fadump_process(struct fw_dump *fadump_conf)
+{
+ struct fadump_crash_info_header *fdh;
+ int rc = -EINVAL;
+
+ if (!opal_fdm_active || !fadump_conf->fadumphdr_addr)
+ return rc;
+
+ /* Validate the fadump crash info header */
+ fdh = __va(fadump_conf->fadumphdr_addr);
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ pr_err("Crash info header is not valid.\n");
+ return rc;
+ }
+
+#ifdef CONFIG_OPAL_CORE
+ /*
+ * If this is a kernel initiated crash, crashing_cpu would be set
+ * appropriately and register data of the crashing CPU saved by
+ * crashing kernel. Add this saved register data of crashing CPU
+ * to elf notes and populate the pt_regs for the remaining CPUs
+ * from register state data provided by firmware.
+ */
+ if (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN)
+ kernel_initiated = true;
+#endif
+
+ rc = opal_fadump_build_cpu_notes(fadump_conf, fdh);
+ if (rc)
+ return rc;
+
+ /*
+ * We are done validating dump info and elfcore header is now ready
+ * to be exported. set elfcorehdr_addr so that vmcore module will
+ * export the elfcore header through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+ return rc;
+}
+
+static void opal_fadump_region_show(struct fw_dump *fadump_conf,
+ struct seq_file *m)
+{
+ const struct opal_fadump_mem_struct *fdm_ptr;
+ u64 dumped_bytes = 0;
+ int i;
+
+ if (fadump_conf->dump_active)
+ fdm_ptr = opal_fdm_active;
+ else
+ fdm_ptr = opal_fdm;
+
+ for (i = 0; i < fdm_ptr->region_cnt; i++) {
+ /*
+ * Only regions that are registered for MPIPL
+ * would have dump data.
+ */
+ if ((fadump_conf->dump_active) &&
+ (i < fdm_ptr->registered_regions))
+ dumped_bytes = fdm_ptr->rgn[i].size;
+
+ seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
+ fdm_ptr->rgn[i].src, fdm_ptr->rgn[i].dest);
+ seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
+ fdm_ptr->rgn[i].size, dumped_bytes);
+ }
+
+ /* Dump is active. Show reserved area start address. */
+ if (fadump_conf->dump_active) {
+ seq_printf(m, "\nMemory above %#016lx is reserved for saving crash dump\n",
+ fadump_conf->reserve_dump_area_start);
+ }
+}
+
+static void opal_fadump_trigger(struct fadump_crash_info_header *fdh,
+ const char *msg)
+{
+ int rc;
+
+ /*
+ * Unlike on pSeries platform, logical CPU number is not provided
+ * with architected register state data. So, store the crashing
+ * CPU's PIR instead to plug the appropriate register data for
+ * crashing CPU in the vmcore file.
+ */
+ fdh->crashing_cpu = (u32)mfspr(SPRN_PIR);
+
+ rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
+ if (rc == OPAL_UNSUPPORTED) {
+ pr_emerg("Reboot type %d not supported.\n",
+ OPAL_REBOOT_MPIPL);
+ } else if (rc == OPAL_HARDWARE)
+ pr_emerg("No backend support for MPIPL!\n");
+}
+
+static struct fadump_ops opal_fadump_ops = {
+ .fadump_init_mem_struct = opal_fadump_init_mem_struct,
+ .fadump_get_metadata_size = opal_fadump_get_metadata_size,
+ .fadump_setup_metadata = opal_fadump_setup_metadata,
+ .fadump_get_bootmem_min = opal_fadump_get_bootmem_min,
+ .fadump_register = opal_fadump_register,
+ .fadump_unregister = opal_fadump_unregister,
+ .fadump_invalidate = opal_fadump_invalidate,
+ .fadump_cleanup = opal_fadump_cleanup,
+ .fadump_process = opal_fadump_process,
+ .fadump_region_show = opal_fadump_region_show,
+ .fadump_trigger = opal_fadump_trigger,
+};
+
+void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+ const __be32 *prop;
+ unsigned long dn;
+ u64 addr = 0;
+ int i, len;
+ s64 ret;
+
+ /*
+ * Check if Firmware-Assisted Dump is supported. if yes, check
+ * if dump has been initiated on last reboot.
+ */
+ dn = of_get_flat_dt_subnode_by_name(node, "dump");
+ if (dn == -FDT_ERR_NOTFOUND) {
+ pr_debug("FADump support is missing!\n");
+ return;
+ }
+
+ if (!of_flat_dt_is_compatible(dn, "ibm,opal-dump")) {
+ pr_err("Support missing for this f/w version!\n");
+ return;
+ }
+
+ prop = of_get_flat_dt_prop(dn, "fw-load-area", &len);
+ if (prop) {
+ /*
+ * Each f/w load area is an (address,size) pair,
+ * 2 cells each, totalling 4 cells per range.
+ */
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+ u64 base, end;
+
+ base = of_read_number(prop + (i * 4) + 0, 2);
+ end = base;
+ end += of_read_number(prop + (i * 4) + 2, 2);
+ if (end > OPAL_FADUMP_MIN_BOOT_MEM) {
+ pr_err("F/W load area: 0x%llx-0x%llx\n",
+ base, end);
+ pr_err("F/W version not supported!\n");
+ return;
+ }
+ }
+ }
+
+ fadump_conf->ops = &opal_fadump_ops;
+ fadump_conf->fadump_supported = 1;
+
+ /*
+ * Firmware supports 32-bit field for size. Align it to PAGE_SIZE
+ * and request firmware to copy multiple kernel boot memory regions.
+ */
+ fadump_conf->max_copy_size = _ALIGN_DOWN(U32_MAX, PAGE_SIZE);
+
+ /*
+ * Check if dump has been initiated on last reboot.
+ */
+ prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
+ if (!prop)
+ return;
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_err("Failed to get Kernel metadata (%lld)\n", ret);
+ return;
+ }
+
+ addr = be64_to_cpu(addr);
+ pr_debug("Kernel metadata addr: %llx\n", addr);
+
+ opal_fdm_active = __va(addr);
+ if (opal_fdm_active->version != OPAL_FADUMP_VERSION) {
+ pr_warn("Supported kernel metadata version: %u, found: %d!\n",
+ OPAL_FADUMP_VERSION, opal_fdm_active->version);
+ pr_warn("WARNING: Kernel metadata format mismatch identified! Core file maybe corrupted..\n");
+ }
+
+ /* Kernel regions not registered with f/w for MPIPL */
+ if (opal_fdm_active->registered_regions == 0) {
+ opal_fdm_active = NULL;
+ return;
+ }
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr);
+ if (addr) {
+ addr = be64_to_cpu(addr);
+ pr_debug("CPU metadata addr: %llx\n", addr);
+ opal_cpu_metadata = __va(addr);
+ }
+
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+ opal_fadump_get_config(fadump_conf, opal_fdm_active);
+}
+#endif /* !CONFIG_PRESERVE_FA_DUMP */
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h
new file mode 100644
index 000000000000..f1e9ecf548c5
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-fadump.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump support on POWER platform (OPAL).
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _POWERNV_OPAL_FADUMP_H
+#define _POWERNV_OPAL_FADUMP_H
+
+#include <asm/reg.h>
+
+/*
+ * With kernel & initrd loaded at 512MB (with 256MB size), enforce a minimum
+ * boot memory size of 768MB to ensure f/w loading kernel and initrd doesn't
+ * mess with crash'ed kernel's memory during MPIPL.
+ */
+#define OPAL_FADUMP_MIN_BOOT_MEM (0x30000000UL)
+
+/*
+ * OPAL FADump metadata structure format version
+ *
+ * OPAL FADump kernel metadata structure stores kernel metadata needed to
+ * register-for/process crash dump. Format version is used to keep a tab on
+ * the changes in the structure format. The changes, if any, to the format
+ * are expected to be minimal and backward compatible.
+ */
+#define OPAL_FADUMP_VERSION 0x1
+
+/*
+ * OPAL FADump kernel metadata
+ *
+ * The address of this structure will be registered with f/w for retrieving
+ * and processing during crash dump.
+ */
+struct opal_fadump_mem_struct {
+ u8 version;
+ u8 reserved[3];
+ u16 region_cnt; /* number of regions */
+ u16 registered_regions; /* Regions registered for MPIPL */
+ u64 fadumphdr_addr;
+ struct opal_mpipl_region rgn[FADUMP_MAX_MEM_REGS];
+} __packed;
+
+/*
+ * CPU state data
+ *
+ * CPU state data information is provided by f/w. The format for this data
+ * is defined in the HDAT spec. Version is used to keep a tab on the changes
+ * in this CPU state data format. Changes to this format are unlikely, but
+ * if there are any changes, please refer to latest HDAT specification.
+ */
+#define HDAT_FADUMP_CPU_DATA_VER 1
+
+#define HDAT_FADUMP_CORE_INACTIVE (0x0F)
+
+/* HDAT thread header for register entries */
+struct hdat_fadump_thread_hdr {
+ __be32 pir;
+ /* 0x00 - 0x0F - The corresponding stop state of the core */
+ u8 core_state;
+ u8 reserved[3];
+
+ __be32 offset; /* Offset to Register Entries array */
+ __be32 ecnt; /* Number of entries */
+ __be32 esize; /* Alloc size of each array entry in bytes */
+ __be32 eactsz; /* Actual size of each array entry in bytes */
+} __packed;
+
+/* Register types populated by f/w */
+#define HDAT_FADUMP_REG_TYPE_GPR 0x01
+#define HDAT_FADUMP_REG_TYPE_SPR 0x02
+
+/* ID numbers used by f/w while populating certain registers */
+#define HDAT_FADUMP_REG_ID_NIP 0x7D0
+#define HDAT_FADUMP_REG_ID_MSR 0x7D1
+#define HDAT_FADUMP_REG_ID_CCR 0x7D2
+
+/* HDAT register entry. */
+struct hdat_fadump_reg_entry {
+ __be32 reg_type;
+ __be32 reg_num;
+ __be64 reg_val;
+} __packed;
+
+static inline void opal_fadump_set_regval_regnum(struct pt_regs *regs,
+ u32 reg_type, u32 reg_num,
+ u64 reg_val)
+{
+ if (reg_type == HDAT_FADUMP_REG_TYPE_GPR) {
+ if (reg_num < 32)
+ regs->gpr[reg_num] = reg_val;
+ return;
+ }
+
+ switch (reg_num) {
+ case SPRN_CTR:
+ regs->ctr = reg_val;
+ break;
+ case SPRN_LR:
+ regs->link = reg_val;
+ break;
+ case SPRN_XER:
+ regs->xer = reg_val;
+ break;
+ case SPRN_DAR:
+ regs->dar = reg_val;
+ break;
+ case SPRN_DSISR:
+ regs->dsisr = reg_val;
+ break;
+ case HDAT_FADUMP_REG_ID_NIP:
+ regs->nip = reg_val;
+ break;
+ case HDAT_FADUMP_REG_ID_MSR:
+ regs->msr = reg_val;
+ break;
+ case HDAT_FADUMP_REG_ID_CCR:
+ regs->ccr = reg_val;
+ break;
+ }
+}
+
+static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt,
+ unsigned int reg_entry_size,
+ bool cpu_endian,
+ struct pt_regs *regs)
+{
+ struct hdat_fadump_reg_entry *reg_entry;
+ u64 val;
+ int i;
+
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) {
+ reg_entry = (struct hdat_fadump_reg_entry *)bufp;
+ val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) :
+ reg_entry->reg_val);
+ opal_fadump_set_regval_regnum(regs,
+ be32_to_cpu(reg_entry->reg_type),
+ be32_to_cpu(reg_entry->reg_num),
+ val);
+ }
+}
+
+#endif /* _POWERNV_OPAL_FADUMP_H */
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index 5cae375525d0..3e1f064a18db 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -137,6 +137,43 @@ static void print_nx_checkstop_reason(const char *level,
xstop_reason[i].description);
}
+static void print_npu_checkstop_reason(const char *level,
+ struct OpalHMIEvent *hmi_evt)
+{
+ uint8_t reason, reason_count, i;
+
+ /*
+ * We may not have a checkstop reason on some combination of
+ * hardware and/or skiboot version
+ */
+ if (!hmi_evt->u.xstop_error.xstop_reason) {
+ printk("%s NPU checkstop on chip %x\n", level,
+ be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
+ return;
+ }
+
+ /*
+ * NPU2 has 3 FIRs. Reason encoded on a byte as:
+ * 2 bits for the FIR number
+ * 6 bits for the bit number
+ * It may be possible to find several reasons.
+ *
+ * We don't display a specific message per FIR bit as there
+ * are too many and most are meaningless without the workbook
+ * and/or hw team help anyway.
+ */
+ reason_count = sizeof(hmi_evt->u.xstop_error.xstop_reason) /
+ sizeof(reason);
+ for (i = 0; i < reason_count; i++) {
+ reason = (hmi_evt->u.xstop_error.xstop_reason >> (8 * i)) & 0xFF;
+ if (reason)
+ printk("%s NPU checkstop on chip %x: FIR%d bit %d is set\n",
+ level,
+ be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id),
+ reason >> 6, reason & 0x3F);
+ }
+}
+
static void print_checkstop_reason(const char *level,
struct OpalHMIEvent *hmi_evt)
{
@@ -148,6 +185,9 @@ static void print_checkstop_reason(const char *level,
case CHECKSTOP_TYPE_NX:
print_nx_checkstop_reason(level, hmi_evt);
break;
+ case CHECKSTOP_TYPE_NPU:
+ print_npu_checkstop_reason(level, hmi_evt);
+ break;
default:
printk("%s Unknown Malfunction Alert of type %d\n",
level, type);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 186109bdd41b..e04b20625cb9 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -53,9 +53,9 @@ static void export_imc_mode_and_cmd(struct device_node *node,
struct imc_pmu *pmu_ptr)
{
static u64 loc, *imc_mode_addr, *imc_cmd_addr;
- int chip = 0, nid;
char mode[16], cmd[16];
u32 cb_offset;
+ struct imc_mem_info *ptr = pmu_ptr->mem_info;
imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
@@ -69,20 +69,20 @@ static void export_imc_mode_and_cmd(struct device_node *node,
if (of_property_read_u32(node, "cb_offset", &cb_offset))
cb_offset = IMC_CNTL_BLK_OFFSET;
- for_each_node(nid) {
- loc = (u64)(pmu_ptr->mem_info[chip].vbase) + cb_offset;
+ while (ptr->vbase != NULL) {
+ loc = (u64)(ptr->vbase) + cb_offset;
imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET);
- sprintf(mode, "imc_mode_%d", nid);
+ sprintf(mode, "imc_mode_%d", (u32)(ptr->id));
if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent,
imc_mode_addr))
goto err;
imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET);
- sprintf(cmd, "imc_cmd_%d", nid);
+ sprintf(cmd, "imc_cmd_%d", (u32)(ptr->id));
if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent,
imc_cmd_addr))
goto err;
- chip++;
+ ptr++;
}
return;
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index dc51d03c6370..d26da19a611f 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -29,23 +29,23 @@ struct memcons {
static struct memcons *opal_memcons = NULL;
-ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
+ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count)
{
const char *conbuf;
ssize_t ret;
size_t first_read = 0;
uint32_t out_pos, avail;
- if (!opal_memcons)
+ if (!mc)
return -ENODEV;
- out_pos = be32_to_cpu(READ_ONCE(opal_memcons->out_pos));
+ out_pos = be32_to_cpu(READ_ONCE(mc->out_pos));
/* Now we've read out_pos, put a barrier in before reading the new
* data it points to in conbuf. */
smp_rmb();
- conbuf = phys_to_virt(be64_to_cpu(opal_memcons->obuf_phys));
+ conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys));
/* When the buffer has wrapped, read from the out_pos marker to the end
* of the buffer, and then read the remaining data as in the un-wrapped
@@ -53,7 +53,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
if (out_pos & MEMCONS_OUT_POS_WRAP) {
out_pos &= MEMCONS_OUT_POS_MASK;
- avail = be32_to_cpu(opal_memcons->obuf_size) - out_pos;
+ avail = be32_to_cpu(mc->obuf_size) - out_pos;
ret = memory_read_from_buffer(to, count, &pos,
conbuf + out_pos, avail);
@@ -71,7 +71,7 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
}
/* Sanity check. The firmware should not do this to us. */
- if (out_pos > be32_to_cpu(opal_memcons->obuf_size)) {
+ if (out_pos > be32_to_cpu(mc->obuf_size)) {
pr_err("OPAL: memory console corruption. Aborting read.\n");
return -EINVAL;
}
@@ -86,6 +86,11 @@ out:
return ret;
}
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
+{
+ return memcons_copy(opal_memcons, to, pos, count);
+}
+
static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
struct bin_attribute *bin_attr, char *to,
loff_t pos, size_t count)
@@ -98,32 +103,48 @@ static struct bin_attribute opal_msglog_attr = {
.read = opal_msglog_read
};
-void __init opal_msglog_init(void)
+struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name)
{
u64 mcaddr;
struct memcons *mc;
- if (of_property_read_u64(opal_node, "ibm,opal-memcons", &mcaddr)) {
- pr_warn("OPAL: Property ibm,opal-memcons not found, no message log\n");
- return;
+ if (of_property_read_u64(node, mc_prop_name, &mcaddr)) {
+ pr_warn("%s property not found, no message log\n",
+ mc_prop_name);
+ goto out_err;
}
mc = phys_to_virt(mcaddr);
if (!mc) {
- pr_warn("OPAL: memory console address is invalid\n");
- return;
+ pr_warn("memory console address is invalid\n");
+ goto out_err;
}
if (be64_to_cpu(mc->magic) != MEMCONS_MAGIC) {
- pr_warn("OPAL: memory console version is invalid\n");
- return;
+ pr_warn("memory console version is invalid\n");
+ goto out_err;
}
- /* Report maximum size */
- opal_msglog_attr.size = be32_to_cpu(mc->ibuf_size) +
- be32_to_cpu(mc->obuf_size);
+ return mc;
+
+out_err:
+ return NULL;
+}
+
+u32 memcons_get_size(struct memcons *mc)
+{
+ return be32_to_cpu(mc->ibuf_size) + be32_to_cpu(mc->obuf_size);
+}
+
+void __init opal_msglog_init(void)
+{
+ opal_memcons = memcons_init(opal_node, "ibm,opal-memcons");
+ if (!opal_memcons) {
+ pr_warn("OPAL: memcons failed to load from ibm,opal-memcons\n");
+ return;
+ }
- opal_memcons = mc;
+ opal_msglog_attr.size = memcons_get_size(opal_memcons);
}
void __init opal_msglog_sysfs_init(void)
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index e072bf157d62..45f4223a790f 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -342,7 +342,7 @@ static int opal_prd_msg_notifier(struct notifier_block *nb,
int msg_size, item_size;
unsigned long flags;
- if (msg_type != OPAL_MSG_PRD)
+ if (msg_type != OPAL_MSG_PRD && msg_type != OPAL_MSG_PRD2)
return 0;
/* Calculate total size of the message and item we need to store. The
@@ -393,6 +393,12 @@ static int opal_prd_probe(struct platform_device *pdev)
return rc;
}
+ rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb);
+ if (rc) {
+ pr_err("Couldn't register PRD2 event notifier\n");
+ return rc;
+ }
+
rc = misc_register(&opal_prd_dev);
if (rc) {
pr_err("failed to register miscdev\n");
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 66430eebe869..fd510d961b8c 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -1,7 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * PowerNV LPC bus handling.
+ * PowerNV SCOM bus debugfs interface
*
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ * <benh@kernel.crashing.org>
+ * and David Gibson, IBM Corporation.
* Copyright 2013 IBM Corp.
*/
@@ -10,62 +13,13 @@
#include <linux/bug.h>
#include <linux/gfp.h>
#include <linux/slab.h>
+#include <linux/uaccess.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/opal.h>
-#include <asm/scom.h>
-
-/*
- * We could probably fit that inside the scom_map_t
- * which is a void* after all but it's really too ugly
- * so let's kmalloc it for now
- */
-struct opal_scom_map {
- uint32_t chip;
- uint64_t addr;
-};
-
-static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
-{
- struct opal_scom_map *m;
- const __be32 *gcid;
-
- if (!of_get_property(dev, "scom-controller", NULL)) {
- pr_err("%s: device %pOF is not a SCOM controller\n",
- __func__, dev);
- return SCOM_MAP_INVALID;
- }
- gcid = of_get_property(dev, "ibm,chip-id", NULL);
- if (!gcid) {
- pr_err("%s: device %pOF has no ibm,chip-id\n",
- __func__, dev);
- return SCOM_MAP_INVALID;
- }
- m = kmalloc(sizeof(*m), GFP_KERNEL);
- if (!m)
- return NULL;
- m->chip = be32_to_cpup(gcid);
- m->addr = reg;
-
- return (scom_map_t)m;
-}
-
-static void opal_scom_unmap(scom_map_t map)
-{
- kfree(map);
-}
-
-static int opal_xscom_err_xlate(int64_t rc)
-{
- switch(rc) {
- case 0:
- return 0;
- /* Add more translations if necessary */
- default:
- return -EIO;
- }
-}
+#include <asm/debugfs.h>
+#include <asm/prom.h>
static u64 opal_scom_unmangle(u64 addr)
{
@@ -98,39 +52,154 @@ static u64 opal_scom_unmangle(u64 addr)
return addr;
}
-static int opal_scom_read(scom_map_t map, u64 reg, u64 *value)
+static int opal_scom_read(uint32_t chip, uint64_t addr, u64 reg, u64 *value)
{
- struct opal_scom_map *m = map;
int64_t rc;
__be64 v;
- reg = opal_scom_unmangle(m->addr + reg);
- rc = opal_xscom_read(m->chip, reg, (__be64 *)__pa(&v));
+ reg = opal_scom_unmangle(addr + reg);
+ rc = opal_xscom_read(chip, reg, (__be64 *)__pa(&v));
+ if (rc) {
+ *value = 0xfffffffffffffffful;
+ return -EIO;
+ }
*value = be64_to_cpu(v);
- return opal_xscom_err_xlate(rc);
+ return 0;
}
-static int opal_scom_write(scom_map_t map, u64 reg, u64 value)
+static int opal_scom_write(uint32_t chip, uint64_t addr, u64 reg, u64 value)
{
- struct opal_scom_map *m = map;
int64_t rc;
- reg = opal_scom_unmangle(m->addr + reg);
- rc = opal_xscom_write(m->chip, reg, value);
- return opal_xscom_err_xlate(rc);
+ reg = opal_scom_unmangle(addr + reg);
+ rc = opal_xscom_write(chip, reg, value);
+ if (rc)
+ return -EIO;
+ return 0;
+}
+
+struct scom_debug_entry {
+ u32 chip;
+ struct debugfs_blob_wrapper path;
+ char name[16];
+};
+
+static ssize_t scom_debug_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct scom_debug_entry *ent = filp->private_data;
+ u64 __user *ubuf64 = (u64 __user *)ubuf;
+ loff_t off = *ppos;
+ ssize_t done = 0;
+ u64 reg, reg_base, reg_cnt, val;
+ int rc;
+
+ if (off < 0 || (off & 7) || (count & 7))
+ return -EINVAL;
+ reg_base = off >> 3;
+ reg_cnt = count >> 3;
+
+ for (reg = 0; reg < reg_cnt; reg++) {
+ rc = opal_scom_read(ent->chip, reg_base, reg, &val);
+ if (!rc)
+ rc = put_user(val, ubuf64);
+ if (rc) {
+ if (!done)
+ done = rc;
+ break;
+ }
+ ubuf64++;
+ *ppos += 8;
+ done += 8;
+ }
+ return done;
+}
+
+static ssize_t scom_debug_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct scom_debug_entry *ent = filp->private_data;
+ u64 __user *ubuf64 = (u64 __user *)ubuf;
+ loff_t off = *ppos;
+ ssize_t done = 0;
+ u64 reg, reg_base, reg_cnt, val;
+ int rc;
+
+ if (off < 0 || (off & 7) || (count & 7))
+ return -EINVAL;
+ reg_base = off >> 3;
+ reg_cnt = count >> 3;
+
+ for (reg = 0; reg < reg_cnt; reg++) {
+ rc = get_user(val, ubuf64);
+ if (!rc)
+ rc = opal_scom_write(ent->chip, reg_base, reg, val);
+ if (rc) {
+ if (!done)
+ done = rc;
+ break;
+ }
+ ubuf64++;
+ done += 8;
+ }
+ return done;
}
-static const struct scom_controller opal_scom_controller = {
- .map = opal_scom_map,
- .unmap = opal_scom_unmap,
- .read = opal_scom_read,
- .write = opal_scom_write
+static const struct file_operations scom_debug_fops = {
+ .read = scom_debug_read,
+ .write = scom_debug_write,
+ .open = simple_open,
+ .llseek = default_llseek,
};
-static int opal_xscom_init(void)
+static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
+ int chip)
{
- if (firmware_has_feature(FW_FEATURE_OPAL))
- scom_init(&opal_scom_controller);
+ struct scom_debug_entry *ent;
+ struct dentry *dir;
+
+ ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ return -ENOMEM;
+
+ ent->chip = chip;
+ snprintf(ent->name, 16, "%08x", chip);
+ ent->path.data = (void *)kasprintf(GFP_KERNEL, "%pOF", dn);
+ ent->path.size = strlen((char *)ent->path.data);
+
+ dir = debugfs_create_dir(ent->name, root);
+ if (!dir) {
+ kfree(ent->path.data);
+ kfree(ent);
+ return -1;
+ }
+
+ debugfs_create_blob("devspec", 0400, dir, &ent->path);
+ debugfs_create_file("access", 0600, dir, ent, &scom_debug_fops);
+
return 0;
}
-machine_arch_initcall(powernv, opal_xscom_init);
+
+static int scom_debug_init(void)
+{
+ struct device_node *dn;
+ struct dentry *root;
+ int chip, rc;
+
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
+ return 0;
+
+ root = debugfs_create_dir("scom", powerpc_debugfs_root);
+ if (!root)
+ return -1;
+
+ rc = 0;
+ for_each_node_with_property(dn, "scom-controller") {
+ chip = of_get_ibm_chip_id(dn);
+ WARN_ON(chip == -1);
+ rc |= scom_debug_init_one(root, dn, chip);
+ }
+
+ return rc;
+}
+device_initcall(scom_debug_init);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 98c5d94b17fb..38e90270280b 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -58,6 +58,8 @@ static DEFINE_SPINLOCK(opal_write_lock);
static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
static uint32_t opal_heartbeat;
static struct task_struct *kopald_tsk;
+static struct opal_msg *opal_msg;
+static u32 opal_msg_size __ro_after_init;
void opal_configure_cores(void)
{
@@ -202,16 +204,18 @@ static int __init opal_register_exception_handlers(void)
glue = 0x7000;
/*
- * Check if we are running on newer firmware that exports
- * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
- * the HMI interrupt and we catch it directly in Linux.
+ * Only ancient OPAL firmware requires this.
+ * Specifically, firmware from FW810.00 (released June 2014)
+ * through FW810.20 (Released October 2014).
*
- * For older firmware (i.e currently released POWER8 System Firmware
- * as of today <= SV810_087), we fallback to old behavior and let OPAL
- * patch the HMI vector and handle it inside OPAL firmware.
+ * Check if we are running on newer (post Oct 2014) firmware that
+ * exports the OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to
+ * patch the HMI interrupt and we catch it directly in Linux.
*
- * For newer firmware (in development/yet to be released) we will
- * start catching/handling HMI directly in Linux.
+ * For older firmware (i.e < FW810.20), we fallback to old behavior and
+ * let OPAL patch the HMI vector and handle it inside OPAL firmware.
+ *
+ * For newer firmware we catch/handle the HMI directly in Linux.
*/
if (!opal_check_token(OPAL_HANDLE_HMI)) {
pr_info("Old firmware detected, OPAL handles HMIs.\n");
@@ -221,6 +225,11 @@ static int __init opal_register_exception_handlers(void)
glue += 128;
}
+ /*
+ * Only applicable to ancient firmware, all modern
+ * (post March 2015/skiboot 5.0) firmware will just return
+ * OPAL_UNSUPPORTED.
+ */
opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
#endif
@@ -264,14 +273,9 @@ static void opal_message_do_notify(uint32_t msg_type, void *msg)
static void opal_handle_message(void)
{
s64 ret;
- /*
- * TODO: pre-allocate a message buffer depending on opal-msg-size
- * value in /proc/device-tree.
- */
- static struct opal_msg msg;
u32 type;
- ret = opal_get_msg(__pa(&msg), sizeof(msg));
+ ret = opal_get_msg(__pa(opal_msg), opal_msg_size);
/* No opal message pending. */
if (ret == OPAL_RESOURCE)
return;
@@ -283,14 +287,14 @@ static void opal_handle_message(void)
return;
}
- type = be32_to_cpu(msg.msg_type);
+ type = be32_to_cpu(opal_msg->msg_type);
/* Sanity check */
if (type >= OPAL_MSG_TYPE_MAX) {
pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
return;
}
- opal_message_do_notify(type, (void *)&msg);
+ opal_message_do_notify(type, (void *)opal_msg);
}
static irqreturn_t opal_message_notify(int irq, void *data)
@@ -299,10 +303,24 @@ static irqreturn_t opal_message_notify(int irq, void *data)
return IRQ_HANDLED;
}
-static int __init opal_message_init(void)
+static int __init opal_message_init(struct device_node *opal_node)
{
int ret, i, irq;
+ ret = of_property_read_u32(opal_node, "opal-msg-size", &opal_msg_size);
+ if (ret) {
+ pr_notice("Failed to read opal-msg-size property\n");
+ opal_msg_size = sizeof(struct opal_msg);
+ }
+
+ opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
+ if (!opal_msg) {
+ opal_msg_size = sizeof(struct opal_msg);
+ /* Try to allocate fixed message size */
+ opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
+ BUG_ON(opal_msg == NULL);
+ }
+
for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
@@ -698,7 +716,10 @@ static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
bin_attr->size);
}
-static BIN_ATTR_RO(symbol_map, 0);
+static struct bin_attribute symbol_map_attr = {
+ .attr = {.name = "symbol_map", .mode = 0400},
+ .read = symbol_map_read
+};
static void opal_export_symmap(void)
{
@@ -715,10 +736,10 @@ static void opal_export_symmap(void)
return;
/* Setup attributes */
- bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
- bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
+ symbol_map_attr.private = __va(be64_to_cpu(syms[0]));
+ symbol_map_attr.size = be64_to_cpu(syms[1]);
- rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
+ rc = sysfs_create_bin_file(opal_kobj, &symbol_map_attr);
if (rc)
pr_warn("Error %d creating OPAL symbols file\n", rc);
}
@@ -903,7 +924,7 @@ static int __init opal_init(void)
}
/* Initialise OPAL messaging system */
- opal_message_init();
+ opal_message_init(opal_node);
/* Initialise OPAL asynchronous completion interface */
opal_async_comp_init();
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index e28f03e1eb5e..a0b9c0c23ed2 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -36,7 +36,8 @@ static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
struct page *tce_mem = NULL;
__be64 *addr;
- tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT);
+ tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
+ shift - PAGE_SHIFT);
if (!tce_mem) {
pr_err("Failed to allocate a TCE memory, level shift=%d\n",
shift);
@@ -48,6 +49,9 @@ static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
return addr;
}
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+ unsigned long size, unsigned int levels);
+
static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
{
__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
@@ -57,9 +61,9 @@ static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
while (level) {
int n = (idx & mask) >> (level * shift);
- unsigned long tce;
+ unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
- if (tmp[n] == 0) {
+ if (!tce) {
__be64 *tmp2;
if (!alloc)
@@ -70,10 +74,15 @@ static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
if (!tmp2)
return NULL;
- tmp[n] = cpu_to_be64(__pa(tmp2) |
- TCE_PCI_READ | TCE_PCI_WRITE);
+ tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
+ oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
+ cpu_to_be64(tce)));
+ if (oldtce) {
+ pnv_pci_ioda2_table_do_free_pages(tmp2,
+ ilog2(tbl->it_level_size) + 3, 1);
+ tce = oldtce;
+ }
}
- tce = be64_to_cpu(tmp[n]);
tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
idx &= ~mask;
@@ -161,6 +170,9 @@ void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
if (ptce)
*ptce = cpu_to_be64(0);
+ else
+ /* Skip the rest of the level */
+ i |= tbl->it_level_size - 1;
}
}
@@ -260,7 +272,6 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
PAGE_SHIFT);
const unsigned long tce_table_size = 1UL << table_shift;
- unsigned int tmplevels = levels;
if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
return -EINVAL;
@@ -268,9 +279,6 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
if (!is_power_of_2(window_size))
return -EINVAL;
- if (alloc_userspace_copy && (window_size > (1ULL << 32)))
- tmplevels = 1;
-
/* Adjust direct table size from window_size and levels */
entries_shift = (entries_shift + levels - 1) / levels;
level_shift = entries_shift + 3;
@@ -281,7 +289,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
/* Allocate TCE table */
addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
- tmplevels, tce_table_size, &offset, &total_allocated);
+ 1, tce_table_size, &offset, &total_allocated);
/* addr==NULL means that the first level allocation failed */
if (!addr)
@@ -292,18 +300,18 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
* we did not allocate as much as we wanted,
* release partially allocated table.
*/
- if (tmplevels == levels && offset < tce_table_size)
+ if (levels == 1 && offset < tce_table_size)
goto free_tces_exit;
/* Allocate userspace view of the TCE table */
if (alloc_userspace_copy) {
offset = 0;
uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
- tmplevels, tce_table_size, &offset,
+ 1, tce_table_size, &offset,
&total_allocated_uas);
if (!uas)
goto free_tces_exit;
- if (tmplevels == levels && (offset < tce_table_size ||
+ if (levels == 1 && (offset < tce_table_size ||
total_allocated_uas != total_allocated))
goto free_uas_exit;
}
@@ -318,7 +326,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
window_size, tce_table_size, bus_offset, tbl->it_base,
- tbl->it_userspace, tmplevels, levels);
+ tbl->it_userspace, 1, levels);
return 0;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 10cc42b9e541..c28d0d9b7ee0 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -50,6 +50,8 @@
static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
"NPU_OCAPI" };
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+
void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...)
{
@@ -1937,26 +1939,12 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
}
#ifdef CONFIG_IOMMU_API
-static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
- unsigned long *hpa, enum dma_data_direction *direction)
+/* Common for IODA1 and IODA2 */
+static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction,
+ bool realmode)
{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
-
- if (!ret)
- pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false);
-
- return ret;
-}
-
-static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index,
- unsigned long *hpa, enum dma_data_direction *direction)
-{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
-
- if (!ret)
- pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true);
-
- return ret;
+ return pnv_tce_xchg(tbl, index, hpa, direction, !realmode);
}
#endif
@@ -1971,8 +1959,8 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
static struct iommu_table_ops pnv_ioda1_iommu_ops = {
.set = pnv_ioda1_tce_build,
#ifdef CONFIG_IOMMU_API
- .exchange = pnv_ioda1_tce_xchg,
- .exchange_rm = pnv_ioda1_tce_xchg_rm,
+ .xchg_no_kill = pnv_ioda_tce_xchg_no_kill,
+ .tce_kill = pnv_pci_p7ioc_tce_invalidate,
.useraddrptr = pnv_tce_useraddrptr,
#endif
.clear = pnv_ioda1_tce_free,
@@ -2101,30 +2089,6 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
return ret;
}
-#ifdef CONFIG_IOMMU_API
-static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
- unsigned long *hpa, enum dma_data_direction *direction)
-{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
-
- if (!ret)
- pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
-
- return ret;
-}
-
-static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index,
- unsigned long *hpa, enum dma_data_direction *direction)
-{
- long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
-
- if (!ret)
- pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true);
-
- return ret;
-}
-#endif
-
static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
long npages)
{
@@ -2136,8 +2100,8 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.set = pnv_ioda2_tce_build,
#ifdef CONFIG_IOMMU_API
- .exchange = pnv_ioda2_tce_xchg,
- .exchange_rm = pnv_ioda2_tce_xchg_rm,
+ .xchg_no_kill = pnv_ioda_tce_xchg_no_kill,
+ .tce_kill = pnv_pci_ioda2_tce_invalidate,
.useraddrptr = pnv_tce_useraddrptr,
#endif
.clear = pnv_ioda2_tce_free,
@@ -2301,7 +2265,7 @@ found:
tbl->it_ops = &pnv_ioda1_iommu_ops;
pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift;
pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
- iommu_init_table(tbl, phb->hose->node);
+ iommu_init_table(tbl, phb->hose->node, 0, 0);
if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
pnv_ioda_setup_bus_dma(pe, pe->pbus);
@@ -2356,7 +2320,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
return 0;
}
-void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
{
uint16_t window_id = (pe->pe_number << 1 ) + 1;
int64_t rc;
@@ -2418,6 +2382,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
{
struct iommu_table *tbl = NULL;
long rc;
+ unsigned long res_start, res_end;
/*
* crashkernel= specifies the kdump kernel's maximum memory at
@@ -2431,19 +2396,46 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
* DMA window can be larger than available memory, which will
* cause errors later.
*/
- const u64 window_size = min((u64)pe->table_group.tce32_size, max_memory);
+ const u64 maxblock = 1UL << (PAGE_SHIFT + MAX_ORDER - 1);
- rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
- IOMMU_PAGE_SHIFT_4K,
- window_size,
- POWERNV_IOMMU_DEFAULT_LEVELS, false, &tbl);
+ /*
+ * We create the default window as big as we can. The constraint is
+ * the max order of allocation possible. The TCE table is likely to
+ * end up being multilevel and with on-demand allocation in place,
+ * the initial use is not going to be huge as the default window aims
+ * to support crippled devices (i.e. not fully 64bit DMAble) only.
+ */
+ /* iommu_table::it_map uses 1 bit per IOMMU page, hence 8 */
+ const u64 window_size = min((maxblock * 8) << PAGE_SHIFT, max_memory);
+ /* Each TCE level cannot exceed maxblock so go multilevel if needed */
+ unsigned long tces_order = ilog2(window_size >> PAGE_SHIFT);
+ unsigned long tcelevel_order = ilog2(maxblock >> 3);
+ unsigned int levels = tces_order / tcelevel_order;
+
+ if (tces_order % tcelevel_order)
+ levels += 1;
+ /*
+ * We try to stick to default levels (which is >1 at the moment) in
+ * order to save memory by relying on on-demain TCE level allocation.
+ */
+ levels = max_t(unsigned int, levels, POWERNV_IOMMU_DEFAULT_LEVELS);
+
+ rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, PAGE_SHIFT,
+ window_size, levels, false, &tbl);
if (rc) {
pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
rc);
return rc;
}
- iommu_init_table(tbl, pe->phb->hose->node);
+ /* We use top part of 32bit space for MMIO so exclude it from DMA */
+ res_start = 0;
+ res_end = 0;
+ if (window_size > pe->phb->ioda.m32_pci_base) {
+ res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift;
+ res_end = min(window_size, SZ_4G) >> tbl->it_page_shift;
+ }
+ iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end);
rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
if (rc) {
@@ -2456,6 +2448,14 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
if (!pnv_iommu_bypass_disabled)
pnv_pci_ioda2_set_bypass(pe, true);
+ /*
+ * Set table base for the case of IOMMU DMA use. Usually this is done
+ * from dma_dev_setup() which is not called when a device is returned
+ * from VFIO so do it here.
+ */
+ if (pe->pdev)
+ set_iommu_table_base(&pe->pdev->dev, tbl);
+
return 0;
}
@@ -2543,6 +2543,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (pe->pbus)
pnv_ioda_setup_bus_dma(pe, pe->pbus);
+ else if (pe->pdev)
+ set_iommu_table_base(&pe->pdev->dev, NULL);
iommu_tce_table_put(tbl);
}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index ff1a33fee8e6..2825d004dece 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -34,7 +34,6 @@
#include "powernv.h"
#include "pci.h"
-static DEFINE_MUTEX(p2p_mutex);
static DEFINE_MUTEX(tunnel_mutex);
int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
@@ -55,7 +54,8 @@ int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
break;
}
- if (!of_device_is_compatible(parent, "ibm,ioda2-phb")) {
+ if (!of_device_is_compatible(parent, "ibm,ioda2-phb") &&
+ !of_device_is_compatible(parent, "ibm,ioda3-phb")) {
of_node_put(parent);
continue;
}
@@ -857,79 +857,6 @@ void pnv_pci_dma_bus_setup(struct pci_bus *bus)
}
}
-int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb_init, *phb_target;
- struct pnv_ioda_pe *pe_init;
- int rc;
-
- if (!opal_check_token(OPAL_PCI_SET_P2P))
- return -ENXIO;
-
- hose = pci_bus_to_host(initiator->bus);
- phb_init = hose->private_data;
-
- hose = pci_bus_to_host(target->bus);
- phb_target = hose->private_data;
-
- pe_init = pnv_ioda_get_pe(initiator);
- if (!pe_init)
- return -ENODEV;
-
- /*
- * Configuring the initiator's PHB requires to adjust its
- * TVE#1 setting. Since the same device can be an initiator
- * several times for different target devices, we need to keep
- * a reference count to know when we can restore the default
- * bypass setting on its TVE#1 when disabling. Opal is not
- * tracking PE states, so we add a reference count on the PE
- * in linux.
- *
- * For the target, the configuration is per PHB, so we keep a
- * target reference count on the PHB.
- */
- mutex_lock(&p2p_mutex);
-
- if (desc & OPAL_PCI_P2P_ENABLE) {
- /* always go to opal to validate the configuration */
- rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
- desc, pe_init->pe_number);
-
- if (rc != OPAL_SUCCESS) {
- rc = -EIO;
- goto out;
- }
-
- pe_init->p2p_initiator_count++;
- phb_target->p2p_target_count++;
- } else {
- if (!pe_init->p2p_initiator_count ||
- !phb_target->p2p_target_count) {
- rc = -EINVAL;
- goto out;
- }
-
- if (--pe_init->p2p_initiator_count == 0)
- pnv_pci_ioda2_set_bypass(pe_init, true);
-
- if (--phb_target->p2p_target_count == 0) {
- rc = opal_pci_set_p2p(phb_init->opal_id,
- phb_target->opal_id, desc,
- pe_init->pe_number);
- if (rc != OPAL_SUCCESS) {
- rc = -EIO;
- goto out;
- }
- }
- }
- rc = 0;
-out:
- mutex_unlock(&p2p_mutex);
- return rc;
-}
-EXPORT_SYMBOL_GPL(pnv_pci_set_p2p);
-
struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
@@ -938,54 +865,6 @@ struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
}
EXPORT_SYMBOL(pnv_pci_get_phb_node);
-int pnv_pci_enable_tunnel(struct pci_dev *dev, u64 *asnind)
-{
- struct device_node *np;
- const __be32 *prop;
- struct pnv_ioda_pe *pe;
- uint16_t window_id;
- int rc;
-
- if (!radix_enabled())
- return -ENXIO;
-
- if (!(np = pnv_pci_get_phb_node(dev)))
- return -ENXIO;
-
- prop = of_get_property(np, "ibm,phb-indications", NULL);
- of_node_put(np);
-
- if (!prop || !prop[1])
- return -ENXIO;
-
- *asnind = (u64)be32_to_cpu(prop[1]);
- pe = pnv_ioda_get_pe(dev);
- if (!pe)
- return -ENODEV;
-
- /* Increase real window size to accept as_notify messages. */
- window_id = (pe->pe_number << 1 ) + 1;
- rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, pe->pe_number,
- window_id, pe->tce_bypass_base,
- (uint64_t)1 << 48);
- return opal_error_code(rc);
-}
-EXPORT_SYMBOL_GPL(pnv_pci_enable_tunnel);
-
-int pnv_pci_disable_tunnel(struct pci_dev *dev)
-{
- struct pnv_ioda_pe *pe;
-
- pe = pnv_ioda_get_pe(dev);
- if (!pe)
- return -ENODEV;
-
- /* Restore default real window size. */
- pnv_pci_ioda2_set_bypass(pe, true);
- return 0;
-}
-EXPORT_SYMBOL_GPL(pnv_pci_disable_tunnel);
-
int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
{
__be64 val;
@@ -1040,29 +919,6 @@ out:
}
EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar);
-#ifdef CONFIG_PPC64 /* for thread.tidr */
-int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, u32 *pid,
- u32 *tid)
-{
- struct mm_struct *mm = NULL;
-
- if (task == NULL)
- return -EINVAL;
-
- mm = get_task_mm(task);
- if (mm == NULL)
- return -EINVAL;
-
- *pid = mm->context.id;
- mmput(mm);
-
- *tid = task->thread.tidr;
- *lpid = mfspr(SPRN_LPID);
- return 0;
-}
-EXPORT_SYMBOL_GPL(pnv_pci_get_as_notify_info);
-#endif
-
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index be26ab3d99e0..f914f0b14e4e 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -79,9 +79,6 @@ struct pnv_ioda_pe {
struct pnv_ioda_pe *master;
struct list_head slaves;
- /* PCI peer-to-peer*/
- int p2p_initiator_count;
-
/* Link in list of PE#s */
struct list_head list;
};
@@ -172,8 +169,6 @@ struct pnv_phb {
/* PHB and hub diagnostics */
unsigned int diag_data_size;
u8 *diag_data;
-
- int p2p_target_count;
};
extern struct pci_ops pnv_pci_ops;
@@ -200,7 +195,6 @@ extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
-extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
__u64 window_size, __u32 levels);
extern int pnv_eeh_post_init(void);
@@ -225,7 +219,7 @@ extern struct iommu_table_group *pnv_npu_compound_attach(
struct pnv_ioda_pe *pe);
/* pci-ioda-tce.c */
-#define POWERNV_IOMMU_DEFAULT_LEVELS 1
+#define POWERNV_IOMMU_DEFAULT_LEVELS 2
#define POWERNV_IOMMU_MAX_LEVELS 5
extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index fd4a1c5a6369..1aa51c4fa904 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -30,4 +30,9 @@ extern void opal_event_shutdown(void);
bool cpu_core_split_required(void);
+struct memcons;
+ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count);
+u32 memcons_get_size(struct memcons *mc);
+struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name);
+
#endif /* _POWERNV_H */
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index a5e52f9eed3c..83498604d322 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -24,6 +24,7 @@
#include <linux/bug.h>
#include <linux/pci.h>
#include <linux/cpufreq.h>
+#include <linux/memblock.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
@@ -166,6 +167,14 @@ static void __init pnv_init(void)
else
#endif
add_preferred_console("hvc", 0, NULL);
+
+ if (!radix_enabled()) {
+ int i;
+
+ /* Allocate per cpu area to save old slb contents during MCE */
+ for_each_possible_cpu(i)
+ paca_ptrs[i]->mce_faulty_slbs = memblock_alloc_node(mmu_slb_size, __alignof__(*paca_ptrs[i]->mce_faulty_slbs), cpu_to_node(i));
+ }
}
static void __init pnv_init_IRQ(void)
diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c
new file mode 100644
index 000000000000..e4a00ad06f9d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/ultravisor.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ultravisor high level interfaces
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include <linux/init.h>
+#include <linux/printk.h>
+#include <linux/of_fdt.h>
+#include <linux/of.h>
+
+#include <asm/ultravisor.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+
+#include "powernv.h"
+
+static struct kobject *ultravisor_kobj;
+
+int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if (!of_flat_dt_is_compatible(node, "ibm,ultravisor"))
+ return 0;
+
+ powerpc_firmware_features |= FW_FEATURE_ULTRAVISOR;
+ pr_debug("Ultravisor detected!\n");
+ return 1;
+}
+
+static struct memcons *uv_memcons;
+
+static ssize_t uv_msglog_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *to,
+ loff_t pos, size_t count)
+{
+ return memcons_copy(uv_memcons, to, pos, count);
+}
+
+static struct bin_attribute uv_msglog_attr = {
+ .attr = {.name = "msglog", .mode = 0400},
+ .read = uv_msglog_read
+};
+
+static int __init uv_init(void)
+{
+ struct device_node *node;
+
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ return 0;
+
+ node = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
+ if (!node)
+ return -ENODEV;
+
+ uv_memcons = memcons_init(node, "memcons");
+ if (!uv_memcons)
+ return -ENOENT;
+
+ uv_msglog_attr.size = memcons_get_size(uv_memcons);
+
+ ultravisor_kobj = kobject_create_and_add("ultravisor", firmware_kobj);
+ if (!ultravisor_kobj)
+ return -ENOMEM;
+
+ return sysfs_create_bin_file(ultravisor_kobj, &uv_msglog_attr);
+}
+machine_subsys_initcall(powernv, uv_init);
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index ea5ca0201da8..0c0d27d17976 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -40,16 +40,6 @@ static void compute_paste_address(struct vas_window *window, u64 *addr, int *len
pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
}
-u64 vas_win_paste_addr(struct vas_window *win)
-{
- u64 addr;
-
- compute_paste_address(win, &addr, NULL);
-
- return addr;
-}
-EXPORT_SYMBOL(vas_win_paste_addr);
-
static inline void get_hvwc_mmio_bar(struct vas_window *window,
u64 *start, int *len)
{
@@ -1264,12 +1254,3 @@ int vas_win_close(struct vas_window *window)
return 0;
}
EXPORT_SYMBOL_GPL(vas_win_close);
-
-/*
- * Return a system-wide unique window id for the window @win.
- */
-u32 vas_win_id(struct vas_window *win)
-{
- return encode_pswid(win->vinst->vas_id, win->winid);
-}
-EXPORT_SYMBOL_GPL(vas_win_id);
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
index 9cc5251816db..5574aec9ee88 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -444,26 +444,6 @@ static inline u64 read_hvwc_reg(struct vas_window *win,
return in_be64(win->hvwc_map+reg);
}
-/*
- * Encode/decode the Partition Send Window ID (PSWID) for a window in
- * a way that we can uniquely identify any window in the system. i.e.
- * we should be able to locate the 'struct vas_window' given the PSWID.
- *
- * Bits Usage
- * 0:7 VAS id (8 bits)
- * 8:15 Unused, 0 (3 bits)
- * 16:31 Window id (16 bits)
- */
-static inline u32 encode_pswid(int vasid, int winid)
-{
- u32 pswid = 0;
-
- pswid |= vasid << (31 - 7);
- pswid |= winid;
-
- return pswid;
-}
-
static inline void decode_pswid(u32 pswid, int *vasid, int *winid)
{
if (vasid)
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index bdaeaecdc06b..1193c294b8d0 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -184,10 +184,7 @@ static void spu_unmap(struct spu *spu)
* setup_areas - Map the spu regions into the address space.
*
* The current HV requires the spu shadow regs to be mapped with the
- * PTE page protection bits set as read-only (PP=3). This implementation
- * uses the low level __ioremap() to bypass the page protection settings
- * inforced by ioremap_prot() to get the needed PTE bits set for the
- * shadow regs.
+ * PTE page protection bits set as read-only.
*/
static int __init setup_areas(struct spu *spu)
@@ -195,9 +192,8 @@ static int __init setup_areas(struct spu *spu)
struct table {char* name; unsigned long addr; unsigned long size;};
unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO));
- spu_pdata(spu)->shadow = __ioremap(spu_pdata(spu)->shadow_addr,
- sizeof(struct spe_shadow),
- shadow_flags);
+ spu_pdata(spu)->shadow = ioremap_prot(spu_pdata(spu)->shadow_addr,
+ sizeof(struct spe_shadow), shadow_flags);
if (!spu_pdata(spu)->shadow) {
pr_debug("%s:%d: ioremap shadow failed\n", __func__, __LINE__);
goto fail_ioremap;
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 98410119c47b..3542b7bd6a46 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -686,20 +686,16 @@ static int ps3_dma_supported(struct device *_dev, u64 mask)
return mask >= DMA_BIT_MASK(32);
}
-static u64 ps3_dma_get_required_mask(struct device *_dev)
-{
- return DMA_BIT_MASK(32);
-}
-
static const struct dma_map_ops ps3_sb_dma_ops = {
.alloc = ps3_alloc_coherent,
.free = ps3_free_coherent,
.map_sg = ps3_sb_map_sg,
.unmap_sg = ps3_sb_unmap_sg,
.dma_supported = ps3_dma_supported,
- .get_required_mask = ps3_dma_get_required_mask,
.map_page = ps3_sb_map_page,
.unmap_page = ps3_unmap_page,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
static const struct dma_map_ops ps3_ioc0_dma_ops = {
@@ -708,9 +704,10 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = {
.map_sg = ps3_ioc0_map_sg,
.unmap_sg = ps3_ioc0_unmap_sg,
.dma_supported = ps3_dma_supported,
- .get_required_mask = ps3_dma_get_required_mask,
.map_page = ps3_ioc0_map_page,
.unmap_page = ps3_unmap_page,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
/**
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 9c6b3d860518..9e35cddddf73 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -23,6 +23,7 @@ config PPC_PSERIES
select ARCH_RANDOM
select PPC_DOORBELL
select FORCE_SMP
+ select SWIOTLB
default y
config PPC_SPLPAR
@@ -80,19 +81,19 @@ config LPARCFG
bool "LPAR Configuration Data"
depends on PPC_PSERIES
help
- Provide system capacity information via human readable
- <key word>=<value> pairs through a /proc/ppc64/lparcfg interface.
+ Provide system capacity information via human readable
+ <key word>=<value> pairs through a /proc/ppc64/lparcfg interface.
config PPC_PSERIES_DEBUG
depends on PPC_PSERIES && PPC_EARLY_DEBUG
bool "Enable extra debug logging in platforms/pseries"
- help
+ default y
+ help
Say Y here if you want the pseries core to produce a bunch of
debug messages to the system log. Select this if you are having a
problem with the pseries core and want to see more of what is
going on. This does not enable debugging in lpar.c, which must
be manually done due to its verbosity.
- default y
config PPC_SMLPAR
bool "Support for shared-memory logical partitions"
@@ -117,16 +118,16 @@ config CMM
balance memory across many LPARs.
config HV_PERF_CTRS
- bool "Hypervisor supplied PMU events (24x7 & GPCI)"
- default y
- depends on PERF_EVENTS && PPC_PSERIES
- help
+ bool "Hypervisor supplied PMU events (24x7 & GPCI)"
+ default y
+ depends on PERF_EVENTS && PPC_PSERIES
+ help
Enable access to hypervisor supplied counters in perf. Currently,
this enables code that uses the hcall GetPerfCounterInfo and 24x7
interfaces to retrieve counters. GPCI exists on Power 6 and later
systems. 24x7 is available on Power 8 and later systems.
- If unsure, select Y.
+ If unsure, select Y.
config IBMVIO
depends on PPC_PSERIES
@@ -144,3 +145,17 @@ config PAPR_SCM
tristate "Support for the PAPR Storage Class Memory interface"
help
Enable access to hypervisor provided storage class memory.
+
+config PPC_SVM
+ bool "Secure virtual machine (SVM) support for POWER"
+ depends on PPC_PSERIES
+ select SWIOTLB
+ select ARCH_HAS_MEM_ENCRYPT
+ select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+ help
+ There are certain POWER platforms which support secure guests using
+ the Protected Execution Facility, with the help of an Ultravisor
+ executing below the hypervisor layer. This enables support for
+ those guests.
+
+ If unsure, say "N".
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index a43ec843c8e2..a3c74a5cf20d 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -25,6 +25,9 @@ obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_IBMEBUS) += ibmebus.o
obj-$(CONFIG_PAPR_SCM) += papr_scm.o
+obj-$(CONFIG_PPC_SPLPAR) += vphn.o
+obj-$(CONFIG_PPC_SVM) += svm.o
+obj-$(CONFIG_FA_DUMP) += rtas-fadump.o
ifdef CONFIG_PPC_PSERIES
obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 437a74173db2..16e86ba8aa20 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -58,6 +58,10 @@ static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa)
name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
prop->name = kstrdup(name, GFP_KERNEL);
+ if (!prop->name) {
+ dlpar_free_cc_property(prop);
+ return NULL;
+ }
prop->length = be32_to_cpu(ccwa->prop_length);
value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset);
@@ -383,11 +387,11 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog)
struct pseries_hp_work *work;
struct pseries_hp_errorlog *hp_errlog_copy;
- hp_errlog_copy = kmalloc(sizeof(struct pseries_hp_errorlog),
- GFP_KERNEL);
- memcpy(hp_errlog_copy, hp_errlog, sizeof(struct pseries_hp_errorlog));
+ hp_errlog_copy = kmemdup(hp_errlog, sizeof(*hp_errlog), GFP_ATOMIC);
+ if (!hp_errlog_copy)
+ return;
- work = kmalloc(sizeof(struct pseries_hp_work), GFP_KERNEL);
+ work = kmalloc(sizeof(struct pseries_hp_work), GFP_ATOMIC);
if (work) {
INIT_WORK((struct work_struct *)work, pseries_hp_work_fn);
work->errlog = hp_errlog_copy;
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index ab5de985a787..2b87480f2837 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -27,13 +27,7 @@ struct dtl {
};
static DEFINE_PER_CPU(struct dtl, cpu_dtl);
-/*
- * Dispatch trace log event mask:
- * 0x7: 0x1: voluntary virtual processor waits
- * 0x2: time-slice preempts
- * 0x4: virtual partition memory page faults
- */
-static u8 dtl_event_mask = 0x7;
+static u8 dtl_event_mask = DTL_LOG_ALL;
/*
@@ -48,7 +42,6 @@ struct dtl_ring {
struct dtl_entry *write_ptr;
struct dtl_entry *buf;
struct dtl_entry *buf_end;
- u8 saved_dtl_mask;
};
static DEFINE_PER_CPU(struct dtl_ring, dtl_rings);
@@ -98,7 +91,6 @@ static int dtl_start(struct dtl *dtl)
dtlr->write_ptr = dtl->buf;
/* enable event logging */
- dtlr->saved_dtl_mask = lppaca_of(dtl->cpu).dtl_enable_mask;
lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask;
dtl_consumer = consume_dtle;
@@ -116,7 +108,7 @@ static void dtl_stop(struct dtl *dtl)
dtlr->buf = NULL;
/* restore dtl_enable_mask */
- lppaca_of(dtl->cpu).dtl_enable_mask = dtlr->saved_dtl_mask;
+ lppaca_of(dtl->cpu).dtl_enable_mask = DTL_LOG_PREEMPT;
if (atomic_dec_and_test(&dtl_count))
dtl_consumer = NULL;
@@ -188,11 +180,16 @@ static int dtl_enable(struct dtl *dtl)
if (dtl->buf)
return -EBUSY;
+ /* ensure there are no other conflicting dtl users */
+ if (!read_trylock(&dtl_access_lock))
+ return -EBUSY;
+
n_entries = dtl_buf_entries;
buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu));
if (!buf) {
printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
__func__, dtl->cpu);
+ read_unlock(&dtl_access_lock);
return -ENOMEM;
}
@@ -209,8 +206,11 @@ static int dtl_enable(struct dtl *dtl)
}
spin_unlock(&dtl->lock);
- if (rc)
+ if (rc) {
+ read_unlock(&dtl_access_lock);
kmem_cache_free(dtl_cache, buf);
+ }
+
return rc;
}
@@ -222,6 +222,7 @@ static void dtl_disable(struct dtl *dtl)
dtl->buf = NULL;
dtl->buf_entries = 0;
spin_unlock(&dtl->lock);
+ read_unlock(&dtl_access_lock);
}
/* file interface */
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 9edae1863e2f..893ba3f562c4 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -42,42 +42,44 @@ static int ibm_get_config_addr_info;
static int ibm_get_config_addr_info2;
static int ibm_configure_pe;
-#ifdef CONFIG_PCI_IOV
void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
{
struct pci_dn *pdn = pci_get_pdn(pdev);
- struct pci_dn *physfn_pdn;
- struct eeh_dev *edev;
- if (!pdev->is_virtfn)
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
return;
- pdn->device_id = pdev->device;
- pdn->vendor_id = pdev->vendor;
- pdn->class_code = pdev->class;
- /*
- * Last allow unfreeze return code used for retrieval
- * by user space in eeh-sysfs to show the last command
- * completion from platform.
- */
- pdn->last_allow_rc = 0;
- physfn_pdn = pci_get_pdn(pdev->physfn);
- pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index];
- edev = pdn_to_eeh_dev(pdn);
+ dev_dbg(&pdev->dev, "EEH: Setting up device\n");
+#ifdef CONFIG_PCI_IOV
+ if (pdev->is_virtfn) {
+ struct pci_dn *physfn_pdn;
- /*
- * The following operations will fail if VF's sysfs files
- * aren't created or its resources aren't finalized.
- */
+ pdn->device_id = pdev->device;
+ pdn->vendor_id = pdev->vendor;
+ pdn->class_code = pdev->class;
+ /*
+ * Last allow unfreeze return code used for retrieval
+ * by user space in eeh-sysfs to show the last command
+ * completion from platform.
+ */
+ pdn->last_allow_rc = 0;
+ physfn_pdn = pci_get_pdn(pdev->physfn);
+ pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index];
+ }
+#endif
eeh_add_device_early(pdn);
eeh_add_device_late(pdev);
- edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
- eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
- eeh_add_to_parent_pe(edev); /* Add as VF PE type */
- eeh_sysfs_add_device(pdev);
+#ifdef CONFIG_PCI_IOV
+ if (pdev->is_virtfn) {
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-}
+ edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+ eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
+ eeh_add_to_parent_pe(edev); /* Add as VF PE type */
+ }
#endif
+ eeh_sysfs_add_device(pdev);
+}
/*
* Buffer for reporting slot-error-detail rtas calls. Its here
@@ -144,10 +146,8 @@ static int pseries_eeh_init(void)
/* Set EEH probe mode */
eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
-#ifdef CONFIG_PCI_IOV
/* Set EEH machine dependent code */
ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
-#endif
return 0;
}
@@ -251,6 +251,8 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
return NULL;
+ eeh_edev_dbg(edev, "Probing device\n");
+
/*
* Update class code and mode of eeh device. We need
* correctly reflects that current device is root port
@@ -280,8 +282,11 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
/* Enable EEH on the device */
+ eeh_edev_dbg(edev, "Enabling EEH on device\n");
ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
- if (!ret) {
+ if (ret) {
+ eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
+ } else {
/* Retrieve PE address */
edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
pe.addr = edev->pe_config_addr;
@@ -297,11 +302,6 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
if (enable) {
eeh_add_flag(EEH_ENABLED);
eeh_add_to_parent_pe(edev);
-
- pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%x-PE#%x\n",
- __func__, pdn->busno, PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn), pe.phb->global_number,
- pe.addr);
} else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
(pdn_to_eeh_dev(pdn->parent))->pe) {
/* This device doesn't support EEH, but it may have an
@@ -310,6 +310,8 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
eeh_add_to_parent_pe(edev);
}
+ eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
+ (enable ? "enabled" : "unsupported"), ret);
}
/* Save memory bars */
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 2ec43b4639a0..8e700390f3d6 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -880,34 +880,44 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
switch (hp_elog->action) {
case PSERIES_HP_ELOG_ACTION_ADD:
- if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
+ switch (hp_elog->id_type) {
+ case PSERIES_HP_ELOG_ID_DRC_COUNT:
count = hp_elog->_drc_u.drc_count;
rc = dlpar_memory_add_by_count(count);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_INDEX:
drc_index = hp_elog->_drc_u.drc_index;
rc = dlpar_memory_add_by_index(drc_index);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_IC:
count = hp_elog->_drc_u.ic.count;
drc_index = hp_elog->_drc_u.ic.index;
rc = dlpar_memory_add_by_ic(count, drc_index);
- } else {
+ break;
+ default:
rc = -EINVAL;
+ break;
}
break;
case PSERIES_HP_ELOG_ACTION_REMOVE:
- if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
+ switch (hp_elog->id_type) {
+ case PSERIES_HP_ELOG_ID_DRC_COUNT:
count = hp_elog->_drc_u.drc_count;
rc = dlpar_memory_remove_by_count(count);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_INDEX:
drc_index = hp_elog->_drc_u.drc_index;
rc = dlpar_memory_remove_by_index(drc_index);
- } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_IC:
count = hp_elog->_drc_u.ic.count;
drc_index = hp_elog->_drc_u.ic.index;
rc = dlpar_memory_remove_by_ic(count, drc_index);
- } else {
+ break;
+ default:
rc = -EINVAL;
+ break;
}
break;
@@ -976,6 +986,9 @@ static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
if (!memblock_size)
return -EINVAL;
+ if (!pr->old_prop)
+ return 0;
+
p = (__be32 *) pr->old_prop->value;
if (!p)
return -EINVAL;
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
index 1498c6b989e6..1ac52963e08b 100644
--- a/arch/powerpc/platforms/pseries/hvconsole.c
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -49,7 +49,7 @@ EXPORT_SYMBOL(hvc_get_chars);
* @vtermno: The vtermno or unit_address of the adapter from which the data
* originated.
* @buf: The character buffer that contains the character data to send to
- * firmware.
+ * firmware. Must be at least 16 bytes, even if count is less than 16.
* @count: Send this number of characters.
*/
int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index 84e8ec4011ba..b91eb0929ed1 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -147,13 +147,13 @@ static const struct dma_map_ops ibmebus_dma_ops = {
.unmap_page = ibmebus_unmap_page,
};
-static int ibmebus_match_path(struct device *dev, void *data)
+static int ibmebus_match_path(struct device *dev, const void *data)
{
struct device_node *dn = to_platform_device(dev)->dev.of_node;
return (of_find_node_by_path(data) == dn);
}
-static int ibmebus_match_node(struct device *dev, void *data)
+static int ibmebus_match_node(struct device *dev, const void *data)
{
return to_platform_device(dev)->dev.of_node == data;
}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 889dc2e44b89..6ba081dd61c9 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -36,6 +36,7 @@
#include <asm/udbg.h>
#include <asm/mmzone.h>
#include <asm/plpar_wrappers.h>
+#include <asm/svm.h>
#include "pseries.h"
@@ -609,7 +610,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
iommu_table_setparms(pci->phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
- iommu_init_table(tbl, pci->phb->node);
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
/* Divide the rest (1.75GB) among the children */
pci->phb->dma_window_size = 0x80000000ul;
@@ -621,7 +622,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
#ifdef CONFIG_IOMMU_API
static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
- long *tce, enum dma_data_direction *direction)
+ long *tce, enum dma_data_direction *direction,
+ bool realmode)
{
long rc;
unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
@@ -649,7 +651,7 @@ static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
struct iommu_table_ops iommu_table_lpar_multi_ops = {
.set = tce_buildmulti_pSeriesLP,
#ifdef CONFIG_IOMMU_API
- .exchange = tce_exchange_pseries,
+ .xchg_no_kill = tce_exchange_pseries,
#endif
.clear = tce_freemulti_pSeriesLP,
.get = tce_get_pSeriesLP
@@ -690,7 +692,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
ppci->table_group, dma_window);
tbl->it_ops = &iommu_table_lpar_multi_ops;
- iommu_init_table(tbl, ppci->phb->node);
+ iommu_init_table(tbl, ppci->phb->node, 0, 0);
iommu_register_group(ppci->table_group,
pci_domain_nr(bus), 0);
pr_debug(" created table: %p\n", ppci->table_group);
@@ -719,7 +721,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
tbl = PCI_DN(dn)->table_group->tables[0];
iommu_table_setparms(phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
- iommu_init_table(tbl, phb->node);
+ iommu_init_table(tbl, phb->node, 0, 0);
set_iommu_table_base(&dev->dev, tbl);
return;
}
@@ -1169,7 +1171,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
iommu_table_setparms_lpar(pci->phb, pdn, tbl,
pci->table_group, dma_window);
tbl->it_ops = &iommu_table_lpar_multi_ops;
- iommu_init_table(tbl, pci->phb->node);
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
iommu_register_group(pci->table_group,
pci_domain_nr(pci->phb->bus), 0);
pr_debug(" created table: %p\n", pci->table_group);
@@ -1318,7 +1320,15 @@ void iommu_init_early_pSeries(void)
of_reconfig_notifier_register(&iommu_reconfig_nb);
register_memory_notifier(&iommu_mem_nb);
- set_pci_dma_ops(&dma_iommu_ops);
+ /*
+ * Secure guest memory is inacessible to devices so regular DMA isn't
+ * possible.
+ *
+ * In that case keep devices' dma_map_ops as NULL so that the generic
+ * DMA code path will use SWIOTLB to bounce buffers for DMA.
+ */
+ if (!is_secure_guest())
+ set_pci_dma_ops(&dma_iommu_ops);
}
static int __init disable_multitce(char *str)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 73620dfb63a1..36b846f6e74e 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -17,6 +17,10 @@
#include <linux/jump_label.h>
#include <linux/delay.h>
#include <linux/stop_machine.h>
+#include <linux/spinlock.h>
+#include <linux/cpuhotplug.h>
+#include <linux/workqueue.h>
+#include <linux/proc_fs.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
@@ -52,13 +56,591 @@ EXPORT_SYMBOL(plpar_hcall);
EXPORT_SYMBOL(plpar_hcall9);
EXPORT_SYMBOL(plpar_hcall_norets);
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+static u8 dtl_mask = DTL_LOG_PREEMPT;
+#else
+static u8 dtl_mask;
+#endif
+
+void alloc_dtl_buffers(unsigned long *time_limit)
+{
+ int cpu;
+ struct paca_struct *pp;
+ struct dtl_entry *dtl;
+
+ for_each_possible_cpu(cpu) {
+ pp = paca_ptrs[cpu];
+ if (pp->dispatch_log)
+ continue;
+ dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
+ if (!dtl) {
+ pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
+ cpu);
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ pr_warn("Stolen time statistics will be unreliable\n");
+#endif
+ break;
+ }
+
+ pp->dtl_ridx = 0;
+ pp->dispatch_log = dtl;
+ pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
+ pp->dtl_curr = dtl;
+
+ if (time_limit && time_after(jiffies, *time_limit)) {
+ cond_resched();
+ *time_limit = jiffies + HZ;
+ }
+ }
+}
+
+void register_dtl_buffer(int cpu)
+{
+ long ret;
+ struct paca_struct *pp;
+ struct dtl_entry *dtl;
+ int hwcpu = get_hard_smp_processor_id(cpu);
+
+ pp = paca_ptrs[cpu];
+ dtl = pp->dispatch_log;
+ if (dtl && dtl_mask) {
+ pp->dtl_ridx = 0;
+ pp->dtl_curr = dtl;
+ lppaca_of(cpu).dtl_idx = 0;
+
+ /* hypervisor reads buffer length from this field */
+ dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
+ ret = register_dtl(hwcpu, __pa(dtl));
+ if (ret)
+ pr_err("WARNING: DTL registration of cpu %d (hw %d) failed with %ld\n",
+ cpu, hwcpu, ret);
+
+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
+ }
+}
+
+#ifdef CONFIG_PPC_SPLPAR
+struct dtl_worker {
+ struct delayed_work work;
+ int cpu;
+};
+
+struct vcpu_dispatch_data {
+ int last_disp_cpu;
+
+ int total_disp;
+
+ int same_cpu_disp;
+ int same_chip_disp;
+ int diff_chip_disp;
+ int far_chip_disp;
+
+ int numa_home_disp;
+ int numa_remote_disp;
+ int numa_far_disp;
+};
+
+/*
+ * This represents the number of cpus in the hypervisor. Since there is no
+ * architected way to discover the number of processors in the host, we
+ * provision for dealing with NR_CPUS. This is currently 2048 by default, and
+ * is sufficient for our purposes. This will need to be tweaked if
+ * CONFIG_NR_CPUS is changed.
+ */
+#define NR_CPUS_H NR_CPUS
+
+DEFINE_RWLOCK(dtl_access_lock);
+static DEFINE_PER_CPU(struct vcpu_dispatch_data, vcpu_disp_data);
+static DEFINE_PER_CPU(u64, dtl_entry_ridx);
+static DEFINE_PER_CPU(struct dtl_worker, dtl_workers);
+static enum cpuhp_state dtl_worker_state;
+static DEFINE_MUTEX(dtl_enable_mutex);
+static int vcpudispatch_stats_on __read_mostly;
+static int vcpudispatch_stats_freq = 50;
+static __be32 *vcpu_associativity, *pcpu_associativity;
+
+
+static void free_dtl_buffers(unsigned long *time_limit)
+{
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ int cpu;
+ struct paca_struct *pp;
+
+ for_each_possible_cpu(cpu) {
+ pp = paca_ptrs[cpu];
+ if (!pp->dispatch_log)
+ continue;
+ kmem_cache_free(dtl_cache, pp->dispatch_log);
+ pp->dtl_ridx = 0;
+ pp->dispatch_log = 0;
+ pp->dispatch_log_end = 0;
+ pp->dtl_curr = 0;
+
+ if (time_limit && time_after(jiffies, *time_limit)) {
+ cond_resched();
+ *time_limit = jiffies + HZ;
+ }
+ }
+#endif
+}
+
+static int init_cpu_associativity(void)
+{
+ vcpu_associativity = kcalloc(num_possible_cpus() / threads_per_core,
+ VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL);
+ pcpu_associativity = kcalloc(NR_CPUS_H / threads_per_core,
+ VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL);
+
+ if (!vcpu_associativity || !pcpu_associativity) {
+ pr_err("error allocating memory for associativity information\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void destroy_cpu_associativity(void)
+{
+ kfree(vcpu_associativity);
+ kfree(pcpu_associativity);
+ vcpu_associativity = pcpu_associativity = 0;
+}
+
+static __be32 *__get_cpu_associativity(int cpu, __be32 *cpu_assoc, int flag)
+{
+ __be32 *assoc;
+ int rc = 0;
+
+ assoc = &cpu_assoc[(int)(cpu / threads_per_core) * VPHN_ASSOC_BUFSIZE];
+ if (!assoc[0]) {
+ rc = hcall_vphn(cpu, flag, &assoc[0]);
+ if (rc)
+ return NULL;
+ }
+
+ return assoc;
+}
+
+static __be32 *get_pcpu_associativity(int cpu)
+{
+ return __get_cpu_associativity(cpu, pcpu_associativity, VPHN_FLAG_PCPU);
+}
+
+static __be32 *get_vcpu_associativity(int cpu)
+{
+ return __get_cpu_associativity(cpu, vcpu_associativity, VPHN_FLAG_VCPU);
+}
+
+static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu)
+{
+ __be32 *last_disp_cpu_assoc, *cur_disp_cpu_assoc;
+
+ if (last_disp_cpu >= NR_CPUS_H || cur_disp_cpu >= NR_CPUS_H)
+ return -EINVAL;
+
+ last_disp_cpu_assoc = get_pcpu_associativity(last_disp_cpu);
+ cur_disp_cpu_assoc = get_pcpu_associativity(cur_disp_cpu);
+
+ if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc)
+ return -EIO;
+
+ return cpu_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
+}
+
+static int cpu_home_node_dispatch_distance(int disp_cpu)
+{
+ __be32 *disp_cpu_assoc, *vcpu_assoc;
+ int vcpu_id = smp_processor_id();
+
+ if (disp_cpu >= NR_CPUS_H) {
+ pr_debug_ratelimited("vcpu dispatch cpu %d > %d\n",
+ disp_cpu, NR_CPUS_H);
+ return -EINVAL;
+ }
+
+ disp_cpu_assoc = get_pcpu_associativity(disp_cpu);
+ vcpu_assoc = get_vcpu_associativity(vcpu_id);
+
+ if (!disp_cpu_assoc || !vcpu_assoc)
+ return -EIO;
+
+ return cpu_distance(disp_cpu_assoc, vcpu_assoc);
+}
+
+static void update_vcpu_disp_stat(int disp_cpu)
+{
+ struct vcpu_dispatch_data *disp;
+ int distance;
+
+ disp = this_cpu_ptr(&vcpu_disp_data);
+ if (disp->last_disp_cpu == -1) {
+ disp->last_disp_cpu = disp_cpu;
+ return;
+ }
+
+ disp->total_disp++;
+
+ if (disp->last_disp_cpu == disp_cpu ||
+ (cpu_first_thread_sibling(disp->last_disp_cpu) ==
+ cpu_first_thread_sibling(disp_cpu)))
+ disp->same_cpu_disp++;
+ else {
+ distance = cpu_relative_dispatch_distance(disp->last_disp_cpu,
+ disp_cpu);
+ if (distance < 0)
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n",
+ smp_processor_id());
+ else {
+ switch (distance) {
+ case 0:
+ disp->same_chip_disp++;
+ break;
+ case 1:
+ disp->diff_chip_disp++;
+ break;
+ case 2:
+ disp->far_chip_disp++;
+ break;
+ default:
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d (%d -> %d): unexpected relative dispatch distance %d\n",
+ smp_processor_id(),
+ disp->last_disp_cpu,
+ disp_cpu,
+ distance);
+ }
+ }
+ }
+
+ distance = cpu_home_node_dispatch_distance(disp_cpu);
+ if (distance < 0)
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n",
+ smp_processor_id());
+ else {
+ switch (distance) {
+ case 0:
+ disp->numa_home_disp++;
+ break;
+ case 1:
+ disp->numa_remote_disp++;
+ break;
+ case 2:
+ disp->numa_far_disp++;
+ break;
+ default:
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d on %d: unexpected numa dispatch distance %d\n",
+ smp_processor_id(),
+ disp_cpu,
+ distance);
+ }
+ }
+
+ disp->last_disp_cpu = disp_cpu;
+}
+
+static void process_dtl_buffer(struct work_struct *work)
+{
+ struct dtl_entry dtle;
+ u64 i = __this_cpu_read(dtl_entry_ridx);
+ struct dtl_entry *dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+ struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
+ struct lppaca *vpa = local_paca->lppaca_ptr;
+ struct dtl_worker *d = container_of(work, struct dtl_worker, work.work);
+
+ if (!local_paca->dispatch_log)
+ return;
+
+ /* if we have been migrated away, we cancel ourself */
+ if (d->cpu != smp_processor_id()) {
+ pr_debug("vcpudispatch_stats: cpu %d worker migrated -- canceling worker\n",
+ smp_processor_id());
+ return;
+ }
+
+ if (i == be64_to_cpu(vpa->dtl_idx))
+ goto out;
+
+ while (i < be64_to_cpu(vpa->dtl_idx)) {
+ dtle = *dtl;
+ barrier();
+ if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
+ /* buffer has overflowed */
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d lost %lld DTL samples\n",
+ d->cpu,
+ be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG - i);
+ i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
+ dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+ continue;
+ }
+ update_vcpu_disp_stat(be16_to_cpu(dtle.processor_id));
+ ++i;
+ ++dtl;
+ if (dtl == dtl_end)
+ dtl = local_paca->dispatch_log;
+ }
+
+ __this_cpu_write(dtl_entry_ridx, i);
+
+out:
+ schedule_delayed_work_on(d->cpu, to_delayed_work(work),
+ HZ / vcpudispatch_stats_freq);
+}
+
+static int dtl_worker_online(unsigned int cpu)
+{
+ struct dtl_worker *d = &per_cpu(dtl_workers, cpu);
+
+ memset(d, 0, sizeof(*d));
+ INIT_DELAYED_WORK(&d->work, process_dtl_buffer);
+ d->cpu = cpu;
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ per_cpu(dtl_entry_ridx, cpu) = 0;
+ register_dtl_buffer(cpu);
+#else
+ per_cpu(dtl_entry_ridx, cpu) = be64_to_cpu(lppaca_of(cpu).dtl_idx);
+#endif
+
+ schedule_delayed_work_on(cpu, &d->work, HZ / vcpudispatch_stats_freq);
+ return 0;
+}
+
+static int dtl_worker_offline(unsigned int cpu)
+{
+ struct dtl_worker *d = &per_cpu(dtl_workers, cpu);
+
+ cancel_delayed_work_sync(&d->work);
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ unregister_dtl(get_hard_smp_processor_id(cpu));
+#endif
+
+ return 0;
+}
+
+static void set_global_dtl_mask(u8 mask)
+{
+ int cpu;
+
+ dtl_mask = mask;
+ for_each_present_cpu(cpu)
+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
+}
+
+static void reset_global_dtl_mask(void)
+{
+ int cpu;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ dtl_mask = DTL_LOG_PREEMPT;
+#else
+ dtl_mask = 0;
+#endif
+ for_each_present_cpu(cpu)
+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
+}
+
+static int dtl_worker_enable(unsigned long *time_limit)
+{
+ int rc = 0, state;
+
+ if (!write_trylock(&dtl_access_lock)) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ set_global_dtl_mask(DTL_LOG_ALL);
+
+ /* Setup dtl buffers and register those */
+ alloc_dtl_buffers(time_limit);
+
+ state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/dtl:online",
+ dtl_worker_online, dtl_worker_offline);
+ if (state < 0) {
+ pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n");
+ free_dtl_buffers(time_limit);
+ reset_global_dtl_mask();
+ write_unlock(&dtl_access_lock);
+ rc = -EINVAL;
+ goto out;
+ }
+ dtl_worker_state = state;
+
+out:
+ return rc;
+}
+
+static void dtl_worker_disable(unsigned long *time_limit)
+{
+ cpuhp_remove_state(dtl_worker_state);
+ free_dtl_buffers(time_limit);
+ reset_global_dtl_mask();
+ write_unlock(&dtl_access_lock);
+}
+
+static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p,
+ size_t count, loff_t *ppos)
+{
+ unsigned long time_limit = jiffies + HZ;
+ struct vcpu_dispatch_data *disp;
+ int rc, cmd, cpu;
+ char buf[16];
+
+ if (count > 15)
+ return -EINVAL;
+
+ if (copy_from_user(buf, p, count))
+ return -EFAULT;
+
+ buf[count] = 0;
+ rc = kstrtoint(buf, 0, &cmd);
+ if (rc || cmd < 0 || cmd > 1) {
+ pr_err("vcpudispatch_stats: please use 0 to disable or 1 to enable dispatch statistics\n");
+ return rc ? rc : -EINVAL;
+ }
+
+ mutex_lock(&dtl_enable_mutex);
+
+ if ((cmd == 0 && !vcpudispatch_stats_on) ||
+ (cmd == 1 && vcpudispatch_stats_on))
+ goto out;
+
+ if (cmd) {
+ rc = init_cpu_associativity();
+ if (rc)
+ goto out;
+
+ for_each_possible_cpu(cpu) {
+ disp = per_cpu_ptr(&vcpu_disp_data, cpu);
+ memset(disp, 0, sizeof(*disp));
+ disp->last_disp_cpu = -1;
+ }
+
+ rc = dtl_worker_enable(&time_limit);
+ if (rc) {
+ destroy_cpu_associativity();
+ goto out;
+ }
+ } else {
+ dtl_worker_disable(&time_limit);
+ destroy_cpu_associativity();
+ }
+
+ vcpudispatch_stats_on = cmd;
+
+out:
+ mutex_unlock(&dtl_enable_mutex);
+ if (rc)
+ return rc;
+ return count;
+}
+
+static int vcpudispatch_stats_display(struct seq_file *p, void *v)
+{
+ int cpu;
+ struct vcpu_dispatch_data *disp;
+
+ if (!vcpudispatch_stats_on) {
+ seq_puts(p, "off\n");
+ return 0;
+ }
+
+ for_each_online_cpu(cpu) {
+ disp = per_cpu_ptr(&vcpu_disp_data, cpu);
+ seq_printf(p, "cpu%d", cpu);
+ seq_put_decimal_ull(p, " ", disp->total_disp);
+ seq_put_decimal_ull(p, " ", disp->same_cpu_disp);
+ seq_put_decimal_ull(p, " ", disp->same_chip_disp);
+ seq_put_decimal_ull(p, " ", disp->diff_chip_disp);
+ seq_put_decimal_ull(p, " ", disp->far_chip_disp);
+ seq_put_decimal_ull(p, " ", disp->numa_home_disp);
+ seq_put_decimal_ull(p, " ", disp->numa_remote_disp);
+ seq_put_decimal_ull(p, " ", disp->numa_far_disp);
+ seq_puts(p, "\n");
+ }
+
+ return 0;
+}
+
+static int vcpudispatch_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, vcpudispatch_stats_display, NULL);
+}
+
+static const struct file_operations vcpudispatch_stats_proc_ops = {
+ .open = vcpudispatch_stats_open,
+ .read = seq_read,
+ .write = vcpudispatch_stats_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static ssize_t vcpudispatch_stats_freq_write(struct file *file,
+ const char __user *p, size_t count, loff_t *ppos)
+{
+ int rc, freq;
+ char buf[16];
+
+ if (count > 15)
+ return -EINVAL;
+
+ if (copy_from_user(buf, p, count))
+ return -EFAULT;
+
+ buf[count] = 0;
+ rc = kstrtoint(buf, 0, &freq);
+ if (rc || freq < 1 || freq > HZ) {
+ pr_err("vcpudispatch_stats_freq: please specify a frequency between 1 and %d\n",
+ HZ);
+ return rc ? rc : -EINVAL;
+ }
+
+ vcpudispatch_stats_freq = freq;
+
+ return count;
+}
+
+static int vcpudispatch_stats_freq_display(struct seq_file *p, void *v)
+{
+ seq_printf(p, "%d\n", vcpudispatch_stats_freq);
+ return 0;
+}
+
+static int vcpudispatch_stats_freq_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, vcpudispatch_stats_freq_display, NULL);
+}
+
+static const struct file_operations vcpudispatch_stats_freq_proc_ops = {
+ .open = vcpudispatch_stats_freq_open,
+ .read = seq_read,
+ .write = vcpudispatch_stats_freq_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init vcpudispatch_stats_procfs_init(void)
+{
+ if (!lppaca_shared_proc(get_lppaca()))
+ return 0;
+
+ if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL,
+ &vcpudispatch_stats_proc_ops))
+ pr_err("vcpudispatch_stats: error creating procfs file\n");
+ else if (!proc_create("powerpc/vcpudispatch_stats_freq", 0600, NULL,
+ &vcpudispatch_stats_freq_proc_ops))
+ pr_err("vcpudispatch_stats_freq: error creating procfs file\n");
+
+ return 0;
+}
+
+machine_device_initcall(pseries, vcpudispatch_stats_procfs_init);
+#endif /* CONFIG_PPC_SPLPAR */
+
void vpa_init(int cpu)
{
int hwcpu = get_hard_smp_processor_id(cpu);
unsigned long addr;
long ret;
- struct paca_struct *pp;
- struct dtl_entry *dtl;
/*
* The spec says it "may be problematic" if CPU x registers the VPA of
@@ -99,22 +681,7 @@ void vpa_init(int cpu)
/*
* Register dispatch trace log, if one has been allocated.
*/
- pp = paca_ptrs[cpu];
- dtl = pp->dispatch_log;
- if (dtl) {
- pp->dtl_ridx = 0;
- pp->dtl_curr = dtl;
- lppaca_of(cpu).dtl_idx = 0;
-
- /* hypervisor reads buffer length from this field */
- dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
- ret = register_dtl(hwcpu, __pa(dtl));
- if (ret)
- pr_err("WARNING: DTL registration of cpu %d (hw %d) "
- "failed with %ld\n", smp_processor_id(),
- hwcpu, ret);
- lppaca_of(cpu).dtl_enable_mask = 2;
- }
+ register_dtl_buffer(cpu);
}
#ifdef CONFIG_PPC_BOOK3S_64
@@ -846,7 +1413,10 @@ static int pseries_lpar_resize_hpt_commit(void *data)
return 0;
}
-/* Must be called in user context */
+/*
+ * Must be called in process context. The caller must hold the
+ * cpus_lock.
+ */
static int pseries_lpar_resize_hpt(unsigned long shift)
{
struct hpt_resize_state state = {
@@ -900,7 +1470,8 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
t1 = ktime_get();
- rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
+ rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit,
+ &state, NULL);
t2 = ktime_get();
@@ -960,16 +1531,24 @@ void __init hpte_init_pseries(void)
mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range;
mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all;
mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
- register_process_table = pseries_lpar_register_process_table;
if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
+
+ /*
+ * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
+ * to inform the hypervisor that we wish to use the HPT.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pseries_lpar_register_process_table(0, 0, 0);
}
void radix_init_pseries(void)
{
pr_info("Using radix MMU under hypervisor\n");
- register_process_table = pseries_lpar_register_process_table;
+
+ pseries_lpar_register_process_table(__pa(process_tb),
+ 0, PRTB_SIZE_SHIFT - 12);
}
#ifdef CONFIG_PPC_SMLPAR
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 0c48c8964783..b571285f6c14 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -6,8 +6,10 @@
* Copyright (C) 2010 IBM Corporation
*/
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/kobject.h>
+#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/stat.h>
#include <linux/completion.h>
@@ -19,6 +21,7 @@
#include <asm/machdep.h>
#include <asm/rtas.h>
#include "pseries.h"
+#include "../../kernel/cacheinfo.h"
static struct kobject *mobility_kobj;
@@ -205,7 +208,11 @@ static int update_dt_node(__be32 phandle, s32 scope)
prop_data += vd;
}
+
+ cond_resched();
}
+
+ cond_resched();
} while (rtas_rc == 1);
of_node_put(dn);
@@ -308,8 +315,12 @@ int pseries_devicetree_update(s32 scope)
add_dt_node(phandle, drc_index);
break;
}
+
+ cond_resched();
}
}
+
+ cond_resched();
} while (rc == 1);
kfree(rtas_buf);
@@ -335,11 +346,28 @@ void post_mobility_fixup(void)
if (rc)
printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc);
+ /*
+ * We don't want CPUs to go online/offline while the device
+ * tree is being updated.
+ */
+ cpus_read_lock();
+
+ /*
+ * It's common for the destination firmware to replace cache
+ * nodes. Release all of the cacheinfo hierarchy's references
+ * before updating the device tree.
+ */
+ cacheinfo_teardown();
+
rc = pseries_devicetree_update(MIGRATION_SCOPE);
if (rc)
printk(KERN_ERR "Post-mobility device tree update "
"failed: %d\n", rc);
+ cacheinfo_rebuild();
+
+ cpus_read_unlock();
+
/* Possibly switch to a new RFI flush type */
pseries_setup_rfi_flush();
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 96c53b23e58f..a5ac371a3f06 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/libnvdimm.h>
#include <linux/platform_device.h>
+#include <linux/delay.h>
#include <asm/plpar_wrappers.h>
@@ -28,6 +29,7 @@ struct papr_scm_priv {
uint64_t blocks;
uint64_t block_size;
int metadata_size;
+ bool is_volatile;
uint64_t bound_addr;
@@ -42,8 +44,9 @@ struct papr_scm_priv {
static int drc_pmem_bind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
- uint64_t rc, token;
uint64_t saved = 0;
+ uint64_t token;
+ int64_t rc;
/*
* When the hypervisor cannot map all the requested memory in a single
@@ -63,6 +66,10 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
} while (rc == H_BUSY);
if (rc) {
+ /* H_OVERLAP needs a separate error path */
+ if (rc == H_OVERLAP)
+ return -EBUSY;
+
dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
return -ENXIO;
}
@@ -77,61 +84,135 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
static int drc_pmem_unbind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
- uint64_t rc, token;
+ uint64_t token = 0;
+ int64_t rc;
- token = 0;
+ dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index);
- /* NB: unbind has the same retry requirements mentioned above */
+ /* NB: unbind has the same retry requirements as drc_pmem_bind() */
do {
- rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index,
- p->bound_addr, p->blocks, token);
+
+ /* Unbind of all SCM resources associated with drcIndex */
+ rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC,
+ p->drc_index, token);
token = ret[0];
- cond_resched();
+
+ /* Check if we are stalled for some time */
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ } else if (rc == H_BUSY) {
+ cond_resched();
+ }
+
} while (rc == H_BUSY);
if (rc)
dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
+ else
+ dev_dbg(&p->pdev->dev, "unbind drc %x complete\n",
+ p->drc_index);
- return !!rc;
+ return rc == H_SUCCESS ? 0 : -ENXIO;
}
static int papr_scm_meta_get(struct papr_scm_priv *p,
- struct nd_cmd_get_config_data_hdr *hdr)
+ struct nd_cmd_get_config_data_hdr *hdr)
{
unsigned long data[PLPAR_HCALL_BUFSIZE];
+ unsigned long offset, data_offset;
+ int len, read;
int64_t ret;
- if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
+ if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
return -EINVAL;
- ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
- hdr->in_offset, 1);
-
- if (ret == H_PARAMETER) /* bad DRC index */
- return -ENODEV;
- if (ret)
- return -EINVAL; /* other invalid parameter */
-
- hdr->out_buf[0] = data[0] & 0xff;
-
+ for (len = hdr->in_length; len; len -= read) {
+
+ data_offset = hdr->in_length - len;
+ offset = hdr->in_offset + data_offset;
+
+ if (len >= 8)
+ read = 8;
+ else if (len >= 4)
+ read = 4;
+ else if (len >= 2)
+ read = 2;
+ else
+ read = 1;
+
+ ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
+ offset, read);
+
+ if (ret == H_PARAMETER) /* bad DRC index */
+ return -ENODEV;
+ if (ret)
+ return -EINVAL; /* other invalid parameter */
+
+ switch (read) {
+ case 8:
+ *(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]);
+ break;
+ case 4:
+ *(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff);
+ break;
+
+ case 2:
+ *(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff);
+ break;
+
+ case 1:
+ *(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff);
+ break;
+ }
+ }
return 0;
}
static int papr_scm_meta_set(struct papr_scm_priv *p,
- struct nd_cmd_set_config_hdr *hdr)
+ struct nd_cmd_set_config_hdr *hdr)
{
+ unsigned long offset, data_offset;
+ int len, wrote;
+ unsigned long data;
+ __be64 data_be;
int64_t ret;
- if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
+ if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
return -EINVAL;
- ret = plpar_hcall_norets(H_SCM_WRITE_METADATA,
- p->drc_index, hdr->in_offset, hdr->in_buf[0], 1);
-
- if (ret == H_PARAMETER) /* bad DRC index */
- return -ENODEV;
- if (ret)
- return -EINVAL; /* other invalid parameter */
+ for (len = hdr->in_length; len; len -= wrote) {
+
+ data_offset = hdr->in_length - len;
+ offset = hdr->in_offset + data_offset;
+
+ if (len >= 8) {
+ data = *(uint64_t *)(hdr->in_buf + data_offset);
+ data_be = cpu_to_be64(data);
+ wrote = 8;
+ } else if (len >= 4) {
+ data = *(uint32_t *)(hdr->in_buf + data_offset);
+ data &= 0xffffffff;
+ data_be = cpu_to_be32(data);
+ wrote = 4;
+ } else if (len >= 2) {
+ data = *(uint16_t *)(hdr->in_buf + data_offset);
+ data &= 0xffff;
+ data_be = cpu_to_be16(data);
+ wrote = 2;
+ } else {
+ data_be = *(uint8_t *)(hdr->in_buf + data_offset);
+ data_be &= 0xff;
+ wrote = 1;
+ }
+
+ ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index,
+ offset, data_be, wrote);
+ if (ret == H_PARAMETER) /* bad DRC index */
+ return -ENODEV;
+ if (ret)
+ return -EINVAL; /* other invalid parameter */
+ }
return 0;
}
@@ -153,7 +234,7 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
get_size_hdr = buf;
get_size_hdr->status = 0;
- get_size_hdr->max_xfer = 1;
+ get_size_hdr->max_xfer = 8;
get_size_hdr->config_size = p->metadata_size;
*cmd_rc = 0;
break;
@@ -194,12 +275,32 @@ static const struct attribute_group *papr_scm_dimm_groups[] = {
NULL,
};
+static inline int papr_scm_node(int node)
+{
+ int min_dist = INT_MAX, dist;
+ int nid, min_node;
+
+ if ((node == NUMA_NO_NODE) || node_online(node))
+ return node;
+
+ min_node = first_online_node;
+ for_each_online_node(nid) {
+ dist = node_distance(node, nid);
+ if (dist < min_dist) {
+ min_dist = dist;
+ min_node = nid;
+ }
+ }
+ return min_node;
+}
+
static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
{
struct device *dev = &p->pdev->dev;
struct nd_mapping_desc mapping;
struct nd_region_desc ndr_desc;
unsigned long dimm_flags;
+ int target_nid, online_nid;
p->bus_desc.ndctl = papr_scm_ndctl;
p->bus_desc.module = THIS_MODULE;
@@ -238,8 +339,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
memset(&ndr_desc, 0, sizeof(ndr_desc));
ndr_desc.attr_groups = region_attr_groups;
- ndr_desc.numa_node = dev_to_node(&p->pdev->dev);
- ndr_desc.target_node = ndr_desc.numa_node;
+ target_nid = dev_to_node(&p->pdev->dev);
+ online_nid = papr_scm_node(target_nid);
+ ndr_desc.numa_node = online_nid;
+ ndr_desc.target_node = target_nid;
ndr_desc.res = &p->res;
ndr_desc.of_node = p->dn;
ndr_desc.provider_data = p;
@@ -248,12 +351,18 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
ndr_desc.nd_set = &p->nd_set;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
- p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
+ if (p->is_volatile)
+ p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
+ else
+ p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
if (!p->region) {
dev_err(dev, "Error registering region %pR from %pOF\n",
ndr_desc.res, p->dn);
goto err;
}
+ if (target_nid != online_nid)
+ dev_info(dev, "Region registered with target node %d and online node %d",
+ target_nid, online_nid);
return 0;
@@ -293,6 +402,7 @@ static int papr_scm_probe(struct platform_device *pdev)
return -ENODEV;
}
+
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
@@ -304,11 +414,19 @@ static int papr_scm_probe(struct platform_device *pdev)
p->drc_index = drc_index;
p->block_size = block_size;
p->blocks = blocks;
+ p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
/* We just need to ensure that set cookies are unique across */
uuid_parse(uuid_str, (uuid_t *) uuid);
- p->nd_set.cookie1 = uuid[0];
- p->nd_set.cookie2 = uuid[1];
+ /*
+ * cookie1 and cookie2 are not really little endian
+ * we store a little endian representation of the
+ * uuid str so that we can compare this with the label
+ * area cookie irrespective of the endian config with which
+ * the kernel is built.
+ */
+ p->nd_set.cookie1 = cpu_to_le64(uuid[0]);
+ p->nd_set.cookie2 = cpu_to_le64(uuid[1]);
/* might be zero */
p->metadata_size = metadata_size;
@@ -316,6 +434,14 @@ static int papr_scm_probe(struct platform_device *pdev)
/* request the hypervisor to bind this region to somewhere in memory */
rc = drc_pmem_bind(p);
+
+ /* If phyp says drc memory still bound then force unbound and retry */
+ if (rc == -EBUSY) {
+ dev_warn(&pdev->dev, "Retrying bind after unbinding\n");
+ drc_pmem_unbind(p);
+ rc = drc_pmem_bind(p);
+ }
+
if (rc)
goto err;
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 1eae1d09980c..722830978639 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -229,8 +229,7 @@ void __init pSeries_final_fixup(void)
pSeries_request_regions();
- eeh_probe_devices();
- eeh_addr_cache_build();
+ eeh_show_enabled();
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index f16fdd0f71f7..3acdcc3bb908 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -76,6 +76,7 @@ struct pseries_mc_errorlog {
#define MC_ERROR_TYPE_UE 0x00
#define MC_ERROR_TYPE_SLB 0x01
#define MC_ERROR_TYPE_ERAT 0x02
+#define MC_ERROR_TYPE_UNKNOWN 0x03
#define MC_ERROR_TYPE_TLB 0x04
#define MC_ERROR_TYPE_D_CACHE 0x05
#define MC_ERROR_TYPE_I_CACHE 0x07
@@ -87,6 +88,9 @@ struct pseries_mc_errorlog {
#define MC_ERROR_UE_LOAD_STORE 3
#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
+#define UE_LOGICAL_ADDR_PROVIDED 0x20
+
#define MC_ERROR_SLB_PARITY 0
#define MC_ERROR_SLB_MULTIHIT 1
#define MC_ERROR_SLB_INDETERMINATE 2
@@ -113,27 +117,6 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
}
}
-static
-inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog)
-{
- __be64 addr = 0;
-
- switch (mlog->error_type) {
- case MC_ERROR_TYPE_UE:
- if (mlog->sub_err_type & 0x40)
- addr = mlog->effective_address;
- break;
- case MC_ERROR_TYPE_SLB:
- case MC_ERROR_TYPE_ERAT:
- case MC_ERROR_TYPE_TLB:
- if (mlog->sub_err_type & 0x80)
- addr = mlog->effective_address;
- default:
- break;
- }
- return be64_to_cpu(addr);
-}
-
/*
* Enable the hotplug interrupt late because processing them may touch other
* devices or systems (e.g. hugepages) that have not been initialized at the
@@ -511,160 +494,165 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
return 0; /* need to perform reset */
}
-#define VAL_TO_STRING(ar, val) \
- (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")
-static void pseries_print_mce_info(struct pt_regs *regs,
- struct rtas_error_log *errp)
+static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
{
- const char *level, *sevstr;
+ struct mce_error_info mce_err = { 0 };
+ unsigned long eaddr = 0, paddr = 0;
struct pseries_errorlog *pseries_log;
struct pseries_mc_errorlog *mce_log;
- u8 error_type, err_sub_type;
- u64 addr;
- u8 initiator = rtas_error_initiator(errp);
int disposition = rtas_error_disposition(errp);
+ int initiator = rtas_error_initiator(errp);
+ int severity = rtas_error_severity(errp);
+ u8 error_type, err_sub_type;
- static const char * const initiators[] = {
- [0] = "Unknown",
- [1] = "CPU",
- [2] = "PCI",
- [3] = "ISA",
- [4] = "Memory",
- [5] = "Power Mgmt",
- };
- static const char * const mc_err_types[] = {
- [0] = "UE",
- [1] = "SLB",
- [2] = "ERAT",
- [3] = "Unknown",
- [4] = "TLB",
- [5] = "D-Cache",
- [6] = "Unknown",
- [7] = "I-Cache",
- };
- static const char * const mc_ue_types[] = {
- [0] = "Indeterminate",
- [1] = "Instruction fetch",
- [2] = "Page table walk ifetch",
- [3] = "Load/Store",
- [4] = "Page table walk Load/Store",
- };
-
- /* SLB sub errors valid values are 0x0, 0x1, 0x2 */
- static const char * const mc_slb_types[] = {
- [0] = "Parity",
- [1] = "Multihit",
- [2] = "Indeterminate",
- };
-
- /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
- static const char * const mc_soft_types[] = {
- [0] = "Unknown",
- [1] = "Parity",
- [2] = "Multihit",
- [3] = "Indeterminate",
- };
-
- if (!rtas_error_extended(errp)) {
- pr_err("Machine check interrupt: Missing extended error log\n");
- return;
- }
+ if (initiator == RTAS_INITIATOR_UNKNOWN)
+ mce_err.initiator = MCE_INITIATOR_UNKNOWN;
+ else if (initiator == RTAS_INITIATOR_CPU)
+ mce_err.initiator = MCE_INITIATOR_CPU;
+ else if (initiator == RTAS_INITIATOR_PCI)
+ mce_err.initiator = MCE_INITIATOR_PCI;
+ else if (initiator == RTAS_INITIATOR_ISA)
+ mce_err.initiator = MCE_INITIATOR_ISA;
+ else if (initiator == RTAS_INITIATOR_MEMORY)
+ mce_err.initiator = MCE_INITIATOR_MEMORY;
+ else if (initiator == RTAS_INITIATOR_POWERMGM)
+ mce_err.initiator = MCE_INITIATOR_POWERMGM;
+ else
+ mce_err.initiator = MCE_INITIATOR_UNKNOWN;
+
+ if (severity == RTAS_SEVERITY_NO_ERROR)
+ mce_err.severity = MCE_SEV_NO_ERROR;
+ else if (severity == RTAS_SEVERITY_EVENT)
+ mce_err.severity = MCE_SEV_WARNING;
+ else if (severity == RTAS_SEVERITY_WARNING)
+ mce_err.severity = MCE_SEV_WARNING;
+ else if (severity == RTAS_SEVERITY_ERROR_SYNC)
+ mce_err.severity = MCE_SEV_SEVERE;
+ else if (severity == RTAS_SEVERITY_ERROR)
+ mce_err.severity = MCE_SEV_SEVERE;
+ else if (severity == RTAS_SEVERITY_FATAL)
+ mce_err.severity = MCE_SEV_FATAL;
+ else
+ mce_err.severity = MCE_SEV_FATAL;
+
+ if (severity <= RTAS_SEVERITY_ERROR_SYNC)
+ mce_err.sync_error = true;
+ else
+ mce_err.sync_error = false;
+
+ mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err.error_class = MCE_ECLASS_UNKNOWN;
+
+ if (!rtas_error_extended(errp))
+ goto out;
pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
if (pseries_log == NULL)
- return;
+ goto out;
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
-
error_type = mce_log->error_type;
err_sub_type = rtas_mc_error_sub_type(mce_log);
- switch (rtas_error_severity(errp)) {
- case RTAS_SEVERITY_NO_ERROR:
- level = KERN_INFO;
- sevstr = "Harmless";
- break;
- case RTAS_SEVERITY_WARNING:
- level = KERN_WARNING;
- sevstr = "";
- break;
- case RTAS_SEVERITY_ERROR:
- case RTAS_SEVERITY_ERROR_SYNC:
- level = KERN_ERR;
- sevstr = "Severe";
- break;
- case RTAS_SEVERITY_FATAL:
- default:
- level = KERN_ERR;
- sevstr = "Fatal";
- break;
- }
+ switch (mce_log->error_type) {
+ case MC_ERROR_TYPE_UE:
+ mce_err.error_type = MCE_ERROR_TYPE_UE;
+ switch (err_sub_type) {
+ case MC_ERROR_UE_IFETCH:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
+ break;
+ case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ case MC_ERROR_UE_LOAD_STORE:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+ break;
+ case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ break;
+ case MC_ERROR_UE_INDETERMINATE:
+ default:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
+ eaddr = be64_to_cpu(mce_log->effective_address);
-#ifdef CONFIG_PPC_BOOK3S_64
- /* Display faulty slb contents for SLB errors. */
- if (error_type == MC_ERROR_TYPE_SLB)
- slb_dump_contents(local_paca->mce_faulty_slbs);
-#endif
+ if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
+ paddr = be64_to_cpu(mce_log->logical_address);
+ } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
+ unsigned long pfn;
- printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
- disposition == RTAS_DISP_FULLY_RECOVERED ?
- "Recovered" : "Not recovered");
- if (user_mode(regs)) {
- printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level,
- regs->nip, current->pid, current->comm);
- } else {
- printk("%s NIP [%016lx]: %pS\n", level, regs->nip,
- (void *)regs->nip);
- }
- printk("%s Initiator: %s\n", level,
- VAL_TO_STRING(initiators, initiator));
+ pfn = addr_to_pfn(regs, eaddr);
+ if (pfn != ULONG_MAX)
+ paddr = pfn << PAGE_SHIFT;
+ }
- switch (error_type) {
- case MC_ERROR_TYPE_UE:
- printk("%s Error type: %s [%s]\n", level,
- VAL_TO_STRING(mc_err_types, error_type),
- VAL_TO_STRING(mc_ue_types, err_sub_type));
break;
case MC_ERROR_TYPE_SLB:
- printk("%s Error type: %s [%s]\n", level,
- VAL_TO_STRING(mc_err_types, error_type),
- VAL_TO_STRING(mc_slb_types, err_sub_type));
+ mce_err.error_type = MCE_ERROR_TYPE_SLB;
+ switch (err_sub_type) {
+ case MC_ERROR_SLB_PARITY:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ break;
+ case MC_ERROR_SLB_MULTIHIT:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_SLB_INDETERMINATE:
+ default:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & 0x80)
+ eaddr = be64_to_cpu(mce_log->effective_address);
break;
case MC_ERROR_TYPE_ERAT:
+ mce_err.error_type = MCE_ERROR_TYPE_ERAT;
+ switch (err_sub_type) {
+ case MC_ERROR_ERAT_PARITY:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
+ break;
+ case MC_ERROR_ERAT_MULTIHIT:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_ERAT_INDETERMINATE:
+ default:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & 0x80)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
case MC_ERROR_TYPE_TLB:
- printk("%s Error type: %s [%s]\n", level,
- VAL_TO_STRING(mc_err_types, error_type),
- VAL_TO_STRING(mc_soft_types, err_sub_type));
+ mce_err.error_type = MCE_ERROR_TYPE_TLB;
+ switch (err_sub_type) {
+ case MC_ERROR_TLB_PARITY:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
+ break;
+ case MC_ERROR_TLB_MULTIHIT:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_TLB_INDETERMINATE:
+ default:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & 0x80)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
+ case MC_ERROR_TYPE_D_CACHE:
+ mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
break;
+ case MC_ERROR_TYPE_I_CACHE:
+ mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
+ break;
+ case MC_ERROR_TYPE_UNKNOWN:
default:
- printk("%s Error type: %s\n", level,
- VAL_TO_STRING(mc_err_types, error_type));
+ mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
break;
}
- addr = rtas_mc_get_effective_addr(mce_log);
- if (addr)
- printk("%s Effective address: %016llx\n", level, addr);
-}
-
-static int mce_handle_error(struct rtas_error_log *errp)
-{
- struct pseries_errorlog *pseries_log;
- struct pseries_mc_errorlog *mce_log;
- int disposition = rtas_error_disposition(errp);
- u8 error_type;
-
- if (!rtas_error_extended(errp))
- goto out;
-
- pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
- if (pseries_log == NULL)
- goto out;
-
- mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
- error_type = mce_log->error_type;
-
#ifdef CONFIG_PPC_BOOK3S_64
if (disposition == RTAS_DISP_NOT_RECOVERED) {
switch (error_type) {
@@ -682,98 +670,24 @@ static int mce_handle_error(struct rtas_error_log *errp)
slb_save_contents(local_paca->mce_faulty_slbs);
flush_and_reload_slb();
disposition = RTAS_DISP_FULLY_RECOVERED;
- rtas_set_disposition_recovered(errp);
break;
default:
break;
}
+ } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
+ /* Platform corrected itself but could be degraded */
+ printk(KERN_ERR "MCE: limited recovery, system may "
+ "be degraded\n");
+ disposition = RTAS_DISP_FULLY_RECOVERED;
}
#endif
out:
- return disposition;
-}
-
-#ifdef CONFIG_MEMORY_FAILURE
-
-static DEFINE_PER_CPU(int, rtas_ue_count);
-static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]);
+ save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
+ &mce_err, regs->nip, eaddr, paddr);
-#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
-#define UE_LOGICAL_ADDR_PROVIDED 0x20
-
-
-static void pseries_hwpoison_work_fn(struct work_struct *work)
-{
- unsigned long paddr;
- int index;
-
- while (__this_cpu_read(rtas_ue_count) > 0) {
- index = __this_cpu_read(rtas_ue_count) - 1;
- paddr = __this_cpu_read(rtas_ue_paddr[index]);
- memory_failure(paddr >> PAGE_SHIFT, 0);
- __this_cpu_dec(rtas_ue_count);
- }
-}
-
-static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn);
-
-static void queue_ue_paddr(unsigned long paddr)
-{
- int index;
-
- index = __this_cpu_inc_return(rtas_ue_count) - 1;
- if (index >= MAX_MC_EVT) {
- __this_cpu_dec(rtas_ue_count);
- return;
- }
- this_cpu_write(rtas_ue_paddr[index], paddr);
- schedule_work(&hwpoison_work);
-}
-
-static void pseries_do_memory_failure(struct pt_regs *regs,
- struct pseries_mc_errorlog *mce_log)
-{
- unsigned long paddr;
-
- if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
- paddr = be64_to_cpu(mce_log->logical_address);
- } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
- unsigned long pfn;
-
- pfn = addr_to_pfn(regs,
- be64_to_cpu(mce_log->effective_address));
- if (pfn == ULONG_MAX)
- return;
- paddr = pfn << PAGE_SHIFT;
- } else {
- return;
- }
- queue_ue_paddr(paddr);
-}
-
-static void pseries_process_ue(struct pt_regs *regs,
- struct rtas_error_log *errp)
-{
- struct pseries_errorlog *pseries_log;
- struct pseries_mc_errorlog *mce_log;
-
- if (!rtas_error_extended(errp))
- return;
-
- pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
- if (!pseries_log)
- return;
-
- mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
-
- if (mce_log->error_type == MC_ERROR_TYPE_UE)
- pseries_do_memory_failure(regs, mce_log);
+ return disposition;
}
-#else
-static inline void pseries_process_ue(struct pt_regs *regs,
- struct rtas_error_log *errp) { }
-#endif /*CONFIG_MEMORY_FAILURE */
/*
* Process MCE rtas errlog event.
@@ -795,49 +709,51 @@ static void mce_process_errlog_event(struct irq_work *work)
* Return 1 if corrected (or delivered a signal).
* Return 0 if there is nothing we can do.
*/
-static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
+static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
{
int recovered = 0;
- int disposition = rtas_error_disposition(err);
-
- pseries_print_mce_info(regs, err);
if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;
-
- } else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
+ } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
/* Platform corrected itself */
recovered = 1;
+ } else if (evt->severity == MCE_SEV_FATAL) {
+ /* Fatal machine check */
+ pr_err("Machine check interrupt is fatal\n");
+ recovered = 0;
+ }
- } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
- /* Platform corrected itself but could be degraded */
- printk(KERN_ERR "MCE: limited recovery, system may "
- "be degraded\n");
- recovered = 1;
-
- } else if (user_mode(regs) && !is_global_init(current) &&
- rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) {
-
+ if (!recovered && evt->sync_error) {
/*
- * If we received a synchronous error when in userspace
- * kill the task. Firmware may report details of the fail
- * asynchronously, so we can't rely on the target and type
- * fields being valid here.
+ * Try to kill processes if we get a synchronous machine check
+ * (e.g., one caused by execution of this instruction). This
+ * will devolve into a panic if we try to kill init or are in
+ * an interrupt etc.
+ *
+ * TODO: Queue up this address for hwpoisioning later.
+ * TODO: This is not quite right for d-side machine
+ * checks ->nip is not necessarily the important
+ * address.
*/
- printk(KERN_ERR "MCE: uncorrectable error, killing task "
- "%s:%d\n", current->comm, current->pid);
-
- _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
- recovered = 1;
+ if ((user_mode(regs))) {
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ } else if (die_will_crash()) {
+ /*
+ * die() would kill the kernel, so better to go via
+ * the platform reboot code that will log the
+ * machine check.
+ */
+ recovered = 0;
+ } else {
+ die("Machine check", regs, SIGBUS);
+ recovered = 1;
+ }
}
- pseries_process_ue(regs, err);
-
- /* Queue irq work to log this rtas event later. */
- irq_work_queue(&mce_errlog_process_work);
-
return recovered;
}
@@ -853,14 +769,21 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
*/
int pSeries_machine_check_exception(struct pt_regs *regs)
{
- struct rtas_error_log *errp;
+ struct machine_check_event evt;
- if (fwnmi_active) {
- fwnmi_release_errinfo();
- errp = fwnmi_get_errlog();
- if (errp && recover_mce(regs, errp))
- return 1;
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return 0;
+
+ /* Print things out */
+ if (evt.version != MCE_V1) {
+ pr_err("Machine Check Exception, Unknown event version %d !\n",
+ evt.version);
+ return 0;
}
+ machine_check_print_event_info(&evt, user_mode(regs), false);
+
+ if (recover_mce(regs, &evt))
+ return 1;
return 0;
}
@@ -877,7 +800,12 @@ long pseries_machine_check_realmode(struct pt_regs *regs)
* to panic. Hence we will call it as soon as we go into
* virtual mode.
*/
- disposition = mce_handle_error(errp);
+ disposition = mce_handle_error(regs, errp);
+ fwnmi_release_errinfo();
+
+ /* Queue irq work to log this rtas event later. */
+ irq_work_queue(&mce_errlog_process_work);
+
if (disposition == RTAS_DISP_FULLY_RECOVERED)
return 1;
}
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
new file mode 100644
index 000000000000..70c3013fdd07
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "rtas fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/fadump.h>
+#include <asm/fadump-internal.h>
+
+#include "rtas-fadump.h"
+
+static struct rtas_fadump_mem_struct fdm;
+static const struct rtas_fadump_mem_struct *fdm_active;
+
+static void rtas_fadump_update_config(struct fw_dump *fadump_conf,
+ const struct rtas_fadump_mem_struct *fdm)
+{
+ fadump_conf->boot_mem_dest_addr =
+ be64_to_cpu(fdm->rmr_region.destination_address);
+
+ fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr +
+ fadump_conf->boot_memory_size);
+}
+
+/*
+ * This function is called in the capture kernel to get configuration details
+ * setup in the first kernel and passed to the f/w.
+ */
+static void rtas_fadump_get_config(struct fw_dump *fadump_conf,
+ const struct rtas_fadump_mem_struct *fdm)
+{
+ fadump_conf->boot_mem_addr[0] =
+ be64_to_cpu(fdm->rmr_region.source_address);
+ fadump_conf->boot_mem_sz[0] = be64_to_cpu(fdm->rmr_region.source_len);
+ fadump_conf->boot_memory_size = fadump_conf->boot_mem_sz[0];
+
+ fadump_conf->boot_mem_top = fadump_conf->boot_memory_size;
+ fadump_conf->boot_mem_regs_cnt = 1;
+
+ /*
+ * Start address of reserve dump area (permanent reservation) for
+ * re-registering FADump after dump capture.
+ */
+ fadump_conf->reserve_dump_area_start =
+ be64_to_cpu(fdm->cpu_state_data.destination_address);
+
+ rtas_fadump_update_config(fadump_conf, fdm);
+}
+
+static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf)
+{
+ u64 addr = fadump_conf->reserve_dump_area_start;
+
+ memset(&fdm, 0, sizeof(struct rtas_fadump_mem_struct));
+ addr = addr & PAGE_MASK;
+
+ fdm.header.dump_format_version = cpu_to_be32(0x00000001);
+ fdm.header.dump_num_sections = cpu_to_be16(3);
+ fdm.header.dump_status_flag = 0;
+ fdm.header.offset_first_dump_section =
+ cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct,
+ cpu_state_data));
+
+ /*
+ * Fields for disk dump option.
+ * We are not using disk dump option, hence set these fields to 0.
+ */
+ fdm.header.dd_block_size = 0;
+ fdm.header.dd_block_offset = 0;
+ fdm.header.dd_num_blocks = 0;
+ fdm.header.dd_offset_disk_path = 0;
+
+ /* set 0 to disable an automatic dump-reboot. */
+ fdm.header.max_time_auto = 0;
+
+ /* Kernel dump sections */
+ /* cpu state data section. */
+ fdm.cpu_state_data.request_flag =
+ cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.cpu_state_data.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA);
+ fdm.cpu_state_data.source_address = 0;
+ fdm.cpu_state_data.source_len =
+ cpu_to_be64(fadump_conf->cpu_state_data_size);
+ fdm.cpu_state_data.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->cpu_state_data_size;
+
+ /* hpte region section */
+ fdm.hpte_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.hpte_region.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_HPTE_REGION);
+ fdm.hpte_region.source_address = 0;
+ fdm.hpte_region.source_len =
+ cpu_to_be64(fadump_conf->hpte_region_size);
+ fdm.hpte_region.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->hpte_region_size;
+
+ /* RMA region section */
+ fdm.rmr_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.rmr_region.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION);
+ fdm.rmr_region.source_address = cpu_to_be64(0);
+ fdm.rmr_region.source_len = cpu_to_be64(fadump_conf->boot_memory_size);
+ fdm.rmr_region.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->boot_memory_size;
+
+ rtas_fadump_update_config(fadump_conf, &fdm);
+
+ return addr;
+}
+
+static u64 rtas_fadump_get_bootmem_min(void)
+{
+ return RTAS_FADUMP_MIN_BOOT_MEM;
+}
+
+static int rtas_fadump_register(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc, err = -EIO;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_REGISTER, &fdm,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+
+ } while (wait_time);
+
+ switch (rc) {
+ case 0:
+ pr_info("Registration is successful!\n");
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ case -1:
+ pr_err("Failed to register. Hardware Error(%d).\n", rc);
+ break;
+ case -3:
+ if (!is_fadump_boot_mem_contiguous())
+ pr_err("Can't have holes in boot memory area.\n");
+ else if (!is_fadump_reserved_mem_contiguous())
+ pr_err("Can't have holes in reserved memory area.\n");
+
+ pr_err("Failed to register. Parameter Error(%d).\n", rc);
+ err = -EINVAL;
+ break;
+ case -9:
+ pr_err("Already registered!\n");
+ fadump_conf->dump_registered = 1;
+ err = -EEXIST;
+ break;
+ default:
+ pr_err("Failed to register. Unknown Error(%d).\n", rc);
+ break;
+ }
+
+ return err;
+}
+
+static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_UNREGISTER, &fdm,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ pr_err("Failed to un-register - unexpected error(%d).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_registered = 0;
+ return 0;
+}
+
+static int rtas_fadump_invalidate(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_INVALIDATE, fdm_active,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ pr_err("Failed to invalidate - unexpected error (%d).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_active = 0;
+ fdm_active = NULL;
+ return 0;
+}
+
+#define RTAS_FADUMP_GPR_MASK 0xffffff0000000000
+static inline int rtas_fadump_gpr_index(u64 id)
+{
+ char str[3];
+ int i = -1;
+
+ if ((id & RTAS_FADUMP_GPR_MASK) == fadump_str_to_u64("GPR")) {
+ /* get the digits at the end */
+ id &= ~RTAS_FADUMP_GPR_MASK;
+ id >>= 24;
+ str[2] = '\0';
+ str[1] = id & 0xff;
+ str[0] = (id >> 8) & 0xff;
+ if (kstrtoint(str, 10, &i))
+ i = -EINVAL;
+ if (i > 31)
+ i = -1;
+ }
+ return i;
+}
+
+void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+{
+ int i;
+
+ i = rtas_fadump_gpr_index(reg_id);
+ if (i >= 0)
+ regs->gpr[i] = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("NIA"))
+ regs->nip = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("MSR"))
+ regs->msr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("CTR"))
+ regs->ctr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("LR"))
+ regs->link = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("XER"))
+ regs->xer = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("CR"))
+ regs->ccr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("DAR"))
+ regs->dar = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("DSISR"))
+ regs->dsisr = (unsigned long)reg_val;
+}
+
+static struct rtas_fadump_reg_entry*
+rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry,
+ struct pt_regs *regs)
+{
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) {
+ rtas_fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
+ be64_to_cpu(reg_entry->reg_value));
+ reg_entry++;
+ }
+ reg_entry++;
+ return reg_entry;
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
+{
+ struct rtas_fadump_reg_save_area_header *reg_header;
+ struct fadump_crash_info_header *fdh = NULL;
+ struct rtas_fadump_reg_entry *reg_entry;
+ u32 num_cpus, *note_buf;
+ int i, rc = 0, cpu = 0;
+ struct pt_regs regs;
+ unsigned long addr;
+ void *vaddr;
+
+ addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
+ vaddr = __va(addr);
+
+ reg_header = vaddr;
+ if (be64_to_cpu(reg_header->magic_number) !=
+ fadump_str_to_u64("REGSAVE")) {
+ pr_err("Unable to read register save area.\n");
+ return -ENOENT;
+ }
+
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
+ pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
+
+ vaddr += be32_to_cpu(reg_header->num_cpu_offset);
+ num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
+ pr_debug("NumCpus : %u\n", num_cpus);
+ vaddr += sizeof(u32);
+ reg_entry = (struct rtas_fadump_reg_entry *)vaddr;
+
+ rc = fadump_setup_cpu_notes_buf(num_cpus);
+ if (rc != 0)
+ return rc;
+
+ note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
+
+ if (fadump_conf->fadumphdr_addr)
+ fdh = __va(fadump_conf->fadumphdr_addr);
+
+ for (i = 0; i < num_cpus; i++) {
+ if (be64_to_cpu(reg_entry->reg_id) !=
+ fadump_str_to_u64("CPUSTRT")) {
+ pr_err("Unable to read CPU state data\n");
+ rc = -ENOENT;
+ goto error_out;
+ }
+ /* Lower 4 bytes of reg_value contains logical cpu id */
+ cpu = (be64_to_cpu(reg_entry->reg_value) &
+ RTAS_FADUMP_CPU_ID_MASK);
+ if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
+ RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
+ continue;
+ }
+ pr_debug("Reading register data for cpu %d...\n", cpu);
+ if (fdh && fdh->crashing_cpu == cpu) {
+ regs = fdh->regs;
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
+ } else {
+ reg_entry++;
+ reg_entry = rtas_fadump_read_regs(reg_entry, &regs);
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ }
+ }
+ final_note(note_buf);
+
+ if (fdh) {
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ fdh->elfcorehdr_addr);
+ fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
+ }
+ return 0;
+
+error_out:
+ fadump_free_cpu_notes_buf();
+ return rc;
+
+}
+
+/*
+ * Validate and process the dump data stored by firmware before exporting
+ * it through '/proc/vmcore'.
+ */
+static int __init rtas_fadump_process(struct fw_dump *fadump_conf)
+{
+ struct fadump_crash_info_header *fdh;
+ int rc = 0;
+
+ if (!fdm_active || !fadump_conf->fadumphdr_addr)
+ return -EINVAL;
+
+ /* Check if the dump data is valid. */
+ if ((be16_to_cpu(fdm_active->header.dump_status_flag) ==
+ RTAS_FADUMP_ERROR_FLAG) ||
+ (fdm_active->cpu_state_data.error_flags != 0) ||
+ (fdm_active->rmr_region.error_flags != 0)) {
+ pr_err("Dump taken by platform is not valid\n");
+ return -EINVAL;
+ }
+ if ((fdm_active->rmr_region.bytes_dumped !=
+ fdm_active->rmr_region.source_len) ||
+ !fdm_active->cpu_state_data.bytes_dumped) {
+ pr_err("Dump taken by platform is incomplete\n");
+ return -EINVAL;
+ }
+
+ /* Validate the fadump crash info header */
+ fdh = __va(fadump_conf->fadumphdr_addr);
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ pr_err("Crash info header is not valid.\n");
+ return -EINVAL;
+ }
+
+ rc = rtas_fadump_build_cpu_notes(fadump_conf);
+ if (rc)
+ return rc;
+
+ /*
+ * We are done validating dump info and elfcore header is now ready
+ * to be exported. set elfcorehdr_addr so that vmcore module will
+ * export the elfcore header through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+ return 0;
+}
+
+static void rtas_fadump_region_show(struct fw_dump *fadump_conf,
+ struct seq_file *m)
+{
+ const struct rtas_fadump_section *cpu_data_section;
+ const struct rtas_fadump_mem_struct *fdm_ptr;
+
+ if (fdm_active)
+ fdm_ptr = fdm_active;
+ else
+ fdm_ptr = &fdm;
+
+ cpu_data_section = &(fdm_ptr->cpu_state_data);
+ seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(cpu_data_section->destination_address),
+ be64_to_cpu(cpu_data_section->destination_address) +
+ be64_to_cpu(cpu_data_section->source_len) - 1,
+ be64_to_cpu(cpu_data_section->source_len),
+ be64_to_cpu(cpu_data_section->bytes_dumped));
+
+ seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(fdm_ptr->hpte_region.destination_address),
+ be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
+ be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
+ be64_to_cpu(fdm_ptr->hpte_region.source_len),
+ be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
+
+ seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
+ be64_to_cpu(fdm_ptr->rmr_region.source_address),
+ be64_to_cpu(fdm_ptr->rmr_region.destination_address));
+ seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
+ be64_to_cpu(fdm_ptr->rmr_region.source_len),
+ be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
+
+ /* Dump is active. Show reserved area start address. */
+ if (fdm_active) {
+ seq_printf(m, "\nMemory above %#016lx is reserved for saving crash dump\n",
+ fadump_conf->reserve_dump_area_start);
+ }
+}
+
+static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh,
+ const char *msg)
+{
+ /* Call ibm,os-term rtas call to trigger firmware assisted dump */
+ rtas_os_term((char *)msg);
+}
+
+static struct fadump_ops rtas_fadump_ops = {
+ .fadump_init_mem_struct = rtas_fadump_init_mem_struct,
+ .fadump_get_bootmem_min = rtas_fadump_get_bootmem_min,
+ .fadump_register = rtas_fadump_register,
+ .fadump_unregister = rtas_fadump_unregister,
+ .fadump_invalidate = rtas_fadump_invalidate,
+ .fadump_process = rtas_fadump_process,
+ .fadump_region_show = rtas_fadump_region_show,
+ .fadump_trigger = rtas_fadump_trigger,
+};
+
+void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+ int i, size, num_sections;
+ const __be32 *sections;
+ const __be32 *token;
+
+ /*
+ * Check if Firmware Assisted dump is supported. if yes, check
+ * if dump has been initiated on last reboot.
+ */
+ token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
+ if (!token)
+ return;
+
+ fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token);
+ fadump_conf->ops = &rtas_fadump_ops;
+ fadump_conf->fadump_supported = 1;
+
+ /* Firmware supports 64-bit value for size, align it to pagesize. */
+ fadump_conf->max_copy_size = _ALIGN_DOWN(U64_MAX, PAGE_SIZE);
+
+ /*
+ * The 'ibm,kernel-dump' rtas node is present only if there is
+ * dump data waiting for us.
+ */
+ fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
+ if (fdm_active) {
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+ rtas_fadump_get_config(fadump_conf, (void *)__pa(fdm_active));
+ }
+
+ /* Get the sizes required to store dump data for the firmware provided
+ * dump sections.
+ * For each dump section type supported, a 32bit cell which defines
+ * the ID of a supported section followed by two 32 bit cells which
+ * gives the size of the section in bytes.
+ */
+ sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
+ &size);
+
+ if (!sections)
+ return;
+
+ num_sections = size / (3 * sizeof(u32));
+
+ for (i = 0; i < num_sections; i++, sections += 3) {
+ u32 type = (u32)of_read_number(sections, 1);
+
+ switch (type) {
+ case RTAS_FADUMP_CPU_STATE_DATA:
+ fadump_conf->cpu_state_data_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ case RTAS_FADUMP_HPTE_REGION:
+ fadump_conf->hpte_region_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ }
+ }
+}
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h
new file mode 100644
index 000000000000..fd59bd7ca9c3
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _PSERIES_RTAS_FADUMP_H
+#define _PSERIES_RTAS_FADUMP_H
+
+/*
+ * On some Power systems where RMO is 128MB, it still requires minimum of
+ * 256MB for kernel to boot successfully. When kdump infrastructure is
+ * configured to save vmcore over network, we run into OOM issue while
+ * loading modules related to network setup. Hence we need additional 64M
+ * of memory to avoid OOM issue.
+ */
+#define RTAS_FADUMP_MIN_BOOT_MEM ((0x1UL << 28) + (0x1UL << 26))
+
+/* Firmware provided dump sections */
+#define RTAS_FADUMP_CPU_STATE_DATA 0x0001
+#define RTAS_FADUMP_HPTE_REGION 0x0002
+#define RTAS_FADUMP_REAL_MODE_REGION 0x0011
+
+/* Dump request flag */
+#define RTAS_FADUMP_REQUEST_FLAG 0x00000001
+
+/* Dump status flag */
+#define RTAS_FADUMP_ERROR_FLAG 0x2000
+
+/* Kernel Dump section info */
+struct rtas_fadump_section {
+ __be32 request_flag;
+ __be16 source_data_type;
+ __be16 error_flags;
+ __be64 source_address;
+ __be64 source_len;
+ __be64 bytes_dumped;
+ __be64 destination_address;
+};
+
+/* ibm,configure-kernel-dump header. */
+struct rtas_fadump_section_header {
+ __be32 dump_format_version;
+ __be16 dump_num_sections;
+ __be16 dump_status_flag;
+ __be32 offset_first_dump_section;
+
+ /* Fields for disk dump option. */
+ __be32 dd_block_size;
+ __be64 dd_block_offset;
+ __be64 dd_num_blocks;
+ __be32 dd_offset_disk_path;
+
+ /* Maximum time allowed to prevent an automatic dump-reboot. */
+ __be32 max_time_auto;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through rtas call.
+ *
+ * No disk dump option. Hence disk dump path string section is not included.
+ */
+struct rtas_fadump_mem_struct {
+ struct rtas_fadump_section_header header;
+
+ /* Kernel dump sections */
+ struct rtas_fadump_section cpu_state_data;
+ struct rtas_fadump_section hpte_region;
+
+ /*
+ * TODO: Extend multiple boot memory regions support in the kernel
+ * for this platform.
+ */
+ struct rtas_fadump_section rmr_region;
+};
+
+/*
+ * The firmware-assisted dump format.
+ *
+ * The register save area is an area in the partition's memory used to preserve
+ * the register contents (CPU state data) for the active CPUs during a firmware
+ * assisted dump. The dump format contains register save area header followed
+ * by register entries. Each list of registers for a CPU starts with "CPUSTRT"
+ * and ends with "CPUEND".
+ */
+
+/* Register save area header. */
+struct rtas_fadump_reg_save_area_header {
+ __be64 magic_number;
+ __be32 version;
+ __be32 num_cpu_offset;
+};
+
+/* Register entry. */
+struct rtas_fadump_reg_entry {
+ __be64 reg_id;
+ __be64 reg_value;
+};
+
+/* Utility macros */
+#define RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry) \
+({ \
+ while (be64_to_cpu(reg_entry->reg_id) != \
+ fadump_str_to_u64("CPUEND")) \
+ reg_entry++; \
+ reg_entry++; \
+})
+
+#define RTAS_FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
+
+#endif /* _PSERIES_RTAS_FADUMP_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8fa012a65a71..f8adcd0e4589 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -38,6 +38,7 @@
#include <linux/of.h>
#include <linux/of_pci.h>
#include <linux/memblock.h>
+#include <linux/swiotlb.h>
#include <asm/mmu.h>
#include <asm/processor.h>
@@ -67,6 +68,8 @@
#include <asm/isa-bridge.h>
#include <asm/security_features.h>
#include <asm/asm-const.h>
+#include <asm/swiotlb.h>
+#include <asm/svm.h>
#include "pseries.h"
#include "../../../../drivers/pci/pci.h"
@@ -139,17 +142,19 @@ static void __init fwnmi_init(void)
}
#ifdef CONFIG_PPC_BOOK3S_64
- /* Allocate per cpu slb area to save old slb contents during MCE */
- size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
- slb_ptr = memblock_alloc_try_nid_raw(size, sizeof(struct slb_entry),
- MEMBLOCK_LOW_LIMIT, ppc64_rma_size,
- NUMA_NO_NODE);
- if (!slb_ptr)
- panic("Failed to allocate %zu bytes below %pa for slb area\n",
- size, &ppc64_rma_size);
-
- for_each_possible_cpu(i)
- paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
+ if (!radix_enabled()) {
+ /* Allocate per cpu area to save old slb contents during MCE */
+ size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
+ slb_ptr = memblock_alloc_try_nid_raw(size,
+ sizeof(struct slb_entry), MEMBLOCK_LOW_LIMIT,
+ ppc64_rma_size, NUMA_NO_NODE);
+ if (!slb_ptr)
+ panic("Failed to allocate %zu bytes below %pa for slb area\n",
+ size, &ppc64_rma_size);
+
+ for_each_possible_cpu(i)
+ paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
+ }
#endif
}
@@ -273,46 +278,16 @@ struct kmem_cache *dtl_cache;
*/
static int alloc_dispatch_logs(void)
{
- int cpu, ret;
- struct paca_struct *pp;
- struct dtl_entry *dtl;
-
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return 0;
if (!dtl_cache)
return 0;
- for_each_possible_cpu(cpu) {
- pp = paca_ptrs[cpu];
- dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
- if (!dtl) {
- pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
- cpu);
- pr_warn("Stolen time statistics will be unreliable\n");
- break;
- }
-
- pp->dtl_ridx = 0;
- pp->dispatch_log = dtl;
- pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
- pp->dtl_curr = dtl;
- }
+ alloc_dtl_buffers(0);
/* Register the DTL for the current (boot) cpu */
- dtl = get_paca()->dispatch_log;
- get_paca()->dtl_ridx = 0;
- get_paca()->dtl_curr = dtl;
- get_paca()->lppaca_ptr->dtl_idx = 0;
-
- /* hypervisor reads buffer length from this field */
- dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
- ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
- if (ret)
- pr_err("WARNING: DTL registration of cpu %d (hw %d) failed "
- "with %d\n", smp_processor_id(),
- hard_smp_processor_id(), ret);
- get_paca()->lppaca_ptr->dtl_enable_mask = 2;
+ register_dtl_buffer(smp_processor_id());
return 0;
}
@@ -325,8 +300,10 @@ static inline int alloc_dispatch_logs(void)
static int alloc_dispatch_log_kmem_cache(void)
{
+ void (*ctor)(void *) = get_dtl_cache_ctor();
+
dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
- DISPATCH_LOG_BYTES, 0, NULL);
+ DISPATCH_LOG_BYTES, 0, ctor);
if (!dtl_cache) {
pr_warn("Failed to create dispatch trace log buffer cache\n");
pr_warn("Stolen time statistics will be unreliable\n");
@@ -344,6 +321,9 @@ static void pseries_lpar_idle(void)
* low power mode by ceding processor to hypervisor
*/
+ if (!prep_irq_for_idle())
+ return;
+
/* Indicate to hypervisor that we are idle. */
get_lppaca()->idle = 1;
@@ -793,6 +773,9 @@ static void __init pSeries_setup_arch(void)
}
ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
+
+ if (swiotlb_force == SWIOTLB_FORCE)
+ ppc_swiotlb_enable = 1;
}
static void pseries_panic(char *str)
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 4b3ef8d9c63f..ad61e90032da 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -41,6 +41,7 @@
#include <asm/dbell.h>
#include <asm/plpar_wrappers.h>
#include <asm/code-patching.h>
+#include <asm/svm.h>
#include "pseries.h"
#include "offline_states.h"
@@ -221,7 +222,7 @@ static __init void pSeries_smp_probe_xics(void)
{
xics_smp_probe();
- if (cpu_has_feature(CPU_FTR_DBELL))
+ if (cpu_has_feature(CPU_FTR_DBELL) && !is_secure_guest())
smp_ops->cause_ipi = smp_pseries_cause_ipi;
else
smp_ops->cause_ipi = icp_ops->cause_ipi;
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
new file mode 100644
index 000000000000..40c0637203d5
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Secure VM platform
+ *
+ * Copyright 2018 IBM Corporation
+ * Author: Anshuman Khandual <khandual@linux.vnet.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <asm/machdep.h>
+#include <asm/svm.h>
+#include <asm/swiotlb.h>
+#include <asm/ultravisor.h>
+
+static int __init init_svm(void)
+{
+ if (!is_secure_guest())
+ return 0;
+
+ /* Don't release the SWIOTLB buffer. */
+ ppc_swiotlb_enable = 1;
+
+ /*
+ * Since the guest memory is inaccessible to the host, devices always
+ * need to use the SWIOTLB buffer for DMA even if dma_capable() says
+ * otherwise.
+ */
+ swiotlb_force = SWIOTLB_FORCE;
+
+ /* Share the SWIOTLB buffer with the host. */
+ swiotlb_update_mem_attributes();
+
+ return 0;
+}
+machine_early_initcall(pseries, init_svm);
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+ if (!PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ uv_unshare_page(PHYS_PFN(__pa(addr)), numpages);
+
+ return 0;
+}
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+ if (!PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ uv_share_page(PHYS_PFN(__pa(addr)), numpages);
+
+ return 0;
+}
+
+/* There's one dispatch log per CPU. */
+#define NR_DTL_PAGE (DISPATCH_LOG_BYTES * CONFIG_NR_CPUS / PAGE_SIZE)
+
+static struct page *dtl_page_store[NR_DTL_PAGE];
+static long dtl_nr_pages;
+
+static bool is_dtl_page_shared(struct page *page)
+{
+ long i;
+
+ for (i = 0; i < dtl_nr_pages; i++)
+ if (dtl_page_store[i] == page)
+ return true;
+
+ return false;
+}
+
+void dtl_cache_ctor(void *addr)
+{
+ unsigned long pfn = PHYS_PFN(__pa(addr));
+ struct page *page = pfn_to_page(pfn);
+
+ if (!is_dtl_page_shared(page)) {
+ dtl_page_store[dtl_nr_pages] = page;
+ dtl_nr_pages++;
+ WARN_ON(dtl_nr_pages >= NR_DTL_PAGE);
+ uv_share_page(pfn, 1);
+ }
+}
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index ba758f4be328..79e2287991db 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -520,7 +520,7 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))))
goto out_fail;
- ret = iommu_map_page(dev, tbl, page, offset, size, device_to_mask(dev),
+ ret = iommu_map_page(dev, tbl, page, offset, size, dma_get_mask(dev),
direction, attrs);
if (unlikely(ret == DMA_MAPPING_ERROR))
goto out_deallocate;
@@ -560,7 +560,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
if (vio_cmo_alloc(viodev, alloc_size))
goto out_fail;
- ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, device_to_mask(dev),
+ ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, dma_get_mask(dev),
direction, attrs);
if (unlikely(!ret))
goto out_deallocate;
@@ -605,6 +605,8 @@ static const struct dma_map_ops vio_dma_mapping_ops = {
.unmap_page = vio_dma_iommu_unmap_page,
.dma_supported = dma_iommu_dma_supported,
.get_required_mask = dma_iommu_get_required_mask,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
/**
@@ -1191,7 +1193,7 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
else
tbl->it_ops = &iommu_table_pseries_ops;
- return iommu_init_table(tbl, -1);
+ return iommu_init_table(tbl, -1, 0, 0);
}
/**
diff --git a/arch/powerpc/mm/book3s64/vphn.c b/arch/powerpc/platforms/pseries/vphn.c
index 0ee7734afb50..3f07bf6c670e 100644
--- a/arch/powerpc/mm/book3s64/vphn.c
+++ b/arch/powerpc/platforms/pseries/vphn.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <asm/byteorder.h>
-#include "vphn.h"
+#include <asm/lppaca.h>
/*
* The associativity domain numbers are returned from the hypervisor as a
@@ -22,7 +22,7 @@
*
* Convert to the sequence they would appear in the ibm,associativity property.
*/
-int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
+static int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
{
__be64 be_packed[VPHN_REGISTER_COUNT];
int i, nr_assoc_doms = 0;
@@ -71,3 +71,19 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
return nr_assoc_doms;
}
+
+/* NOTE: This file is included by a selftest and built in userspace. */
+#ifdef __KERNEL__
+#include <asm/hvcall.h>
+
+long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity)
+{
+ long rc;
+ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu);
+ vphn_unpack_associativity(retbuf, associativity);
+
+ return rc;
+}
+#endif
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index e0dbec780fe9..9ebcc1337560 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
+# see Documentation/kbuild/kconfig-language.rst.
#
config PPC4xx_PCI_EXPRESS
@@ -28,13 +28,6 @@ config PPC_MSI_BITMAP
source "arch/powerpc/sysdev/xics/Kconfig"
source "arch/powerpc/sysdev/xive/Kconfig"
-config PPC_SCOM
- bool
-
-config SCOM_DEBUGFS
- bool "Expose SCOM controllers via debugfs"
- depends on PPC_SCOM && DEBUG_FS
-
config GE_FPGA
bool
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index aaf23283ba0c..603b3c656d19 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -37,12 +37,10 @@ obj-$(CONFIG_XILINX_PCI) += xilinx_pci.o
obj-$(CONFIG_OF_RTC) += of_rtc.o
obj-$(CONFIG_CPM) += cpm_common.o
-obj-$(CONFIG_CPM1) += cpm1.o
obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o cpm_gpio.o
obj-$(CONFIG_8xx_GPIO) += cpm_gpio.o
obj-$(CONFIG_QUICC_ENGINE) += cpm_common.o
obj-$(CONFIG_PPC_DCR) += dcr.o
-obj-$(CONFIG_UCODE_PATCH) += micropatch.o
obj-$(CONFIG_PPC_MPC512x) += mpc5xxx_clocks.o
obj-$(CONFIG_PPC_MPC52xx) += mpc5xxx_clocks.o
@@ -51,8 +49,6 @@ ifdef CONFIG_SUSPEND
obj-$(CONFIG_PPC_BOOK3S_32) += 6xx-suspend.o
endif
-obj-$(CONFIG_PPC_SCOM) += scom.o
-
obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o
obj-$(CONFIG_PPC_XICS) += xics/
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index e5519875cf17..6b4a34b36d98 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -144,7 +144,7 @@ static void dart_cache_sync(unsigned int *base, unsigned int count)
unsigned int tmp;
/* Perform a standard cache flush */
- flush_inval_dcache_range(start, end);
+ flush_dcache_range(start, end);
/*
* Perform the sequence described in the CPC925 manual to
@@ -344,7 +344,7 @@ static void iommu_table_dart_setup(void)
iommu_table_dart.it_index = 0;
iommu_table_dart.it_blocksize = 1;
iommu_table_dart.it_ops = &iommu_dart_ops;
- iommu_init_table(&iommu_table_dart, -1);
+ iommu_init_table(&iommu_table_dart, -1, 0, 0);
/* Reserve the last page of the DART to avoid possible prefetch
* past the DART mapped area
diff --git a/arch/powerpc/sysdev/micropatch.c b/arch/powerpc/sysdev/micropatch.c
deleted file mode 100644
index 33a9042fca80..000000000000
--- a/arch/powerpc/sysdev/micropatch.c
+++ /dev/null
@@ -1,749 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/* Microcode patches for the CPM as supplied by Motorola.
- * This is the one for IIC/SPI. There is a newer one that
- * also relocates SMC2, but this would require additional changes
- * to uart.c, so I am holding off on that for a moment.
- */
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <asm/irq.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/8xx_immap.h>
-#include <asm/cpm.h>
-#include <asm/cpm1.h>
-
-/*
- * I2C/SPI relocation patch arrays.
- */
-
-#ifdef CONFIG_I2C_SPI_UCODE_PATCH
-
-static uint patch_2000[] __initdata = {
- 0x7FFFEFD9,
- 0x3FFD0000,
- 0x7FFB49F7,
- 0x7FF90000,
- 0x5FEFADF7,
- 0x5F89ADF7,
- 0x5FEFAFF7,
- 0x5F89AFF7,
- 0x3A9CFBC8,
- 0xE7C0EDF0,
- 0x77C1E1BB,
- 0xF4DC7F1D,
- 0xABAD932F,
- 0x4E08FDCF,
- 0x6E0FAFF8,
- 0x7CCF76CF,
- 0xFD1FF9CF,
- 0xABF88DC6,
- 0xAB5679F7,
- 0xB0937383,
- 0xDFCE79F7,
- 0xB091E6BB,
- 0xE5BBE74F,
- 0xB3FA6F0F,
- 0x6FFB76CE,
- 0xEE0DF9CF,
- 0x2BFBEFEF,
- 0xCFEEF9CF,
- 0x76CEAD24,
- 0x90B2DF9A,
- 0x7FDDD0BF,
- 0x4BF847FD,
- 0x7CCF76CE,
- 0xCFEF7E1F,
- 0x7F1D7DFD,
- 0xF0B6EF71,
- 0x7FC177C1,
- 0xFBC86079,
- 0xE722FBC8,
- 0x5FFFDFFF,
- 0x5FB2FFFB,
- 0xFBC8F3C8,
- 0x94A67F01,
- 0x7F1D5F39,
- 0xAFE85F5E,
- 0xFFDFDF96,
- 0xCB9FAF7D,
- 0x5FC1AFED,
- 0x8C1C5FC1,
- 0xAFDD5FC3,
- 0xDF9A7EFD,
- 0xB0B25FB2,
- 0xFFFEABAD,
- 0x5FB2FFFE,
- 0x5FCE600B,
- 0xE6BB600B,
- 0x5FCEDFC6,
- 0x27FBEFDF,
- 0x5FC8CFDE,
- 0x3A9CE7C0,
- 0xEDF0F3C8,
- 0x7F0154CD,
- 0x7F1D2D3D,
- 0x363A7570,
- 0x7E0AF1CE,
- 0x37EF2E68,
- 0x7FEE10EC,
- 0xADF8EFDE,
- 0xCFEAE52F,
- 0x7D0FE12B,
- 0xF1CE5F65,
- 0x7E0A4DF8,
- 0xCFEA5F72,
- 0x7D0BEFEE,
- 0xCFEA5F74,
- 0xE522EFDE,
- 0x5F74CFDA,
- 0x0B627385,
- 0xDF627E0A,
- 0x30D8145B,
- 0xBFFFF3C8,
- 0x5FFFDFFF,
- 0xA7F85F5E,
- 0xBFFE7F7D,
- 0x10D31450,
- 0x5F36BFFF,
- 0xAF785F5E,
- 0xBFFDA7F8,
- 0x5F36BFFE,
- 0x77FD30C0,
- 0x4E08FDCF,
- 0xE5FF6E0F,
- 0xAFF87E1F,
- 0x7E0FFD1F,
- 0xF1CF5F1B,
- 0xABF80D5E,
- 0x5F5EFFEF,
- 0x79F730A2,
- 0xAFDD5F34,
- 0x47F85F34,
- 0xAFED7FDD,
- 0x50B24978,
- 0x47FD7F1D,
- 0x7DFD70AD,
- 0xEF717EC1,
- 0x6BA47F01,
- 0x2D267EFD,
- 0x30DE5F5E,
- 0xFFFD5F5E,
- 0xFFEF5F5E,
- 0xFFDF0CA0,
- 0xAFED0A9E,
- 0xAFDD0C3A,
- 0x5F3AAFBD,
- 0x7FBDB082,
- 0x5F8247F8
-};
-
-static uint patch_2f00[] __initdata = {
- 0x3E303430,
- 0x34343737,
- 0xABF7BF9B,
- 0x994B4FBD,
- 0xBD599493,
- 0x349FFF37,
- 0xFB9B177D,
- 0xD9936956,
- 0xBBFDD697,
- 0xBDD2FD11,
- 0x31DB9BB3,
- 0x63139637,
- 0x93733693,
- 0x193137F7,
- 0x331737AF,
- 0x7BB9B999,
- 0xBB197957,
- 0x7FDFD3D5,
- 0x73B773F7,
- 0x37933B99,
- 0x1D115316,
- 0x99315315,
- 0x31694BF4,
- 0xFBDBD359,
- 0x31497353,
- 0x76956D69,
- 0x7B9D9693,
- 0x13131979,
- 0x79376935
-};
-#endif
-
-/*
- * I2C/SPI/SMC1 relocation patch arrays.
- */
-
-#ifdef CONFIG_I2C_SPI_SMC1_UCODE_PATCH
-
-static uint patch_2000[] __initdata = {
- 0x3fff0000,
- 0x3ffd0000,
- 0x3ffb0000,
- 0x3ff90000,
- 0x5f13eff8,
- 0x5eb5eff8,
- 0x5f88adf7,
- 0x5fefadf7,
- 0x3a9cfbc8,
- 0x77cae1bb,
- 0xf4de7fad,
- 0xabae9330,
- 0x4e08fdcf,
- 0x6e0faff8,
- 0x7ccf76cf,
- 0xfdaff9cf,
- 0xabf88dc8,
- 0xab5879f7,
- 0xb0925d8d,
- 0xdfd079f7,
- 0xb090e6bb,
- 0xe5bbe74f,
- 0x9e046f0f,
- 0x6ffb76ce,
- 0xee0cf9cf,
- 0x2bfbefef,
- 0xcfeef9cf,
- 0x76cead23,
- 0x90b3df99,
- 0x7fddd0c1,
- 0x4bf847fd,
- 0x7ccf76ce,
- 0xcfef77ca,
- 0x7eaf7fad,
- 0x7dfdf0b7,
- 0xef7a7fca,
- 0x77cafbc8,
- 0x6079e722,
- 0xfbc85fff,
- 0xdfff5fb3,
- 0xfffbfbc8,
- 0xf3c894a5,
- 0xe7c9edf9,
- 0x7f9a7fad,
- 0x5f36afe8,
- 0x5f5bffdf,
- 0xdf95cb9e,
- 0xaf7d5fc3,
- 0xafed8c1b,
- 0x5fc3afdd,
- 0x5fc5df99,
- 0x7efdb0b3,
- 0x5fb3fffe,
- 0xabae5fb3,
- 0xfffe5fd0,
- 0x600be6bb,
- 0x600b5fd0,
- 0xdfc827fb,
- 0xefdf5fca,
- 0xcfde3a9c,
- 0xe7c9edf9,
- 0xf3c87f9e,
- 0x54ca7fed,
- 0x2d3a3637,
- 0x756f7e9a,
- 0xf1ce37ef,
- 0x2e677fee,
- 0x10ebadf8,
- 0xefdecfea,
- 0xe52f7d9f,
- 0xe12bf1ce,
- 0x5f647e9a,
- 0x4df8cfea,
- 0x5f717d9b,
- 0xefeecfea,
- 0x5f73e522,
- 0xefde5f73,
- 0xcfda0b61,
- 0x5d8fdf61,
- 0xe7c9edf9,
- 0x7e9a30d5,
- 0x1458bfff,
- 0xf3c85fff,
- 0xdfffa7f8,
- 0x5f5bbffe,
- 0x7f7d10d0,
- 0x144d5f33,
- 0xbfffaf78,
- 0x5f5bbffd,
- 0xa7f85f33,
- 0xbffe77fd,
- 0x30bd4e08,
- 0xfdcfe5ff,
- 0x6e0faff8,
- 0x7eef7e9f,
- 0xfdeff1cf,
- 0x5f17abf8,
- 0x0d5b5f5b,
- 0xffef79f7,
- 0x309eafdd,
- 0x5f3147f8,
- 0x5f31afed,
- 0x7fdd50af,
- 0x497847fd,
- 0x7f9e7fed,
- 0x7dfd70a9,
- 0xef7e7ece,
- 0x6ba07f9e,
- 0x2d227efd,
- 0x30db5f5b,
- 0xfffd5f5b,
- 0xffef5f5b,
- 0xffdf0c9c,
- 0xafed0a9a,
- 0xafdd0c37,
- 0x5f37afbd,
- 0x7fbdb081,
- 0x5f8147f8,
- 0x3a11e710,
- 0xedf0ccdd,
- 0xf3186d0a,
- 0x7f0e5f06,
- 0x7fedbb38,
- 0x3afe7468,
- 0x7fedf4fc,
- 0x8ffbb951,
- 0xb85f77fd,
- 0xb0df5ddd,
- 0xdefe7fed,
- 0x90e1e74d,
- 0x6f0dcbf7,
- 0xe7decfed,
- 0xcb74cfed,
- 0xcfeddf6d,
- 0x91714f74,
- 0x5dd2deef,
- 0x9e04e7df,
- 0xefbb6ffb,
- 0xe7ef7f0e,
- 0x9e097fed,
- 0xebdbeffa,
- 0xeb54affb,
- 0x7fea90d7,
- 0x7e0cf0c3,
- 0xbffff318,
- 0x5fffdfff,
- 0xac59efea,
- 0x7fce1ee5,
- 0xe2ff5ee1,
- 0xaffbe2ff,
- 0x5ee3affb,
- 0xf9cc7d0f,
- 0xaef8770f,
- 0x7d0fb0c6,
- 0xeffbbfff,
- 0xcfef5ede,
- 0x7d0fbfff,
- 0x5ede4cf8,
- 0x7fddd0bf,
- 0x49f847fd,
- 0x7efdf0bb,
- 0x7fedfffd,
- 0x7dfdf0b7,
- 0xef7e7e1e,
- 0x5ede7f0e,
- 0x3a11e710,
- 0xedf0ccab,
- 0xfb18ad2e,
- 0x1ea9bbb8,
- 0x74283b7e,
- 0x73c2e4bb,
- 0x2ada4fb8,
- 0xdc21e4bb,
- 0xb2a1ffbf,
- 0x5e2c43f8,
- 0xfc87e1bb,
- 0xe74ffd91,
- 0x6f0f4fe8,
- 0xc7ba32e2,
- 0xf396efeb,
- 0x600b4f78,
- 0xe5bb760b,
- 0x53acaef8,
- 0x4ef88b0e,
- 0xcfef9e09,
- 0xabf8751f,
- 0xefef5bac,
- 0x741f4fe8,
- 0x751e760d,
- 0x7fdbf081,
- 0x741cafce,
- 0xefcc7fce,
- 0x751e70ac,
- 0x741ce7bb,
- 0x3372cfed,
- 0xafdbefeb,
- 0xe5bb760b,
- 0x53f2aef8,
- 0xafe8e7eb,
- 0x4bf8771e,
- 0x7e247fed,
- 0x4fcbe2cc,
- 0x7fbc30a9,
- 0x7b0f7a0f,
- 0x34d577fd,
- 0x308b5db7,
- 0xde553e5f,
- 0xaf78741f,
- 0x741f30f0,
- 0xcfef5e2c,
- 0x741f3eac,
- 0xafb8771e,
- 0x5e677fed,
- 0x0bd3e2cc,
- 0x741ccfec,
- 0xe5ca53cd,
- 0x6fcb4f74,
- 0x5dadde4b,
- 0x2ab63d38,
- 0x4bb3de30,
- 0x751f741c,
- 0x6c42effa,
- 0xefea7fce,
- 0x6ffc30be,
- 0xefec3fca,
- 0x30b3de2e,
- 0xadf85d9e,
- 0xaf7daefd,
- 0x5d9ede2e,
- 0x5d9eafdd,
- 0x761f10ac,
- 0x1da07efd,
- 0x30adfffe,
- 0x4908fb18,
- 0x5fffdfff,
- 0xafbb709b,
- 0x4ef85e67,
- 0xadf814ad,
- 0x7a0f70ad,
- 0xcfef50ad,
- 0x7a0fde30,
- 0x5da0afed,
- 0x3c12780f,
- 0xefef780f,
- 0xefef790f,
- 0xa7f85e0f,
- 0xffef790f,
- 0xefef790f,
- 0x14adde2e,
- 0x5d9eadfd,
- 0x5e2dfffb,
- 0xe79addfd,
- 0xeff96079,
- 0x607ae79a,
- 0xddfceff9,
- 0x60795dff,
- 0x607acfef,
- 0xefefefdf,
- 0xefbfef7f,
- 0xeeffedff,
- 0xebffe7ff,
- 0xafefafdf,
- 0xafbfaf7f,
- 0xaeffadff,
- 0xabffa7ff,
- 0x6fef6fdf,
- 0x6fbf6f7f,
- 0x6eff6dff,
- 0x6bff67ff,
- 0x2fef2fdf,
- 0x2fbf2f7f,
- 0x2eff2dff,
- 0x2bff27ff,
- 0x4e08fd1f,
- 0xe5ff6e0f,
- 0xaff87eef,
- 0x7e0ffdef,
- 0xf11f6079,
- 0xabf8f542,
- 0x7e0af11c,
- 0x37cfae3a,
- 0x7fec90be,
- 0xadf8efdc,
- 0xcfeae52f,
- 0x7d0fe12b,
- 0xf11c6079,
- 0x7e0a4df8,
- 0xcfea5dc4,
- 0x7d0befec,
- 0xcfea5dc6,
- 0xe522efdc,
- 0x5dc6cfda,
- 0x4e08fd1f,
- 0x6e0faff8,
- 0x7c1f761f,
- 0xfdeff91f,
- 0x6079abf8,
- 0x761cee24,
- 0xf91f2bfb,
- 0xefefcfec,
- 0xf91f6079,
- 0x761c27fb,
- 0xefdf5da7,
- 0xcfdc7fdd,
- 0xd09c4bf8,
- 0x47fd7c1f,
- 0x761ccfcf,
- 0x7eef7fed,
- 0x7dfdf093,
- 0xef7e7f1e,
- 0x771efb18,
- 0x6079e722,
- 0xe6bbe5bb,
- 0xae0ae5bb,
- 0x600bae85,
- 0xe2bbe2bb,
- 0xe2bbe2bb,
- 0xaf02e2bb,
- 0xe2bb2ff9,
- 0x6079e2bb
-};
-
-static uint patch_2f00[] __initdata = {
- 0x30303030,
- 0x3e3e3434,
- 0xabbf9b99,
- 0x4b4fbdbd,
- 0x59949334,
- 0x9fff37fb,
- 0x9b177dd9,
- 0x936956bb,
- 0xfbdd697b,
- 0xdd2fd113,
- 0x1db9f7bb,
- 0x36313963,
- 0x79373369,
- 0x3193137f,
- 0x7331737a,
- 0xf7bb9b99,
- 0x9bb19795,
- 0x77fdfd3d,
- 0x573b773f,
- 0x737933f7,
- 0xb991d115,
- 0x31699315,
- 0x31531694,
- 0xbf4fbdbd,
- 0x35931497,
- 0x35376956,
- 0xbd697b9d,
- 0x96931313,
- 0x19797937,
- 0x6935af78,
- 0xb9b3baa3,
- 0xb8788683,
- 0x368f78f7,
- 0x87778733,
- 0x3ffffb3b,
- 0x8e8f78b8,
- 0x1d118e13,
- 0xf3ff3f8b,
- 0x6bd8e173,
- 0xd1366856,
- 0x68d1687b,
- 0x3daf78b8,
- 0x3a3a3f87,
- 0x8f81378f,
- 0xf876f887,
- 0x77fd8778,
- 0x737de8d6,
- 0xbbf8bfff,
- 0xd8df87f7,
- 0xfd876f7b,
- 0x8bfff8bd,
- 0x8683387d,
- 0xb873d87b,
- 0x3b8fd7f8,
- 0xf7338883,
- 0xbb8ee1f8,
- 0xef837377,
- 0x3337b836,
- 0x817d11f8,
- 0x7378b878,
- 0xd3368b7d,
- 0xed731b7d,
- 0x833731f3,
- 0xf22f3f23
-};
-
-static uint patch_2e00[] __initdata = {
- 0x27eeeeee,
- 0xeeeeeeee,
- 0xeeeeeeee,
- 0xeeeeeeee,
- 0xee4bf4fb,
- 0xdbd259bb,
- 0x1979577f,
- 0xdfd2d573,
- 0xb773f737,
- 0x4b4fbdbd,
- 0x25b9b177,
- 0xd2d17376,
- 0x956bbfdd,
- 0x697bdd2f,
- 0xff9f79ff,
- 0xff9ff22f
-};
-#endif
-
-/*
- * USB SOF patch arrays.
- */
-
-#ifdef CONFIG_USB_SOF_UCODE_PATCH
-
-static uint patch_2000[] __initdata = {
- 0x7fff0000,
- 0x7ffd0000,
- 0x7ffb0000,
- 0x49f7ba5b,
- 0xba383ffb,
- 0xf9b8b46d,
- 0xe5ab4e07,
- 0xaf77bffe,
- 0x3f7bbf79,
- 0xba5bba38,
- 0xe7676076,
- 0x60750000
-};
-
-static uint patch_2f00[] __initdata = {
- 0x3030304c,
- 0xcab9e441,
- 0xa1aaf220
-};
-#endif
-
-void __init cpm_load_patch(cpm8xx_t *cp)
-{
- volatile uint *dp; /* Dual-ported RAM. */
- volatile cpm8xx_t *commproc;
-#if defined(CONFIG_I2C_SPI_UCODE_PATCH) || \
- defined(CONFIG_I2C_SPI_SMC1_UCODE_PATCH)
- volatile iic_t *iip;
- volatile struct spi_pram *spp;
-#ifdef CONFIG_I2C_SPI_SMC1_UCODE_PATCH
- volatile smc_uart_t *smp;
-#endif
-#endif
- int i;
-
- commproc = cp;
-
-#ifdef CONFIG_USB_SOF_UCODE_PATCH
- commproc->cp_rccr = 0;
-
- dp = (uint *)(commproc->cp_dpmem);
- for (i=0; i<(sizeof(patch_2000)/4); i++)
- *dp++ = patch_2000[i];
-
- dp = (uint *)&(commproc->cp_dpmem[0x0f00]);
- for (i=0; i<(sizeof(patch_2f00)/4); i++)
- *dp++ = patch_2f00[i];
-
- commproc->cp_rccr = 0x0009;
-
- printk("USB SOF microcode patch installed\n");
-#endif /* CONFIG_USB_SOF_UCODE_PATCH */
-
-#if defined(CONFIG_I2C_SPI_UCODE_PATCH) || \
- defined(CONFIG_I2C_SPI_SMC1_UCODE_PATCH)
-
- commproc->cp_rccr = 0;
-
- dp = (uint *)(commproc->cp_dpmem);
- for (i=0; i<(sizeof(patch_2000)/4); i++)
- *dp++ = patch_2000[i];
-
- dp = (uint *)&(commproc->cp_dpmem[0x0f00]);
- for (i=0; i<(sizeof(patch_2f00)/4); i++)
- *dp++ = patch_2f00[i];
-
- iip = (iic_t *)&commproc->cp_dparam[PROFF_IIC];
-# define RPBASE 0x0500
- iip->iic_rpbase = RPBASE;
-
- /* Put SPI above the IIC, also 32-byte aligned.
- */
- i = (RPBASE + sizeof(iic_t) + 31) & ~31;
- spp = (struct spi_pram *)&commproc->cp_dparam[PROFF_SPI];
- spp->rpbase = i;
-
-# if defined(CONFIG_I2C_SPI_UCODE_PATCH)
- commproc->cp_cpmcr1 = 0x802a;
- commproc->cp_cpmcr2 = 0x8028;
- commproc->cp_cpmcr3 = 0x802e;
- commproc->cp_cpmcr4 = 0x802c;
- commproc->cp_rccr = 1;
-
- printk("I2C/SPI microcode patch installed.\n");
-# endif /* CONFIG_I2C_SPI_UCODE_PATCH */
-
-# if defined(CONFIG_I2C_SPI_SMC1_UCODE_PATCH)
-
- dp = (uint *)&(commproc->cp_dpmem[0x0e00]);
- for (i=0; i<(sizeof(patch_2e00)/4); i++)
- *dp++ = patch_2e00[i];
-
- commproc->cp_cpmcr1 = 0x8080;
- commproc->cp_cpmcr2 = 0x808a;
- commproc->cp_cpmcr3 = 0x8028;
- commproc->cp_cpmcr4 = 0x802a;
- commproc->cp_rccr = 3;
-
- smp = (smc_uart_t *)&commproc->cp_dparam[PROFF_SMC1];
- smp->smc_rpbase = 0x1FC0;
-
- printk("I2C/SPI/SMC1 microcode patch installed.\n");
-# endif /* CONFIG_I2C_SPI_SMC1_UCODE_PATCH) */
-
-#endif /* some variation of the I2C/SPI patch was selected */
-}
-
-/*
- * Take this entire routine out, since no one calls it and its
- * logic is suspect.
- */
-
-#if 0
-void
-verify_patch(volatile immap_t *immr)
-{
- volatile uint *dp;
- volatile cpm8xx_t *commproc;
- int i;
-
- commproc = (cpm8xx_t *)&immr->im_cpm;
-
- printk("cp_rccr %x\n", commproc->cp_rccr);
- commproc->cp_rccr = 0;
-
- dp = (uint *)(commproc->cp_dpmem);
- for (i=0; i<(sizeof(patch_2000)/4); i++)
- if (*dp++ != patch_2000[i]) {
- printk("patch_2000 bad at %d\n", i);
- dp--;
- printk("found 0x%X, wanted 0x%X\n", *dp, patch_2000[i]);
- break;
- }
-
- dp = (uint *)&(commproc->cp_dpmem[0x0f00]);
- for (i=0; i<(sizeof(patch_2f00)/4); i++)
- if (*dp++ != patch_2f00[i]) {
- printk("patch_2f00 bad at %d\n", i);
- dp--;
- printk("found 0x%X, wanted 0x%X\n", *dp, patch_2f00[i]);
- break;
- }
-
- commproc->cp_rccr = 0x0009;
-}
-#endif
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
deleted file mode 100644
index 94e885bf3aee..000000000000
--- a/arch/powerpc/sysdev/scom.c
+++ /dev/null
@@ -1,223 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
- * <benh@kernel.crashing.org>
- * and David Gibson, IBM Corporation.
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <asm/debugfs.h>
-#include <asm/prom.h>
-#include <asm/scom.h>
-#include <linux/uaccess.h>
-
-const struct scom_controller *scom_controller;
-EXPORT_SYMBOL_GPL(scom_controller);
-
-struct device_node *scom_find_parent(struct device_node *node)
-{
- struct device_node *par, *tmp;
- const u32 *p;
-
- for (par = of_node_get(node); par;) {
- if (of_get_property(par, "scom-controller", NULL))
- break;
- p = of_get_property(par, "scom-parent", NULL);
- tmp = par;
- if (p == NULL)
- par = of_get_parent(par);
- else
- par = of_find_node_by_phandle(*p);
- of_node_put(tmp);
- }
- return par;
-}
-EXPORT_SYMBOL_GPL(scom_find_parent);
-
-scom_map_t scom_map_device(struct device_node *dev, int index)
-{
- struct device_node *parent;
- unsigned int cells, size;
- const __be32 *prop, *sprop;
- u64 reg, cnt;
- scom_map_t ret;
-
- parent = scom_find_parent(dev);
-
- if (parent == NULL)
- return NULL;
-
- /*
- * We support "scom-reg" properties for adding scom registers
- * to a random device-tree node with an explicit scom-parent
- *
- * We also support the simple "reg" property if the device is
- * a direct child of a scom controller.
- *
- * In case both exist, "scom-reg" takes precedence.
- */
- prop = of_get_property(dev, "scom-reg", &size);
- sprop = of_get_property(parent, "#scom-cells", NULL);
- if (!prop && parent == dev->parent) {
- prop = of_get_property(dev, "reg", &size);
- sprop = of_get_property(parent, "#address-cells", NULL);
- }
- if (!prop)
- return NULL;
- cells = sprop ? be32_to_cpup(sprop) : 1;
- size >>= 2;
-
- if (index >= (size / (2*cells)))
- return NULL;
-
- reg = of_read_number(&prop[index * cells * 2], cells);
- cnt = of_read_number(&prop[index * cells * 2 + cells], cells);
-
- ret = scom_map(parent, reg, cnt);
- of_node_put(parent);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(scom_map_device);
-
-#ifdef CONFIG_SCOM_DEBUGFS
-struct scom_debug_entry {
- struct device_node *dn;
- struct debugfs_blob_wrapper path;
- char name[16];
-};
-
-static ssize_t scom_debug_read(struct file *filp, char __user *ubuf,
- size_t count, loff_t *ppos)
-{
- struct scom_debug_entry *ent = filp->private_data;
- u64 __user *ubuf64 = (u64 __user *)ubuf;
- loff_t off = *ppos;
- ssize_t done = 0;
- u64 reg, reg_cnt, val;
- scom_map_t map;
- int rc;
-
- if (off < 0 || (off & 7) || (count & 7))
- return -EINVAL;
- reg = off >> 3;
- reg_cnt = count >> 3;
-
- map = scom_map(ent->dn, reg, reg_cnt);
- if (!scom_map_ok(map))
- return -ENXIO;
-
- for (reg = 0; reg < reg_cnt; reg++) {
- rc = scom_read(map, reg, &val);
- if (!rc)
- rc = put_user(val, ubuf64);
- if (rc) {
- if (!done)
- done = rc;
- break;
- }
- ubuf64++;
- *ppos += 8;
- done += 8;
- }
- scom_unmap(map);
- return done;
-}
-
-static ssize_t scom_debug_write(struct file* filp, const char __user *ubuf,
- size_t count, loff_t *ppos)
-{
- struct scom_debug_entry *ent = filp->private_data;
- u64 __user *ubuf64 = (u64 __user *)ubuf;
- loff_t off = *ppos;
- ssize_t done = 0;
- u64 reg, reg_cnt, val;
- scom_map_t map;
- int rc;
-
- if (off < 0 || (off & 7) || (count & 7))
- return -EINVAL;
- reg = off >> 3;
- reg_cnt = count >> 3;
-
- map = scom_map(ent->dn, reg, reg_cnt);
- if (!scom_map_ok(map))
- return -ENXIO;
-
- for (reg = 0; reg < reg_cnt; reg++) {
- rc = get_user(val, ubuf64);
- if (!rc)
- rc = scom_write(map, reg, val);
- if (rc) {
- if (!done)
- done = rc;
- break;
- }
- ubuf64++;
- done += 8;
- }
- scom_unmap(map);
- return done;
-}
-
-static const struct file_operations scom_debug_fops = {
- .read = scom_debug_read,
- .write = scom_debug_write,
- .open = simple_open,
- .llseek = default_llseek,
-};
-
-static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
- int i)
-{
- struct scom_debug_entry *ent;
- struct dentry *dir;
-
- ent = kzalloc(sizeof(*ent), GFP_KERNEL);
- if (!ent)
- return -ENOMEM;
-
- ent->dn = of_node_get(dn);
- snprintf(ent->name, 16, "%08x", i);
- ent->path.data = (void*)kasprintf(GFP_KERNEL, "%pOF", dn);
- ent->path.size = strlen((char *)ent->path.data);
-
- dir = debugfs_create_dir(ent->name, root);
- if (!dir) {
- of_node_put(dn);
- kfree(ent->path.data);
- kfree(ent);
- return -1;
- }
-
- debugfs_create_blob("devspec", 0400, dir, &ent->path);
- debugfs_create_file("access", 0600, dir, ent, &scom_debug_fops);
-
- return 0;
-}
-
-static int scom_debug_init(void)
-{
- struct device_node *dn;
- struct dentry *root;
- int i, rc;
-
- root = debugfs_create_dir("scom", powerpc_debugfs_root);
- if (!root)
- return -1;
-
- i = rc = 0;
- for_each_node_with_property(dn, "scom-controller") {
- int id = of_get_ibm_chip_id(dn);
- if (id == -1)
- id = i;
- rc |= scom_debug_init_one(root, dn, id);
- i++;
- }
-
- return rc;
-}
-device_initcall(scom_debug_init);
-#endif /* CONFIG_SCOM_DEBUGFS */
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
index 86fee428f5f1..304614c920aa 100644
--- a/arch/powerpc/sysdev/xics/Kconfig
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -1,15 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
config PPC_XICS
- def_bool n
- select PPC_SMP_MUXED_IPI
- select HARDIRQS_SW_RESEND
+ def_bool n
+ select PPC_SMP_MUXED_IPI
+ select HARDIRQS_SW_RESEND
config PPC_ICP_NATIVE
- def_bool n
+ def_bool n
config PPC_ICP_HV
- def_bool n
+ def_bool n
config PPC_ICS_RTAS
- def_bool n
-
+ def_bool n
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 082c7e1c20f0..df832b09e3e9 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -135,7 +135,7 @@ static u32 xive_read_eq(struct xive_q *q, bool just_peek)
static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
{
u32 irq = 0;
- u8 prio;
+ u8 prio = 0;
/* Find highest pending priority */
while (xc->pending_prio != 0) {
@@ -148,8 +148,19 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
irq = xive_read_eq(&xc->queue[prio], just_peek);
/* Found something ? That's it */
- if (irq)
- break;
+ if (irq) {
+ if (just_peek || irq_to_desc(irq))
+ break;
+ /*
+ * We should never get here; if we do then we must
+ * have failed to synchronize the interrupt properly
+ * when shutting it down.
+ */
+ pr_crit("xive: got interrupt %d without descriptor, dropping\n",
+ irq);
+ WARN_ON(1);
+ continue;
+ }
/* Clear pending bits */
xc->pending_prio &= ~(1 << prio);
@@ -185,7 +196,7 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
/*
* This is used to perform the magic loads from an ESB
- * described in xive.h
+ * described in xive-regs.h
*/
static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
{
@@ -226,26 +237,61 @@ static notrace void xive_dump_eq(const char *name, struct xive_q *q)
i0 = be32_to_cpup(q->qpage + idx);
idx = (idx + 1) & q->msk;
i1 = be32_to_cpup(q->qpage + idx);
- xmon_printf(" %s Q T=%d %08x %08x ...\n", name,
- q->toggle, i0, i1);
+ xmon_printf("%s idx=%d T=%d %08x %08x ...", name,
+ q->idx, q->toggle, i0, i1);
}
notrace void xmon_xive_do_dump(int cpu)
{
struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
- xmon_printf("XIVE state for CPU %d:\n", cpu);
- xmon_printf(" pp=%02x cppr=%02x\n", xc->pending_prio, xc->cppr);
- xive_dump_eq("IRQ", &xc->queue[xive_irq_priority]);
+ xmon_printf("CPU %d:", cpu);
+ if (xc) {
+ xmon_printf("pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
+
#ifdef CONFIG_SMP
- {
- u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
- xmon_printf(" IPI state: %x:%c%c\n", xc->hw_ipi,
- val & XIVE_ESB_VAL_P ? 'P' : 'p',
- val & XIVE_ESB_VAL_Q ? 'Q' : 'q');
- }
+ {
+ u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
+
+ xmon_printf("IPI=0x%08x PQ=%c%c ", xc->hw_ipi,
+ val & XIVE_ESB_VAL_P ? 'P' : '-',
+ val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ }
#endif
+ xive_dump_eq("EQ", &xc->queue[xive_irq_priority]);
+ }
+ xmon_printf("\n");
}
+
+int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
+{
+ int rc;
+ u32 target;
+ u8 prio;
+ u32 lirq;
+
+ rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
+ if (rc) {
+ xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
+ return rc;
+ }
+
+ xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
+ hw_irq, target, prio, lirq);
+
+ if (d) {
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+ u64 val = xive_esb_read(xd, XIVE_ESB_GET);
+
+ xmon_printf("PQ=%c%c",
+ val & XIVE_ESB_VAL_P ? 'P' : '-',
+ val & XIVE_ESB_VAL_Q ? 'Q' : '-');
+ }
+
+ xmon_printf("\n");
+ return 0;
+}
+
#endif /* CONFIG_XMON */
static unsigned int xive_get_irq(void)
@@ -307,6 +353,7 @@ static void xive_do_queue_eoi(struct xive_cpu *xc)
*/
static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
{
+ xd->stale_p = false;
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
@@ -350,7 +397,7 @@ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
}
}
-/* irq_chip eoi callback */
+/* irq_chip eoi callback, called with irq descriptor lock held */
static void xive_irq_eoi(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -366,6 +413,8 @@ static void xive_irq_eoi(struct irq_data *d)
if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
!(xd->flags & XIVE_IRQ_NO_EOI))
xive_do_source_eoi(irqd_to_hwirq(d), xd);
+ else
+ xd->stale_p = true;
/*
* Clear saved_p to indicate that it's no longer occupying
@@ -397,11 +446,16 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd,
*/
if (mask) {
val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
- xd->saved_p = !!(val & XIVE_ESB_VAL_P);
- } else if (xd->saved_p)
+ if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
+ xd->saved_p = true;
+ xd->stale_p = false;
+ } else if (xd->saved_p) {
xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
- else
+ xd->saved_p = false;
+ } else {
xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
+ xd->stale_p = false;
+ }
}
/*
@@ -479,7 +533,7 @@ static int xive_find_target_in_mask(const struct cpumask *mask,
* Now go through the entire mask until we find a valid
* target.
*/
- for (;;) {
+ do {
/*
* We re-check online as the fallback case passes us
* an untested affinity mask
@@ -487,12 +541,11 @@ static int xive_find_target_in_mask(const struct cpumask *mask,
if (cpu_online(cpu) && xive_try_pick_target(cpu))
return cpu;
cpu = cpumask_next(cpu, mask);
- if (cpu == first)
- break;
/* Wrap around */
if (cpu >= nr_cpu_ids)
cpu = cpumask_first(mask);
- }
+ } while (cpu != first);
+
return -1;
}
@@ -542,6 +595,8 @@ static unsigned int xive_irq_startup(struct irq_data *d)
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
int target, rc;
+ xd->saved_p = false;
+ xd->stale_p = false;
pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
d->irq, hw_irq, d);
@@ -588,6 +643,7 @@ static unsigned int xive_irq_startup(struct irq_data *d)
return 0;
}
+/* called with irq descriptor lock held */
static void xive_irq_shutdown(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -603,16 +659,6 @@ static void xive_irq_shutdown(struct irq_data *d)
xive_do_source_set_mask(xd, true);
/*
- * The above may have set saved_p. We clear it otherwise it
- * will prevent re-enabling later on. It is ok to forget the
- * fact that the interrupt might be in a queue because we are
- * accounting that already in xive_dec_target_count() and will
- * be re-routing it to a new queue with proper accounting when
- * it's started up again
- */
- xd->saved_p = false;
-
- /*
* Mask the interrupt in HW in the IVT/EAS and set the number
* to be the "bad" IRQ number
*/
@@ -798,6 +844,10 @@ static int xive_irq_retrigger(struct irq_data *d)
return 1;
}
+/*
+ * Caller holds the irq descriptor lock, so this won't be called
+ * concurrently with xive_get_irqchip_state on the same interrupt.
+ */
static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -821,6 +871,10 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
/* Set it to PQ=10 state to prevent further sends */
pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
+ if (!xd->stale_p) {
+ xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
+ xd->stale_p = !xd->saved_p;
+ }
/* No target ? nothing to do */
if (xd->target == XIVE_INVALID_TARGET) {
@@ -828,7 +882,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
* An untargetted interrupt should have been
* also masked at the source
*/
- WARN_ON(pq & 2);
+ WARN_ON(xd->saved_p);
return 0;
}
@@ -848,9 +902,8 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
* This saved_p is cleared by the host EOI, when we know
* for sure the queue slot is no longer in use.
*/
- if (pq & 2) {
- pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
- xd->saved_p = true;
+ if (xd->saved_p) {
+ xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
/*
* Sync the XIVE source HW to ensure the interrupt
@@ -863,8 +916,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
*/
if (xive_ops->sync_source)
xive_ops->sync_source(hw_irq);
- } else
- xd->saved_p = false;
+ }
} else {
irqd_clr_forwarded_to_vcpu(d);
@@ -915,6 +967,23 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
return 0;
}
+/* Called with irq descriptor lock held. */
+static int xive_get_irqchip_state(struct irq_data *data,
+ enum irqchip_irq_state which, bool *state)
+{
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
+
+ switch (which) {
+ case IRQCHIP_STATE_ACTIVE:
+ *state = !xd->stale_p &&
+ (xd->saved_p ||
+ !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P));
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static struct irq_chip xive_irq_chip = {
.name = "XIVE-IRQ",
.irq_startup = xive_irq_startup,
@@ -926,6 +995,7 @@ static struct irq_chip xive_irq_chip = {
.irq_set_type = xive_irq_set_type,
.irq_retrigger = xive_irq_retrigger,
.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
+ .irq_get_irqchip_state = xive_get_irqchip_state,
};
bool is_xive_irq(struct irq_chip *chip)
@@ -1339,6 +1409,11 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
xd = irq_desc_get_handler_data(desc);
/*
+ * Clear saved_p to indicate that it's no longer pending
+ */
+ xd->saved_p = false;
+
+ /*
* For LSIs, we EOI, this will cause a resend if it's
* still asserted. Otherwise do an MSI retrigger.
*/
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 2f26b74f6cfa..0ff6b739052c 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -111,6 +111,20 @@ int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
}
EXPORT_SYMBOL_GPL(xive_native_configure_irq);
+static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio,
+ u32 *sw_irq)
+{
+ s64 rc;
+ __be64 vp;
+ __be32 lirq;
+
+ rc = opal_xive_get_irq_config(hw_irq, &vp, prio, &lirq);
+
+ *target = be64_to_cpu(vp);
+ *sw_irq = be32_to_cpu(lirq);
+
+ return rc == 0 ? 0 : -ENXIO;
+}
/* This can be called multiple time to change a queue configuration */
int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
@@ -231,6 +245,17 @@ static bool xive_native_match(struct device_node *node)
return of_device_is_compatible(node, "ibm,opal-xive-vc");
}
+static s64 opal_xive_allocate_irq(u32 chip_id)
+{
+ s64 irq = opal_xive_allocate_irq_raw(chip_id);
+
+ /*
+ * Old versions of skiboot can incorrectly return 0xffffffff to
+ * indicate no space, fix it up here.
+ */
+ return irq == 0xffffffff ? OPAL_RESOURCE : irq;
+}
+
#ifdef CONFIG_SMP
static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
{
@@ -442,6 +467,7 @@ EXPORT_SYMBOL_GPL(xive_native_sync_queue);
static const struct xive_ops xive_native_ops = {
.populate_irq_data = xive_native_populate_irq_data,
.configure_irq = xive_native_configure_irq,
+ .get_irq_config = xive_native_get_irq_config,
.setup_queue = xive_native_setup_queue,
.cleanup_queue = xive_native_cleanup_queue,
.match = xive_native_match,
@@ -800,6 +826,13 @@ int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex)
}
EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
+bool xive_native_has_queue_state_support(void)
+{
+ return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) &&
+ opal_check_token(OPAL_XIVE_SET_QUEUE_STATE);
+}
+EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support);
+
int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
{
__be64 state;
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index cafb5c4df26b..33c10749edec 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -16,6 +16,7 @@
#include <linux/cpumask.h>
#include <linux/mm.h>
#include <linux/delay.h>
+#include <linux/libfdt.h>
#include <asm/prom.h>
#include <asm/io.h>
@@ -44,7 +45,7 @@ static int xive_irq_bitmap_add(int base, int count)
{
struct xive_irq_bitmap *xibm;
- xibm = kzalloc(sizeof(*xibm), GFP_ATOMIC);
+ xibm = kzalloc(sizeof(*xibm), GFP_KERNEL);
if (!xibm)
return -ENOMEM;
@@ -52,6 +53,10 @@ static int xive_irq_bitmap_add(int base, int count)
xibm->base = base;
xibm->count = count;
xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL);
+ if (!xibm->bitmap) {
+ kfree(xibm);
+ return -ENOMEM;
+ }
list_add(&xibm->list, &xive_irq_bitmaps);
pr_info("Using IRQ range [%x-%x]", xibm->base,
@@ -210,6 +215,38 @@ static long plpar_int_set_source_config(unsigned long flags,
return 0;
}
+static long plpar_int_get_source_config(unsigned long flags,
+ unsigned long lisn,
+ unsigned long *target,
+ unsigned long *prio,
+ unsigned long *sw_irq)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ pr_devel("H_INT_GET_SOURCE_CONFIG flags=%lx lisn=%lx\n", flags, lisn);
+
+ do {
+ rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn,
+ target, prio, sw_irq);
+ } while (plpar_busy_delay(rc));
+
+ if (rc) {
+ pr_err("H_INT_GET_SOURCE_CONFIG lisn=%ld failed %ld\n",
+ lisn, rc);
+ return rc;
+ }
+
+ *target = retbuf[0];
+ *prio = retbuf[1];
+ *sw_irq = retbuf[2];
+
+ pr_devel("H_INT_GET_SOURCE_CONFIG target=%lx prio=%lx sw_irq=%lx\n",
+ retbuf[0], retbuf[1], retbuf[2]);
+
+ return 0;
+}
+
static long plpar_int_get_queue_info(unsigned long flags,
unsigned long target,
unsigned long priority,
@@ -393,6 +430,24 @@ static int xive_spapr_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
return rc == 0 ? 0 : -ENXIO;
}
+static int xive_spapr_get_irq_config(u32 hw_irq, u32 *target, u8 *prio,
+ u32 *sw_irq)
+{
+ long rc;
+ unsigned long h_target;
+ unsigned long h_prio;
+ unsigned long h_sw_irq;
+
+ rc = plpar_int_get_source_config(0, hw_irq, &h_target, &h_prio,
+ &h_sw_irq);
+
+ *target = h_target;
+ *prio = h_prio;
+ *sw_irq = h_sw_irq;
+
+ return rc == 0 ? 0 : -ENXIO;
+}
+
/* This can be called multiple time to change a queue configuration */
static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
__be32 *qpage, u32 order)
@@ -585,6 +640,7 @@ static void xive_spapr_sync_source(u32 hw_irq)
static const struct xive_ops xive_spapr_ops = {
.populate_irq_data = xive_spapr_populate_irq_data,
.configure_irq = xive_spapr_configure_irq,
+ .get_irq_config = xive_spapr_get_irq_config,
.setup_queue = xive_spapr_setup_queue,
.cleanup_queue = xive_spapr_cleanup_queue,
.match = xive_spapr_match,
@@ -659,6 +715,55 @@ static bool xive_get_max_prio(u8 *max_prio)
return true;
}
+static const u8 *get_vec5_feature(unsigned int index)
+{
+ unsigned long root, chosen;
+ int size;
+ const u8 *vec5;
+
+ root = of_get_flat_dt_root();
+ chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+ if (chosen == -FDT_ERR_NOTFOUND)
+ return NULL;
+
+ vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
+ if (!vec5)
+ return NULL;
+
+ if (size <= index)
+ return NULL;
+
+ return vec5 + index;
+}
+
+static bool xive_spapr_disabled(void)
+{
+ const u8 *vec5_xive;
+
+ vec5_xive = get_vec5_feature(OV5_INDX(OV5_XIVE_SUPPORT));
+ if (vec5_xive) {
+ u8 val;
+
+ val = *vec5_xive & OV5_FEAT(OV5_XIVE_SUPPORT);
+ switch (val) {
+ case OV5_FEAT(OV5_XIVE_EITHER):
+ case OV5_FEAT(OV5_XIVE_LEGACY):
+ break;
+ case OV5_FEAT(OV5_XIVE_EXPLOIT):
+ /* Hypervisor only supports XIVE */
+ if (xive_cmdline_disabled)
+ pr_warn("WARNING: Ignoring cmdline option xive=off\n");
+ return false;
+ default:
+ pr_warn("%s: Unknown xive support option: 0x%x\n",
+ __func__, val);
+ break;
+ }
+ }
+
+ return xive_cmdline_disabled;
+}
+
bool __init xive_spapr_init(void)
{
struct device_node *np;
@@ -671,7 +776,7 @@ bool __init xive_spapr_init(void)
const __be32 *reg;
int i;
- if (xive_cmdline_disabled)
+ if (xive_spapr_disabled())
return false;
pr_devel("%s()\n", __func__);
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
index 211725dbf364..59cd366e7933 100644
--- a/arch/powerpc/sysdev/xive/xive-internal.h
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -33,6 +33,8 @@ struct xive_cpu {
struct xive_ops {
int (*populate_irq_data)(u32 hw_irq, struct xive_irq_data *data);
int (*configure_irq)(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
+ int (*get_irq_config)(u32 hw_irq, u32 *target, u8 *prio,
+ u32 *sw_irq);
int (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
void (*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio);
void (*setup_cpu)(unsigned int cpu, struct xive_cpu *xc);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index d0620d762a5a..d83364ebc5c5 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -465,8 +465,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
local_irq_save(flags);
hard_irq_disable();
- tracing_enabled = tracing_is_on();
- tracing_off();
+ if (!fromipi) {
+ tracing_enabled = tracing_is_on();
+ tracing_off();
+ }
bp = in_breakpoint_table(regs->nip, &offset);
if (bp != NULL) {
@@ -2448,7 +2450,9 @@ static void dump_one_paca(int cpu)
DUMP(p, canary, "%#-*lx");
#endif
DUMP(p, saved_r1, "%#-*llx");
+#ifdef CONFIG_PPC_BOOK3E
DUMP(p, trap_save, "%#-*x");
+#endif
DUMP(p, irq_soft_mask, "%#-*x");
DUMP(p, irq_happened, "%#-*x");
#ifdef CONFIG_MMIOWB
@@ -2530,13 +2534,16 @@ static void dump_pacas(void)
static void dump_one_xive(int cpu)
{
unsigned int hwid = get_hard_smp_processor_id(cpu);
-
- opal_xive_dump(XIVE_DUMP_TM_HYP, hwid);
- opal_xive_dump(XIVE_DUMP_TM_POOL, hwid);
- opal_xive_dump(XIVE_DUMP_TM_OS, hwid);
- opal_xive_dump(XIVE_DUMP_TM_USER, hwid);
- opal_xive_dump(XIVE_DUMP_VP, hwid);
- opal_xive_dump(XIVE_DUMP_EMU_STATE, hwid);
+ bool hv = cpu_has_feature(CPU_FTR_HVMODE);
+
+ if (hv) {
+ opal_xive_dump(XIVE_DUMP_TM_HYP, hwid);
+ opal_xive_dump(XIVE_DUMP_TM_POOL, hwid);
+ opal_xive_dump(XIVE_DUMP_TM_OS, hwid);
+ opal_xive_dump(XIVE_DUMP_TM_USER, hwid);
+ opal_xive_dump(XIVE_DUMP_VP, hwid);
+ opal_xive_dump(XIVE_DUMP_EMU_STATE, hwid);
+ }
if (setjmp(bus_error_jmp) != 0) {
catch_memory_errors = 0;
@@ -2565,16 +2572,28 @@ static void dump_all_xives(void)
dump_one_xive(cpu);
}
-static void dump_one_xive_irq(u32 num)
+static void dump_one_xive_irq(u32 num, struct irq_data *d)
+{
+ xmon_xive_get_irq_config(num, d);
+}
+
+static void dump_all_xive_irq(void)
{
- s64 rc;
- __be64 vp;
- u8 prio;
- __be32 lirq;
-
- rc = opal_xive_get_irq_config(num, &vp, &prio, &lirq);
- xmon_printf("IRQ 0x%x config: vp=0x%llx prio=%d lirq=0x%x (rc=%lld)\n",
- num, be64_to_cpu(vp), prio, be32_to_cpu(lirq), rc);
+ unsigned int i;
+ struct irq_desc *desc;
+
+ for_each_irq_desc(i, desc) {
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+ unsigned int hwirq;
+
+ if (!d)
+ continue;
+
+ hwirq = (unsigned int)irqd_to_hwirq(d);
+ /* IPIs are special (HW number 0) */
+ if (hwirq)
+ dump_one_xive_irq(hwirq, d);
+ }
}
static void dump_xives(void)
@@ -2593,7 +2612,9 @@ static void dump_xives(void)
return;
} else if (c == 'i') {
if (scanhex(&num))
- dump_one_xive_irq(num);
+ dump_one_xive_irq(num, NULL);
+ else
+ dump_all_xive_irq();
return;
}
@@ -3090,7 +3111,7 @@ static void show_pte(unsigned long addr)
printf("pgd @ 0x%px\n", pgdir);
- if (pgd_huge(*pgdp)) {
+ if (pgd_is_leaf(*pgdp)) {
format_pte(pgdp, pgd_val(*pgdp));
return;
}
@@ -3103,7 +3124,7 @@ static void show_pte(unsigned long addr)
return;
}
- if (pud_huge(*pudp)) {
+ if (pud_is_leaf(*pudp)) {
format_pte(pudp, pud_val(*pudp));
return;
}
@@ -3117,7 +3138,7 @@ static void show_pte(unsigned long addr)
return;
}
- if (pmd_huge(*pmdp)) {
+ if (pmd_is_leaf(*pmdp)) {
format_pte(pmdp, pmd_val(*pmdp));
return;
}