aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-class-cxl10
-rw-r--r--Documentation/powerpc/cxl.txt55
-rw-r--r--MAINTAINERS16
-rw-r--r--arch/powerpc/Kconfig6
-rw-r--r--arch/powerpc/boot/rs6000.h2
-rw-r--r--arch/powerpc/boot/treeboot-akebono.c2
-rw-r--r--arch/powerpc/boot/treeboot-currituck.c2
-rw-r--r--arch/powerpc/boot/treeboot-iss4xx.c2
-rw-r--r--arch/powerpc/configs/powernv_defconfig313
-rw-r--r--arch/powerpc/crypto/aes-spe-core.S4
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c2
-rw-r--r--arch/powerpc/include/asm/atomic.h159
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h (renamed from arch/powerpc/include/asm/mmu-hash32.h)0
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h36
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h57
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h68
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h (renamed from arch/powerpc/include/asm/mmu-hash64.h)4
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h40
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h94
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h237
-rw-r--r--arch/powerpc/include/asm/cputable.h20
-rw-r--r--arch/powerpc/include/asm/eeh.h6
-rw-r--r--arch/powerpc/include/asm/hugetlb.h2
-rw-r--r--arch/powerpc/include/asm/hvcall.h1
-rw-r--r--arch/powerpc/include/asm/hydra.h2
-rw-r--r--arch/powerpc/include/asm/io.h2
-rw-r--r--arch/powerpc/include/asm/machdep.h6
-rw-r--r--arch/powerpc/include/asm/mmu.h5
-rw-r--r--arch/powerpc/include/asm/module.h2
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/opal.h3
-rw-r--r--arch/powerpc/include/asm/page.h111
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h9
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h10
-rw-r--r--arch/powerpc/include/asm/pgalloc-64.h42
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h103
-rw-r--r--arch/powerpc/include/asm/pmac_feature.h2
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/include/asm/reg.h16
-rw-r--r--arch/powerpc/include/asm/reg_booke.h2
-rw-r--r--arch/powerpc/include/asm/smu.h2
-rw-r--r--arch/powerpc/include/asm/switch_to.h13
-rw-r--r--arch/powerpc/include/asm/tlbflush.h92
-rw-r--r--arch/powerpc/include/asm/trace.h8
-rw-r--r--arch/powerpc/include/asm/uninorth.h2
-rw-r--r--arch/powerpc/include/asm/xics.h2
-rw-r--r--arch/powerpc/include/uapi/asm/epapr_hcalls.h4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S49
-rw-r--r--arch/powerpc/kernel/cputable.c27
-rw-r--r--arch/powerpc/kernel/eeh.c39
-rw-r--r--arch/powerpc/kernel/eeh_cache.c11
-rw-r--r--arch/powerpc/kernel/eeh_dev.c1
-rw-r--r--arch/powerpc/kernel/eeh_driver.c152
-rw-r--r--arch/powerpc/kernel/eeh_pe.c38
-rw-r--r--arch/powerpc/kernel/entry_64.S21
-rw-r--r--arch/powerpc/kernel/fpu.S25
-rw-r--r--arch/powerpc/kernel/head_44x.S2
-rw-r--r--arch/powerpc/kernel/idle_power7.S2
-rw-r--r--arch/powerpc/kernel/kgdb.c4
-rw-r--r--arch/powerpc/kernel/mce_power.c17
-rw-r--r--arch/powerpc/kernel/module_64.c2
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c2
-rw-r--r--arch/powerpc/kernel/pci_dn.c19
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c4
-rw-r--r--arch/powerpc/kernel/process.c168
-rw-r--r--arch/powerpc/kernel/signal.c4
-rw-r--r--arch/powerpc/kernel/signal.h2
-rw-r--r--arch/powerpc/kernel/traps.c12
-rw-r--r--arch/powerpc/kernel/vector.S45
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S2
-rw-r--r--arch/powerpc/kvm/book3s_xics.c2
-rw-r--r--arch/powerpc/kvm/booke.c2
-rw-r--r--arch/powerpc/kvm/e500mc.c2
-rw-r--r--arch/powerpc/mm/hash64_4k.c4
-rw-r--r--arch/powerpc/mm/hash64_64k.c15
-rw-r--r--arch/powerpc/mm/hash_utils_64.c87
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c12
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c5
-rw-r--r--arch/powerpc/mm/hugetlbpage.c3
-rw-r--r--arch/powerpc/mm/init_64.c68
-rw-r--r--arch/powerpc/mm/mem.c10
-rw-r--r--arch/powerpc/mm/mmu_decl.h3
-rw-r--r--arch/powerpc/mm/pgtable_64.c43
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S2
-rw-r--r--arch/powerpc/mm/tlb_nohash_low.S4
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c4
-rw-r--r--arch/powerpc/perf/core-book3s.c2
-rw-r--r--arch/powerpc/perf/hv-24x7.c225
-rw-r--r--arch/powerpc/perf/hv-24x7.h3
-rw-r--r--arch/powerpc/perf/hv-gpci.c43
-rw-r--r--arch/powerpc/perf/power7-pmu.c18
-rw-r--r--arch/powerpc/perf/power8-events-list.h51
-rw-r--r--arch/powerpc/perf/power8-pmu.c112
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pci.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_cds.c2
-rw-r--r--arch/powerpc/platforms/powermac/cache.S2
-rw-r--r--arch/powerpc/platforms/powermac/feature.c6
-rw-r--r--arch/powerpc/platforms/powernv/Makefile2
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c303
-rw-r--r--arch/powerpc/platforms/powernv/idle.c6
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c34
-rw-r--r--arch/powerpc/platforms/powernv/opal.c7
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c348
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c271
-rw-r--r--arch/powerpc/platforms/powernv/pci.c43
-rw-r--r--arch/powerpc/platforms/powernv/pci.h153
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c2
-rw-r--r--arch/powerpc/platforms/ps3/gelic_udbg.c72
-rw-r--r--arch/powerpc/platforms/ps3/interrupt.c2
-rw-r--r--arch/powerpc/platforms/pseries/hvconsole.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c9
-rw-r--r--arch/powerpc/platforms/pseries/setup.c2
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c2
-rw-r--r--arch/powerpc/sysdev/fsl_rmu.c2
-rw-r--r--arch/powerpc/sysdev/i8259.c2
-rw-r--r--arch/powerpc/sysdev/mpic.c4
-rw-r--r--arch/powerpc/xmon/xmon.c120
-rw-r--r--drivers/misc/cxl/Makefile1
-rw-r--r--drivers/misc/cxl/api.c83
-rw-r--r--drivers/misc/cxl/base.c32
-rw-r--r--drivers/misc/cxl/context.c11
-rw-r--r--drivers/misc/cxl/cxl.h288
-rw-r--r--drivers/misc/cxl/debugfs.c4
-rw-r--r--drivers/misc/cxl/fault.c25
-rw-r--r--drivers/misc/cxl/file.c28
-rw-r--r--drivers/misc/cxl/flash.c538
-rw-r--r--drivers/misc/cxl/guest.c1177
-rw-r--r--drivers/misc/cxl/hcalls.c647
-rw-r--r--drivers/misc/cxl/hcalls.h204
-rw-r--r--drivers/misc/cxl/irq.c309
-rw-r--r--drivers/misc/cxl/main.c122
-rw-r--r--drivers/misc/cxl/native.c469
-rw-r--r--drivers/misc/cxl/of.c513
-rw-r--r--drivers/misc/cxl/pci.c267
-rw-r--r--drivers/misc/cxl/sysfs.c123
-rw-r--r--drivers/misc/cxl/trace.h193
-rw-r--r--drivers/misc/cxl/vphb.c167
-rw-r--r--drivers/pci/bus.c3
-rw-r--r--drivers/pci/iov.c10
-rw-r--r--drivers/scsi/cxlflash/common.h1
-rw-r--r--drivers/scsi/cxlflash/main.c18
-rw-r--r--include/asm-generic/pgtable.h8
-rw-r--r--include/linux/atomic.h10
-rw-r--r--include/linux/bug.h9
-rw-r--r--include/linux/huge_mm.h3
-rw-r--r--include/linux/pci.h9
-rw-r--r--include/misc/cxl.h8
-rw-r--r--include/uapi/misc/cxl.h24
-rw-r--r--mm/huge_memory.c18
-rw-r--r--tools/testing/selftests/powerpc/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/basic_asm.h70
-rw-r--r--tools/testing/selftests/powerpc/math/.gitignore6
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile19
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_asm.S198
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_preempt.c113
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_signal.c135
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_syscall.c90
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_asm.S235
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_preempt.c112
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_signal.c156
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_syscall.c91
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c2
171 files changed, 8619 insertions, 2300 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl
index b07e86d4597f..7fd737eed38a 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -159,7 +159,7 @@ Description: read only
Decimal value of the Per Process MMIO space length.
Users: https://github.com/ibm-capi/libcxl
-What: /sys/class/cxl/<afu>m/pp_mmio_off
+What: /sys/class/cxl/<afu>m/pp_mmio_off (not in a guest)
Date: September 2014
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
@@ -183,7 +183,7 @@ Description: read only
Identifies the revision level of the PSL.
Users: https://github.com/ibm-capi/libcxl
-What: /sys/class/cxl/<card>/base_image
+What: /sys/class/cxl/<card>/base_image (not in a guest)
Date: September 2014
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
@@ -193,7 +193,7 @@ Description: read only
during the initial program load.
Users: https://github.com/ibm-capi/libcxl
-What: /sys/class/cxl/<card>/image_loaded
+What: /sys/class/cxl/<card>/image_loaded (not in a guest)
Date: September 2014
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
@@ -201,7 +201,7 @@ Description: read only
onto the card.
Users: https://github.com/ibm-capi/libcxl
-What: /sys/class/cxl/<card>/load_image_on_perst
+What: /sys/class/cxl/<card>/load_image_on_perst (not in a guest)
Date: December 2014
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
@@ -224,7 +224,7 @@ Description: write only
to reload the FPGA depending on load_image_on_perst.
Users: https://github.com/ibm-capi/libcxl
-What: /sys/class/cxl/<card>/perst_reloads_same_image
+What: /sys/class/cxl/<card>/perst_reloads_same_image (not in a guest)
Date: July 2015
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
diff --git a/Documentation/powerpc/cxl.txt b/Documentation/powerpc/cxl.txt
index 205c1b81625c..d5506ba0fef7 100644
--- a/Documentation/powerpc/cxl.txt
+++ b/Documentation/powerpc/cxl.txt
@@ -116,6 +116,8 @@ Work Element Descriptor (WED)
User API
========
+1. AFU character devices
+
For AFUs operating in AFU directed mode, two character device
files will be created. /dev/cxl/afu0.0m will correspond to a
master context and /dev/cxl/afu0.0s will correspond to a slave
@@ -362,6 +364,59 @@ read
reserved fields:
For future extensions and padding
+
+2. Card character device (powerVM guest only)
+
+ In a powerVM guest, an extra character device is created for the
+ card. The device is only used to write (flash) a new image on the
+ FPGA accelerator. Once the image is written and verified, the
+ device tree is updated and the card is reset to reload the updated
+ image.
+
+open
+----
+
+ Opens the device and allocates a file descriptor to be used with
+ the rest of the API. The device can only be opened once.
+
+ioctl
+-----
+
+CXL_IOCTL_DOWNLOAD_IMAGE:
+CXL_IOCTL_VALIDATE_IMAGE:
+ Starts and controls flashing a new FPGA image. Partial
+ reconfiguration is not supported (yet), so the image must contain
+ a copy of the PSL and AFU(s). Since an image can be quite large,
+ the caller may have to iterate, splitting the image in smaller
+ chunks.
+
+ Takes a pointer to a struct cxl_adapter_image:
+ struct cxl_adapter_image {
+ __u64 flags;
+ __u64 data;
+ __u64 len_data;
+ __u64 len_image;
+ __u64 reserved1;
+ __u64 reserved2;
+ __u64 reserved3;
+ __u64 reserved4;
+ };
+
+ flags:
+ These flags indicate which optional fields are present in
+ this struct. Currently all fields are mandatory.
+
+ data:
+ Pointer to a buffer with part of the image to write to the
+ card.
+
+ len_data:
+ Size of the buffer pointed to by data.
+
+ len_image:
+ Full size of the image.
+
+
Sysfs Class
===========
diff --git a/MAINTAINERS b/MAINTAINERS
index 7f1fa4ff300a..74bbff3dda52 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4230,13 +4230,6 @@ M: Maxim Levitsky <maximlevitsky@gmail.com>
S: Maintained
F: drivers/media/rc/ene_ir.*
-ENHANCED ERROR HANDLING (EEH)
-M: Gavin Shan <shangw@linux.vnet.ibm.com>
-L: linuxppc-dev@lists.ozlabs.org
-S: Supported
-F: Documentation/powerpc/eeh-pci-error-recovery.txt
-F: arch/powerpc/kernel/eeh*.c
-
EPSON S1D13XXX FRAMEBUFFER DRIVER
M: Kristoffer Ericson <kristoffer.ericson@gmail.com>
S: Maintained
@@ -8252,6 +8245,15 @@ L: linux-pci@vger.kernel.org
S: Supported
F: Documentation/PCI/pci-error-recovery.txt
+PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC
+M: Russell Currey <ruscur@russell.cc>
+L: linuxppc-dev@lists.ozlabs.org
+S: Supported
+F: Documentation/powerpc/eeh-pci-error-recovery.txt
+F: arch/powerpc/kernel/eeh*.c
+F: arch/powerpc/platforms/*/eeh*.c
+F: arch/powerpc/include/*/eeh*.h
+
PCI SUBSYSTEM
M: Bjorn Helgaas <bhelgaas@google.com>
L: linux-pci@vger.kernel.org
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 91da283cd658..5130fa166a93 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -304,7 +304,7 @@ config ZONE_DMA32
config PGTABLE_LEVELS
int
default 2 if !PPC64
- default 3 if PPC_64K_PAGES
+ default 3 if PPC_64K_PAGES && !PPC_BOOK3S_64
default 4
source "init/Kconfig"
@@ -576,7 +576,7 @@ choice
config PPC_4K_PAGES
bool "4k page size"
- select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
+ select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_16K_PAGES
bool "16k page size"
@@ -585,7 +585,7 @@ config PPC_16K_PAGES
config PPC_64K_PAGES
bool "64k page size"
depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
- select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
+ select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_256K_PAGES
bool "256k page size"
diff --git a/arch/powerpc/boot/rs6000.h b/arch/powerpc/boot/rs6000.h
index 433f45084e41..d70517ccc0f7 100644
--- a/arch/powerpc/boot/rs6000.h
+++ b/arch/powerpc/boot/rs6000.h
@@ -239,5 +239,5 @@ struct external_reloc {
#define DEFAULT_DATA_SECTION_ALIGNMENT 4
#define DEFAULT_BSS_SECTION_ALIGNMENT 4
#define DEFAULT_TEXT_SECTION_ALIGNMENT 4
-/* For new sections we havn't heard of before */
+/* For new sections we haven't heard of before */
#define DEFAULT_SECTION_ALIGNMENT 4
diff --git a/arch/powerpc/boot/treeboot-akebono.c b/arch/powerpc/boot/treeboot-akebono.c
index b73174c34fe4..bcc5902f8462 100644
--- a/arch/powerpc/boot/treeboot-akebono.c
+++ b/arch/powerpc/boot/treeboot-akebono.c
@@ -38,7 +38,7 @@
BSS_STACK(4096);
-#define SPRN_PIR 0x11E /* Processor Indentification Register */
+#define SPRN_PIR 0x11E /* Processor Identification Register */
#define USERDATA_LEN 256 /* Length of userdata passed in by PIBS */
#define MAX_RANKS 0x4
#define DDR3_MR0CF 0x80010011U
diff --git a/arch/powerpc/boot/treeboot-currituck.c b/arch/powerpc/boot/treeboot-currituck.c
index 925ae43b7467..303d2074ee56 100644
--- a/arch/powerpc/boot/treeboot-currituck.c
+++ b/arch/powerpc/boot/treeboot-currituck.c
@@ -80,7 +80,7 @@ static void ibm_currituck_fixups(void)
}
}
-#define SPRN_PIR 0x11E /* Processor Indentification Register */
+#define SPRN_PIR 0x11E /* Processor Identification Register */
void platform_init(void)
{
unsigned long end_of_ram, avail_ram;
diff --git a/arch/powerpc/boot/treeboot-iss4xx.c b/arch/powerpc/boot/treeboot-iss4xx.c
index 329e710feda2..733f8bf25184 100644
--- a/arch/powerpc/boot/treeboot-iss4xx.c
+++ b/arch/powerpc/boot/treeboot-iss4xx.c
@@ -59,7 +59,7 @@ static void *iss_4xx_vmlinux_alloc(unsigned long size)
return (void *)ibm4xx_memstart;
}
-#define SPRN_PIR 0x11E /* Processor Indentification Register */
+#define SPRN_PIR 0x11E /* Processor Identification Register */
void platform_init(void)
{
unsigned long end_of_ram = 0x08000000;
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
new file mode 100644
index 000000000000..045031048f8d
--- /dev/null
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -0,0 +1,313 @@
+CONFIG_PPC64=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+CONFIG_CGROUPS=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_USER_NS=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OPAL_PRD=y
+# CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_IDLE=y
+CONFIG_HZ_100=y
+CONFIG_BINFMT_MISC=m
+CONFIG_PPC_TRANSACTIONAL_MEM=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_NUMA=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PPC_64K_PAGES=y
+CONFIG_PPC_SUBPAGE_PROT=y
+CONFIG_SCHED_SMT=y
+CONFIG_PM=y
+CONFIG_PCI_MSI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_ADVANCED is not set
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_MTD=y
+CONFIG_MTD_POWERNV_FLASH=y
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_BLK_DEV_FD=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SRP_ATTRS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_IPR=y
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_UEVENT=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_NETCONSOLE=y
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_VHOST_NET=m
+CONFIG_VORTEX=y
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+CONFIG_PCNET32=y
+CONFIG_TIGON3=y
+CONFIG_BNX2X=m
+CONFIG_CHELSIO_T1=m
+CONFIG_BE2NET=m
+CONFIG_S2IO=m
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_IXGB=m
+CONFIG_IXGBE=m
+CONFIG_MLX4_EN=m
+CONFIG_MYRI10GE=m
+CONFIG_QLGE=m
+CONFIG_NETXEN_NIC=m
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_EVDEV=m
+CONFIG_INPUT_MISC=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_JSM=m
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_IPMI_HANDLER=y
+CONFIG_IPMI_DEVICE_INTERFACE=y
+CONFIG_IPMI_POWERNV=y
+CONFIG_RAW_DRIVER=y
+CONFIG_MAX_RAW_DEVS=1024
+CONFIG_DRM=y
+CONFIG_DRM_AST=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_OF=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_RADEON=y
+CONFIG_FB_IBM_GXT4500=y
+CONFIG_LCD_PLATFORM=m
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_LOGO=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_MON=m
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=m
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_POWERNV=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_CXGB3=m
+CONFIG_INFINIBAND_CXGB4=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_IPOIB_CM=y
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_ISER=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GENERIC=y
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_OVERLAY_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_PSTORE=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_CODE_PATCHING_SELFTEST=y
+CONFIG_FTR_FIXUP_SELFTEST=y
+CONFIG_MSI_BITMAP_SELFTEST=y
+CONFIG_XMON=y
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_DEV_NX=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM_BOOK3S_64=m
+CONFIG_KVM_BOOK3S_64_HV=m
diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S
index 5dc6bce90a77..bc6ff43a9889 100644
--- a/arch/powerpc/crypto/aes-spe-core.S
+++ b/arch/powerpc/crypto/aes-spe-core.S
@@ -61,7 +61,7 @@
* via bl/blr. It expects that caller has pre-xored input data with first
* 4 words of encryption key into rD0-rD3. Pointer/counter registers must
* have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
- * and rW0-rW3 and caller must execute a final xor on the ouput registers.
+ * and rW0-rW3 and caller must execute a final xor on the output registers.
* All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
*
*/
@@ -209,7 +209,7 @@ ppc_encrypt_block_loop:
* via bl/blr. It expects that caller has pre-xored input data with first
* 4 words of encryption key into rD0-rD3. Pointer/counter registers must
* have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
- * and rW0-rW3 and caller must execute a final xor on the ouput registers.
+ * and rW0-rW3 and caller must execute a final xor on the output registers.
* All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
*
*/
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 93ee046d12cd..6d99ebf2ea15 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -32,7 +32,7 @@
* 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
* will need an estimated maximum of 20,000 cycles. Headroom for cache misses
* included. Even with the low end model clocked at 667 MHz this equals to a
- * critical time window of less than 30us. The value has been choosen to
+ * critical time window of less than 30us. The value has been chosen to
* process a 512 byte disk block in one or a large 1400 bytes IPsec network
* packet in two runs.
*
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 55f106ed12bf..ae0751ef8788 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -12,6 +12,24 @@
#define ATOMIC_INIT(i) { (i) }
+/*
+ * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
+ * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
+ * on the platform without lwsync.
+ */
+#define __atomic_op_acquire(op, args...) \
+({ \
+ typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \
+ __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory"); \
+ __ret; \
+})
+
+#define __atomic_op_release(op, args...) \
+({ \
+ __asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory"); \
+ op##_relaxed(args); \
+})
+
static __inline__ int atomic_read(const atomic_t *v)
{
int t;
@@ -42,27 +60,27 @@ static __inline__ void atomic_##op(int a, atomic_t *v) \
: "cc"); \
} \
-#define ATOMIC_OP_RETURN(op, asm_op) \
-static __inline__ int atomic_##op##_return(int a, atomic_t *v) \
+#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
+static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
{ \
int t; \
\
__asm__ __volatile__( \
- PPC_ATOMIC_ENTRY_BARRIER \
-"1: lwarx %0,0,%2 # atomic_" #op "_return\n" \
- #asm_op " %0,%1,%0\n" \
- PPC405_ERR77(0,%2) \
-" stwcx. %0,0,%2 \n" \
+"1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \
+ #asm_op " %0,%2,%0\n" \
+ PPC405_ERR77(0, %3) \
+" stwcx. %0,0,%3\n" \
" bne- 1b\n" \
- PPC_ATOMIC_EXIT_BARRIER \
- : "=&r" (t) \
+ : "=&r" (t), "+m" (v->counter) \
: "r" (a), "r" (&v->counter) \
- : "cc", "memory"); \
+ : "cc"); \
\
return t; \
}
-#define ATOMIC_OPS(op, asm_op) ATOMIC_OP(op, asm_op) ATOMIC_OP_RETURN(op, asm_op)
+#define ATOMIC_OPS(op, asm_op) \
+ ATOMIC_OP(op, asm_op) \
+ ATOMIC_OP_RETURN_RELAXED(op, asm_op)
ATOMIC_OPS(add, add)
ATOMIC_OPS(sub, subf)
@@ -71,8 +89,11 @@ ATOMIC_OP(and, and)
ATOMIC_OP(or, or)
ATOMIC_OP(xor, xor)
+#define atomic_add_return_relaxed atomic_add_return_relaxed
+#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+
#undef ATOMIC_OPS
-#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP_RETURN_RELAXED
#undef ATOMIC_OP
#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
@@ -92,21 +113,19 @@ static __inline__ void atomic_inc(atomic_t *v)
: "cc", "xer");
}
-static __inline__ int atomic_inc_return(atomic_t *v)
+static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
{
int t;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%1 # atomic_inc_return\n\
- addic %0,%0,1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1 \n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
+"1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n"
+" addic %0,%0,1\n"
+ PPC405_ERR77(0, %2)
+" stwcx. %0,0,%2\n"
+" bne- 1b"
+ : "=&r" (t), "+m" (v->counter)
: "r" (&v->counter)
- : "cc", "xer", "memory");
+ : "cc", "xer");
return t;
}
@@ -136,27 +155,34 @@ static __inline__ void atomic_dec(atomic_t *v)
: "cc", "xer");
}
-static __inline__ int atomic_dec_return(atomic_t *v)
+static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
{
int t;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%1 # atomic_dec_return\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1\n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
+"1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n"
+" addic %0,%0,-1\n"
+ PPC405_ERR77(0, %2)
+" stwcx. %0,0,%2\n"
+" bne- 1b"
+ : "=&r" (t), "+m" (v->counter)
: "r" (&v->counter)
- : "cc", "xer", "memory");
+ : "cc", "xer");
return t;
}
+#define atomic_inc_return_relaxed atomic_inc_return_relaxed
+#define atomic_dec_return_relaxed atomic_dec_return_relaxed
+
#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_cmpxchg_relaxed(v, o, n) \
+ cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define atomic_cmpxchg_acquire(v, o, n) \
+ cmpxchg_acquire(&((v)->counter), (o), (n))
+
#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
/**
* __atomic_add_unless - add unless the number is a given value
@@ -285,26 +311,27 @@ static __inline__ void atomic64_##op(long a, atomic64_t *v) \
: "cc"); \
}
-#define ATOMIC64_OP_RETURN(op, asm_op) \
-static __inline__ long atomic64_##op##_return(long a, atomic64_t *v) \
+#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
+static inline long \
+atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
{ \
long t; \
\
__asm__ __volatile__( \
- PPC_ATOMIC_ENTRY_BARRIER \
-"1: ldarx %0,0,%2 # atomic64_" #op "_return\n" \
- #asm_op " %0,%1,%0\n" \
-" stdcx. %0,0,%2 \n" \
+"1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \
+ #asm_op " %0,%2,%0\n" \
+" stdcx. %0,0,%3\n" \
" bne- 1b\n" \
- PPC_ATOMIC_EXIT_BARRIER \
- : "=&r" (t) \
+ : "=&r" (t), "+m" (v->counter) \
: "r" (a), "r" (&v->counter) \
- : "cc", "memory"); \
+ : "cc"); \
\
return t; \
}
-#define ATOMIC64_OPS(op, asm_op) ATOMIC64_OP(op, asm_op) ATOMIC64_OP_RETURN(op, asm_op)
+#define ATOMIC64_OPS(op, asm_op) \
+ ATOMIC64_OP(op, asm_op) \
+ ATOMIC64_OP_RETURN_RELAXED(op, asm_op)
ATOMIC64_OPS(add, add)
ATOMIC64_OPS(sub, subf)
@@ -312,8 +339,11 @@ ATOMIC64_OP(and, and)
ATOMIC64_OP(or, or)
ATOMIC64_OP(xor, xor)
-#undef ATOMIC64_OPS
-#undef ATOMIC64_OP_RETURN
+#define atomic64_add_return_relaxed atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+
+#undef ATOPIC64_OPS
+#undef ATOMIC64_OP_RETURN_RELAXED
#undef ATOMIC64_OP
#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
@@ -332,20 +362,18 @@ static __inline__ void atomic64_inc(atomic64_t *v)
: "cc", "xer");
}
-static __inline__ long atomic64_inc_return(atomic64_t *v)
+static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
{
long t;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: ldarx %0,0,%1 # atomic64_inc_return\n\
- addic %0,%0,1\n\
- stdcx. %0,0,%1 \n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
+"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n"
+" addic %0,%0,1\n"
+" stdcx. %0,0,%2\n"
+" bne- 1b"
+ : "=&r" (t), "+m" (v->counter)
: "r" (&v->counter)
- : "cc", "xer", "memory");
+ : "cc", "xer");
return t;
}
@@ -374,24 +402,25 @@ static __inline__ void atomic64_dec(atomic64_t *v)
: "cc", "xer");
}
-static __inline__ long atomic64_dec_return(atomic64_t *v)
+static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
{
long t;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: ldarx %0,0,%1 # atomic64_dec_return\n\
- addic %0,%0,-1\n\
- stdcx. %0,0,%1\n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
+"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n"
+" addic %0,%0,-1\n"
+" stdcx. %0,0,%2\n"
+" bne- 1b"
+ : "=&r" (t), "+m" (v->counter)
: "r" (&v->counter)
- : "cc", "xer", "memory");
+ : "cc", "xer");
return t;
}
+#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
+#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
+
#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
@@ -420,7 +449,13 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
}
#define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+#define atomic64_cmpxchg_relaxed(v, o, n) \
+ cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define atomic64_cmpxchg_acquire(v, o, n) \
+ cmpxchg_acquire(&((v)->counter), (o), (n))
+
#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
/**
* atomic64_add_unless - add unless the number is a given value
diff --git a/arch/powerpc/include/asm/mmu-hash32.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 16f513e5cbd7..16f513e5cbd7 100644
--- a/arch/powerpc/include/asm/mmu-hash32.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index ea0414d6659e..5f08a0832238 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -52,44 +52,14 @@
_PAGE_F_SECOND | _PAGE_F_GIX)
/* shift to put page number into pte */
-#define PTE_RPN_SHIFT (18)
+#define PTE_RPN_SHIFT (12)
+#define PTE_RPN_SIZE (45) /* gives 57-bit real addresses */
#define _PAGE_4K_PFN 0
#ifndef __ASSEMBLY__
/*
- * 4-level page tables related bits
+ * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
*/
-
-#define pgd_none(pgd) (!pgd_val(pgd))
-#define pgd_bad(pgd) (pgd_val(pgd) == 0)
-#define pgd_present(pgd) (pgd_val(pgd) != 0)
-#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)
-
-static inline void pgd_clear(pgd_t *pgdp)
-{
- *pgdp = __pgd(0);
-}
-
-static inline pte_t pgd_pte(pgd_t pgd)
-{
- return __pte(pgd_val(pgd));
-}
-
-static inline pgd_t pte_pgd(pte_t pte)
-{
- return __pgd(pte_val(pte));
-}
-extern struct page *pgd_page(pgd_t pgd);
-
-#define pud_offset(pgdp, addr) \
- (((pud_t *) pgd_page_vaddr(*(pgdp))) + \
- (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
-
-#define pud_ERROR(e) \
- pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
-
-/*
- * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */
#define remap_4k_pfn(vma, addr, pfn, prot) \
remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 849bbec80f7b..0a7956a80a08 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -1,15 +1,14 @@
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
-#include <asm-generic/pgtable-nopud.h>
-
#define PTE_INDEX_SIZE 8
-#define PMD_INDEX_SIZE 10
-#define PUD_INDEX_SIZE 0
+#define PMD_INDEX_SIZE 5
+#define PUD_INDEX_SIZE 5
#define PGD_INDEX_SIZE 12
#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE)
#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
/* With 4k base page size, hugepage PTEs go at the PMD level */
@@ -20,13 +19,18 @@
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define _PAGE_COMBO 0x00040000 /* this is a combo 4k page */
-#define _PAGE_4K_PFN 0x00080000 /* PFN is for a single 4k page */
+#define _PAGE_COMBO 0x00001000 /* this is a combo 4k page */
+#define _PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */
/*
* Used to track subpage group valid if _PAGE_COMBO is set
* This overloads _PAGE_F_GIX and _PAGE_F_SECOND
@@ -39,10 +43,12 @@
/* Shift to put page number into pte.
*
- * That gives us a max RPN of 34 bits, which means a max of 50 bits
- * of addressable physical space, or 46 bits for the special 4k PFNs.
+ * That gives us a max RPN of 41 bits, which means a max of 57 bits
+ * of addressable physical space, or 53 bits for the special 4k PFNs.
*/
-#define PTE_RPN_SHIFT (30)
+#define PTE_RPN_SHIFT (16)
+#define PTE_RPN_SIZE (41)
+
/*
* we support 16 fragments per PTE page of 64K size.
*/
@@ -54,13 +60,12 @@
#define PTE_FRAG_SIZE_SHIFT 12
#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
-/*
- * Bits to mask out from a PMD to get to the PTE page
- * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned.
- */
-#define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1)
-/* Bits to mask out from a PGD/PUD to get to the PMD page */
-#define PUD_MASKED_BITS 0x1ff
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS 0xc0000000000000ffUL
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS 0xc0000000000000ffUL
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS 0xc0000000000000ffUL
#ifndef __ASSEMBLY__
@@ -120,7 +125,7 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
#define remap_4k_pfn(vma, addr, pfn, prot) \
- (WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL : \
+ (WARN_ON(((pfn) >= (1UL << PTE_RPN_SIZE))) ? -EINVAL : \
remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \
__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
@@ -130,11 +135,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
#else
#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
#endif
+#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
-#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd })))
-#define pte_pgd(pte) ((pgd_t)pte_pud(pte))
-
#ifdef CONFIG_HUGETLB_PAGE
/*
* We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
@@ -208,30 +211,30 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp)
/*
* The linux hugepage PMD now include the pmd entries followed by the address
* to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
- * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
+ * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per
* each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
* with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
*
- * The last three bits are intentionally left to zero. This memory location
+ * The top three bits are intentionally left as zero. This memory location
* are also used as normal page PTE pointers. So if we have any pointers
* left around while we collapse a hugepage, we need to make sure
* _PAGE_PRESENT bit of that is zero when we look at them
*/
static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
{
- return (hpte_slot_array[index] >> 3) & 0x1;
+ return hpte_slot_array[index] & 0x1;
}
static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
int index)
{
- return hpte_slot_array[index] >> 4;
+ return hpte_slot_array[index] >> 1;
}
static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
unsigned int index, unsigned int hidx)
{
- hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
+ hpte_slot_array[index] = (hidx << 1) | 0x1;
}
/*
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 8d1c8162f0c1..d0ee6fcef823 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -4,8 +4,7 @@
/*
* Common bits between 4K and 64K pages in a linux-style PTE.
- * These match the bits in the (hardware-defined) PowerPC PTE as closely
- * as possible. Additional bits may be defined in pgtable-hash64-*.h
+ * Additional bits may be defined in pgtable-hash64-*.h
*
* Note: We only support user read/write permissions. Supervisor always
* have full read/write to pages above PAGE_OFFSET (pages below that
@@ -14,32 +13,35 @@
* We could create separate kernel read-only if we used the 3 PP bits
* combinations that newer processors provide but we currently don't.
*/
-#define _PAGE_PTE 0x00001
-#define _PAGE_PRESENT 0x00002 /* software: pte contains a translation */
-#define _PAGE_BIT_SWAP_TYPE 2
-#define _PAGE_USER 0x00004 /* matches one of the PP bits */
-#define _PAGE_EXEC 0x00008 /* No execute on POWER4 and newer (we invert) */
-#define _PAGE_GUARDED 0x00010
-/* We can derive Memory coherence from _PAGE_NO_CACHE */
+#define _PAGE_BIT_SWAP_TYPE 0
+
+#define _PAGE_EXEC 0x00001 /* execute permission */
+#define _PAGE_RW 0x00002 /* read & write access allowed */
+#define _PAGE_READ 0x00004 /* read access allowed */
+#define _PAGE_USER 0x00008 /* page may be accessed by userspace */
+#define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) page */
+/* M (memory coherence) is always set in the HPTE, so we don't need it here */
#define _PAGE_COHERENT 0x0
#define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */
#define _PAGE_DIRTY 0x00080 /* C: page changed */
#define _PAGE_ACCESSED 0x00100 /* R: page referenced */
-#define _PAGE_RW 0x00200 /* software: user write access allowed */
-#define _PAGE_HASHPTE 0x00400 /* software: pte has an associated HPTE */
+#define _PAGE_SPECIAL 0x00400 /* software: special page */
#define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */
-#define _PAGE_F_GIX 0x07000 /* full page: hidx bits */
-#define _PAGE_F_GIX_SHIFT 12
-#define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not */
-#define _PAGE_SPECIAL 0x10000 /* software: special page */
#ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SOFT_DIRTY 0x20000 /* software: software dirty tracking */
+#define _PAGE_SOFT_DIRTY 0x200 /* software: software dirty tracking */
#else
-#define _PAGE_SOFT_DIRTY 0x00000
+#define _PAGE_SOFT_DIRTY 0x000
#endif
+#define _PAGE_F_GIX_SHIFT 57
+#define _PAGE_F_GIX (7ul << 57) /* HPTE index within HPTEG */
+#define _PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */
+#define _PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */
+#define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */
+#define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */
+
/*
* We need to differentiate between explicit huge page and THP huge
* page, since THP huge page also need to track real subpage details
@@ -132,7 +134,7 @@
* The mask convered by the RPN must be a ULL on 32-bit platforms with
* 64-bit PTEs
*/
-#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
+#define PTE_RPN_MASK (((1UL << PTE_RPN_SIZE) - 1) << PTE_RPN_SHIFT)
/*
* _PAGE_CHG_MASK masks of bits that are to be preserved across
* pgprot changes
@@ -223,15 +225,17 @@
#define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
#ifndef __ASSEMBLY__
-#define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \
- || (pmd_val(pmd) & PMD_BAD_BITS))
-#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
+#define pmd_bad(pmd) (pmd_val(pmd) & PMD_BAD_BITS)
+#define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS)
+
+#define pud_bad(pud) (pud_val(pud) & PUD_BAD_BITS)
+#define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS)
-#define pud_bad(pud) (!is_kernel_addr(pud_val(pud)) \
- || (pud_val(pud) & PUD_BAD_BITS))
-#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
+/* Pointers in the page table tree are physical addresses */
+#define __pgtable_ptr_val(ptr) __pa(ptr)
#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
+#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
@@ -360,8 +364,18 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
:"cc");
}
+static inline int pgd_bad(pgd_t pgd)
+{
+ return (pgd_val(pgd) == 0);
+}
+
#define __HAVE_ARCH_PTE_SAME
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+{
+ return (unsigned long)__va(pgd_val(pgd) & ~PGD_MASKED_BITS);
+}
+
/* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
@@ -402,7 +416,7 @@ static inline int pte_protnone(pte_t pte)
static inline int pte_present(pte_t pte)
{
- return pte_val(pte) & _PAGE_PRESENT;
+ return !!(pte_val(pte) & _PAGE_PRESENT);
}
/* Conversion functions: convert a page and protection to a page entry,
@@ -413,13 +427,13 @@ static inline int pte_present(pte_t pte)
*/
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
{
- return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+ return __pte((((pte_basic_t)(pfn) << PTE_RPN_SHIFT) & PTE_RPN_MASK) |
pgprot_val(pgprot));
}
static inline unsigned long pte_pfn(pte_t pte)
{
- return pte_val(pte) >> PTE_RPN_SHIFT;
+ return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT;
}
/* Generic modifiers for PTE bits */
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 7352d3f212df..0cea4807e26f 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -114,6 +114,7 @@
#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */
#define POWER8_TLB_SETS 512 /* # sets in POWER8 TLB */
+#define POWER9_TLB_SETS_HASH 256 /* # sets in POWER9 TLB Hash mode */
#ifndef __ASSEMBLY__
@@ -607,6 +608,9 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1;
return get_vsid(context, ea, ssize);
}
+
+unsigned htab_shift_for_mem_size(unsigned long mem_size);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_MMU_HASH64_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8d1c41d28318..77d3ce05798e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -43,13 +43,8 @@
*/
#ifndef __real_pte
-#ifdef CONFIG_STRICT_MM_TYPECHECKS
#define __real_pte(e,p) ((real_pte_t){(e)})
#define __rpte_to_pte(r) ((r).pte)
-#else
-#define __real_pte(e,p) (e)
-#define __rpte_to_pte(r) (__pte(r))
-#endif
#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >>_PAGE_F_GIX_SHIFT)
#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
@@ -111,6 +106,26 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
*pgdp = __pgd(val);
}
+static inline void pgd_clear(pgd_t *pgdp)
+{
+ *pgdp = __pgd(0);
+}
+
+#define pgd_none(pgd) (!pgd_val(pgd))
+#define pgd_present(pgd) (!pgd_none(pgd))
+
+static inline pte_t pgd_pte(pgd_t pgd)
+{
+ return __pte(pgd_val(pgd));
+}
+
+static inline pgd_t pte_pgd(pte_t pte)
+{
+ return __pgd(pte_val(pte));
+}
+
+extern struct page *pgd_page(pgd_t pgd);
+
/*
* Find an entry in a page-table-directory. We combine the address region
* (the high order N bits) and the pgd portion of the address.
@@ -118,9 +133,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+#define pud_offset(pgdp, addr) \
+ (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr))
#define pmd_offset(pudp,addr) \
(((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr))
-
#define pte_offset_kernel(dir,addr) \
(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
@@ -135,6 +151,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
@@ -154,10 +172,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
#define SWP_TYPE_BITS 5
#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
& ((1UL << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x) ((x).val >> PTE_RPN_SHIFT)
+#define __swp_offset(x) (((x).val & PTE_RPN_MASK) >> PTE_RPN_SHIFT)
#define __swp_entry(type, offset) ((swp_entry_t) { \
- ((type) << _PAGE_BIT_SWAP_TYPE) \
- | ((offset) << PTE_RPN_SHIFT) })
+ ((type) << _PAGE_BIT_SWAP_TYPE) \
+ | (((offset) << PTE_RPN_SHIFT) & PTE_RPN_MASK)})
/*
* swp_entry_t must be independent of pte bits. We build a swp_entry_t from
* swap type and offset we get from swap and convert that to pte to find a
@@ -281,6 +299,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp);
+#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
+extern void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
#define pmd_move_must_withdraw pmd_move_must_withdraw
struct spinlock;
static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
new file mode 100644
index 000000000000..1b753f96b374
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -0,0 +1,94 @@
+#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
+#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
+
+#define MMU_NO_CONTEXT 0
+
+/*
+ * TLB flushing for 64-bit hash-MMU CPUs
+ */
+
+#include <linux/percpu.h>
+#include <asm/page.h>
+
+#define PPC64_TLB_BATCH_NR 192
+
+struct ppc64_tlb_batch {
+ int active;
+ unsigned long index;
+ struct mm_struct *mm;
+ real_pte_t pte[PPC64_TLB_BATCH_NR];
+ unsigned long vpn[PPC64_TLB_BATCH_NR];
+ unsigned int psize;
+ int ssize;
+};
+DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
+
+extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
+
+#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+
+static inline void arch_enter_lazy_mmu_mode(void)
+{
+ struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+
+ batch->active = 1;
+}
+
+static inline void arch_leave_lazy_mmu_mode(void)
+{
+ struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+
+ if (batch->index)
+ __flush_tlb_pending(batch);
+ batch->active = 0;
+}
+
+#define arch_flush_lazy_mmu_mode() do {} while (0)
+
+
+extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
+ int ssize, unsigned long flags);
+extern void flush_hash_range(unsigned long number, int local);
+extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
+ pmd_t *pmdp, unsigned int psize, int ssize,
+ unsigned long flags);
+
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+}
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+}
+
+static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
+{
+}
+
+/* Private function for use by PCI IO mapping code */
+extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end);
+extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr);
+#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index d1a8d93cccfd..44efe739b6b9 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -5,25 +5,25 @@
#include <linux/compiler.h>
#include <asm/synch.h>
#include <asm/asm-compat.h>
+#include <linux/bug.h>
/*
* Atomic exchange
*
- * Changes the memory location '*ptr' to be val and returns
+ * Changes the memory location '*p' to be val and returns
* the previous value stored there.
*/
+
static __always_inline unsigned long
-__xchg_u32(volatile void *p, unsigned long val)
+__xchg_u32_local(volatile void *p, unsigned long val)
{
unsigned long prev;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
"1: lwarx %0,0,%2 \n"
PPC405_ERR77(0,%2)
" stwcx. %3,0,%2 \n\
bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
: "=&r" (prev), "+m" (*(volatile unsigned int *)p)
: "r" (p), "r" (val)
: "cc", "memory");
@@ -31,42 +31,34 @@ __xchg_u32(volatile void *p, unsigned long val)
return prev;
}
-/*
- * Atomic exchange
- *
- * Changes the memory location '*ptr' to be val and returns
- * the previous value stored there.
- */
static __always_inline unsigned long
-__xchg_u32_local(volatile void *p, unsigned long val)
+__xchg_u32_relaxed(u32 *p, unsigned long val)
{
unsigned long prev;
__asm__ __volatile__(
-"1: lwarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
-" stwcx. %3,0,%2 \n\
- bne- 1b"
- : "=&r" (prev), "+m" (*(volatile unsigned int *)p)
+"1: lwarx %0,0,%2\n"
+ PPC405_ERR77(0, %2)
+" stwcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
: "r" (p), "r" (val)
- : "cc", "memory");
+ : "cc");
return prev;
}
#ifdef CONFIG_PPC64
static __always_inline unsigned long
-__xchg_u64(volatile void *p, unsigned long val)
+__xchg_u64_local(volatile void *p, unsigned long val)
{
unsigned long prev;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
"1: ldarx %0,0,%2 \n"
PPC405_ERR77(0,%2)
" stdcx. %3,0,%2 \n\
bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
: "=&r" (prev), "+m" (*(volatile unsigned long *)p)
: "r" (p), "r" (val)
: "cc", "memory");
@@ -75,64 +67,52 @@ __xchg_u64(volatile void *p, unsigned long val)
}
static __always_inline unsigned long
-__xchg_u64_local(volatile void *p, unsigned long val)
+__xchg_u64_relaxed(u64 *p, unsigned long val)
{
unsigned long prev;
__asm__ __volatile__(
-"1: ldarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
-" stdcx. %3,0,%2 \n\
- bne- 1b"
- : "=&r" (prev), "+m" (*(volatile unsigned long *)p)
+"1: ldarx %0,0,%2\n"
+ PPC405_ERR77(0, %2)
+" stdcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
: "r" (p), "r" (val)
- : "cc", "memory");
+ : "cc");
return prev;
}
#endif
-/*
- * This function doesn't exist, so you'll get a linker error
- * if something tries to do an invalid xchg().
- */
-extern void __xchg_called_with_bad_pointer(void);
-
static __always_inline unsigned long
-__xchg(volatile void *ptr, unsigned long x, unsigned int size)
+__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
{
switch (size) {
case 4:
- return __xchg_u32(ptr, x);
+ return __xchg_u32_local(ptr, x);
#ifdef CONFIG_PPC64
case 8:
- return __xchg_u64(ptr, x);
+ return __xchg_u64_local(ptr, x);
#endif
}
- __xchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg");
return x;
}
static __always_inline unsigned long
-__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
+__xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
{
switch (size) {
case 4:
- return __xchg_u32_local(ptr, x);
+ return __xchg_u32_relaxed(ptr, x);
#ifdef CONFIG_PPC64
case 8:
- return __xchg_u64_local(ptr, x);
+ return __xchg_u64_relaxed(ptr, x);
#endif
}
- __xchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg_local");
return x;
}
-#define xchg(ptr,x) \
- ({ \
- __typeof__(*(ptr)) _x_ = (x); \
- (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
- })
-
#define xchg_local(ptr,x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
@@ -140,6 +120,12 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
(unsigned long)_x_, sizeof(*(ptr))); \
})
+#define xchg_relaxed(ptr, x) \
+({ \
+ __typeof__(*(ptr)) _x_ = (x); \
+ (__typeof__(*(ptr))) __xchg_relaxed((ptr), \
+ (unsigned long)_x_, sizeof(*(ptr))); \
+})
/*
* Compare and exchange - if *p == old, set it to new,
* and return the old value of *p.
@@ -190,6 +176,56 @@ __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
return prev;
}
+static __always_inline unsigned long
+__cmpxchg_u32_relaxed(u32 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lwarx %0,0,%2 # __cmpxchg_u32_relaxed\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+ PPC405_ERR77(0, %2)
+" stwcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+/*
+ * cmpxchg family don't have order guarantee if cmp part fails, therefore we
+ * can avoid superfluous barriers if we use assembly code to implement
+ * cmpxchg() and cmpxchg_acquire(), however we don't do the similar for
+ * cmpxchg_release() because that will result in putting a barrier in the
+ * middle of a ll/sc loop, which is probably a bad idea. For example, this
+ * might cause the conditional store more likely to fail.
+ */
+static __always_inline unsigned long
+__cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lwarx %0,0,%2 # __cmpxchg_u32_acquire\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+ PPC405_ERR77(0, %2)
+" stwcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+ "\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
#ifdef CONFIG_PPC64
static __always_inline unsigned long
__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
@@ -233,11 +269,47 @@ __cmpxchg_u64_local(volatile unsigned long *p, unsigned long old,
return prev;
}
-#endif
-/* This function doesn't exist, so you'll get a linker error
- if something tries to do an invalid cmpxchg(). */
-extern void __cmpxchg_called_with_bad_pointer(void);
+static __always_inline unsigned long
+__cmpxchg_u64_relaxed(u64 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: ldarx %0,0,%2 # __cmpxchg_u64_relaxed\n"
+" cmpd 0,%0,%3\n"
+" bne- 2f\n"
+" stdcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: ldarx %0,0,%2 # __cmpxchg_u64_acquire\n"
+" cmpd 0,%0,%3\n"
+" bne- 2f\n"
+" stdcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+ "\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+#endif
static __always_inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
@@ -251,7 +323,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
return __cmpxchg_u64(ptr, old, new);
#endif
}
- __cmpxchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg");
return old;
}
@@ -267,10 +339,41 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
return __cmpxchg_u64_local(ptr, old, new);
#endif
}
- __cmpxchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_local");
+ return old;
+}
+
+static __always_inline unsigned long
+__cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
+ unsigned int size)
+{
+ switch (size) {
+ case 4:
+ return __cmpxchg_u32_relaxed(ptr, old, new);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __cmpxchg_u64_relaxed(ptr, old, new);
+#endif
+ }
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_relaxed");
return old;
}
+static __always_inline unsigned long
+__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
+ unsigned int size)
+{
+ switch (size) {
+ case 4:
+ return __cmpxchg_u32_acquire(ptr, old, new);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __cmpxchg_u64_acquire(ptr, old, new);
+#endif
+ }
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
+ return old;
+}
#define cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
@@ -288,6 +391,23 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
(unsigned long)_n_, sizeof(*(ptr))); \
})
+#define cmpxchg_relaxed(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
+ (unsigned long)_o_, (unsigned long)_n_, \
+ sizeof(*(ptr))); \
+})
+
+#define cmpxchg_acquire(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \
+ (unsigned long)_o_, (unsigned long)_n_, \
+ sizeof(*(ptr))); \
+})
#ifdef CONFIG_PPC64
#define cmpxchg64(ptr, o, n) \
({ \
@@ -299,7 +419,16 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
cmpxchg_local((ptr), (o), (n)); \
})
-#define cmpxchg64_relaxed cmpxchg64_local
+#define cmpxchg64_relaxed(ptr, o, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ cmpxchg_relaxed((ptr), (o), (n)); \
+})
+#define cmpxchg64_acquire(ptr, o, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ cmpxchg_acquire((ptr), (o), (n)); \
+})
#else
#include <asm-generic/cmpxchg-local.h>
#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index b118072670fb..94ace9b4c4e1 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -171,7 +171,7 @@ enum {
#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000200000000)
#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000400000000)
#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000800000000)
-/* Free LONG_ASM_CONST(0x0000001000000000) */
+#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000001000000000)
#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000002000000000)
#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000004000000000)
#define CPU_FTR_SMT LONG_ASM_CONST(0x0000008000000000)
@@ -196,6 +196,7 @@ enum {
#define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000)
#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
+#define CPU_FTR_SUBCORE LONG_ASM_CONST(0x2000000000000000)
#ifndef __ASSEMBLY__
@@ -443,9 +444,19 @@ enum {
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE)
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
+#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
+ CPU_FTR_MMCRA | CPU_FTR_SMT | \
+ CPU_FTR_COHERENT_ICACHE | \
+ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+ CPU_FTR_DSCR | CPU_FTR_SAO | \
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+ CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -464,7 +475,7 @@ enum {
(CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
- CPU_FTRS_PA6T | CPU_FTR_VSX)
+ CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9)
#endif
#else
enum {
@@ -515,7 +526,8 @@ enum {
(CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
- CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE)
+ CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \
+ CPU_FTRS_POWER9)
#endif
#else
enum {
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index c5eb86f3d452..fb9f376ae27b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -72,6 +72,7 @@ struct pci_dn;
#define EEH_PE_PHB (1 << 1) /* PHB PE */
#define EEH_PE_DEVICE (1 << 2) /* Device PE */
#define EEH_PE_BUS (1 << 3) /* Bus PE */
+#define EEH_PE_VF (1 << 4) /* VF PE */
#define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */
#define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */
@@ -81,6 +82,7 @@ struct pci_dn;
#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
#define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */
#define EEH_PE_REMOVED (1 << 10) /* Removed permanently */
+#define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */
struct eeh_pe {
int type; /* PE type: PHB/Bus/Device */
@@ -135,11 +137,15 @@ struct eeh_dev {
int pcix_cap; /* Saved PCIx capability */
int pcie_cap; /* Saved PCIe capability */
int aer_cap; /* Saved AER capability */
+ int af_cap; /* Saved AF capability */
struct eeh_pe *pe; /* Associated PE */
struct list_head list; /* Form link list in the PE */
+ struct list_head rmv_list; /* Record the removed edevs */
struct pci_controller *phb; /* Associated PHB */
struct pci_dn *pdn; /* Associated PCI device node */
struct pci_dev *pdev; /* Associated PCI device */
+ bool in_error; /* Error flag for edev */
+ struct pci_dev *physfn; /* Associated SRIOV PF */
struct pci_bus *bus; /* PCI bus for partial hotplug */
};
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 7eac89b9f02e..42814f0567cc 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -19,7 +19,7 @@ static inline pte_t *hugepd_page(hugepd_t hpd)
* We have only four bits to encode, MMU page size
*/
BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
- return (pte_t *)(hpd.pd & ~HUGEPD_SHIFT_MASK);
+ return __va(hpd.pd & HUGEPD_ADDR_MASK);
}
static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index e3b54dd4f730..0bc9c284aa10 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -94,6 +94,7 @@
#define H_SG_LIST -72
#define H_OP_MODE -73
#define H_COP_HW -74
+#define H_STATE -75
#define H_UNSUPPORTED_FLAG_START -256
#define H_UNSUPPORTED_FLAG_END -511
#define H_MULTI_THREADS_ACTIVE -9005
diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h
index 1cb39c96d155..b3b0f2d020f0 100644
--- a/arch/powerpc/include/asm/hydra.h
+++ b/arch/powerpc/include/asm/hydra.h
@@ -89,7 +89,7 @@ extern volatile struct Hydra __iomem *Hydra;
#define HYDRA_INT_EXT2 13 /* PCI IRQX */
#define HYDRA_INT_EXT3 14 /* PCI IRQY */
#define HYDRA_INT_EXT4 15 /* PCI IRQZ */
-#define HYDRA_INT_EXT5 16 /* IDE Primay/Secondary */
+#define HYDRA_INT_EXT5 16 /* IDE Primary/Secondary */
#define HYDRA_INT_EXT6 17 /* IDE Secondary */
#define HYDRA_INT_EXT7 18 /* Power Off Request */
#define HYDRA_INT_SPARE 19
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 6c1297ec374c..2fd1690b79d2 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -300,7 +300,7 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src,
* When CONFIG_PPC_INDIRECT_MMIO is set, the platform can provide hooks
* on all MMIOs. (Note that this is all 64 bits only for now)
*
- * To help platforms who may need to differenciate MMIO addresses in
+ * To help platforms who may need to differentiate MMIO addresses in
* their hooks, a bitfield is reserved for use by the platform near the
* top of MMIO addresses (not PIO, those have to cope the hard way).
*
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 3f191f573d4f..fd22442d30a9 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -54,7 +54,7 @@ struct machdep_calls {
int psize, int apsize,
int ssize);
long (*hpte_remove)(unsigned long hpte_group);
- void (*hpte_removebolted)(unsigned long ea,
+ int (*hpte_removebolted)(unsigned long ea,
int psize, int ssize);
void (*flush_hash_range)(unsigned long number, int local);
void (*hugepage_invalidate)(unsigned long vsid,
@@ -174,11 +174,11 @@ struct machdep_calls {
platform, called once per cpu. */
void (*enable_pmcs)(void);
- /* Set DABR for this platform, leave empty for default implemenation */
+ /* Set DABR for this platform, leave empty for default implementation */
int (*set_dabr)(unsigned long dabr,
unsigned long dabrx);
- /* Set DAWR for this platform, leave empty for default implemenation */
+ /* Set DAWR for this platform, leave empty for default implementation */
int (*set_dawr)(unsigned long dawr,
unsigned long dawrx);
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 3d5abfe6ba67..8ca1c983bf6c 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -97,6 +97,7 @@
#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
MMU_FTR_CI_LARGE_PAGE
#define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
@@ -182,10 +183,10 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
#if defined(CONFIG_PPC_STD_MMU_64)
/* 64-bit classic hash table MMU */
-# include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#elif defined(CONFIG_PPC_STD_MMU_32)
/* 32-bit classic hash table MMU */
-# include <asm/mmu-hash32.h>
+#include <asm/book3s/32/mmu-hash.h>
#elif defined(CONFIG_40x)
/* 40x-style software loaded TLB */
# include <asm/mmu-40x.h>
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 5b6b5a427b54..cd4ffd86765f 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -19,7 +19,7 @@
* Thanks to Paul M for explaining this.
*
* PPC can only do rel jumps += 32MB, and often the kernel and other
- * modules are furthur away than this. So, we jump to a table of
+ * modules are further away than this. So, we jump to a table of
* trampolines attached to the module (the Procedure Linkage Table)
* whenever that happens.
*/
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index b9f734dd5b81..10debb93c4a4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -108,6 +108,9 @@
#ifndef __ASSEMBLY__
/* pte_clear moved to later in this file */
+/* Pointers in the page table tree are virtual addresses */
+#define __pgtable_ptr_val(ptr) ((unsigned long)(ptr))
+
#define PMD_BAD_BITS (PTE_TABLE_SIZE-1)
#define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 07a99e638449..9d86c6651716 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -248,6 +248,7 @@ extern int opal_elog_init(void);
extern void opal_platform_dump_init(void);
extern void opal_sys_param_init(void);
extern void opal_msglog_init(void);
+extern void opal_msglog_sysfs_init(void);
extern int opal_async_comp_init(void);
extern int opal_sensor_init(void);
extern int opal_hmi_handler_init(void);
@@ -273,6 +274,8 @@ void opal_free_sg_list(struct opal_sg_list *sg);
extern int opal_error_code(int rc);
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_OPAL_H */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index e34124f6fbf2..ab3d8977bacd 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -271,6 +271,13 @@ extern long long virt_phys_offset;
#else
#define PD_HUGE 0x80000000
#endif
+
+#else /* CONFIG_PPC_BOOK3S_64 */
+/*
+ * Book3S 64 stores real addresses in the hugepd entries to
+ * avoid overlaps with _PAGE_PRESENT and _PAGE_PTE.
+ */
+#define HUGEPD_ADDR_MASK (0x0ffffffffffffffful & ~HUGEPD_SHIFT_MASK)
#endif /* CONFIG_PPC_BOOK3S_64 */
/*
@@ -281,109 +288,7 @@ extern long long virt_phys_offset;
#ifndef __ASSEMBLY__
-#ifdef CONFIG_STRICT_MM_TYPECHECKS
-/* These are used to make use of C type-checking. */
-
-/* PTE level */
-typedef struct { pte_basic_t pte; } pte_t;
-#define __pte(x) ((pte_t) { (x) })
-static inline pte_basic_t pte_val(pte_t x)
-{
- return x.pte;
-}
-
-/* 64k pages additionally define a bigger "real PTE" type that gathers
- * the "second half" part of the PTE for pseudo 64k pages
- */
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef struct { pte_t pte; } real_pte_t;
-#endif
-
-/* PMD level */
-#ifdef CONFIG_PPC64
-typedef struct { unsigned long pmd; } pmd_t;
-#define __pmd(x) ((pmd_t) { (x) })
-static inline unsigned long pmd_val(pmd_t x)
-{
- return x.pmd;
-}
-
-/* PUD level exusts only on 4k pages */
-#ifndef CONFIG_PPC_64K_PAGES
-typedef struct { unsigned long pud; } pud_t;
-#define __pud(x) ((pud_t) { (x) })
-static inline unsigned long pud_val(pud_t x)
-{
- return x.pud;
-}
-#endif /* !CONFIG_PPC_64K_PAGES */
-#endif /* CONFIG_PPC64 */
-
-/* PGD level */
-typedef struct { unsigned long pgd; } pgd_t;
-#define __pgd(x) ((pgd_t) { (x) })
-static inline unsigned long pgd_val(pgd_t x)
-{
- return x.pgd;
-}
-
-/* Page protection bits */
-typedef struct { unsigned long pgprot; } pgprot_t;
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) })
-
-#else
-
-/*
- * .. while these make it easier on the compiler
- */
-
-typedef pte_basic_t pte_t;
-#define __pte(x) (x)
-static inline pte_basic_t pte_val(pte_t pte)
-{
- return pte;
-}
-
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef pte_t real_pte_t;
-#endif
-
-
-#ifdef CONFIG_PPC64
-typedef unsigned long pmd_t;
-#define __pmd(x) (x)
-static inline unsigned long pmd_val(pmd_t pmd)
-{
- return pmd;
-}
-
-#ifndef CONFIG_PPC_64K_PAGES
-typedef unsigned long pud_t;
-#define __pud(x) (x)
-static inline unsigned long pud_val(pud_t pud)
-{
- return pud;
-}
-#endif /* !CONFIG_PPC_64K_PAGES */
-#endif /* CONFIG_PPC64 */
-
-typedef unsigned long pgd_t;
-#define __pgd(x) (x)
-static inline unsigned long pgd_val(pgd_t pgd)
-{
- return pgd;
-}
-
-typedef unsigned long pgprot_t;
-#define pgprot_val(x) (x)
-#define __pgprot(x) (x)
-
-#endif
+#include <asm/pgtable-types.h>
typedef struct { signed long pd; } hugepd_t;
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 54843ca5fa2b..9f165e8a77bf 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -212,15 +212,16 @@ struct pci_dn {
#define IODA_INVALID_PE (-1)
#ifdef CONFIG_PPC_POWERNV
int pe_number;
+ int vf_index; /* VF index in the PF */
#ifdef CONFIG_PCI_IOV
u16 vfs_expanded; /* number of VFs IOV BAR expanded */
u16 num_vfs; /* number of VFs enabled*/
- int offset; /* PE# for the first VF PE */
-#define M64_PER_IOV 4
- int m64_per_iov;
+ int *pe_num_map; /* PE# for the first VF PE or array */
+ bool m64_single_mode; /* Use M64 BAR in Single Mode */
#define IODA_INVALID_M64 (-1)
- int m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV];
+ int (*m64_map)[PCI_SRIOV_NUM_BARS];
#endif /* CONFIG_PCI_IOV */
+ int mps; /* Maximum Payload Size */
#endif
struct list_head child_list;
struct list_head list;
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 814622146d5a..e157489ee7a1 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -136,16 +136,24 @@ extern ssize_t power_events_sysfs_show(struct device *dev,
* event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and
* 'PM_CYC' where the latter is the name by which the event is known in
* POWER CPU specification.
+ *
+ * Similarly, some hardware and cache events use the same event code. Eg.
+ * on POWER8, both "cache-references" and "L1-dcache-loads" events refer
+ * to the same event, PM_LD_REF_L1. The suffix, allows us to have two
+ * sysfs objects for the same event and thus two entries/aliases in sysfs.
*/
#define EVENT_VAR(_id, _suffix) event_attr_##_id##_suffix
#define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr
#define EVENT_ATTR(_name, _id, _suffix) \
- PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_##_id, \
+ PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), _id, \
power_events_sysfs_show)
#define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g)
#define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g)
+#define CACHE_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _c)
+#define CACHE_EVENT_PTR(_id) EVENT_PTR(_id, _c)
+
#define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _p)
#define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p)
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index 69ef28a81733..8d5fc3ac43da 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -53,7 +53,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
#ifndef CONFIG_PPC_64K_PAGES
-#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD)
+#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, __pgtable_ptr_val(PUD))
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
@@ -68,19 +68,19 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
- pud_set(pud, (unsigned long)pmd);
+ pud_set(pud, __pgtable_ptr_val(pmd));
}
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
{
- pmd_set(pmd, (unsigned long)pte);
+ pmd_set(pmd, __pgtable_ptr_val(pte));
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte_page)
{
- pmd_set(pmd, (unsigned long)page_address(pte_page));
+ pmd_set(pmd, __pgtable_ptr_val(page_address(pte_page)));
}
#define pmd_pgtable(pmd) pmd_page(pmd)
@@ -171,23 +171,45 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
extern void __tlb_remove_table(void *_table);
#endif
-#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
+#ifndef __PAGETABLE_PUD_FOLDED
+/* book3s 64 is 4 level page table */
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+ pgd_set(pgd, __pgtable_ptr_val(pud));
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+}
+#endif
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ pud_set(pud, __pgtable_ptr_val(pmd));
+}
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
{
- pmd_set(pmd, (unsigned long)pte);
+ pmd_set(pmd, __pgtable_ptr_val(pte));
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte_page)
{
- pmd_set(pmd, (unsigned long)pte_page);
+ pmd_set(pmd, __pgtable_ptr_val(pte_page));
}
static inline pgtable_t pmd_pgtable(pmd_t pmd)
{
- return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS);
+ return (pgtable_t)pmd_page_vaddr(pmd);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
@@ -233,11 +255,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
#define __pmd_free_tlb(tlb, pmd, addr) \
pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
-#ifndef CONFIG_PPC_64K_PAGES
+#ifndef __PAGETABLE_PUD_FOLDED
#define __pud_free_tlb(tlb, pud, addr) \
pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
-#endif /* CONFIG_PPC_64K_PAGES */
+#endif /* __PAGETABLE_PUD_FOLDED */
#define check_pgt_cache() do { } while (0)
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
new file mode 100644
index 000000000000..43140f8b0592
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -0,0 +1,103 @@
+#ifndef _ASM_POWERPC_PGTABLE_TYPES_H
+#define _ASM_POWERPC_PGTABLE_TYPES_H
+
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
+/* These are used to make use of C type-checking. */
+
+/* PTE level */
+typedef struct { pte_basic_t pte; } pte_t;
+#define __pte(x) ((pte_t) { (x) })
+static inline pte_basic_t pte_val(pte_t x)
+{
+ return x.pte;
+}
+
+/* PMD level */
+#ifdef CONFIG_PPC64
+typedef struct { unsigned long pmd; } pmd_t;
+#define __pmd(x) ((pmd_t) { (x) })
+static inline unsigned long pmd_val(pmd_t x)
+{
+ return x.pmd;
+}
+
+/*
+ * 64 bit hash always use 4 level table. Everybody else use 4 level
+ * only for 4K page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+typedef struct { unsigned long pud; } pud_t;
+#define __pud(x) ((pud_t) { (x) })
+static inline unsigned long pud_val(pud_t x)
+{
+ return x.pud;
+}
+#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC64 */
+
+/* PGD level */
+typedef struct { unsigned long pgd; } pgd_t;
+#define __pgd(x) ((pgd_t) { (x) })
+static inline unsigned long pgd_val(pgd_t x)
+{
+ return x.pgd;
+}
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) })
+
+#else
+
+/*
+ * .. while these make it easier on the compiler
+ */
+
+typedef pte_basic_t pte_t;
+#define __pte(x) (x)
+static inline pte_basic_t pte_val(pte_t pte)
+{
+ return pte;
+}
+
+#ifdef CONFIG_PPC64
+typedef unsigned long pmd_t;
+#define __pmd(x) (x)
+static inline unsigned long pmd_val(pmd_t pmd)
+{
+ return pmd;
+}
+
+#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+typedef unsigned long pud_t;
+#define __pud(x) (x)
+static inline unsigned long pud_val(pud_t pud)
+{
+ return pud;
+}
+#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC64 */
+
+typedef unsigned long pgd_t;
+#define __pgd(x) (x)
+static inline unsigned long pgd_val(pgd_t pgd)
+{
+ return pgd;
+}
+
+typedef unsigned long pgprot_t;
+#define pgprot_val(x) (x)
+#define __pgprot(x) (x)
+
+#endif /* CONFIG_STRICT_MM_TYPECHECKS */
+/*
+ * With hash config 64k pages additionally define a bigger "real PTE" type that
+ * gathers the "second half" part of the PTE for pseudo 64k pages
+ */
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */
diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h
index 10902c9375d0..925697968946 100644
--- a/arch/powerpc/include/asm/pmac_feature.h
+++ b/arch/powerpc/include/asm/pmac_feature.h
@@ -46,7 +46,7 @@
/* PowerSurge are the first generation of PCI Pmacs. This include
* all of the Grand-Central based machines. We currently don't
- * differenciate most of them.
+ * differentiate most of them.
*/
#define PMAC_TYPE_PSURGE 0x10 /* PowerSurge */
#define PMAC_TYPE_ANS 0x11 /* Apple Network Server */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ac2330820b9a..8ab8a1a9610a 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -236,7 +236,9 @@ struct thread_struct {
#endif
struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
unsigned long trap_nr; /* last trap # on this thread */
+ u8 load_fp;
#ifdef CONFIG_ALTIVEC
+ u8 load_vec;
struct thread_vr_state vr_state;
struct thread_vr_state *vr_save_area;
unsigned long vrsave;
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c4cb2ffc624e..52ed654d01ba 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -75,6 +75,14 @@
#define MSR_HV 0
#endif
+/*
+ * To be used in shared book E/book S, this avoids needing to worry about
+ * book S/book E in shared code
+ */
+#ifndef MSR_SPE
+#define MSR_SPE 0
+#endif
+
#define MSR_VEC __MASK(MSR_VEC_LG) /* Enable AltiVec */
#define MSR_VSX __MASK(MSR_VSX_LG) /* Enable VSX */
#define MSR_POW __MASK(MSR_POW_LG) /* Enable Power Management */
@@ -376,7 +384,7 @@
#define SPRN_TSCR 0x399 /* Thread Switch Control Register */
#define SPRN_DEC 0x016 /* Decrement Register */
-#define SPRN_DER 0x095 /* Debug Enable Regsiter */
+#define SPRN_DER 0x095 /* Debug Enable Register */
#define DER_RSTE 0x40000000 /* Reset Interrupt */
#define DER_CHSTPE 0x20000000 /* Check Stop */
#define DER_MCIE 0x10000000 /* Machine Check Interrupt */
@@ -401,7 +409,7 @@
#define SPRN_DPDES 0x0B0 /* Directed Priv. Doorbell Exc. State */
#define SPRN_EAR 0x11A /* External Address Register */
#define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */
-#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */
+#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Register */
#define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */
#define HID0_HDICE_SH (63 - 23) /* 970 HDEC interrupt enable */
#define HID0_EMCP (1<<31) /* Enable Machine Check pin */
@@ -514,7 +522,7 @@
#define ICTRL_EICP 0x00000100 /* enable icache par. check */
#define SPRN_IMISS 0x3D4 /* Instruction TLB Miss Register */
#define SPRN_IMMR 0x27E /* Internal Memory Map Register */
-#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Regsiter */
+#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Register */
#define SPRN_L2CR2 0x3f8
#define L2CR_L2E 0x80000000 /* L2 enable */
#define L2CR_L2PE 0x40000000 /* L2 parity enable */
@@ -549,7 +557,7 @@
#define L2CR_L2DO_745x 0x00010000 /* L2 data only (745x) */
#define L2CR_L2REP_745x 0x00001000 /* L2 repl. algorithm (745x) */
#define L2CR_L2HWF_745x 0x00000800 /* L2 hardware flush (745x) */
-#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Regsiter */
+#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Register */
#define L3CR_L3E 0x80000000 /* L3 enable */
#define L3CR_L3PE 0x40000000 /* L3 data parity enable */
#define L3CR_L3APE 0x20000000 /* L3 addr parity enable */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 2fef74b474f0..737e012ef56e 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -681,7 +681,7 @@
#define SPRN_CDBCR 0x3D7 /* Cache Debug Control Register */
#define SPRN_TBHI 0x3DC /* Time Base High */
#define SPRN_TBLO 0x3DD /* Time Base Low */
-#define SPRN_DBCR 0x3F2 /* Debug Control Regsiter */
+#define SPRN_DBCR 0x3F2 /* Debug Control Register */
#define SPRN_PBL1 0x3FC /* Protection Bound Lower 1 */
#define SPRN_PBL2 0x3FE /* Protection Bound Lower 2 */
#define SPRN_PBU1 0x3FD /* Protection Bound Upper 1 */
diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h
index 37d2da6feabf..f280dd11243f 100644
--- a/arch/powerpc/include/asm/smu.h
+++ b/arch/powerpc/include/asm/smu.h
@@ -154,7 +154,7 @@
*
* The Darwin I2C driver is less subtle though. On any non-success status
* from the response command, it waits 5ms and tries again up to 20 times,
- * it doesn't differenciate between fatal errors or "busy" status.
+ * it doesn't differentiate between fatal errors or "busy" status.
*
* This driver provides an asynchronous paramblock based i2c command
* interface to be used either directly by low level code or by a higher
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 5b268b6be74c..17c8380673a6 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -28,12 +28,14 @@ extern void giveup_all(struct task_struct *);
extern void enable_kernel_fp(void);
extern void flush_fp_to_thread(struct task_struct *);
extern void giveup_fpu(struct task_struct *);
-extern void __giveup_fpu(struct task_struct *);
+extern void save_fpu(struct task_struct *);
static inline void disable_kernel_fp(void)
{
msr_check_and_clear(MSR_FP);
}
#else
+static inline void __giveup_fpu(struct task_struct *t) { }
+static inline void save_fpu(struct task_struct *t) { }
static inline void flush_fp_to_thread(struct task_struct *t) { }
#endif
@@ -41,18 +43,19 @@ static inline void flush_fp_to_thread(struct task_struct *t) { }
extern void enable_kernel_altivec(void);
extern void flush_altivec_to_thread(struct task_struct *);
extern void giveup_altivec(struct task_struct *);
-extern void __giveup_altivec(struct task_struct *);
+extern void save_altivec(struct task_struct *);
static inline void disable_kernel_altivec(void)
{
msr_check_and_clear(MSR_VEC);
}
+#else
+static inline void save_altivec(struct task_struct *t) { }
+static inline void __giveup_altivec(struct task_struct *t) { }
#endif
#ifdef CONFIG_VSX
extern void enable_kernel_vsx(void);
extern void flush_vsx_to_thread(struct task_struct *);
-extern void giveup_vsx(struct task_struct *);
-extern void __giveup_vsx(struct task_struct *);
static inline void disable_kernel_vsx(void)
{
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
@@ -68,6 +71,8 @@ static inline void disable_kernel_spe(void)
{
msr_check_and_clear(MSR_SPE);
}
+#else
+static inline void __giveup_spe(struct task_struct *t) { }
#endif
static inline void clear_task_ebb(struct task_struct *t)
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 23d351ca0303..9f77f85e3e99 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -78,97 +78,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
}
#elif defined(CONFIG_PPC_STD_MMU_64)
-
-#define MMU_NO_CONTEXT 0
-
-/*
- * TLB flushing for 64-bit hash-MMU CPUs
- */
-
-#include <linux/percpu.h>
-#include <asm/page.h>
-
-#define PPC64_TLB_BATCH_NR 192
-
-struct ppc64_tlb_batch {
- int active;
- unsigned long index;
- struct mm_struct *mm;
- real_pte_t pte[PPC64_TLB_BATCH_NR];
- unsigned long vpn[PPC64_TLB_BATCH_NR];
- unsigned int psize;
- int ssize;
-};
-DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
-
-extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
-
-#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-
-static inline void arch_enter_lazy_mmu_mode(void)
-{
- struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
-
- batch->active = 1;
-}
-
-static inline void arch_leave_lazy_mmu_mode(void)
-{
- struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
-
- if (batch->index)
- __flush_tlb_pending(batch);
- batch->active = 0;
-}
-
-#define arch_flush_lazy_mmu_mode() do {} while (0)
-
-
-extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
- int ssize, unsigned long flags);
-extern void flush_hash_range(unsigned long number, int local);
-extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
- pmd_t *pmdp, unsigned int psize, int ssize,
- unsigned long flags);
-
-static inline void local_flush_tlb_mm(struct mm_struct *mm)
-{
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-}
-
-static inline void local_flush_tlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
-static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
-}
-
-/* Private function for use by PCI IO mapping code */
-extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
- unsigned long end);
-extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
- unsigned long addr);
+#include <asm/book3s/64/tlbflush-hash.h>
#else
#error Unsupported MMU type
#endif
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 8e86b48d0369..32e36b16773f 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -57,12 +57,14 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
extern void hcall_tracepoint_regfunc(void);
extern void hcall_tracepoint_unregfunc(void);
-TRACE_EVENT_FN(hcall_entry,
+TRACE_EVENT_FN_COND(hcall_entry,
TP_PROTO(unsigned long opcode, unsigned long *args),
TP_ARGS(opcode, args),
+ TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
TP_STRUCT__entry(
__field(unsigned long, opcode)
),
@@ -76,13 +78,15 @@ TRACE_EVENT_FN(hcall_entry,
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
);
-TRACE_EVENT_FN(hcall_exit,
+TRACE_EVENT_FN_COND(hcall_exit,
TP_PROTO(unsigned long opcode, unsigned long retval,
unsigned long *retbuf),
TP_ARGS(opcode, retval, retbuf),
+ TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
TP_STRUCT__entry(
__field(unsigned long, opcode)
__field(unsigned long, retval)
diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h
index d12b11d7641e..a1d112979fd2 100644
--- a/arch/powerpc/include/asm/uninorth.h
+++ b/arch/powerpc/include/asm/uninorth.h
@@ -132,7 +132,7 @@
/* This one _might_ return the CPU number of the CPU reading it;
* the bootROM decides whether to boot or to sleep/spinloop depending
- * on this register beeing 0 or not
+ * on this register being 0 or not
*/
#define UNI_N_CPU_NUMBER 0x0050
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 0e25bdb190bb..5d61bbced6a1 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -1,5 +1,5 @@
/*
- * Common definitions accross all variants of ICP and ICS interrupt
+ * Common definitions across all variants of ICP and ICS interrupt
* controllers.
*/
diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
index 7f9c74b46704..b4504f394427 100644
--- a/arch/powerpc/include/uapi/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
@@ -78,7 +78,7 @@
#define EV_SUCCESS 0
#define EV_EPERM 1 /* Operation not permitted */
#define EV_ENOENT 2 /* Entry Not Found */
-#define EV_EIO 3 /* I/O error occured */
+#define EV_EIO 3 /* I/O error occurred */
#define EV_EAGAIN 4 /* The operation had insufficient
* resources to complete and should be
* retried
@@ -89,7 +89,7 @@
#define EV_ENODEV 7 /* No such device */
#define EV_EINVAL 8 /* An argument supplied to the hcall
was out of range or invalid */
-#define EV_INTERNAL 9 /* An internal error occured */
+#define EV_INTERNAL 9 /* An internal error occurred */
#define EV_CONFIG 10 /* A configuration error was detected */
#define EV_INVALID_STATE 11 /* The object is in an invalid state */
#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 07cebc3514f3..10d5eab19458 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -95,12 +95,14 @@ int main(void)
DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area));
DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
+ DEFINE(THREAD_LOAD_FP, offsetof(struct thread_struct, load_fp));
#ifdef CONFIG_ALTIVEC
DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area));
DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
+ DEFINE(THREAD_LOAD_VEC, offsetof(struct thread_struct, load_vec));
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 9c9b7411b28b..584e119fa8b0 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -15,6 +15,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
+#include <asm/book3s/64/mmu-hash.h>
/* Entry: r3 = crap, r4 = ptr to cputable entry
*
@@ -83,6 +84,39 @@ _GLOBAL(__restore_cpu_power8)
mtlr r11
blr
+_GLOBAL(__setup_cpu_power9)
+ mflr r11
+ bl __init_FSCR
+ bl __init_hvmode_206
+ mtlr r11
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
+ ori r3, r3, LPCR_PECEDH
+ bl __init_LPCR
+ bl __init_HFSCR
+ bl __init_tlb_power9
+ mtlr r11
+ blr
+
+_GLOBAL(__restore_cpu_power9)
+ mflr r11
+ bl __init_FSCR
+ mfmsr r3
+ rldicl. r0,r3,4,63
+ mtlr r11
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
+ ori r3, r3, LPCR_PECEDH
+ bl __init_LPCR
+ bl __init_HFSCR
+ bl __init_tlb_power9
+ mtlr r11
+ blr
+
__init_hvmode_206:
/* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
mfmsr r3
@@ -139,7 +173,7 @@ __init_HFSCR:
* (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
*/
__init_tlb_power7:
- li r6,128
+ li r6,POWER7_TLB_SETS
mtctr r6
li r7,0xc00 /* IS field = 0b11 */
ptesync
@@ -150,7 +184,18 @@ __init_tlb_power7:
1: blr
__init_tlb_power8:
- li r6,512
+ li r6,POWER8_TLB_SETS
+ mtctr r6
+ li r7,0xc00 /* IS field = 0b11 */
+ ptesync
+2: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 2b
+ ptesync
+1: blr
+
+__init_tlb_power9:
+ li r6,POWER9_TLB_SETS_HASH
mtctr r6
li r7,0xc00 /* IS field = 0b11 */
ptesync
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 7d80bfdfb15e..be4d73053bed 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -70,9 +70,12 @@ extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power7(void);
extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power8(void);
+extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_power9(void);
extern void __restore_cpu_a2(void);
extern void __flush_tlb_power7(unsigned int action);
extern void __flush_tlb_power8(unsigned int action);
+extern void __flush_tlb_power9(unsigned int action);
extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
@@ -116,6 +119,11 @@ extern void __restore_cpu_e6500(void);
#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_HAS_ALTIVEC_COMP)
+#define COMMON_USER_POWER9 COMMON_USER_POWER8
+#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \
+ PPC_FEATURE2_ARCH_3_00 | \
+ PPC_FEATURE2_HAS_IEEE128)
+
#ifdef CONFIG_PPC_BOOK3E_64
#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)
#else
@@ -499,6 +507,25 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
+ { /* Power9 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004e0000,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power9",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .flush_tlb = __flush_tlb_power9,
+ .platform = "power9",
+ },
{ /* Cell Broadband Engine */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00700000,
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 40e4d4a27663..6544017eb90b 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -268,13 +268,6 @@ static void *eeh_dump_pe_log(void *data, void *flag)
struct eeh_dev *edev, *tmp;
size_t *plen = flag;
- /* If the PE's config space is blocked, 0xFF's will be
- * returned. It's pointless to collect the log in this
- * case.
- */
- if (pe->state & EEH_PE_CFG_BLOCKED)
- return NULL;
-
eeh_pe_for_each_dev(pe, edev, tmp)
*plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
EEH_PCI_REGS_LOG_LEN - *plen);
@@ -677,7 +670,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
/* Check if the request is finished successfully */
if (active_flag) {
rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if (rc <= 0)
+ if (rc < 0)
return rc;
if (rc & active_flag)
@@ -739,7 +732,7 @@ static void *eeh_restore_dev_state(void *data, void *userdata)
}
/**
- * pcibios_set_pcie_slot_reset - Set PCI-E reset state
+ * pcibios_set_pcie_reset_state - Set PCI-E reset state
* @dev: pci device struct
* @state: reset state to enter
*
@@ -761,7 +754,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
case pcie_deassert_reset:
eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
eeh_unfreeze_pe(pe, false);
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ if (!(pe->type & EEH_PE_VF))
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
break;
@@ -769,14 +763,16 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
- eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
+ if (!(pe->type & EEH_PE_VF))
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->reset(pe, EEH_RESET_HOT);
break;
case pcie_warm_reset:
eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
- eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
+ if (!(pe->type & EEH_PE_VF))
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
break;
default:
@@ -1243,6 +1239,14 @@ void eeh_remove_device(struct pci_dev *dev)
* from the parent PE during the BAR resotre.
*/
edev->pdev = NULL;
+
+ /*
+ * The flag "in_error" is used to trace EEH devices for VFs
+ * in error state or not. It's set in eeh_report_error(). If
+ * it's not set, eeh_report_{reset,resume}() won't be called
+ * for the VF EEH device.
+ */
+ edev->in_error = false;
dev->dev.archdata.edev = NULL;
if (!(edev->pe->state & EEH_PE_KEEP))
eeh_rmv_from_parent_pe(edev);
@@ -1537,6 +1541,17 @@ int eeh_pe_get_state(struct eeh_pe *pe)
if (!eeh_ops || !eeh_ops->get_state)
return -ENOENT;
+ /*
+ * If the parent PE is owned by the host kernel and is undergoing
+ * error recovery, we should return the PE state as temporarily
+ * unavailable so that the error recovery on the guest is suspended
+ * until the recovery completes on the host.
+ */
+ if (pe->parent &&
+ !(pe->state & EEH_PE_REMOVED) &&
+ (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
+ return EEH_PE_STATE_UNAVAIL;
+
result = eeh_ops->get_state(pe, NULL);
rst_active = !!(result & EEH_STATE_RESET_ACTIVE);
dma_en = !!(result & EEH_STATE_DMA_ENABLED);
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index a1e86e172e3c..ddbcfab7efdf 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -195,8 +195,11 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
return;
}
- /* Walk resources on this device, poke them into the tree */
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ /*
+ * Walk resources on this device, poke the first 7 (6 normal BAR and 1
+ * ROM BAR) into the tree.
+ */
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
resource_size_t start = pci_resource_start(dev,i);
resource_size_t end = pci_resource_end(dev,i);
unsigned long flags = pci_resource_flags(dev,i);
@@ -222,10 +225,6 @@ void eeh_addr_cache_insert_dev(struct pci_dev *dev)
{
unsigned long flags;
- /* Ignore PCI bridges */
- if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
- return;
-
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
__eeh_addr_cache_insert_dev(dev);
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index aabba94ff9cb..7815095fe3d8 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -67,6 +67,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data)
edev->pdn = pdn;
edev->phb = phb;
INIT_LIST_HEAD(&edev->list);
+ INIT_LIST_HEAD(&edev->rmv_list);
return NULL;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 938742135ee0..fb6207d2c604 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -34,6 +34,11 @@
#include <asm/prom.h>
#include <asm/rtas.h>
+struct eeh_rmv_data {
+ struct list_head edev_list;
+ int removed;
+};
+
/**
* eeh_pcid_name - Retrieve name of PCI device driver
* @pdev: PCI device
@@ -190,7 +195,7 @@ static void *eeh_report_error(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev))
+ if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_frozen;
@@ -211,6 +216,7 @@ static void *eeh_report_error(void *data, void *userdata)
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+ edev->in_error = true;
eeh_pcid_put(dev);
return NULL;
}
@@ -231,7 +237,7 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev))
+ if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
driver = eeh_pcid_get(dev);
@@ -271,7 +277,7 @@ static void *eeh_report_reset(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev))
+ if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_normal;
@@ -282,7 +288,8 @@ static void *eeh_report_reset(void *data, void *userdata)
if (!driver->err_handler ||
!driver->err_handler->slot_reset ||
- (edev->mode & EEH_DEV_NO_HANDLER)) {
+ (edev->mode & EEH_DEV_NO_HANDLER) ||
+ (!edev->in_error)) {
eeh_pcid_put(dev);
return NULL;
}
@@ -326,20 +333,23 @@ static void *eeh_report_resume(void *data, void *userdata)
{
struct eeh_dev *edev = (struct eeh_dev *)data;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ bool was_in_error;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev))
+ if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
if (!driver) return NULL;
+ was_in_error = edev->in_error;
+ edev->in_error = false;
eeh_enable_irq(dev);
if (!driver->err_handler ||
!driver->err_handler->resume ||
- (edev->mode & EEH_DEV_NO_HANDLER)) {
+ (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
edev->mode &= ~EEH_DEV_NO_HANDLER;
eeh_pcid_put(dev);
return NULL;
@@ -365,7 +375,7 @@ static void *eeh_report_failure(void *data, void *userdata)
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev))
+ if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_perm_failure;
@@ -386,12 +396,40 @@ static void *eeh_report_failure(void *data, void *userdata)
return NULL;
}
+static void *eeh_add_virt_device(void *data, void *userdata)
+{
+ struct pci_driver *driver;
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+ if (!(edev->physfn)) {
+ pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
+ __func__, edev->phb->global_number, pdn->busno,
+ PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+ return NULL;
+ }
+
+ driver = eeh_pcid_get(dev);
+ if (driver) {
+ eeh_pcid_put(dev);
+ if (driver->err_handler)
+ return NULL;
+ }
+
+#ifdef CONFIG_PPC_POWERNV
+ pci_iov_add_virtfn(edev->physfn, pdn->vf_index, 0);
+#endif
+ return NULL;
+}
+
static void *eeh_rmv_device(void *data, void *userdata)
{
struct pci_driver *driver;
struct eeh_dev *edev = (struct eeh_dev *)data;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- int *removed = (int *)userdata;
+ struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
+ int *removed = rmv_data ? &rmv_data->removed : NULL;
/*
* Actually, we should remove the PCI bridges as well.
@@ -416,10 +454,13 @@ static void *eeh_rmv_device(void *data, void *userdata)
driver = eeh_pcid_get(dev);
if (driver) {
eeh_pcid_put(dev);
- if (driver->err_handler &&
+ if (removed &&
+ eeh_pe_passed(edev->pe))
+ return NULL;
+ if (removed &&
+ driver->err_handler &&
driver->err_handler->error_detected &&
- driver->err_handler->slot_reset &&
- driver->err_handler->resume)
+ driver->err_handler->slot_reset)
return NULL;
}
@@ -428,11 +469,29 @@ static void *eeh_rmv_device(void *data, void *userdata)
pci_name(dev));
edev->bus = dev->bus;
edev->mode |= EEH_DEV_DISCONNECTED;
- (*removed)++;
+ if (removed)
+ (*removed)++;
- pci_lock_rescan_remove();
- pci_stop_and_remove_bus_device(dev);
- pci_unlock_rescan_remove();
+ if (edev->physfn) {
+#ifdef CONFIG_PPC_POWERNV
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+ pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0);
+ edev->pdev = NULL;
+
+ /*
+ * We have to set the VF PE number to invalid one, which is
+ * required to plug the VF successfully.
+ */
+ pdn->pe_number = IODA_INVALID_PE;
+#endif
+ if (rmv_data)
+ list_add(&edev->rmv_list, &rmv_data->edev_list);
+ } else {
+ pci_lock_rescan_remove();
+ pci_stop_and_remove_bus_device(dev);
+ pci_unlock_rescan_remove();
+ }
return NULL;
}
@@ -546,11 +605,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
* During the reset, udev might be invoked because those affected
* PCI devices will be removed and then added.
*/
-static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
+static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
+ struct eeh_rmv_data *rmv_data)
{
struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
struct timeval tstamp;
- int cnt, rc, removed = 0;
+ int cnt, rc;
+ struct eeh_dev *edev;
/* pcibios will clear the counter; save the value */
cnt = pe->freeze_count;
@@ -564,11 +625,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
if (bus) {
- pci_lock_rescan_remove();
- pcibios_remove_pci_devices(bus);
- pci_unlock_rescan_remove();
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ pci_lock_rescan_remove();
+ pcibios_remove_pci_devices(bus);
+ pci_unlock_rescan_remove();
+ }
} else if (frozen_bus) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data);
}
/*
@@ -610,14 +676,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
* PE. We should disconnect it so the binding can be
* rebuilt when adding PCI devices.
*/
+ edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
- pcibios_add_pci_devices(bus);
- } else if (frozen_bus && removed) {
+ if (pe->type & EEH_PE_VF)
+ eeh_add_virt_device(edev, NULL);
+ else
+ pcibios_add_pci_devices(bus);
+ } else if (frozen_bus && rmv_data->removed) {
pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
ssleep(5);
+ edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
- pcibios_add_pci_devices(frozen_bus);
+ if (pe->type & EEH_PE_VF)
+ eeh_add_virt_device(edev, NULL);
+ else
+ pcibios_add_pci_devices(frozen_bus);
}
eeh_pe_state_clear(pe, EEH_PE_KEEP);
@@ -636,8 +710,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
static void eeh_handle_normal_event(struct eeh_pe *pe)
{
struct pci_bus *frozen_bus;
+ struct eeh_dev *edev, *tmp;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
+ struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
frozen_bus = eeh_pe_bus_get(pe);
if (!frozen_bus) {
@@ -692,7 +768,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
*/
if (result == PCI_ERS_RESULT_NONE) {
pr_info("EEH: Reset with hotplug activity\n");
- rc = eeh_reset_device(pe, frozen_bus);
+ rc = eeh_reset_device(pe, frozen_bus, NULL);
if (rc) {
pr_warn("%s: Unable to reset, err=%d\n",
__func__, rc);
@@ -744,7 +820,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
pr_info("EEH: Reset without hotplug activity\n");
- rc = eeh_reset_device(pe, NULL);
+ rc = eeh_reset_device(pe, NULL, &rmv_data);
if (rc) {
pr_warn("%s: Cannot reset, err=%d\n",
__func__, rc);
@@ -764,6 +840,15 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
goto hard_fail;
}
+ /*
+ * For those hot removed VFs, we should add back them after PF get
+ * recovered properly.
+ */
+ list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
+ eeh_add_virt_device(edev, NULL);
+ list_del(&edev->rmv_list);
+ }
+
/* Tell all device drivers that they can resume operations */
pr_info("EEH: Notify device driver to resume\n");
eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
@@ -803,11 +888,17 @@ perm_error:
* the their PCI config any more.
*/
if (frozen_bus) {
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- pci_lock_rescan_remove();
- pcibios_remove_pci_devices(frozen_bus);
- pci_unlock_rescan_remove();
+ pci_lock_rescan_remove();
+ pcibios_remove_pci_devices(frozen_bus);
+ pci_unlock_rescan_remove();
+ }
}
}
@@ -886,6 +977,7 @@ static void eeh_handle_special_event(void)
continue;
/* Notify all devices to be down */
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
bus = eeh_pe_bus_get(phb_pe);
eeh_pe_dev_traverse(pe,
eeh_report_failure, NULL);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index ca9e5371930e..eea48d8baf49 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -299,7 +299,10 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
* EEH device already having associated PE, but
* the direct parent EEH device doesn't have yet.
*/
- pdn = pdn ? pdn->parent : NULL;
+ if (edev->physfn)
+ pdn = pci_get_pdn(edev->physfn);
+ else
+ pdn = pdn ? pdn->parent : NULL;
while (pdn) {
/* We're poking out of PCI territory */
parent = pdn_to_eeh_dev(pdn);
@@ -382,7 +385,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
}
/* Create a new EEH PE */
- pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
+ if (edev->physfn)
+ pe = eeh_pe_alloc(edev->phb, EEH_PE_VF);
+ else
+ pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
if (!pe) {
pr_err("%s: out of memory!\n", __func__);
return -ENOMEM;
@@ -920,25 +926,21 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe)
*/
struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
{
- struct pci_bus *bus = NULL;
struct eeh_dev *edev;
struct pci_dev *pdev;
- if (pe->type & EEH_PE_PHB) {
- bus = pe->phb->bus;
- } else if (pe->type & EEH_PE_BUS ||
- pe->type & EEH_PE_DEVICE) {
- if (pe->bus) {
- bus = pe->bus;
- goto out;
- }
+ if (pe->type & EEH_PE_PHB)
+ return pe->phb->bus;
- edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
- pdev = eeh_dev_to_pci_dev(edev);
- if (pdev)
- bus = pdev->bus;
- }
+ /* The primary bus might be cached during probe time */
+ if (pe->state & EEH_PE_PRI_BUS)
+ return pe->bus;
-out:
- return bus;
+ /* Retrieve the parent PCI bus of first (top) PCI device */
+ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list);
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ return pdev->bus;
+
+ return NULL;
}
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index ec7f8aada697..b9b125327f27 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -210,7 +210,20 @@ system_call: /* label this so stack traces look sane */
li r11,-MAX_ERRNO
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- syscall_exit_work
- cmpld r3,r11
+
+ andi. r0,r8,MSR_FP
+ beq 2f
+#ifdef CONFIG_ALTIVEC
+ andis. r0,r8,MSR_VEC@h
+ bne 3f
+#endif
+2: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl restore_math
+ ld r8,_MSR(r1)
+ ld r3,RESULT(r1)
+ li r11,-MAX_ERRNO
+
+3: cmpld r3,r11
ld r5,_CCR(r1)
bge- syscall_error
.Lsyscall_error_cont:
@@ -602,8 +615,8 @@ _GLOBAL(ret_from_except_lite)
/* Check current_thread_info()->flags */
andi. r0,r4,_TIF_USER_WORK_MASK
-#ifdef CONFIG_PPC_BOOK3E
bne 1f
+#ifdef CONFIG_PPC_BOOK3E
/*
* Check to see if the dbcr0 register is set up to debug.
* Use the internal debug mode bit to do this.
@@ -618,7 +631,9 @@ _GLOBAL(ret_from_except_lite)
mtspr SPRN_DBSR,r10
b restore
#else
- beq restore
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl restore_math
+ b restore
#endif
1: andi. r0,r4,_TIF_NEED_RESCHED
beq 2f
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 2117eaca3d28..15da2b5df85e 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -130,6 +130,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
or r12,r12,r4
std r12,_MSR(r1)
#endif
+ /* Don't care if r4 overflows, this is desired behaviour */
+ lbz r4,THREAD_LOAD_FP(r5)
+ addi r4,r4,1
+ stb r4,THREAD_LOAD_FP(r5)
addi r10,r5,THREAD_FPSTATE
lfd fr0,FPSTATE_FPSCR(r10)
MTFSF_L(fr0)
@@ -139,33 +143,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
blr
/*
- * __giveup_fpu(tsk)
- * Disable FP for the task given as the argument,
- * and save the floating-point registers in its thread_struct.
+ * save_fpu(tsk)
+ * Save the floating-point registers in its thread_struct.
* Enables the FPU for use in the kernel on return.
*/
-_GLOBAL(__giveup_fpu)
+_GLOBAL(save_fpu)
addi r3,r3,THREAD /* want THREAD of task */
PPC_LL r6,THREAD_FPSAVEAREA(r3)
PPC_LL r5,PT_REGS(r3)
PPC_LCMPI 0,r6,0
bne 2f
addi r6,r3,THREAD_FPSTATE
-2: PPC_LCMPI 0,r5,0
- SAVE_32FPVSRS(0, R4, R6)
+2: SAVE_32FPVSRS(0, R4, R6)
mffs fr0
stfd fr0,FPSTATE_FPSCR(r6)
- beq 1f
- PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- li r3,MSR_FP|MSR_FE0|MSR_FE1
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- oris r3,r3,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
- andc r4,r4,r3 /* disable FP for previous task */
- PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
blr
/*
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index b5061abbd2e0..9cdf5c71e426 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -806,7 +806,7 @@ _GLOBAL(set_context)
_GLOBAL(init_cpu_state)
mflr r22
#ifdef CONFIG_PPC_47x
- /* We use the PVR to differenciate 44x cores from 476 */
+ /* We use the PVR to differentiate 44x cores from 476 */
mfspr r3,SPRN_PVR
srwi r3,r3,16
cmplwi cr0,r3,PVR_476FPE@h
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index cf4fb5429cf1..470ceebd2d23 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -19,7 +19,7 @@
#include <asm/kvm_book3s_asm.h>
#include <asm/opal.h>
#include <asm/cpuidle.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#undef DEBUG
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index e77c3ccf8dcf..dbf098121ce6 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -445,7 +445,11 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
* Global data
*/
struct kgdb_arch arch_kgdb_ops = {
+#ifdef __LITTLE_ENDIAN__
+ .gdb_bpt_instr = {0x08, 0x10, 0x82, 0x7d},
+#else
.gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
+#endif
};
static int kgdb_not_implemented(struct pt_regs *regs)
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 2c647b1e62e4..ee62b197502d 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -54,8 +54,8 @@ static void flush_tlb_206(unsigned int num_sets, unsigned int action)
}
/*
- * Generic routine to flush TLB on power7. This routine is used as
- * flush_tlb hook in cpu_spec for Power7 processor.
+ * Generic routines to flush TLB on POWER processors. These routines
+ * are used as flush_tlb hook in the cpu_spec.
*
* action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs.
* TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
@@ -65,18 +65,17 @@ void __flush_tlb_power7(unsigned int action)
flush_tlb_206(POWER7_TLB_SETS, action);
}
-/*
- * Generic routine to flush TLB on power8. This routine is used as
- * flush_tlb hook in cpu_spec for power8 processor.
- *
- * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs.
- * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
- */
void __flush_tlb_power8(unsigned int action)
{
flush_tlb_206(POWER8_TLB_SETS, action);
}
+void __flush_tlb_power9(unsigned int action)
+{
+ flush_tlb_206(POWER9_TLB_SETS_HASH, action);
+}
+
+
/* flush SLBs and reload */
static void flush_and_reload_slb(void)
{
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 848b47499a27..9ce9a25f58b5 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -311,7 +311,7 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
if (name[0] == '.') {
if (strcmp(name+1, "TOC.") == 0)
syms[i].st_shndx = SHN_ABS;
- memmove(name, name+1, strlen(name));
+ syms[i].st_name++;
}
}
}
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 7f9ed0c1f6b9..59c436189f46 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -55,7 +55,7 @@ void pcibios_remove_pci_devices(struct pci_bus *bus)
pr_debug("PCI: Removing devices on bus %04x:%02x\n",
pci_domain_nr(bus), bus->number);
- list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
+ list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) {
pr_debug(" Removing %s...\n", pci_name(dev));
pci_stop_and_remove_bus_device(dev);
}
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index b3b4df91b792..38102cb9baa9 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -139,6 +139,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
#ifdef CONFIG_PCI_IOV
static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
struct pci_dev *pdev,
+ int vf_index,
int busno, int devfn)
{
struct pci_dn *pdn;
@@ -158,6 +159,7 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
pdn->busno = busno;
pdn->devfn = devfn;
#ifdef CONFIG_PPC_POWERNV
+ pdn->vf_index = vf_index;
pdn->pe_number = IODA_INVALID_PE;
#endif
INIT_LIST_HEAD(&pdn->child_list);
@@ -179,6 +181,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
{
#ifdef CONFIG_PCI_IOV
struct pci_dn *parent, *pdn;
+ struct eeh_dev *edev;
int i;
/* Only support IOV for now */
@@ -196,7 +199,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
return NULL;
for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
- pdn = add_one_dev_pci_data(parent, NULL,
+ pdn = add_one_dev_pci_data(parent, NULL, i,
pci_iov_virtfn_bus(pdev, i),
pci_iov_virtfn_devfn(pdev, i));
if (!pdn) {
@@ -204,6 +207,12 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
__func__, i);
return NULL;
}
+
+ /* Create the EEH device for the VF */
+ eeh_dev_init(pdn, pci_bus_to_host(pdev->bus));
+ edev = pdn_to_eeh_dev(pdn);
+ BUG_ON(!edev);
+ edev->physfn = pdev;
}
#endif /* CONFIG_PCI_IOV */
@@ -215,6 +224,7 @@ void remove_dev_pci_data(struct pci_dev *pdev)
#ifdef CONFIG_PCI_IOV
struct pci_dn *parent;
struct pci_dn *pdn, *tmp;
+ struct eeh_dev *edev;
int i;
/*
@@ -256,6 +266,13 @@ void remove_dev_pci_data(struct pci_dev *pdev)
pdn->devfn != pci_iov_virtfn_devfn(pdev, i))
continue;
+ /* Release EEH device for the VF */
+ edev = pdn_to_eeh_dev(pdn);
+ if (edev) {
+ pdn->edev = NULL;
+ kfree(edev);
+ }
+
if (!list_empty(&pdn->list))
list_del(&pdn->list);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 41e1607e800c..ef7024dacff7 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -28,10 +28,6 @@ EXPORT_SYMBOL(load_vr_state);
EXPORT_SYMBOL(store_vr_state);
#endif
-#ifdef CONFIG_VSX
-EXPORT_SYMBOL_GPL(__giveup_vsx);
-#endif
-
#ifdef CONFIG_EPAPR_PARAVIRT
EXPORT_SYMBOL(epapr_hypercall_start);
#endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dccc87e8fee5..d7a9df51b974 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -133,6 +133,16 @@ void __msr_check_and_clear(unsigned long bits)
EXPORT_SYMBOL(__msr_check_and_clear);
#ifdef CONFIG_PPC_FPU
+void __giveup_fpu(struct task_struct *tsk)
+{
+ save_fpu(tsk);
+ tsk->thread.regs->msr &= ~MSR_FP;
+#ifdef CONFIG_VSX
+ if (cpu_has_feature(CPU_FTR_VSX))
+ tsk->thread.regs->msr &= ~MSR_VSX;
+#endif
+}
+
void giveup_fpu(struct task_struct *tsk)
{
check_if_tm_restore_required(tsk);
@@ -187,9 +197,32 @@ void enable_kernel_fp(void)
}
}
EXPORT_SYMBOL(enable_kernel_fp);
+
+static int restore_fp(struct task_struct *tsk) {
+ if (tsk->thread.load_fp) {
+ load_fp_state(&current->thread.fp_state);
+ current->thread.load_fp++;
+ return 1;
+ }
+ return 0;
+}
+#else
+static int restore_fp(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_PPC_FPU */
#ifdef CONFIG_ALTIVEC
+#define loadvec(thr) ((thr).load_vec)
+
+static void __giveup_altivec(struct task_struct *tsk)
+{
+ save_altivec(tsk);
+ tsk->thread.regs->msr &= ~MSR_VEC;
+#ifdef CONFIG_VSX
+ if (cpu_has_feature(CPU_FTR_VSX))
+ tsk->thread.regs->msr &= ~MSR_VSX;
+#endif
+}
+
void giveup_altivec(struct task_struct *tsk)
{
check_if_tm_restore_required(tsk);
@@ -229,22 +262,49 @@ void flush_altivec_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
+
+static int restore_altivec(struct task_struct *tsk)
+{
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && tsk->thread.load_vec) {
+ load_vr_state(&tsk->thread.vr_state);
+ tsk->thread.used_vr = 1;
+ tsk->thread.load_vec++;
+
+ return 1;
+ }
+ return 0;
+}
+#else
+#define loadvec(thr) 0
+static inline int restore_altivec(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
-void giveup_vsx(struct task_struct *tsk)
+static void __giveup_vsx(struct task_struct *tsk)
{
- check_if_tm_restore_required(tsk);
-
- msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
if (tsk->thread.regs->msr & MSR_FP)
__giveup_fpu(tsk);
if (tsk->thread.regs->msr & MSR_VEC)
__giveup_altivec(tsk);
+ tsk->thread.regs->msr &= ~MSR_VSX;
+}
+
+static void giveup_vsx(struct task_struct *tsk)
+{
+ check_if_tm_restore_required(tsk);
+
+ msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
__giveup_vsx(tsk);
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
}
-EXPORT_SYMBOL(giveup_vsx);
+
+static void save_vsx(struct task_struct *tsk)
+{
+ if (tsk->thread.regs->msr & MSR_FP)
+ save_fpu(tsk);
+ if (tsk->thread.regs->msr & MSR_VEC)
+ save_altivec(tsk);
+}
void enable_kernel_vsx(void)
{
@@ -275,6 +335,19 @@ void flush_vsx_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
+
+static int restore_vsx(struct task_struct *tsk)
+{
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ tsk->thread.used_vsr = 1;
+ return 1;
+ }
+
+ return 0;
+}
+#else
+static inline int restore_vsx(struct task_struct *tsk) { return 0; }
+static inline void save_vsx(struct task_struct *tsk) { }
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
@@ -374,12 +447,76 @@ void giveup_all(struct task_struct *tsk)
}
EXPORT_SYMBOL(giveup_all);
+void restore_math(struct pt_regs *regs)
+{
+ unsigned long msr;
+
+ if (!current->thread.load_fp && !loadvec(current->thread))
+ return;
+
+ msr = regs->msr;
+ msr_check_and_set(msr_all_available);
+
+ /*
+ * Only reload if the bit is not set in the user MSR, the bit BEING set
+ * indicates that the registers are hot
+ */
+ if ((!(msr & MSR_FP)) && restore_fp(current))
+ msr |= MSR_FP | current->thread.fpexc_mode;
+
+ if ((!(msr & MSR_VEC)) && restore_altivec(current))
+ msr |= MSR_VEC;
+
+ if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) &&
+ restore_vsx(current)) {
+ msr |= MSR_VSX;
+ }
+
+ msr_check_and_clear(msr_all_available);
+
+ regs->msr = msr;
+}
+
+void save_all(struct task_struct *tsk)
+{
+ unsigned long usermsr;
+
+ if (!tsk->thread.regs)
+ return;
+
+ usermsr = tsk->thread.regs->msr;
+
+ if ((usermsr & msr_all_available) == 0)
+ return;
+
+ msr_check_and_set(msr_all_available);
+
+ /*
+ * Saving the way the register space is in hardware, save_vsx boils
+ * down to a save_fpu() and save_altivec()
+ */
+ if (usermsr & MSR_VSX) {
+ save_vsx(tsk);
+ } else {
+ if (usermsr & MSR_FP)
+ save_fpu(tsk);
+
+ if (usermsr & MSR_VEC)
+ save_altivec(tsk);
+ }
+
+ if (usermsr & MSR_SPE)
+ __giveup_spe(tsk);
+
+ msr_check_and_clear(msr_all_available);
+}
+
void flush_all_to_thread(struct task_struct *tsk)
{
if (tsk->thread.regs) {
preempt_disable();
BUG_ON(tsk != current);
- giveup_all(tsk);
+ save_all(tsk);
#ifdef CONFIG_SPE
if (tsk->thread.regs->msr & MSR_SPE)
@@ -832,17 +969,9 @@ void restore_tm_state(struct pt_regs *regs)
msr_diff = current->thread.ckpt_regs.msr & ~regs->msr;
msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
- if (msr_diff & MSR_FP) {
- msr_check_and_set(MSR_FP);
- load_fp_state(&current->thread.fp_state);
- msr_check_and_clear(MSR_FP);
- regs->msr |= current->thread.fpexc_mode;
- }
- if (msr_diff & MSR_VEC) {
- msr_check_and_set(MSR_VEC);
- load_vr_state(&current->thread.vr_state);
- msr_check_and_clear(MSR_VEC);
- }
+
+ restore_math(regs);
+
regs->msr |= msr_diff;
}
@@ -1006,6 +1135,10 @@ struct task_struct *__switch_to(struct task_struct *prev,
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
+
+ if (current_thread_info()->task->thread.regs)
+ restore_math(current_thread_info()->task->thread.regs);
+
#endif /* CONFIG_PPC_BOOK3S_64 */
return last;
@@ -1307,6 +1440,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
f = ret_from_fork;
}
+ childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
sp -= STACK_FRAME_OVERHEAD;
/*
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index cf8c7e4e0b21..cb64d6feb45a 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -1,7 +1,7 @@
/*
* Common signal handling code for both 32 and 64 bits
*
- * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
+ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation
* Extracted from signal_32.c and signal_64.c
*
* This file is subject to the terms and conditions of the GNU General
@@ -178,7 +178,7 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs)
* need to use the stack pointer from the checkpointed state, rather
* than the speculated state. This ensures that the signal context
* (written tm suspended) will be written below the stack required for
- * the rollback. The transaction is aborted becuase of the treclaim,
+ * the rollback. The transaction is aborted because of the treclaim,
* so any memory written between the tbegin and the signal will be
* rolled back anyway.
*
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 51b274199dd9..be305c858e51 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
+ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation
* Extracted from signal_32.c and signal_64.c
*
* This file is subject to the terms and conditions of the GNU General
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index b6becc795bb5..88414dde7e35 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1148,6 +1148,7 @@ void __kprobes program_check_exception(struct pt_regs *regs)
goto bail;
}
if (reason & REASON_TRAP) {
+ unsigned long bugaddr;
/* Debugger is first in line to stop recursive faults in
* rcu_lock, notify_die, or atomic_notifier_call_chain */
if (debugger_bpt(regs))
@@ -1158,8 +1159,15 @@ void __kprobes program_check_exception(struct pt_regs *regs)
== NOTIFY_STOP)
goto bail;
+ bugaddr = regs->nip;
+ /*
+ * Fixup bugaddr for BUG_ON() in real mode
+ */
+ if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR))
+ bugaddr += PAGE_OFFSET;
+
if (!(regs->msr & MSR_PR) && /* not user-mode */
- report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
+ report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
regs->nip += 4;
goto bail;
}
@@ -1394,7 +1402,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
* is a read DSCR attempt through a mfspr instruction, we
* just emulate the instruction instead. This code path will
* always emulate all the mfspr instructions till the user
- * has attempted atleast one mtspr instruction. This way it
+ * has attempted at least one mtspr instruction. This way it
* preserves the same behaviour when the user is accessing
* the DSCR through privilege level only SPR number (0x11)
* which is emulated through illegal instruction exception.
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 162d0f714941..1c2e7a343bf5 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -91,6 +91,10 @@ _GLOBAL(load_up_altivec)
oris r12,r12,MSR_VEC@h
std r12,_MSR(r1)
#endif
+ /* Don't care if r4 overflows, this is desired behaviour */
+ lbz r4,THREAD_LOAD_VEC(r5)
+ addi r4,r4,1
+ stb r4,THREAD_LOAD_VEC(r5)
addi r6,r5,THREAD_VRSTATE
li r4,1
li r10,VRSTATE_VSCR
@@ -102,36 +106,20 @@ _GLOBAL(load_up_altivec)
blr
/*
- * __giveup_altivec(tsk)
- * Disable VMX for the task given as the argument,
- * and save the vector registers in its thread_struct.
+ * save_altivec(tsk)
+ * Save the vector registers to its thread_struct
*/
-_GLOBAL(__giveup_altivec)
+_GLOBAL(save_altivec)
addi r3,r3,THREAD /* want THREAD of task */
PPC_LL r7,THREAD_VRSAVEAREA(r3)
PPC_LL r5,PT_REGS(r3)
PPC_LCMPI 0,r7,0
bne 2f
addi r7,r3,THREAD_VRSTATE
-2: PPC_LCMPI 0,r5,0
- SAVE_32VRS(0,r4,r7)
+2: SAVE_32VRS(0,r4,r7)
mfvscr v0
li r4,VRSTATE_VSCR
stvx v0,r4,r7
- beq 1f
- PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- lis r3,(MSR_VEC|MSR_VSX)@h
-FTR_SECTION_ELSE
- lis r3,MSR_VEC@h
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
-#else
- lis r3,MSR_VEC@h
-#endif
- andc r4,r4,r3 /* disable FP for previous task */
- PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
blr
#ifdef CONFIG_VSX
@@ -163,23 +151,6 @@ _GLOBAL(load_up_vsx)
std r12,_MSR(r1)
b fast_exception_return
-/*
- * __giveup_vsx(tsk)
- * Disable VSX for the task given as the argument.
- * Does NOT save vsx registers.
- */
-_GLOBAL(__giveup_vsx)
- addi r3,r3,THREAD /* want THREAD of task */
- ld r5,PT_REGS(r3)
- cmpdi 0,r5,0
- beq 1f
- ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- lis r3,MSR_VSX@h
- andc r4,r4,r3 /* disable VSX for previous task */
- std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
- blr
-
#endif /* CONFIG_VSX */
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 55c4d51ea3e2..999106991a76 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -22,7 +22,7 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash32.h>
+#include <asm/book3s/32/mmu-hash.h>
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 9bf7031a67ff..b9131aa1aedf 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -26,7 +26,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
/* #define DEBUG_MMU */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 913cd2198fa6..114edace6cdd 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -23,7 +23,7 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index fb37290a57b4..c7b78d8336b2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -32,7 +32,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 54cf9bc94dad..9c3b76bb69d9 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -30,7 +30,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 89e96b3e0039..039028d3ccb5 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -29,7 +29,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 91700518bbf3..4cb8db05f3e5 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -17,7 +17,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6ee26de9a1de..c613fee0b9f7 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -27,7 +27,7 @@
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
#include <asm/kvm_book3s_asm.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/tm.h>
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 905e94a1370f..46871d554057 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -432,7 +432,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* the whole masked_pending business which is about not
* losing interrupts that occur while masked.
*
- * I don't differenciate normal deliveries and resends, this
+ * I don't differentiate normal deliveries and resends, this
* implementation will differ from PAPR and not lose such
* interrupts.
*/
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 778ef86e187e..4d66f44a1657 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -992,7 +992,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_restart_interrupt(vcpu, exit_nr);
/*
- * get last instruction before beeing preempted
+ * get last instruction before being preempted
* TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA
*/
switch (exit_nr) {
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index cda695de8aa7..f48a0c22e8f9 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -182,7 +182,7 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
#ifdef CONFIG_ALTIVEC
/*
- * Since guests have the priviledge to enable AltiVec, we need AltiVec
+ * Since guests have the privilege to enable AltiVec, we need AltiVec
* support in the host to save/restore their context.
* Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit
* because it's cleared in the absence of CONFIG_ALTIVEC!
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index e7c04542ba62..47d1b26effc6 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -44,7 +44,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
* a write access. Since this is 4K insert of 64K page size
* also add _PAGE_COMBO
*/
- new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE;
+ new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
if (access & _PAGE_RW)
new_pte |= _PAGE_DIRTY;
} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
@@ -106,7 +106,7 @@ repeat:
}
}
/*
- * Hypervisor failure. Restore old pmd and return -1
+ * Hypervisor failure. Restore old pte and return -1
* similar to __hash_page_*
*/
if (unlikely(slot == -2)) {
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 0762c1e08c88..b2d659cf51c6 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -111,7 +111,13 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
*/
if (!(old_pte & _PAGE_COMBO)) {
flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
- old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND;
+ /*
+ * clear the old slot details from the old and new pte.
+ * On hash insert failure we use old pte value and we don't
+ * want slot information there if we have a insert failure.
+ */
+ old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
+ new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
goto htab_insert_hpte;
}
/*
@@ -182,7 +188,7 @@ repeat:
}
}
/*
- * Hypervisor failure. Restore old pmd and return -1
+ * Hypervisor failure. Restore old pte and return -1
* similar to __hash_page_*
*/
if (unlikely(slot == -2)) {
@@ -243,8 +249,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
return 0;
/*
* Try to lock the PTE, add ACCESSED and DIRTY if it was
- * a write access. Since this is 4K insert of 64K page size
- * also add _PAGE_COMBO
+ * a write access.
*/
new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
if (access & _PAGE_RW)
@@ -305,7 +310,7 @@ repeat:
}
}
/*
- * Hypervisor failure. Restore old pmd and return -1
+ * Hypervisor failure. Restore old pte and return -1
* similar to __hash_page_*
*/
if (unlikely(slot == -2)) {
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ba59d5977f34..90dd9280894f 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -168,11 +168,11 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
rflags |= HPTE_R_N;
/*
* PP bits:
- * Linux use slb key 0 for kernel and 1 for user.
- * kernel areas are mapped by PP bits 00
- * and and there is no kernel RO (_PAGE_KERNEL_RO).
- * User area mapped by 0x2 and read only use by
- * 0x3.
+ * Linux uses slb key 0 for kernel and 1 for user.
+ * kernel areas are mapped with PP=00
+ * and there is no kernel RO (_PAGE_KERNEL_RO).
+ * User area is mapped with PP=0x2 for read/write
+ * or PP=0x3 for read-only (including writeable but clean pages).
*/
if (pteflags & _PAGE_USER) {
rflags |= 0x2;
@@ -263,28 +263,32 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
return ret < 0 ? ret : 0;
}
-#ifdef CONFIG_MEMORY_HOTPLUG
int htab_remove_mapping(unsigned long vstart, unsigned long vend,
int psize, int ssize)
{
unsigned long vaddr;
unsigned int step, shift;
+ int rc;
+ int ret = 0;
shift = mmu_psize_defs[psize].shift;
step = 1 << shift;
- if (!ppc_md.hpte_removebolted) {
- printk(KERN_WARNING "Platform doesn't implement "
- "hpte_removebolted\n");
- return -EINVAL;
- }
+ if (!ppc_md.hpte_removebolted)
+ return -ENODEV;
- for (vaddr = vstart; vaddr < vend; vaddr += step)
- ppc_md.hpte_removebolted(vaddr, psize, ssize);
+ for (vaddr = vstart; vaddr < vend; vaddr += step) {
+ rc = ppc_md.hpte_removebolted(vaddr, psize, ssize);
+ if (rc == -ENOENT) {
+ ret = -ENOENT;
+ continue;
+ }
+ if (rc < 0)
+ return rc;
+ }
- return 0;
+ return ret;
}
-#endif /* CONFIG_MEMORY_HOTPLUG */
static int __init htab_dt_scan_seg_sizes(unsigned long node,
const char *uname, int depth,
@@ -605,10 +609,28 @@ static int __init htab_dt_scan_pftsize(unsigned long node,
return 0;
}
-static unsigned long __init htab_get_table_size(void)
+unsigned htab_shift_for_mem_size(unsigned long mem_size)
{
- unsigned long mem_size, rnd_mem_size, pteg_count, psize;
+ unsigned memshift = __ilog2(mem_size);
+ unsigned pshift = mmu_psize_defs[mmu_virtual_psize].shift;
+ unsigned pteg_shift;
+
+ /* round mem_size up to next power of 2 */
+ if ((1UL << memshift) < mem_size)
+ memshift += 1;
+
+ /* aim for 2 pages / pteg */
+ pteg_shift = memshift - (pshift + 1);
+ /*
+ * 2^11 PTEGS of 128 bytes each, ie. 2^18 bytes is the minimum htab
+ * size permitted by the architecture.
+ */
+ return max(pteg_shift + 7, 18U);
+}
+
+static unsigned long __init htab_get_table_size(void)
+{
/* If hash size isn't already provided by the platform, we try to
* retrieve it from the device-tree. If it's not there neither, we
* calculate it now based on the total RAM size
@@ -618,31 +640,30 @@ static unsigned long __init htab_get_table_size(void)
if (ppc64_pft_size)
return 1UL << ppc64_pft_size;
- /* round mem_size up to next power of 2 */
- mem_size = memblock_phys_mem_size();
- rnd_mem_size = 1UL << __ilog2(mem_size);
- if (rnd_mem_size < mem_size)
- rnd_mem_size <<= 1;
-
- /* # pages / 2 */
- psize = mmu_psize_defs[mmu_virtual_psize].shift;
- pteg_count = max(rnd_mem_size >> (psize + 1), 1UL << 11);
-
- return pteg_count << 7;
+ return 1UL << htab_shift_for_mem_size(memblock_phys_mem_size());
}
#ifdef CONFIG_MEMORY_HOTPLUG
int create_section_mapping(unsigned long start, unsigned long end)
{
- return htab_bolt_mapping(start, end, __pa(start),
- pgprot_val(PAGE_KERNEL), mmu_linear_psize,
- mmu_kernel_ssize);
+ int rc = htab_bolt_mapping(start, end, __pa(start),
+ pgprot_val(PAGE_KERNEL), mmu_linear_psize,
+ mmu_kernel_ssize);
+
+ if (rc < 0) {
+ int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
+ mmu_kernel_ssize);
+ BUG_ON(rc2 && (rc2 != -ENOENT));
+ }
+ return rc;
}
int remove_section_mapping(unsigned long start, unsigned long end)
{
- return htab_remove_mapping(start, end, mmu_linear_psize,
- mmu_kernel_ssize);
+ int rc = htab_remove_mapping(start, end, mmu_linear_psize,
+ mmu_kernel_ssize);
+ WARN_ON(rc < 0);
+ return rc;
}
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 49b152b0f926..eb2accdd76fd 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -78,9 +78,19 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
* base page size. This is because demote_segment won't flush
* hash page table entries.
*/
- if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
+ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) {
flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
ssize, flags);
+ /*
+ * With THP, we also clear the slot information with
+ * respect to all the 64K hash pte mapping the 16MB
+ * page. They are all invalid now. This make sure we
+ * don't find the slot valid when we fault with 4k
+ * base page size.
+ *
+ */
+ memset(hpte_slot_array, 0, PTE_FRAG_SIZE);
+ }
}
valid = hpte_valid(hpte_slot_array, index);
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index e2138c7ae70f..8555fce902fe 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -76,7 +76,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
if (old_pte & _PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & _PAGE_F_GIX) >> 12;
+ slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT;
if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
mmu_psize, ssize, flags) == -1)
@@ -105,7 +105,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
return -1;
}
- new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
+ new_pte |= (slot << _PAGE_F_GIX_SHIFT) &
+ (_PAGE_F_SECOND | _PAGE_F_GIX);
}
/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 744e24bcb85c..6dd272b6196f 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -107,8 +107,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
kmem_cache_free(cachep, new);
else {
#ifdef CONFIG_PPC_BOOK3S_64
- hpdp->pd = (unsigned long)new |
- (shift_to_mmu_psize(pshift) << 2);
+ hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2);
#else
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#endif
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 379a6a90644b..ba655666186d 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -85,6 +85,11 @@ static void pgd_ctor(void *addr)
memset(addr, 0, PGD_TABLE_SIZE);
}
+static void pud_ctor(void *addr)
+{
+ memset(addr, 0, PUD_TABLE_SIZE);
+}
+
static void pmd_ctor(void *addr)
{
memset(addr, 0, PMD_TABLE_SIZE);
@@ -138,14 +143,18 @@ void pgtable_cache_init(void)
{
pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
+ /*
+ * In all current configs, when the PUD index exists it's the
+ * same size as either the pgd or pmd index except with THP enabled
+ * on book3s 64
+ */
+ if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+ pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+
if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
panic("Couldn't allocate pgtable caches");
- /* In all current configs, when the PUD index exists it's the
- * same size as either the pgd or pmd index. Verify that the
- * initialization above has also created a PUD cache. This
- * will need re-examiniation if we add new possibilities for
- * the pagetable layout. */
- BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
+ if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+ panic("Couldn't allocate pud pgtable caches");
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -188,9 +197,9 @@ static int __meminit vmemmap_populated(unsigned long start, int page_size)
*/
#ifdef CONFIG_PPC_BOOK3E
-static void __meminit vmemmap_create_mapping(unsigned long start,
- unsigned long page_size,
- unsigned long phys)
+static int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
{
/* Create a PTE encoding without page size */
unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
@@ -208,6 +217,8 @@ static void __meminit vmemmap_create_mapping(unsigned long start,
*/
for (i = 0; i < page_size; i += PAGE_SIZE)
BUG_ON(map_kernel_page(start + i, phys, flags));
+
+ return 0;
}
#ifdef CONFIG_MEMORY_HOTPLUG
@@ -217,25 +228,31 @@ static void vmemmap_remove_mapping(unsigned long start,
}
#endif
#else /* CONFIG_PPC_BOOK3E */
-static void __meminit vmemmap_create_mapping(unsigned long start,
- unsigned long page_size,
- unsigned long phys)
+static int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
{
- int mapped = htab_bolt_mapping(start, start + page_size, phys,
- pgprot_val(PAGE_KERNEL),
- mmu_vmemmap_psize,
- mmu_kernel_ssize);
- BUG_ON(mapped < 0);
+ int rc = htab_bolt_mapping(start, start + page_size, phys,
+ pgprot_val(PAGE_KERNEL),
+ mmu_vmemmap_psize, mmu_kernel_ssize);
+ if (rc < 0) {
+ int rc2 = htab_remove_mapping(start, start + page_size,
+ mmu_vmemmap_psize,
+ mmu_kernel_ssize);
+ BUG_ON(rc2 && (rc2 != -ENOENT));
+ }
+ return rc;
}
#ifdef CONFIG_MEMORY_HOTPLUG
static void vmemmap_remove_mapping(unsigned long start,
unsigned long page_size)
{
- int mapped = htab_remove_mapping(start, start + page_size,
- mmu_vmemmap_psize,
- mmu_kernel_ssize);
- BUG_ON(mapped < 0);
+ int rc = htab_remove_mapping(start, start + page_size,
+ mmu_vmemmap_psize,
+ mmu_kernel_ssize);
+ BUG_ON((rc < 0) && (rc != -ENOENT));
+ WARN_ON(rc == -ENOENT);
}
#endif
@@ -303,6 +320,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
for (; start < end; start += page_size) {
void *p;
+ int rc;
if (vmemmap_populated(start, page_size))
continue;
@@ -316,7 +334,13 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pr_debug(" * %016lx..%016lx allocated at %p\n",
start, start + page_size, p);
- vmemmap_create_mapping(start, page_size, __pa(p));
+ rc = vmemmap_create_mapping(start, page_size, __pa(p));
+ if (rc < 0) {
+ pr_warning(
+ "vmemmap_populate: Unable to create vmemmap mapping: %d\n",
+ rc);
+ return -EFAULT;
+ }
}
return 0;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index d0f0a514b04e..f980da6d7569 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -119,12 +119,18 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
struct zone *zone;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ int rc;
pgdata = NODE_DATA(nid);
start = (unsigned long)__va(start);
- if (create_section_mapping(start, start + size))
- return -EINVAL;
+ rc = create_section_mapping(start, start + size);
+ if (rc) {
+ pr_warning(
+ "Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
+ start, start + size, rc);
+ return -EFAULT;
+ }
/* this should work for most non-highmem platforms */
zone = pgdata->node_zones +
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 9f58ff44a075..898d63365cdd 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -110,7 +110,8 @@ extern unsigned long Hash_size, Hash_mask;
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC64
-extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags);
+extern int map_kernel_page(unsigned long ea, unsigned long pa,
+ unsigned long flags);
#endif /* CONFIG_PPC64 */
extern unsigned long ioremap_bot;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3124a20d0fab..0eb53128ca2a 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -88,7 +88,7 @@ static __ref void *early_alloc_pgtable(unsigned long size)
* map_kernel_page adds an entry to the ioremap page table
* and adds an entry to the HPT, possibly bolting it
*/
-int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
+int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
{
pgd_t *pgdp;
pud_t *pudp;
@@ -646,6 +646,28 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
return pgtable;
}
+void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
+
+ /*
+ * We can't mark the pmd none here, because that will cause a race
+ * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
+ * we spilt, but at the same time we wan't rest of the ppc64 code
+ * not to insert hash pte on this, because we will be modifying
+ * the deposited pgtable in the caller of this function. Hence
+ * clear the _PAGE_USER so that we move the fault handling to
+ * higher level function and that will serialize against ptl.
+ * We need to flush existing hash pte entries here even though,
+ * the translation is still valid, because we will withdraw
+ * pgtable_t after this.
+ */
+ pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
+}
+
+
/*
* set a new huge pmd. We should not be called for updating
* an existing pmd entry. That should go via pmd_hugepage_update.
@@ -663,10 +685,20 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
}
+/*
+ * We use this to invalidate a pmdp entry before switching from a
+ * hugepte to regular pmd entry.
+ */
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+
+ /*
+ * This ensures that generic code that rely on IRQ disabling
+ * to prevent a parallel THP split work as expected.
+ */
+ kick_all_cpus_sync();
}
/*
@@ -717,7 +749,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
{
unsigned long pmdv;
- pmdv = pfn << PTE_RPN_SHIFT;
+ pmdv = (pfn << PTE_RPN_SHIFT) & PTE_RPN_MASK;
return pmd_set_protbits(__pmd(pmdv), pgprot);
}
@@ -785,6 +817,13 @@ pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
int has_transparent_hugepage(void)
{
+
+ BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) >= MAX_ORDER,
+ "hugepages can't be allocated by the buddy allocator");
+
+ BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) < 2,
+ "We need more than 2 pages to do deferred thp split");
+
if (!mmu_has_feature(MMU_FTR_16M_PAGE))
return 0;
/*
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 29d6987c37ba..eb82d787d99a 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -895,7 +895,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
BEGIN_MMU_FTR_SECTION
virt_page_table_tlb_miss_done:
- /* We have overriden MAS2:EPN but currently our primary TLB miss
+ /* We have overridden MAS2:EPN but currently our primary TLB miss
* handler will always restore it so that should not be an issue,
* if we ever optimize the primary handler to not write MAS2 on
* some cases, we'll have to restore MAS2:EPN here based on the
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index 68c477592e43..eabecfcaef7c 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -108,7 +108,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
blr
2:
#ifdef CONFIG_PPC_47x
- oris r7,r6,0x8000 /* specify way explicitely */
+ oris r7,r6,0x8000 /* specify way explicitly */
clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */
ori r4,r4,PPC47x_TLBE_SIZE
tlbwe r4,r7,0 /* write it */
@@ -149,7 +149,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
li r3,-1 /* Current set */
lis r10,tlb_47x_boltmap@h
ori r10,r10,tlb_47x_boltmap@l
- lis r7,0x8000 /* Specify way explicitely */
+ lis r7,0x8000 /* Specify way explicitly */
b 9f /* For each set */
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index 863d89386f60..c82497a31c54 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -208,7 +208,7 @@ static void pm_rtas_reset_signals(u32 node)
/*
* The debug bus is being set to the passthru disable state.
- * However, the FW still expects atleast one legal signal routing
+ * However, the FW still expects at least one legal signal routing
* entry or it will return an error on the arguments. If we don't
* supply a valid entry, we must ignore all return values. Ignoring
* all return values means we might miss an error we should be
@@ -1008,7 +1008,7 @@ static int initial_lfsr[] = {
*
* To avoid the time to compute the LFSR, a lookup table is used. The 24 bit
* LFSR sequence is broken into four ranges. The spacing of the precomputed
- * values is adjusted in each range so the error between the user specifed
+ * values is adjusted in each range so the error between the user specified
* number (N) of events between samples and the actual number of events based
* on the precomputed value will be les then about 6.2%. Note, if the user
* specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index d1e65ce545b3..97a1d40d8696 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -651,7 +651,7 @@ static void pmao_restore_workaround(bool ebb)
/*
* We are already soft-disabled in power_pmu_enable(). We need to hard
- * enable to actually prevent the PMU exception from firing.
+ * disable to actually prevent the PMU exception from firing.
*/
hard_irq_disable();
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9f9dfda9ed2c..59012e750abe 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -27,20 +27,6 @@
#include "hv-24x7-catalog.h"
#include "hv-common.h"
-static const char *event_domain_suffix(unsigned domain)
-{
- switch (domain) {
-#define DOMAIN(n, v, x, c) \
- case HV_PERF_DOMAIN_##n: \
- return "__" #n;
-#include "hv-24x7-domains.h"
-#undef DOMAIN
- default:
- WARN(1, "unknown domain %d\n", domain);
- return "__UNKNOWN_DOMAIN_SUFFIX";
- }
-}
-
static bool domain_is_valid(unsigned domain)
{
switch (domain) {
@@ -68,6 +54,24 @@ static bool is_physical_domain(unsigned domain)
}
}
+static const char *domain_name(unsigned domain)
+{
+ if (!domain_is_valid(domain))
+ return NULL;
+
+ switch (domain) {
+ case HV_PERF_DOMAIN_PHYS_CHIP: return "Physical Chip";
+ case HV_PERF_DOMAIN_PHYS_CORE: return "Physical Core";
+ case HV_PERF_DOMAIN_VCPU_HOME_CORE: return "VCPU Home Core";
+ case HV_PERF_DOMAIN_VCPU_HOME_CHIP: return "VCPU Home Chip";
+ case HV_PERF_DOMAIN_VCPU_HOME_NODE: return "VCPU Home Node";
+ case HV_PERF_DOMAIN_VCPU_REMOTE_NODE: return "VCPU Remote Node";
+ }
+
+ WARN_ON_ONCE(domain);
+ return NULL;
+}
+
static bool catalog_entry_domain_is_valid(unsigned domain)
{
return is_physical_domain(domain);
@@ -101,6 +105,7 @@ static bool catalog_entry_domain_is_valid(unsigned domain)
EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
/* u16 */
EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
+EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31);
EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
/* u32, see "data_offset" */
EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
@@ -115,6 +120,7 @@ static struct attribute *format_attrs[] = {
&format_attr_domain.attr,
&format_attr_offset.attr,
&format_attr_core.attr,
+ &format_attr_chip.attr,
&format_attr_vcpu.attr,
&format_attr_lpar.attr,
NULL,
@@ -274,32 +280,70 @@ static unsigned long h_get_24x7_catalog_page(char page[],
version, index);
}
-static unsigned core_domains[] = {
- HV_PERF_DOMAIN_PHYS_CORE,
- HV_PERF_DOMAIN_VCPU_HOME_CORE,
- HV_PERF_DOMAIN_VCPU_HOME_CHIP,
- HV_PERF_DOMAIN_VCPU_HOME_NODE,
- HV_PERF_DOMAIN_VCPU_REMOTE_NODE,
-};
-/* chip event data always yeilds a single event, core yeilds multiple */
-#define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains)
-
+/*
+ * Each event we find in the catalog, will have a sysfs entry. Format the
+ * data for this sysfs entry based on the event's domain.
+ *
+ * Events belonging to the Chip domain can only be monitored in that domain.
+ * i.e the domain for these events is a fixed/knwon value.
+ *
+ * Events belonging to the Core domain can be monitored either in the physical
+ * core or in one of the virtual CPU domains. So the domain value for these
+ * events must be specified by the user (i.e is a required parameter). Format
+ * the Core events with 'domain=?' so the perf-tool can error check required
+ * parameters.
+ *
+ * NOTE: For the Core domain events, rather than making domain a required
+ * parameter we could default it to PHYS_CORE and allowe users to
+ * override the domain to one of the VCPU domains.
+ *
+ * However, this can make the interface a little inconsistent.
+ *
+ * If we set domain=2 (PHYS_CHIP) and allow user to override this field
+ * the user may be tempted to also modify the "offset=x" field in which
+ * can lead to confusing usage. Consider the HPM_PCYC (offset=0x18) and
+ * HPM_INST (offset=0x20) events. With:
+ *
+ * perf stat -e hv_24x7/HPM_PCYC,offset=0x20/
+ *
+ * we end up monitoring HPM_INST, while the command line has HPM_PCYC.
+ *
+ * By not assigning a default value to the domain for the Core events,
+ * we can have simple guidelines:
+ *
+ * - Specifying values for parameters with "=?" is required.
+ *
+ * - Specifying (i.e overriding) values for other parameters
+ * is undefined.
+ */
static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain)
{
const char *sindex;
const char *lpar;
+ const char *domain_str;
+ char buf[8];
- if (is_physical_domain(domain)) {
+ switch (domain) {
+ case HV_PERF_DOMAIN_PHYS_CHIP:
+ snprintf(buf, sizeof(buf), "%d", domain);
+ domain_str = buf;
+ lpar = "0x0";
+ sindex = "chip";
+ break;
+ case HV_PERF_DOMAIN_PHYS_CORE:
+ domain_str = "?";
lpar = "0x0";
sindex = "core";
- } else {
+ break;
+ default:
+ domain_str = "?";
lpar = "?";
sindex = "vcpu";
}
return kasprintf(GFP_KERNEL,
- "domain=0x%x,offset=0x%x,%s=?,lpar=%s",
- domain,
+ "domain=%s,offset=0x%x,%s=?,lpar=%s",
+ domain_str,
be16_to_cpu(event->event_counter_offs) +
be16_to_cpu(event->event_group_record_offs),
sindex,
@@ -339,6 +383,15 @@ static struct attribute *device_str_attr_create_(char *name, char *str)
return &attr->attr.attr;
}
+/*
+ * Allocate and initialize strings representing event attributes.
+ *
+ * NOTE: The strings allocated here are never destroyed and continue to
+ * exist till shutdown. This is to allow us to create as many events
+ * from the catalog as possible, even if we encounter errors with some.
+ * In case of changes to error paths in future, these may need to be
+ * freed by the caller.
+ */
static struct attribute *device_str_attr_create(char *name, int name_max,
int name_nonce,
char *str, size_t str_max)
@@ -370,16 +423,6 @@ out_s:
return NULL;
}
-static void device_str_attr_destroy(struct attribute *attr)
-{
- struct dev_ext_attribute *d;
-
- d = container_of(attr, struct dev_ext_attribute, attr.attr);
- kfree(d->var);
- kfree(d->attr.attr.name);
- kfree(d);
-}
-
static struct attribute *event_to_attr(unsigned ix,
struct hv_24x7_event_data *event,
unsigned domain,
@@ -387,7 +430,6 @@ static struct attribute *event_to_attr(unsigned ix,
{
int event_name_len;
char *ev_name, *a_ev_name, *val;
- const char *ev_suffix;
struct attribute *attr;
if (!domain_is_valid(domain)) {
@@ -400,14 +442,13 @@ static struct attribute *event_to_attr(unsigned ix,
if (!val)
return NULL;
- ev_suffix = event_domain_suffix(domain);
ev_name = event_name(event, &event_name_len);
if (!nonce)
- a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s",
- (int)event_name_len, ev_name, ev_suffix);
+ a_ev_name = kasprintf(GFP_KERNEL, "%.*s",
+ (int)event_name_len, ev_name);
else
- a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d",
- (int)event_name_len, ev_name, ev_suffix, nonce);
+ a_ev_name = kasprintf(GFP_KERNEL, "%.*s__%d",
+ (int)event_name_len, ev_name, nonce);
if (!a_ev_name)
goto out_val;
@@ -452,45 +493,14 @@ event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
return device_str_attr_create(name, nl, nonce, desc, dl);
}
-static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs,
+static int event_data_to_attrs(unsigned ix, struct attribute **attrs,
struct hv_24x7_event_data *event, int nonce)
{
- unsigned i;
-
- switch (event->domain) {
- case HV_PERF_DOMAIN_PHYS_CHIP:
- *attrs = event_to_attr(ix, event, event->domain, nonce);
- return 1;
- case HV_PERF_DOMAIN_PHYS_CORE:
- for (i = 0; i < ARRAY_SIZE(core_domains); i++) {
- attrs[i] = event_to_attr(ix, event, core_domains[i],
- nonce);
- if (!attrs[i]) {
- pr_warn("catalog event %u: individual attr %u "
- "creation failure\n", ix, i);
- for (; i; i--)
- device_str_attr_destroy(attrs[i - 1]);
- return -1;
- }
- }
- return i;
- default:
- pr_warn("catalog event %u: domain %u is not allowed in the "
- "catalog\n", ix, event->domain);
+ *attrs = event_to_attr(ix, event, event->domain, nonce);
+ if (!*attrs)
return -1;
- }
-}
-static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
-{
- switch (event->domain) {
- case HV_PERF_DOMAIN_PHYS_CHIP:
- return 1;
- case HV_PERF_DOMAIN_PHYS_CORE:
- return ARRAY_SIZE(core_domains);
- default:
- return 0;
- }
+ return 0;
}
static unsigned long vmalloc_to_phys(void *v)
@@ -726,9 +736,8 @@ static int create_events_from_catalog(struct attribute ***events_,
goto e_free;
}
- if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) {
- pr_err("event_entry_count %zu is invalid\n",
- event_entry_count);
+ if (SIZE_MAX - 1 < event_entry_count) {
+ pr_err("event_entry_count %zu is invalid\n", event_entry_count);
ret = -EIO;
goto e_free;
}
@@ -801,7 +810,7 @@ static int create_events_from_catalog(struct attribute ***events_,
continue;
}
- attr_max += event_to_attr_ct(event);
+ attr_max++;
}
event_idx_last = event_idx;
@@ -851,12 +860,12 @@ static int create_events_from_catalog(struct attribute ***events_,
nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
ct = event_data_to_attrs(event_idx, events + event_attr_ct,
event, nonce);
- if (ct <= 0) {
+ if (ct < 0) {
pr_warn("event %zu (%.*s) creation failure, skipping\n",
event_idx, nl, name);
junk_events++;
} else {
- event_attr_ct += ct;
+ event_attr_ct++;
event_descs[desc_ct] = event_to_desc_attr(event, nonce);
if (event_descs[desc_ct])
desc_ct++;
@@ -961,6 +970,27 @@ e_free:
return ret;
}
+static ssize_t domains_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ int d, n, count = 0;
+ const char *str;
+
+ for (d = 0; d < HV_PERF_DOMAIN_MAX; d++) {
+ str = domain_name(d);
+ if (!str)
+ continue;
+
+ n = sprintf(page, "%d: %s\n", d, str);
+ if (n < 0)
+ break;
+
+ count += n;
+ page += n;
+ }
+ return count;
+}
+
#define PAGE_0_ATTR(_name, _fmt, _expr) \
static ssize_t _name##_show(struct device *dev, \
struct device_attribute *dev_attr, \
@@ -989,6 +1019,7 @@ PAGE_0_ATTR(catalog_version, "%lld\n",
PAGE_0_ATTR(catalog_len, "%lld\n",
(unsigned long long)be32_to_cpu(page_0->length) * 4096);
static BIN_ATTR_RO(catalog, 0/* real length varies */);
+static DEVICE_ATTR_RO(domains);
static struct bin_attribute *if_bin_attrs[] = {
&bin_attr_catalog,
@@ -998,6 +1029,7 @@ static struct bin_attribute *if_bin_attrs[] = {
static struct attribute *if_attrs[] = {
&dev_attr_catalog_len.attr,
&dev_attr_catalog_version.attr,
+ &dev_attr_domains.attr,
NULL,
};
@@ -1089,10 +1121,16 @@ static int add_event_to_24x7_request(struct perf_event *event,
return -EINVAL;
}
- if (is_physical_domain(event_get_domain(event)))
+ switch (event_get_domain(event)) {
+ case HV_PERF_DOMAIN_PHYS_CHIP:
+ idx = event_get_chip(event);
+ break;
+ case HV_PERF_DOMAIN_PHYS_CORE:
idx = event_get_core(event);
- else
+ break;
+ default:
idx = event_get_vcpu(event);
+ }
i = request_buffer->num_requests++;
req = &request_buffer->requests[i];
@@ -1208,11 +1246,12 @@ static int h_24x7_event_init(struct perf_event *event)
return -EACCES;
}
- /* see if the event complains */
+ /* Get the initial value of the counter for this event */
if (single_24x7_request(event, &ct)) {
pr_devel("test hcall failed\n");
return -EIO;
}
+ (void)local64_xchg(&event->hw.prev_count, ct);
return 0;
}
@@ -1275,6 +1314,16 @@ static void h_24x7_event_read(struct perf_event *event)
h24x7hw = &get_cpu_var(hv_24x7_hw);
h24x7hw->events[i] = event;
put_cpu_var(h24x7hw);
+ /*
+ * Clear the event count so we can compute the _change_
+ * in the 24x7 raw counter value at the end of the txn.
+ *
+ * Note that we could alternatively read the 24x7 value
+ * now and save its value in event->hw.prev_count. But
+ * that would require issuing a hcall, which would then
+ * defeat the purpose of using the txn interface.
+ */
+ local64_set(&event->count, 0);
}
put_cpu_var(hv_24x7_reqb);
diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
index 0f9fa21a29f2..791455e7f5cf 100644
--- a/arch/powerpc/perf/hv-24x7.h
+++ b/arch/powerpc/perf/hv-24x7.h
@@ -7,6 +7,7 @@ enum hv_perf_domains {
#define DOMAIN(n, v, x, c) HV_PERF_DOMAIN_##n = v,
#include "hv-24x7-domains.h"
#undef DOMAIN
+ HV_PERF_DOMAIN_MAX,
};
struct hv_24x7_request {
@@ -80,7 +81,7 @@ struct hv_24x7_result {
__u8 results_complete;
__be16 num_elements_returned;
- /* This is a copy of @data_size from the coresponding hv_24x7_request */
+ /* This is a copy of @data_size from the corresponding hv_24x7_request */
__be16 result_element_data_size;
__u8 reserved[0x2];
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 856fe6e03c2a..7aa37236bb70 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -127,8 +127,16 @@ static const struct attribute_group *attr_groups[] = {
NULL,
};
-#define GPCI_MAX_DATA_BYTES \
- (1024 - sizeof(struct hv_get_perf_counter_info_params))
+#define HGPCI_REQ_BUFFER_SIZE 4096
+#define HGPCI_MAX_DATA_BYTES \
+ (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params))
+
+DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
+
+struct hv_gpci_request_buffer {
+ struct hv_get_perf_counter_info_params params;
+ uint8_t bytes[HGPCI_MAX_DATA_BYTES];
+} __packed;
static unsigned long single_gpci_request(u32 req, u32 starting_index,
u16 secondary_index, u8 version_in, u32 offset, u8 length,
@@ -137,24 +145,21 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index,
unsigned long ret;
size_t i;
u64 count;
+ struct hv_gpci_request_buffer *arg;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
- struct {
- struct hv_get_perf_counter_info_params params;
- uint8_t bytes[GPCI_MAX_DATA_BYTES];
- } __packed __aligned(sizeof(uint64_t)) arg = {
- .params = {
- .counter_request = cpu_to_be32(req),
- .starting_index = cpu_to_be32(starting_index),
- .secondary_index = cpu_to_be16(secondary_index),
- .counter_info_version_in = version_in,
- }
- };
+ arg->params.counter_request = cpu_to_be32(req);
+ arg->params.starting_index = cpu_to_be32(starting_index);
+ arg->params.secondary_index = cpu_to_be16(secondary_index);
+ arg->params.counter_info_version_in = version_in;
ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
- virt_to_phys(&arg), sizeof(arg));
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
if (ret) {
pr_devel("hcall failed: 0x%lx\n", ret);
- return ret;
+ goto out;
}
/*
@@ -163,9 +168,11 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index,
*/
count = 0;
for (i = offset; i < offset + length; i++)
- count |= arg.bytes[i] << (i - offset);
+ count |= arg->bytes[i] << (i - offset);
*value = count;
+out:
+ put_cpu_var(hv_gpci_reqb);
return ret;
}
@@ -245,10 +252,10 @@ static int h_gpci_event_init(struct perf_event *event)
}
/* last byte within the buffer? */
- if ((event_get_offset(event) + length) > GPCI_MAX_DATA_BYTES) {
+ if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) {
pr_devel("request outside of buffer: %zu > %zu\n",
(size_t)event_get_offset(event) + length,
- GPCI_MAX_DATA_BYTES);
+ HGPCI_MAX_DATA_BYTES);
return -EINVAL;
}
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 5b62f2389290..a383c23a9070 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -54,7 +54,7 @@
* Power7 event codes.
*/
#define EVENT(_name, _code) \
- PME_##_name = _code,
+ _name = _code,
enum {
#include "power7-events-list.h"
@@ -318,14 +318,14 @@ static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
}
static int power7_generic_events[] = {
- [PERF_COUNT_HW_CPU_CYCLES] = PME_PM_CYC,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PME_PM_GCT_NOSLOT_CYC,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PME_PM_CMPLU_STALL,
- [PERF_COUNT_HW_INSTRUCTIONS] = PME_PM_INST_CMPL,
- [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1,
- [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN,
- [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BR_MPRED,
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_GCT_NOSLOT_CYC,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_FIN,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED,
};
#define C(x) PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/perf/power8-events-list.h b/arch/powerpc/perf/power8-events-list.h
new file mode 100644
index 000000000000..741b77edd03e
--- /dev/null
+++ b/arch/powerpc/perf/power8-events-list.h
@@ -0,0 +1,51 @@
+/*
+ * Performance counter support for POWER8 processors.
+ *
+ * Copyright 2014 Sukadev Bhattiprolu, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Power8 event codes.
+ */
+EVENT(PM_CYC, 0x0001e)
+EVENT(PM_GCT_NOSLOT_CYC, 0x100f8)
+EVENT(PM_CMPLU_STALL, 0x4000a)
+EVENT(PM_INST_CMPL, 0x00002)
+EVENT(PM_BRU_FIN, 0x10068)
+EVENT(PM_BR_MPRED_CMPL, 0x400f6)
+
+/* All L1 D cache load references counted at finish, gated by reject */
+EVENT(PM_LD_REF_L1, 0x100ee)
+/* Load Missed L1 */
+EVENT(PM_LD_MISS_L1, 0x3e054)
+/* Store Missed L1 */
+EVENT(PM_ST_MISS_L1, 0x300f0)
+/* L1 cache data prefetches */
+EVENT(PM_L1_PREF, 0x0d8b8)
+/* Instruction fetches from L1 */
+EVENT(PM_INST_FROM_L1, 0x04080)
+/* Demand iCache Miss */
+EVENT(PM_L1_ICACHE_MISS, 0x200fd)
+/* Instruction Demand sectors wriittent into IL1 */
+EVENT(PM_L1_DEMAND_WRITE, 0x0408c)
+/* Instruction prefetch written into IL1 */
+EVENT(PM_IC_PREF_WRITE, 0x0408e)
+/* The data cache was reloaded from local core's L3 due to a demand load */
+EVENT(PM_DATA_FROM_L3, 0x4c042)
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+EVENT(PM_DATA_FROM_L3MISS, 0x300fe)
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST, 0x17080)
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS, 0x17082)
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PREF_ALL, 0x4e052)
+/* Data PTEG reload */
+EVENT(PM_DTLB_MISS, 0x300fc)
+/* ITLB Reloaded */
+EVENT(PM_ITLB_MISS, 0x400fc)
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 9958ba8bf0d2..690d9186a855 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -17,48 +17,16 @@
#include <asm/firmware.h>
#include <asm/cputable.h>
-
/*
* Some power8 event codes.
*/
-#define PM_CYC 0x0001e
-#define PM_GCT_NOSLOT_CYC 0x100f8
-#define PM_CMPLU_STALL 0x4000a
-#define PM_INST_CMPL 0x00002
-#define PM_BRU_FIN 0x10068
-#define PM_BR_MPRED_CMPL 0x400f6
-
-/* All L1 D cache load references counted at finish, gated by reject */
-#define PM_LD_REF_L1 0x100ee
-/* Load Missed L1 */
-#define PM_LD_MISS_L1 0x3e054
-/* Store Missed L1 */
-#define PM_ST_MISS_L1 0x300f0
-/* L1 cache data prefetches */
-#define PM_L1_PREF 0x0d8b8
-/* Instruction fetches from L1 */
-#define PM_INST_FROM_L1 0x04080
-/* Demand iCache Miss */
-#define PM_L1_ICACHE_MISS 0x200fd
-/* Instruction Demand sectors wriittent into IL1 */
-#define PM_L1_DEMAND_WRITE 0x0408c
-/* Instruction prefetch written into IL1 */
-#define PM_IC_PREF_WRITE 0x0408e
-/* The data cache was reloaded from local core's L3 due to a demand load */
-#define PM_DATA_FROM_L3 0x4c042
-/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
-#define PM_DATA_FROM_L3MISS 0x300fe
-/* All successful D-side store dispatches for this thread */
-#define PM_L2_ST 0x17080
-/* All successful D-side store dispatches for this thread that were L2 Miss */
-#define PM_L2_ST_MISS 0x17082
-/* Total HW L3 prefetches(Load+store) */
-#define PM_L3_PREF_ALL 0x4e052
-/* Data PTEG reload */
-#define PM_DTLB_MISS 0x300fc
-/* ITLB Reloaded */
-#define PM_ITLB_MISS 0x400fc
+#define EVENT(_name, _code) _name = _code,
+
+enum {
+#include "power8-events-list.h"
+};
+#undef EVENT
/*
* Raw event encoding for POWER8:
@@ -415,7 +383,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev,
pmc_inuse |= 1 << pmc;
}
- /* In continous sampling mode, update SDAR on TLB miss */
+ /* In continuous sampling mode, update SDAR on TLB miss */
mmcra = MMCRA_SDAR_MODE_TLB;
mmcr1 = mmcr2 = 0;
@@ -604,6 +572,71 @@ static void power8_disable_pmc(unsigned int pmc, unsigned long mmcr[])
mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
}
+GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_GCT_NOSLOT_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL);
+GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_FIN);
+GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
+
+CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF);
+CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1);
+CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE);
+
+CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL);
+CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
+CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
+
+CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(branch-loads, PM_BRU_FIN);
+CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
+
+static struct attribute *power8_events_attr[] = {
+ GENERIC_EVENT_PTR(PM_CYC),
+ GENERIC_EVENT_PTR(PM_GCT_NOSLOT_CYC),
+ GENERIC_EVENT_PTR(PM_CMPLU_STALL),
+ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_BRU_FIN),
+ GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+ GENERIC_EVENT_PTR(PM_LD_REF_L1),
+ GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+
+ CACHE_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_LD_REF_L1),
+ CACHE_EVENT_PTR(PM_L1_PREF),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_INST_FROM_L1),
+ CACHE_EVENT_PTR(PM_IC_PREF_WRITE),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+ CACHE_EVENT_PTR(PM_L3_PREF_ALL),
+ CACHE_EVENT_PTR(PM_L2_ST_MISS),
+ CACHE_EVENT_PTR(PM_L2_ST),
+
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_BRU_FIN),
+
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
+ NULL
+};
+
+static struct attribute_group power8_pmu_events_group = {
+ .name = "events",
+ .attrs = power8_events_attr,
+};
+
PMU_FORMAT_ATTR(event, "config:0-49");
PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
PMU_FORMAT_ATTR(mark, "config:8");
@@ -640,6 +673,7 @@ struct attribute_group power8_pmu_format_group = {
static const struct attribute_group *power8_pmu_attr_groups[] = {
&power8_pmu_format_group,
+ &power8_pmu_events_group,
NULL,
};
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
index 6eb3b2abae90..00282c2b0cae 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -319,7 +319,7 @@ mpc52xx_pci_setup(struct pci_controller *hose,
tmp = in_be32(&pci_regs->gscr);
#if 0
- /* Reset the exteral bus ( internal PCI controller is NOT resetted ) */
+ /* Reset the exteral bus ( internal PCI controller is NOT reset ) */
/* Not necessary and can be a bad thing if for example the bootloader
is displaying a splash screen or ... Just left here for
documentation purpose if anyone need it */
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 5ac70de3e48a..d7e87ff912d7 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -99,7 +99,7 @@ static void mpc85xx_cds_restart(char *cmd)
pci_read_config_byte(dev, 0x47, &tmp);
/*
- * At this point, the harware reset should have triggered.
+ * At this point, the hardware reset should have triggered.
* However, if it doesn't work for some mysterious reason,
* just fall through to the default reset below.
*/
diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S
index 6be1a4af3359..cc5347eb1662 100644
--- a/arch/powerpc/platforms/powermac/cache.S
+++ b/arch/powerpc/platforms/powermac/cache.S
@@ -23,7 +23,7 @@
* when going to sleep, when doing a PMU based cpufreq transition,
* or when "offlining" a CPU on SMP machines. This code is over
* paranoid, but I've had enough issues with various CPU revs and
- * bugs that I decided it was worth beeing over cautious
+ * bugs that I decided it was worth being over cautious
*/
_GLOBAL(flush_disable_caches)
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 4882bfd90e27..1e02328c3f2d 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -198,7 +198,7 @@ static long ohare_htw_scc_enable(struct device_node *node, long param,
if (htw) {
/* Side effect: this will also power up the
* modem, but it's too messy to figure out on which
- * ports this controls the tranceiver and on which
+ * ports this controls the transceiver and on which
* it controls the modem
*/
if (trans)
@@ -463,7 +463,7 @@ static long heathrow_sound_enable(struct device_node *node, long param,
unsigned long flags;
/* B&W G3 and Yikes don't support that properly (the
- * sound appear to never come back after beeing shut down).
+ * sound appear to never come back after being shut down).
*/
if (pmac_mb.model_id == PMAC_TYPE_YOSEMITE ||
pmac_mb.model_id == PMAC_TYPE_YIKES)
@@ -2770,7 +2770,7 @@ set_initial_features(void)
* but I'm not too sure it was audited for side-effects on other
* ohare based machines...
* Since I still have difficulties figuring the right way to
- * differenciate them all and since that hack was there for a long
+ * differentiate them all and since that hack was there for a long
* time, I'll keep it around
*/
if (macio_chips[0].type == macio_ohare) {
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index f1516b5ecec9..cd9711e72df6 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -5,7 +5,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
obj-y += opal-kmsg.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
-obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o
+obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o
obj-$(CONFIG_EEH) += eeh-powernv.o
obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 5f152b95ca0c..950b3e539057 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -167,42 +167,26 @@ static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
return 0;
}
-static int pnv_eeh_outb_dbgfs_set(void *data, u64 val)
-{
- return pnv_eeh_dbgfs_set(data, 0xD10, val);
-}
-
-static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val)
-{
- return pnv_eeh_dbgfs_get(data, 0xD10, val);
-}
-
-static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val)
-{
- return pnv_eeh_dbgfs_set(data, 0xD90, val);
-}
-
-static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val)
-{
- return pnv_eeh_dbgfs_get(data, 0xD90, val);
-}
-
-static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val)
-{
- return pnv_eeh_dbgfs_set(data, 0xE10, val);
-}
+#define PNV_EEH_DBGFS_ENTRY(name, reg) \
+static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \
+{ \
+ return pnv_eeh_dbgfs_set(data, reg, val); \
+} \
+ \
+static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \
+{ \
+ return pnv_eeh_dbgfs_get(data, reg, val); \
+} \
+ \
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \
+ pnv_eeh_dbgfs_get_##name, \
+ pnv_eeh_dbgfs_set_##name, \
+ "0x%llx\n")
+
+PNV_EEH_DBGFS_ENTRY(outb, 0xD10);
+PNV_EEH_DBGFS_ENTRY(inbA, 0xD90);
+PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
-static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val)
-{
- return pnv_eeh_dbgfs_get(data, 0xE10, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get,
- pnv_eeh_outb_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get,
- pnv_eeh_inbA_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get,
- pnv_eeh_inbB_dbgfs_set, "0x%llx\n");
#endif /* CONFIG_DEBUG_FS */
/**
@@ -268,13 +252,13 @@ static int pnv_eeh_post_init(void)
debugfs_create_file("err_injct_outbound", 0600,
phb->dbgfs, hose,
- &pnv_eeh_outb_dbgfs_ops);
+ &pnv_eeh_dbgfs_ops_outb);
debugfs_create_file("err_injct_inboundA", 0600,
phb->dbgfs, hose,
- &pnv_eeh_inbA_dbgfs_ops);
+ &pnv_eeh_dbgfs_ops_inbA);
debugfs_create_file("err_injct_inboundB", 0600,
phb->dbgfs, hose,
- &pnv_eeh_inbB_dbgfs_ops);
+ &pnv_eeh_dbgfs_ops_inbB);
#endif /* CONFIG_DEBUG_FS */
}
@@ -387,6 +371,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
edev->mode &= 0xFFFFFF00;
edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+ edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);
edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
edev->mode |= EEH_DEV_BRIDGE;
@@ -444,9 +429,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
* PCI devices of the PE are expected to be removed prior
* to PE reset.
*/
- if (!edev->pe->bus)
+ if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
edev->pe->bus = pci_find_bus(hose->global_number,
pdn->busno);
+ if (edev->pe->bus)
+ edev->pe->state |= EEH_PE_PRI_BUS;
+ }
/*
* Enable EEH explicitly so that we will do EEH check
@@ -892,6 +880,120 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
}
}
+static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
+ int pos, u16 mask)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ int i, status = 0;
+
+ /* Wait for Transaction Pending bit to be cleared */
+ for (i = 0; i < 4; i++) {
+ eeh_ops->read_config(pdn, pos, 2, &status);
+ if (!(status & mask))
+ return;
+
+ msleep((1 << i) * 100);
+ }
+
+ pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
+ __func__, type,
+ edev->phb->global_number, pdn->busno,
+ PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+}
+
+static int pnv_eeh_do_flr(struct pci_dn *pdn, int option)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 reg = 0;
+
+ if (WARN_ON(!edev->pcie_cap))
+ return -ENOTTY;
+
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, &reg);
+ if (!(reg & PCI_EXP_DEVCAP_FLR))
+ return -ENOTTY;
+
+ switch (option) {
+ case EEH_RESET_HOT:
+ case EEH_RESET_FUNDAMENTAL:
+ pnv_eeh_wait_for_pending(pdn, "",
+ edev->pcie_cap + PCI_EXP_DEVSTA,
+ PCI_EXP_DEVSTA_TRPND);
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, &reg);
+ reg |= PCI_EXP_DEVCTL_BCR_FLR;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, reg);
+ msleep(EEH_PE_RST_HOLD_TIME);
+ break;
+ case EEH_RESET_DEACTIVATE:
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, &reg);
+ reg &= ~PCI_EXP_DEVCTL_BCR_FLR;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, reg);
+ msleep(EEH_PE_RST_SETTLE_TIME);
+ break;
+ }
+
+ return 0;
+}
+
+static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 cap = 0;
+
+ if (WARN_ON(!edev->af_cap))
+ return -ENOTTY;
+
+ eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap);
+ if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
+ return -ENOTTY;
+
+ switch (option) {
+ case EEH_RESET_HOT:
+ case EEH_RESET_FUNDAMENTAL:
+ /*
+ * Wait for Transaction Pending bit to clear. A word-aligned
+ * test is used, so we use the conrol offset rather than status
+ * and shift the test bit to match.
+ */
+ pnv_eeh_wait_for_pending(pdn, "AF",
+ edev->af_cap + PCI_AF_CTRL,
+ PCI_AF_STATUS_TP << 8);
+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL,
+ 1, PCI_AF_CTRL_FLR);
+ msleep(EEH_PE_RST_HOLD_TIME);
+ break;
+ case EEH_RESET_DEACTIVATE:
+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0);
+ msleep(EEH_PE_RST_SETTLE_TIME);
+ break;
+ }
+
+ return 0;
+}
+
+static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)
+{
+ struct eeh_dev *edev;
+ struct pci_dn *pdn;
+ int ret;
+
+ /* The VF PE should have only one child device */
+ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list);
+ pdn = eeh_dev_to_pdn(edev);
+ if (!pdn)
+ return -ENXIO;
+
+ ret = pnv_eeh_do_flr(pdn, option);
+ if (!ret)
+ return ret;
+
+ return pnv_eeh_do_af_flr(pdn, option);
+}
+
/**
* pnv_eeh_reset - Reset the specified PE
* @pe: EEH PE
@@ -953,7 +1055,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
}
bus = eeh_pe_bus_get(pe);
- if (pci_is_root_bus(bus) ||
+ if (pe->type & EEH_PE_VF)
+ ret = pnv_eeh_reset_vf_pe(pe, option);
+ else if (pci_is_root_bus(bus) ||
pci_is_root_bus(bus->parent))
ret = pnv_eeh_root_reset(hose, option);
else
@@ -1092,6 +1196,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
if (!edev || !edev->pe)
return false;
+ /*
+ * We will issue FLR or AF FLR to all VFs, which are contained
+ * in VF PE. It relies on the EEH PCI config accessors. So we
+ * can't block them during the window.
+ */
+ if (edev->physfn && (edev->pe->state & EEH_PE_RESET))
+ return false;
+
if (edev->pe->state & EEH_PE_CFG_BLOCKED)
return true;
@@ -1476,6 +1588,65 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
return ret;
}
+static int pnv_eeh_restore_vf_config(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 devctl, cmd, cap2, aer_capctl;
+ int old_mps;
+
+ if (edev->pcie_cap) {
+ /* Restore MPS */
+ old_mps = (ffs(pdn->mps) - 8) << 5;
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
+ devctl |= old_mps;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+
+ /* Disable Completion Timeout */
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
+ 4, &cap2);
+ if (cap2 & 0x10) {
+ eeh_ops->read_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, &cap2);
+ cap2 |= 0x10;
+ eeh_ops->write_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, cap2);
+ }
+ }
+
+ /* Enable SERR and parity checking */
+ eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
+ cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
+ eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
+
+ /* Enable report various errors */
+ if (edev->pcie_cap) {
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_CERE;
+ devctl |= (PCI_EXP_DEVCTL_NFERE |
+ PCI_EXP_DEVCTL_FERE |
+ PCI_EXP_DEVCTL_URRE);
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+ }
+
+ /* Enable ECRC generation and check */
+ if (edev->pcie_cap && edev->aer_cap) {
+ eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, &aer_capctl);
+ aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+ eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, aer_capctl);
+ }
+
+ return 0;
+}
+
static int pnv_eeh_restore_config(struct pci_dn *pdn)
{
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -1485,9 +1656,21 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
if (!edev)
return -EEXIST;
- phb = edev->phb->private_data;
- ret = opal_pci_reinit(phb->opal_id,
- OPAL_REINIT_PCI_DEV, edev->config_addr);
+ /*
+ * We have to restore the PCI config space after reset since the
+ * firmware can't see SRIOV VFs.
+ *
+ * FIXME: The MPS, error routing rules, timeout setting are worthy
+ * to be exported by firmware in extendible way.
+ */
+ if (edev->physfn) {
+ ret = pnv_eeh_restore_vf_config(pdn);
+ } else {
+ phb = edev->phb->private_data;
+ ret = opal_pci_reinit(phb->opal_id,
+ OPAL_REINIT_PCI_DEV, edev->config_addr);
+ }
+
if (ret) {
pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
__func__, edev->config_addr, ret);
@@ -1516,6 +1699,40 @@ static struct eeh_ops pnv_eeh_ops = {
.restore_config = pnv_eeh_restore_config
};
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!pdev->is_virtfn)
+ return;
+
+ /*
+ * The following operations will fail if VF's sysfs files
+ * aren't created or its resources aren't finalized.
+ */
+ eeh_add_device_early(pdn);
+ eeh_add_device_late(pdev);
+ eeh_sysfs_add_device(pdev);
+}
+
+#ifdef CONFIG_PCI_IOV
+static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ int parent_mps;
+
+ if (!pdev->is_virtfn)
+ return;
+
+ /* Synchronize MPS for VF and PF */
+ parent_mps = pcie_get_mps(pdev->physfn);
+ if ((128 << pdev->pcie_mpss) >= parent_mps)
+ pcie_set_mps(pdev, parent_mps);
+ pdn->mps = pcie_get_mps(pdev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps);
+#endif /* CONFIG_PCI_IOV */
+
/**
* eeh_powernv_init - Register platform dependent EEH operations
*
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 15bfbcd5debc..fcc8b6861b63 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -35,9 +35,9 @@ int pnv_save_sprs_for_winkle(void)
int rc;
/*
- * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+ * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
* all cpus at boot. Get these reg values of current cpu and use the
- * same accross all cpus.
+ * same across all cpus.
*/
uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
uint64_t hid0_val = mfspr(SPRN_HID0);
@@ -185,7 +185,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
* fastsleep workaround needs to be left in 'applied' state on all
* the cores. Do this by-
* 1. Patching out the call to 'undo' workaround in fastsleep exit path
- * 2. Sending ipi to all the cores which have atleast one online thread
+ * 2. Sending ipi to all the cores which have at least one online thread
* 3. Patching out the call to 'apply' workaround in fastsleep entry
* path
* There is no need to send ipi to cores which have all threads
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index e85aa900f5c0..7229acd9bb3a 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -278,7 +278,7 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
/*
* Enable/disable bypass mode on the NPU. The NPU only supports one
- * window per link, so bypass needs to be explicity enabled or
+ * window per link, so bypass needs to be explicitly enabled or
* disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
* active at the same time.
*/
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index 44ed78af1a0d..39d6ff9e5630 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -31,26 +31,25 @@ struct memcons {
__be32 in_cons;
};
-static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *to,
- loff_t pos, size_t count)
+static struct memcons *opal_memcons = NULL;
+
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
{
- struct memcons *mc = bin_attr->private;
const char *conbuf;
ssize_t ret;
size_t first_read = 0;
uint32_t out_pos, avail;
- if (!mc)
+ if (!opal_memcons)
return -ENODEV;
- out_pos = be32_to_cpu(ACCESS_ONCE(mc->out_pos));
+ out_pos = be32_to_cpu(ACCESS_ONCE(opal_memcons->out_pos));
/* Now we've read out_pos, put a barrier in before reading the new
* data it points to in conbuf. */
smp_rmb();
- conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys));
+ conbuf = phys_to_virt(be64_to_cpu(opal_memcons->obuf_phys));
/* When the buffer has wrapped, read from the out_pos marker to the end
* of the buffer, and then read the remaining data as in the un-wrapped
@@ -58,7 +57,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
if (out_pos & MEMCONS_OUT_POS_WRAP) {
out_pos &= MEMCONS_OUT_POS_MASK;
- avail = be32_to_cpu(mc->obuf_size) - out_pos;
+ avail = be32_to_cpu(opal_memcons->obuf_size) - out_pos;
ret = memory_read_from_buffer(to, count, &pos,
conbuf + out_pos, avail);
@@ -76,7 +75,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
}
/* Sanity check. The firmware should not do this to us. */
- if (out_pos > be32_to_cpu(mc->obuf_size)) {
+ if (out_pos > be32_to_cpu(opal_memcons->obuf_size)) {
pr_err("OPAL: memory console corruption. Aborting read.\n");
return -EINVAL;
}
@@ -91,6 +90,13 @@ out:
return ret;
}
+static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *to,
+ loff_t pos, size_t count)
+{
+ return opal_msglog_copy(to, pos, count);
+}
+
static struct bin_attribute opal_msglog_attr = {
.attr = {.name = "msglog", .mode = 0444},
.read = opal_msglog_read
@@ -117,7 +123,15 @@ void __init opal_msglog_init(void)
return;
}
- opal_msglog_attr.private = mc;
+ opal_memcons = mc;
+}
+
+void __init opal_msglog_sysfs_init(void)
+{
+ if (!opal_memcons) {
+ pr_warn("OPAL: message log initialisation failed, not creating sysfs entry\n");
+ return;
+ }
if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0)
pr_warn("OPAL: sysfs file creation failed\n");
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 4e0da5af94a1..0256d0729252 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -724,6 +724,9 @@ static int __init opal_init(void)
of_node_put(leds);
}
+ /* Initialise OPAL message log interface */
+ opal_msglog_init();
+
/* Create "opal" kobject under /sys/firmware */
rc = opal_sysfs_init();
if (rc == 0) {
@@ -739,8 +742,8 @@ static int __init opal_init(void)
opal_platform_dump_init();
/* Setup system parameters interface */
opal_sys_param_init();
- /* Setup message log interface. */
- opal_msglog_init();
+ /* Setup message log sysfs interface. */
+ opal_msglog_sysfs_init();
}
/* Initialize platform devices: IPMI backend, PRD & flash interface */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 573ae1994097..c5baaf3cc4e5 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -872,9 +872,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
if (!res->flags || !res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags))
- continue;
-
/*
* The actual IOV BAR range is determined by the start address
* and the actual size for num_vfs VFs BAR. This check is to
@@ -903,9 +900,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
if (!res->flags || !res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags))
- continue;
-
size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
res2 = *res;
res->start += size * offset;
@@ -1196,29 +1190,36 @@ static void pnv_pci_ioda_setup_PEs(void)
}
#ifdef CONFIG_PCI_IOV
-static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
{
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
struct pci_dn *pdn;
int i, j;
+ int m64_bars;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
phb = hose->private_data;
pdn = pci_get_pdn(pdev);
+ if (pdn->m64_single_mode)
+ m64_bars = num_vfs;
+ else
+ m64_bars = 1;
+
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
- for (j = 0; j < M64_PER_IOV; j++) {
- if (pdn->m64_wins[i][j] == IODA_INVALID_M64)
+ for (j = 0; j < m64_bars; j++) {
+ if (pdn->m64_map[j][i] == IODA_INVALID_M64)
continue;
opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0);
- clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64_bar_alloc);
- pdn->m64_wins[i][j] = IODA_INVALID_M64;
+ OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0);
+ clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc);
+ pdn->m64_map[j][i] = IODA_INVALID_M64;
}
+ kfree(pdn->m64_map);
return 0;
}
@@ -1235,8 +1236,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
int total_vfs;
resource_size_t size, start;
int pe_num;
- int vf_groups;
- int vf_per_group;
+ int m64_bars;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1244,29 +1244,26 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
pdn = pci_get_pdn(pdev);
total_vfs = pci_sriov_get_totalvfs(pdev);
- /* Initialize the m64_wins to IODA_INVALID_M64 */
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
- for (j = 0; j < M64_PER_IOV; j++)
- pdn->m64_wins[i][j] = IODA_INVALID_M64;
+ if (pdn->m64_single_mode)
+ m64_bars = num_vfs;
+ else
+ m64_bars = 1;
+
+ pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL);
+ if (!pdn->m64_map)
+ return -ENOMEM;
+ /* Initialize the m64_map to IODA_INVALID_M64 */
+ for (i = 0; i < m64_bars ; i++)
+ for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
+ pdn->m64_map[i][j] = IODA_INVALID_M64;
- if (pdn->m64_per_iov == M64_PER_IOV) {
- vf_groups = (num_vfs <= M64_PER_IOV) ? num_vfs: M64_PER_IOV;
- vf_per_group = (num_vfs <= M64_PER_IOV)? 1:
- roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
- } else {
- vf_groups = 1;
- vf_per_group = 1;
- }
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || !res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags))
- continue;
-
- for (j = 0; j < vf_groups; j++) {
+ for (j = 0; j < m64_bars; j++) {
do {
win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
phb->ioda.m64_bar_idx + 1, 0);
@@ -1275,12 +1272,11 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
goto m64_failed;
} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
- pdn->m64_wins[i][j] = win;
+ pdn->m64_map[j][i] = win;
- if (pdn->m64_per_iov == M64_PER_IOV) {
+ if (pdn->m64_single_mode) {
size = pci_iov_resource_size(pdev,
PCI_IOV_RESOURCES + i);
- size = size * vf_per_group;
start = res->start + size * j;
} else {
size = resource_size(res);
@@ -1288,16 +1284,16 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
}
/* Map the M64 here */
- if (pdn->m64_per_iov == M64_PER_IOV) {
- pe_num = pdn->offset + j;
+ if (pdn->m64_single_mode) {
+ pe_num = pdn->pe_num_map[j];
rc = opal_pci_map_pe_mmio_window(phb->opal_id,
pe_num, OPAL_M64_WINDOW_TYPE,
- pdn->m64_wins[i][j], 0);
+ pdn->m64_map[j][i], 0);
}
rc = opal_pci_set_phb_mem_window(phb->opal_id,
OPAL_M64_WINDOW_TYPE,
- pdn->m64_wins[i][j],
+ pdn->m64_map[j][i],
start,
0, /* unused */
size);
@@ -1309,12 +1305,12 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
goto m64_failed;
}
- if (pdn->m64_per_iov == M64_PER_IOV)
+ if (pdn->m64_single_mode)
rc = opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2);
+ OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2);
else
rc = opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1);
+ OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1);
if (rc != OPAL_SUCCESS) {
dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
@@ -1326,7 +1322,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
return 0;
m64_failed:
- pnv_pci_vf_release_m64(pdev);
+ pnv_pci_vf_release_m64(pdev, num_vfs);
return -EBUSY;
}
@@ -1353,15 +1349,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
}
-static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
{
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
struct pnv_ioda_pe *pe, *pe_n;
struct pci_dn *pdn;
- u16 vf_index;
- int64_t rc;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1371,35 +1365,6 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
if (!pdev->is_physfn)
return;
- if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
- int vf_group;
- int vf_per_group;
- int vf_index1;
-
- vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
-
- for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++)
- for (vf_index = vf_group * vf_per_group;
- vf_index < (vf_group + 1) * vf_per_group &&
- vf_index < num_vfs;
- vf_index++)
- for (vf_index1 = vf_group * vf_per_group;
- vf_index1 < (vf_group + 1) * vf_per_group &&
- vf_index1 < num_vfs;
- vf_index1++){
-
- rc = opal_pci_set_peltv(phb->opal_id,
- pdn->offset + vf_index,
- pdn->offset + vf_index1,
- OPAL_REMOVE_PE_FROM_DOMAIN);
-
- if (rc)
- dev_warn(&pdev->dev, "%s: Failed to unlink same group PE#%d(%lld)\n",
- __func__,
- pdn->offset + vf_index1, rc);
- }
- }
-
list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
if (pe->parent_dev != pdev)
continue;
@@ -1424,7 +1389,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
struct pnv_phb *phb;
struct pci_dn *pdn;
struct pci_sriov *iov;
- u16 num_vfs;
+ u16 num_vfs, i;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1434,18 +1399,25 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
num_vfs = pdn->num_vfs;
/* Release VF PEs */
- pnv_ioda_release_vf_PE(pdev, num_vfs);
+ pnv_ioda_release_vf_PE(pdev);
if (phb->type == PNV_PHB_IODA2) {
- if (pdn->m64_per_iov == 1)
- pnv_pci_vf_resource_shift(pdev, -pdn->offset);
+ if (!pdn->m64_single_mode)
+ pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map);
/* Release M64 windows */
- pnv_pci_vf_release_m64(pdev);
+ pnv_pci_vf_release_m64(pdev, num_vfs);
/* Release PE numbers */
- bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
- pdn->offset = 0;
+ if (pdn->m64_single_mode) {
+ for (i = 0; i < num_vfs; i++) {
+ if (pdn->pe_num_map[i] != IODA_INVALID_PE)
+ pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ }
+ } else
+ bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
+ /* Releasing pe_num_map */
+ kfree(pdn->pe_num_map);
}
}
@@ -1460,7 +1432,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
int pe_num;
u16 vf_index;
struct pci_dn *pdn;
- int64_t rc;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1472,7 +1443,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
/* Reserve PE for each VF */
for (vf_index = 0; vf_index < num_vfs; vf_index++) {
- pe_num = pdn->offset + vf_index;
+ if (pdn->m64_single_mode)
+ pe_num = pdn->pe_num_map[vf_index];
+ else
+ pe_num = *pdn->pe_num_map + vf_index;
pe = &phb->ioda.pe_array[pe_num];
pe->pe_number = pe_num;
@@ -1505,37 +1479,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
pnv_pci_ioda2_setup_dma_pe(phb, pe);
}
-
- if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
- int vf_group;
- int vf_per_group;
- int vf_index1;
-
- vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
-
- for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
- for (vf_index = vf_group * vf_per_group;
- vf_index < (vf_group + 1) * vf_per_group &&
- vf_index < num_vfs;
- vf_index++) {
- for (vf_index1 = vf_group * vf_per_group;
- vf_index1 < (vf_group + 1) * vf_per_group &&
- vf_index1 < num_vfs;
- vf_index1++) {
-
- rc = opal_pci_set_peltv(phb->opal_id,
- pdn->offset + vf_index,
- pdn->offset + vf_index1,
- OPAL_ADD_PE_TO_DOMAIN);
-
- if (rc)
- dev_warn(&pdev->dev, "%s: Failed to link same group PE#%d(%lld)\n",
- __func__,
- pdn->offset + vf_index1, rc);
- }
- }
- }
- }
}
int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
@@ -1545,6 +1488,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
struct pnv_phb *phb;
struct pci_dn *pdn;
int ret;
+ u16 i;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1552,20 +1496,59 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
pdn = pci_get_pdn(pdev);
if (phb->type == PNV_PHB_IODA2) {
+ if (!pdn->vfs_expanded) {
+ dev_info(&pdev->dev, "don't support this SRIOV device"
+ " with non 64bit-prefetchable IOV BAR\n");
+ return -ENOSPC;
+ }
+
+ /*
+ * When M64 BARs functions in Single PE mode, the number of VFs
+ * could be enabled must be less than the number of M64 BARs.
+ */
+ if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
+ dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
+ return -EBUSY;
+ }
+
+ /* Allocating pe_num_map */
+ if (pdn->m64_single_mode)
+ pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs,
+ GFP_KERNEL);
+ else
+ pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL);
+
+ if (!pdn->pe_num_map)
+ return -ENOMEM;
+
+ if (pdn->m64_single_mode)
+ for (i = 0; i < num_vfs; i++)
+ pdn->pe_num_map[i] = IODA_INVALID_PE;
+
/* Calculate available PE for required VFs */
- mutex_lock(&phb->ioda.pe_alloc_mutex);
- pdn->offset = bitmap_find_next_zero_area(
- phb->ioda.pe_alloc, phb->ioda.total_pe,
- 0, num_vfs, 0);
- if (pdn->offset >= phb->ioda.total_pe) {
+ if (pdn->m64_single_mode) {
+ for (i = 0; i < num_vfs; i++) {
+ pdn->pe_num_map[i] = pnv_ioda_alloc_pe(phb);
+ if (pdn->pe_num_map[i] == IODA_INVALID_PE) {
+ ret = -EBUSY;
+ goto m64_failed;
+ }
+ }
+ } else {
+ mutex_lock(&phb->ioda.pe_alloc_mutex);
+ *pdn->pe_num_map = bitmap_find_next_zero_area(
+ phb->ioda.pe_alloc, phb->ioda.total_pe,
+ 0, num_vfs, 0);
+ if (*pdn->pe_num_map >= phb->ioda.total_pe) {
+ mutex_unlock(&phb->ioda.pe_alloc_mutex);
+ dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
+ kfree(pdn->pe_num_map);
+ return -EBUSY;
+ }
+ bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
mutex_unlock(&phb->ioda.pe_alloc_mutex);
- dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
- pdn->offset = 0;
- return -EBUSY;
}
- bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
pdn->num_vfs = num_vfs;
- mutex_unlock(&phb->ioda.pe_alloc_mutex);
/* Assign M64 window accordingly */
ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
@@ -1579,8 +1562,8 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
* the IOV BAR according to the PE# allocated to the VFs.
* Otherwise, the PE# for the VF will conflict with others.
*/
- if (pdn->m64_per_iov == 1) {
- ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
+ if (!pdn->m64_single_mode) {
+ ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map);
if (ret)
goto m64_failed;
}
@@ -1592,8 +1575,16 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
return 0;
m64_failed:
- bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
- pdn->offset = 0;
+ if (pdn->m64_single_mode) {
+ for (i = 0; i < num_vfs; i++) {
+ if (pdn->pe_num_map[i] != IODA_INVALID_PE)
+ pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ }
+ } else
+ bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
+
+ /* Releasing pe_num_map */
+ kfree(pdn->pe_num_map);
return ret;
}
@@ -1612,8 +1603,7 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
/* Allocate PCI data */
add_dev_pci_data(pdev);
- pnv_pci_sriov_enable(pdev, num_vfs);
- return 0;
+ return pnv_pci_sriov_enable(pdev, num_vfs);
}
#endif /* CONFIG_PCI_IOV */
@@ -2851,45 +2841,58 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
#ifdef CONFIG_PCI_IOV
static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
{
- struct pci_controller *hose;
- struct pnv_phb *phb;
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ const resource_size_t gate = phb->ioda.m64_segsize >> 2;
struct resource *res;
int i;
- resource_size_t size;
+ resource_size_t size, total_vf_bar_sz;
struct pci_dn *pdn;
int mul, total_vfs;
if (!pdev->is_physfn || pdev->is_added)
return;
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
-
pdn = pci_get_pdn(pdev);
pdn->vfs_expanded = 0;
+ pdn->m64_single_mode = false;
total_vfs = pci_sriov_get_totalvfs(pdev);
- pdn->m64_per_iov = 1;
mul = phb->ioda.total_pe;
+ total_vf_bar_sz = 0;
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || res->parent)
continue;
if (!pnv_pci_is_mem_pref_64(res->flags)) {
- dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
+ dev_warn(&pdev->dev, "Don't support SR-IOV with"
+ " non M64 VF BAR%d: %pR. \n",
i, res);
- continue;
+ goto truncate_iov;
}
- size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+ total_vf_bar_sz += pci_iov_resource_size(pdev,
+ i + PCI_IOV_RESOURCES);
- /* bigger than 64M */
- if (size > (1 << 26)) {
- dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n",
- i, res);
- pdn->m64_per_iov = M64_PER_IOV;
+ /*
+ * If bigger than quarter of M64 segment size, just round up
+ * power of two.
+ *
+ * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
+ * with other devices, IOV BAR size is expanded to be
+ * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64
+ * segment size , the expanded size would equal to half of the
+ * whole M64 space size, which will exhaust the M64 Space and
+ * limit the system flexibility. This is a design decision to
+ * set the boundary to quarter of the M64 segment size.
+ */
+ if (total_vf_bar_sz > gate) {
mul = roundup_pow_of_two(total_vfs);
+ dev_info(&pdev->dev,
+ "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
+ total_vf_bar_sz, gate, mul);
+ pdn->m64_single_mode = true;
break;
}
}
@@ -2898,20 +2901,31 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags)) {
- dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
- i, res);
- continue;
- }
- dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+ /*
+ * On PHB3, the minimum size alignment of M64 BAR in single
+ * mode is 32MB.
+ */
+ if (pdn->m64_single_mode && (size < SZ_32M))
+ goto truncate_iov;
+ dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
res->end = res->start + size * mul - 1;
dev_dbg(&pdev->dev, " %pR\n", res);
dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
i, res, mul);
}
pdn->vfs_expanded = mul;
+
+ return;
+
+truncate_iov:
+ /* To save MMIO space, IOV BAR is truncated. */
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &pdev->resource[i + PCI_IOV_RESOURCES];
+ res->flags = 0;
+ res->end = res->start - 1;
+ }
}
#endif /* CONFIG_PCI_IOV */
@@ -3125,18 +3139,35 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
int resno)
{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
- resource_size_t align, iov_align;
-
- iov_align = resource_size(&pdev->resource[resno]);
- if (iov_align)
- return iov_align;
+ resource_size_t align;
+ /*
+ * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+ * SR-IOV. While from hardware perspective, the range mapped by M64
+ * BAR should be size aligned.
+ *
+ * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
+ * powernv-specific hardware restriction is gone. But if just use the
+ * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
+ * in one segment of M64 #15, which introduces the PE conflict between
+ * PF and VF. Based on this, the minimum alignment of an IOV BAR is
+ * m64_segsize.
+ *
+ * This function returns the total IOV BAR size if M64 BAR is in
+ * Shared PE mode or just VF BAR size if not.
+ * If the M64 BAR is in Single PE mode, return the VF BAR size or
+ * M64 segment size if IOV BAR size is less.
+ */
align = pci_iov_resource_size(pdev, resno);
- if (pdn->vfs_expanded)
- return pdn->vfs_expanded * align;
+ if (!pdn->vfs_expanded)
+ return align;
+ if (pdn->m64_single_mode)
+ return max(align, (resource_size_t)phb->ioda.m64_segsize);
- return align;
+ return pdn->vfs_expanded * align;
}
#endif /* CONFIG_PCI_IOV */
@@ -3180,6 +3211,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
+ .dma_bus_setup = pnv_pci_dma_bus_setup,
#ifdef CONFIG_PCI_MSI
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
deleted file mode 100644
index f2bdfea3b68d..000000000000
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Support PCI/PCIe on PowerNV platforms
- *
- * Currently supports only P5IOC2
- *
- * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/msi.h>
-
-#include <asm/sections.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/machdep.h>
-#include <asm/msi_bitmap.h>
-#include <asm/ppc-pci.h>
-#include <asm/opal.h>
-#include <asm/iommu.h>
-#include <asm/tce.h>
-
-#include "powernv.h"
-#include "pci.h"
-
-/* For now, use a fixed amount of TCE memory for each p5ioc2
- * hub, 16M will do
- */
-#define P5IOC2_TCE_MEMORY 0x01000000
-
-#ifdef CONFIG_PCI_MSI
-static int pnv_pci_p5ioc2_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
- unsigned int hwirq, unsigned int virq,
- unsigned int is_64, struct msi_msg *msg)
-{
- if (WARN_ON(!is_64))
- return -ENXIO;
- msg->data = hwirq - phb->msi_base;
- msg->address_hi = 0x10000000;
- msg->address_lo = 0;
-
- return 0;
-}
-
-static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb)
-{
- unsigned int count;
- const __be32 *prop = of_get_property(phb->hose->dn,
- "ibm,opal-msi-ranges", NULL);
- if (!prop)
- return;
-
- /* Don't do MSI's on p5ioc2 PCI-X are they are not properly
- * verified in HW
- */
- if (of_device_is_compatible(phb->hose->dn, "ibm,p5ioc2-pcix"))
- return;
- phb->msi_base = be32_to_cpup(prop);
- count = be32_to_cpup(prop + 1);
- if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
- pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
- phb->hose->global_number);
- return;
- }
- phb->msi_setup = pnv_pci_p5ioc2_msi_setup;
- phb->msi32_support = 0;
- pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
- count, phb->msi_base);
-}
-#else
-static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
-#endif /* CONFIG_PCI_MSI */
-
-static struct iommu_table_ops pnv_p5ioc2_iommu_ops = {
- .set = pnv_tce_build,
-#ifdef CONFIG_IOMMU_API
- .exchange = pnv_tce_xchg,
-#endif
- .clear = pnv_tce_free,
- .get = pnv_tce_get,
-};
-
-static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
- struct pci_dev *pdev)
-{
- struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0];
-
- if (!tbl->it_map) {
- tbl->it_ops = &pnv_p5ioc2_iommu_ops;
- iommu_init_table(tbl, phb->hose->node);
- iommu_register_group(&phb->p5ioc2.table_group,
- pci_domain_nr(phb->hose->bus), phb->opal_id);
- INIT_LIST_HEAD_RCU(&tbl->it_group_list);
- pnv_pci_link_table_and_group(phb->hose->node, 0,
- tbl, &phb->p5ioc2.table_group);
- }
-
- set_iommu_table_base(&pdev->dev, tbl);
- iommu_add_device(&pdev->dev);
-}
-
-static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
-#ifdef CONFIG_PCI_MSI
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
-#endif
-};
-
-static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
- void *tce_mem, u64 tce_size)
-{
- struct pnv_phb *phb;
- const __be64 *prop64;
- u64 phb_id;
- int64_t rc;
- static int primary = 1;
- struct iommu_table_group *table_group;
- struct iommu_table *tbl;
-
- pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name);
-
- prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
- if (!prop64) {
- pr_err(" Missing \"ibm,opal-phbid\" property !\n");
- return;
- }
- phb_id = be64_to_cpup(prop64);
- pr_devel(" PHB-ID : 0x%016llx\n", phb_id);
- pr_devel(" TCE AT : 0x%016lx\n", __pa(tce_mem));
- pr_devel(" TCE SZ : 0x%016llx\n", tce_size);
-
- rc = opal_pci_set_phb_tce_memory(phb_id, __pa(tce_mem), tce_size);
- if (rc != OPAL_SUCCESS) {
- pr_err(" Failed to set TCE memory, OPAL error %lld\n", rc);
- return;
- }
-
- phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
- phb->hose = pcibios_alloc_controller(np);
- if (!phb->hose) {
- pr_err(" Failed to allocate PCI controller\n");
- return;
- }
-
- spin_lock_init(&phb->lock);
- phb->hose->first_busno = 0;
- phb->hose->last_busno = 0xff;
- phb->hose->private_data = phb;
- phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops;
- phb->hub_id = hub_id;
- phb->opal_id = phb_id;
- phb->type = PNV_PHB_P5IOC2;
- phb->model = PNV_PHB_MODEL_P5IOC2;
-
- phb->regs = of_iomap(np, 0);
-
- if (phb->regs == NULL)
- pr_err(" Failed to map registers !\n");
- else {
- pr_devel(" P_BUID = 0x%08x\n", in_be32(phb->regs + 0x100));
- pr_devel(" P_IOSZ = 0x%08x\n", in_be32(phb->regs + 0x1b0));
- pr_devel(" P_IO_ST = 0x%08x\n", in_be32(phb->regs + 0x1e0));
- pr_devel(" P_MEM1_H = 0x%08x\n", in_be32(phb->regs + 0x1a0));
- pr_devel(" P_MEM1_L = 0x%08x\n", in_be32(phb->regs + 0x190));
- pr_devel(" P_MSZ1_L = 0x%08x\n", in_be32(phb->regs + 0x1c0));
- pr_devel(" P_MEM_ST = 0x%08x\n", in_be32(phb->regs + 0x1d0));
- pr_devel(" P_MEM2_H = 0x%08x\n", in_be32(phb->regs + 0x2c0));
- pr_devel(" P_MEM2_L = 0x%08x\n", in_be32(phb->regs + 0x2b0));
- pr_devel(" P_MSZ2_H = 0x%08x\n", in_be32(phb->regs + 0x2d0));
- pr_devel(" P_MSZ2_L = 0x%08x\n", in_be32(phb->regs + 0x2e0));
- }
-
- /* Interpret the "ranges" property */
- /* This also maps the I/O region and sets isa_io/mem_base */
- pci_process_bridge_OF_ranges(phb->hose, np, primary);
- primary = 0;
-
- phb->hose->ops = &pnv_pci_ops;
-
- /* Setup MSI support */
- pnv_pci_init_p5ioc2_msis(phb);
-
- /* Setup TCEs */
- phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup;
- pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table,
- tce_mem, tce_size, 0,
- IOMMU_PAGE_SHIFT_4K);
- /*
- * We do not allocate iommu_table as we do not support
- * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table()
- * should not be called for phb->p5ioc2.table_group.tables[0] ever.
- */
- tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table;
- table_group = &phb->p5ioc2.table_group;
- table_group->tce32_start = tbl->it_offset << tbl->it_page_shift;
- table_group->tce32_size = tbl->it_size << tbl->it_page_shift;
-}
-
-void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
-{
- struct device_node *phbn;
- const __be64 *prop64;
- u64 hub_id;
- void *tce_mem;
- uint64_t tce_per_phb;
- int64_t rc;
- int phb_count = 0;
-
- pr_info("Probing p5ioc2 IO-Hub %s\n", np->full_name);
-
- prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
- if (!prop64) {
- pr_err(" Missing \"ibm,opal-hubid\" property !\n");
- return;
- }
- hub_id = be64_to_cpup(prop64);
- pr_info(" HUB-ID : 0x%016llx\n", hub_id);
-
- /* Count child PHBs and calculate TCE space per PHB */
- for_each_child_of_node(np, phbn) {
- if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
- of_device_is_compatible(phbn, "ibm,p5ioc2-pciex"))
- phb_count++;
- }
-
- if (phb_count <= 0) {
- pr_info(" No PHBs for Hub %s\n", np->full_name);
- return;
- }
-
- tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count);
- pr_info(" Allocating %lld MB of TCE memory per PHB\n",
- tce_per_phb >> 20);
-
- /* Currently allocate 16M of TCE memory for every Hub
- *
- * XXX TODO: Make it chip local if possible
- */
- tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY);
- pr_debug(" TCE : 0x%016lx..0x%016lx\n",
- __pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1);
- rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem),
- P5IOC2_TCE_MEMORY);
- if (rc != OPAL_SUCCESS) {
- pr_err(" Failed to allocate TCE memory, OPAL error %lld\n", rc);
- return;
- }
-
- /* Initialize PHBs */
- for_each_child_of_node(np, phbn) {
- if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
- of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
- pnv_pci_init_p5ioc2_phb(phbn, hub_id,
- tce_mem, tce_per_phb);
- tce_mem += tce_per_phb;
- }
- }
-}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 2f55c86df703..73c8dc2a353f 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -380,10 +380,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
*/
pe_no = pdn->pe_number;
if (pe_no == IODA_INVALID_PE) {
- if (phb->type == PNV_PHB_P5IOC2)
- pe_no = 0;
- else
- pe_no = phb->ioda.reserved_pe;
+ pe_no = phb->ioda.reserved_pe;
}
/*
@@ -599,6 +596,9 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
long i;
+ if (proto_tce & TCE_PCI_WRITE)
+ proto_tce |= TCE_PCI_READ;
+
for (i = 0; i < npages; i++) {
unsigned long newtce = proto_tce |
((rpn + i) << tbl->it_page_shift);
@@ -620,6 +620,9 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index,
BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+ if (newtce & TCE_PCI_WRITE)
+ newtce |= TCE_PCI_READ;
+
oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
*hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
*direction = iommu_tce_direction(oldtce);
@@ -760,6 +763,26 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
phb->dma_dev_setup(phb, pdev);
}
+void pnv_pci_dma_bus_setup(struct pci_bus *bus)
+{
+ struct pci_controller *hose = bus->sysdata;
+ struct pnv_phb *phb = hose->private_data;
+ struct pnv_ioda_pe *pe;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
+ continue;
+
+ if (!pe->pbus)
+ continue;
+
+ if (bus->number == ((pe->rid >> 8) & 0xFF)) {
+ pe->pbus = bus;
+ break;
+ }
+ }
+}
+
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
@@ -779,7 +802,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
void __init pnv_pci_init(void)
{
struct device_node *np;
- bool found_ioda = false;
pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
@@ -787,20 +809,11 @@ void __init pnv_pci_init(void)
if (!firmware_has_feature(FW_FEATURE_OPAL))
return;
- /* Look for IODA IO-Hubs. We don't support mixing IODA
- * and p5ioc2 due to the need to change some global
- * probing flags
- */
+ /* Look for IODA IO-Hubs. */
for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
pnv_pci_init_ioda_hub(np);
- found_ioda = true;
}
- /* Look for p5ioc2 IO-Hubs */
- if (!found_ioda)
- for_each_compatible_node(np, NULL, "ibm,p5ioc2")
- pnv_pci_init_p5ioc2_hub(np);
-
/* Look for ioda2 built-in PHB3's */
for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
pnv_pci_init_ioda2_phb(np);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 7f56313e8d72..3f814f382b2e 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -4,16 +4,14 @@
struct pci_dn;
enum pnv_phb_type {
- PNV_PHB_P5IOC2 = 0,
- PNV_PHB_IODA1 = 1,
- PNV_PHB_IODA2 = 2,
- PNV_PHB_NPU = 3,
+ PNV_PHB_IODA1 = 0,
+ PNV_PHB_IODA2 = 1,
+ PNV_PHB_NPU = 2,
};
/* Precise PHB model for error management */
enum pnv_phb_model {
PNV_PHB_MODEL_UNKNOWN,
- PNV_PHB_MODEL_P5IOC2,
PNV_PHB_MODEL_P7IOC,
PNV_PHB_MODEL_PHB3,
PNV_PHB_MODEL_NPU,
@@ -121,81 +119,74 @@ struct pnv_phb {
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
- union {
- struct {
- struct iommu_table iommu_table;
- struct iommu_table_group table_group;
- } p5ioc2;
-
- struct {
- /* Global bridge info */
- unsigned int total_pe;
- unsigned int reserved_pe;
-
- /* 32-bit MMIO window */
- unsigned int m32_size;
- unsigned int m32_segsize;
- unsigned int m32_pci_base;
-
- /* 64-bit MMIO window */
- unsigned int m64_bar_idx;
- unsigned long m64_size;
- unsigned long m64_segsize;
- unsigned long m64_base;
- unsigned long m64_bar_alloc;
-
- /* IO ports */
- unsigned int io_size;
- unsigned int io_segsize;
- unsigned int io_pci_base;
-
- /* PE allocation bitmap */
- unsigned long *pe_alloc;
- /* PE allocation mutex */
- struct mutex pe_alloc_mutex;
-
- /* M32 & IO segment maps */
- unsigned int *m32_segmap;
- unsigned int *io_segmap;
- struct pnv_ioda_pe *pe_array;
-
- /* IRQ chip */
- int irq_chip_init;
- struct irq_chip irq_chip;
-
- /* Sorted list of used PE's based
- * on the sequence of creation
- */
- struct list_head pe_list;
- struct mutex pe_list_mutex;
-
- /* Reverse map of PEs, will have to extend if
- * we are to support more than 256 PEs, indexed
- * bus { bus, devfn }
- */
- unsigned char pe_rmap[0x10000];
-
- /* 32-bit TCE tables allocation */
- unsigned long tce32_count;
-
- /* Total "weight" for the sake of DMA resources
- * allocation
- */
- unsigned int dma_weight;
- unsigned int dma_pe_count;
-
- /* Sorted list of used PE's, sorted at
- * boot for resource allocation purposes
- */
- struct list_head pe_dma_list;
-
- /* TCE cache invalidate registers (physical and
- * remapped)
- */
- phys_addr_t tce_inval_reg_phys;
- __be64 __iomem *tce_inval_reg;
- } ioda;
- };
+ struct {
+ /* Global bridge info */
+ unsigned int total_pe;
+ unsigned int reserved_pe;
+
+ /* 32-bit MMIO window */
+ unsigned int m32_size;
+ unsigned int m32_segsize;
+ unsigned int m32_pci_base;
+
+ /* 64-bit MMIO window */
+ unsigned int m64_bar_idx;
+ unsigned long m64_size;
+ unsigned long m64_segsize;
+ unsigned long m64_base;
+ unsigned long m64_bar_alloc;
+
+ /* IO ports */
+ unsigned int io_size;
+ unsigned int io_segsize;
+ unsigned int io_pci_base;
+
+ /* PE allocation bitmap */
+ unsigned long *pe_alloc;
+ /* PE allocation mutex */
+ struct mutex pe_alloc_mutex;
+
+ /* M32 & IO segment maps */
+ unsigned int *m32_segmap;
+ unsigned int *io_segmap;
+ struct pnv_ioda_pe *pe_array;
+
+ /* IRQ chip */
+ int irq_chip_init;
+ struct irq_chip irq_chip;
+
+ /* Sorted list of used PE's based
+ * on the sequence of creation
+ */
+ struct list_head pe_list;
+ struct mutex pe_list_mutex;
+
+ /* Reverse map of PEs, will have to extend if
+ * we are to support more than 256 PEs, indexed
+ * bus { bus, devfn }
+ */
+ unsigned char pe_rmap[0x10000];
+
+ /* 32-bit TCE tables allocation */
+ unsigned long tce32_count;
+
+ /* Total "weight" for the sake of DMA resources
+ * allocation
+ */
+ unsigned int dma_weight;
+ unsigned int dma_pe_count;
+
+ /* Sorted list of used PE's, sorted at
+ * boot for resource allocation purposes
+ */
+ struct list_head pe_dma_list;
+
+ /* TCE cache invalidate registers (physical and
+ * remapped)
+ */
+ phys_addr_t tce_inval_reg_phys;
+ __be64 __iomem *tce_inval_reg;
+ } ioda;
/* PHB and hub status structure */
union {
@@ -232,7 +223,6 @@ extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
void *tce_mem, u64 tce_size,
u64 dma_offset, unsigned page_shift);
-extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_init_npu_phb(struct device_node *np);
@@ -242,6 +232,7 @@ extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
+extern void pnv_pci_dma_bus_setup(struct pci_bus *bus);
extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 503a73f59359..0babef11136f 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -407,7 +407,7 @@ static DEVICE_ATTR(subcores_per_core, 0644,
static int subcore_init(void)
{
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ if (!cpu_has_feature(CPU_FTR_SUBCORE))
return 0;
/*
diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c
index 20b46a19a48f..09bf24d616a5 100644
--- a/arch/powerpc/platforms/ps3/gelic_udbg.c
+++ b/arch/powerpc/platforms/ps3/gelic_udbg.c
@@ -13,6 +13,12 @@
*
*/
+#include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
#include <asm/io.h>
#include <asm/udbg.h>
#include <asm/lv1call.h>
@@ -56,39 +62,8 @@ struct debug_block {
u8 pkt[1520];
} __packed;
-struct ethhdr {
- u8 dest[6];
- u8 src[6];
- u16 type;
-} __packed;
-
-struct vlantag {
- u16 vlan;
- u16 subtype;
-} __packed;
-
-struct iphdr {
- u8 ver_len;
- u8 dscp_ecn;
- u16 total_length;
- u16 ident;
- u16 frag_off_flags;
- u8 ttl;
- u8 proto;
- u16 checksum;
- u32 src;
- u32 dest;
-} __packed;
-
-struct udphdr {
- u16 src;
- u16 dest;
- u16 len;
- u16 checksum;
-} __packed;
-
static __iomem struct ethhdr *h_eth;
-static __iomem struct vlantag *h_vlan;
+static __iomem struct vlan_hdr *h_vlan;
static __iomem struct iphdr *h_ip;
static __iomem struct udphdr *h_udp;
@@ -173,8 +148,8 @@ static void gelic_debug_init(void)
h_eth = (struct ethhdr *)dbg.pkt;
- memset(&h_eth->dest, 0xff, 6);
- memcpy(&h_eth->src, &mac, 6);
+ eth_broadcast_addr(h_eth->h_dest);
+ memcpy(&h_eth->h_source, &mac, ETH_ALEN);
header_size = sizeof(struct ethhdr);
@@ -183,28 +158,29 @@ static void gelic_debug_init(void)
GELIC_LV1_VLAN_TX_ETHERNET_0, 0, 0,
&vlan_id, &v2);
if (!result) {
- h_eth->type = 0x8100;
+ h_eth->h_proto= ETH_P_8021Q;
- header_size += sizeof(struct vlantag);
- h_vlan = (struct vlantag *)(h_eth + 1);
- h_vlan->vlan = vlan_id;
- h_vlan->subtype = 0x0800;
+ header_size += sizeof(struct vlan_hdr);
+ h_vlan = (struct vlan_hdr *)(h_eth + 1);
+ h_vlan->h_vlan_TCI = vlan_id;
+ h_vlan->h_vlan_encapsulated_proto = ETH_P_IP;
h_ip = (struct iphdr *)(h_vlan + 1);
} else {
- h_eth->type = 0x0800;
+ h_eth->h_proto= 0x0800;
h_ip = (struct iphdr *)(h_eth + 1);
}
header_size += sizeof(struct iphdr);
- h_ip->ver_len = 0x45;
+ h_ip->version = 4;
+ h_ip->ihl = 5;
h_ip->ttl = 10;
- h_ip->proto = 0x11;
- h_ip->src = 0x00000000;
- h_ip->dest = 0xffffffff;
+ h_ip->protocol = 0x11;
+ h_ip->saddr = 0x00000000;
+ h_ip->daddr = 0xffffffff;
header_size += sizeof(struct udphdr);
h_udp = (struct udphdr *)(h_ip + 1);
- h_udp->src = GELIC_DEBUG_PORT;
+ h_udp->source = GELIC_DEBUG_PORT;
h_udp->dest = GELIC_DEBUG_PORT;
pmsgc = pmsg = (char *)(h_udp + 1);
@@ -225,16 +201,16 @@ static void gelic_sendbuf(int msgsize)
int i;
dbg.descr.buf_size = header_size + msgsize;
- h_ip->total_length = msgsize + sizeof(struct udphdr) +
+ h_ip->tot_len = msgsize + sizeof(struct udphdr) +
sizeof(struct iphdr);
h_udp->len = msgsize + sizeof(struct udphdr);
- h_ip->checksum = 0;
+ h_ip->check = 0;
sum = 0;
p = (u16 *)h_ip;
for (i = 0; i < 5; i++)
sum += *p++;
- h_ip->checksum = ~(sum + (sum >> 16));
+ h_ip->check = ~(sum + (sum >> 16));
dbg.descr.dmac_cmd_status = GELIC_DESCR_DMA_CMD_NO_CHKSUM |
GELIC_DESCR_TX_DMA_FRAME_TAIL;
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index 638c4060938e..b831638e6f4a 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -78,7 +78,7 @@ struct ps3_bmp {
/**
* struct ps3_private - a per cpu data structure
* @bmp: ps3_bmp structure
- * @bmp_lock: Syncronize access to bmp.
+ * @bmp_lock: Synchronize access to bmp.
* @ipi_debug_brk_mask: Mask for debug break IPIs
* @ppe_id: HV logical_ppe_id
* @thread_id: HV thread_id
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
index 849b29b3e9ae..74da18de853a 100644
--- a/arch/powerpc/platforms/pseries/hvconsole.c
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -31,7 +31,7 @@
#include <asm/plpar_wrappers.h>
/**
- * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper
+ * hvc_get_chars - retrieve characters from firmware for denoted vterm adapter
* @vtermno: The vtermno or unit_address of the adapter from which to fetch the
* data.
* @buf: The character buffer into which to put the character data fetched from
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 477290ad855e..2415a0d31f8f 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -505,8 +505,8 @@ static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
}
#endif
-static void pSeries_lpar_hpte_removebolted(unsigned long ea,
- int psize, int ssize)
+static int pSeries_lpar_hpte_removebolted(unsigned long ea,
+ int psize, int ssize)
{
unsigned long vpn;
unsigned long slot, vsid;
@@ -515,11 +515,14 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
vpn = hpt_vpn(ea, vsid, ssize);
slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
- BUG_ON(slot == -1);
+ if (slot == -1)
+ return -ENOENT;
+
/*
* lpar doesn't use the passed actual page size
*/
pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0);
+ return 0;
}
/*
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 36df46eaba24..6e944fc6e5f9 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -515,7 +515,7 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
- /* By default, only probe PCI (can be overriden by rtas_pci) */
+ /* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
/* Find and initialize PCI host bridges */
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index c69e88e91459..85729f49764f 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -575,7 +575,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) {
/* use fsl_indirect_read_config for PCIe */
hose->ops = &fsl_indirect_pcie_ops;
- /* For PCIE read HEADER_TYPE to identify controler mode */
+ /* For PCIE read HEADER_TYPE to identify controller mode */
early_read_config_byte(hose, 0, 0, PCI_HEADER_TYPE, &hdr_type);
if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE)
goto no_bridge;
diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
index b48197ae44d0..ffe0ee832768 100644
--- a/arch/powerpc/sysdev/fsl_rmu.c
+++ b/arch/powerpc/sysdev/fsl_rmu.c
@@ -570,7 +570,7 @@ int fsl_rio_port_write_init(struct fsl_rio_pw *pw)
out_be32(&pw->pw_regs->pwsr,
(RIO_IPWSR_TE | RIO_IPWSR_QFI | RIO_IPWSR_PWD));
- /* Configure port write contoller for snooping enable all reporting,
+ /* Configure port write controller for snooping enable all reporting,
clear queue full */
out_be32(&pw->pw_regs->pwmr,
RIO_IPWMR_SEN | RIO_IPWMR_QFIE | RIO_IPWMR_EIE | RIO_IPWMR_CQ);
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 6f99ed3967fd..aa2c186d3115 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -238,7 +238,7 @@ void i8259_init(struct device_node *node, unsigned long intack_addr)
/* init master interrupt controller */
outb(0x11, 0x20); /* Start init sequence */
outb(0x00, 0x21); /* Vector base */
- outb(0x04, 0x21); /* edge tiggered, Cascade (slave) on IRQ2 */
+ outb(0x04, 0x21); /* edge triggered, Cascade (slave) on IRQ2 */
outb(0x01, 0x21); /* Select 8086 mode */
/* init slave interrupt controller */
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 2a0452e364ba..afe3c7cd395d 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -2,7 +2,7 @@
* arch/powerpc/kernel/mpic.c
*
* Driver for interrupt controllers following the OpenPIC standard, the
- * common implementation beeing IBM's MPIC. This driver also can deal
+ * common implementation being IBM's MPIC. This driver also can deal
* with various broken implementations of this HW.
*
* Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
@@ -1657,7 +1657,7 @@ void __init mpic_init(struct mpic *mpic)
}
}
- /* FSL mpic error interrupt intialization */
+ /* FSL mpic error interrupt initialization */
if (mpic->flags & MPIC_FSL_HAS_EIMR)
mpic_err_int_init(mpic, MPIC_FSL_ERR_INT);
}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 07a8508cb7fa..942796fa4767 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -47,6 +47,9 @@
#include <asm/debug.h>
#include <asm/hw_breakpoint.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+
#ifdef CONFIG_PPC64
#include <asm/hvcall.h>
#include <asm/paca.h>
@@ -119,6 +122,16 @@ static void dump(void);
static void prdump(unsigned long, long);
static int ppc_inst_dump(unsigned long, long, int);
static void dump_log_buf(void);
+
+#ifdef CONFIG_PPC_POWERNV
+static void dump_opal_msglog(void);
+#else
+static inline void dump_opal_msglog(void)
+{
+ printf("Machine is not running OPAL firmware.\n");
+}
+#endif
+
static void backtrace(struct pt_regs *);
static void excprint(struct pt_regs *);
static void prregs(struct pt_regs *);
@@ -150,6 +163,7 @@ static int cpu_cmd(void);
static void csum(void);
static void bootcmds(void);
static void proccall(void);
+static void show_tasks(void);
void dump_segments(void);
static void symbol_lookup(void);
static void xmon_show_stack(unsigned long sp, unsigned long lr,
@@ -202,6 +216,10 @@ Commands:\n\
df dump float values\n\
dd dump double values\n\
dl dump the kernel log buffer\n"
+#ifdef CONFIG_PPC_POWERNV
+ "\
+ do dump the OPAL message log\n"
+#endif
#ifdef CONFIG_PPC64
"\
dp[#] dump paca for current cpu, or cpu #\n\
@@ -221,6 +239,7 @@ Commands:\n\
mz zero a block of memory\n\
mi show information about memory allocation\n\
p call a procedure\n\
+ P list processes/tasks\n\
r print registers\n\
s single step\n"
#ifdef CONFIG_SPU_BASE
@@ -233,7 +252,7 @@ Commands:\n\
" S print special registers\n\
t print backtrace\n\
x exit monitor and recover\n\
- X exit monitor and dont recover\n"
+ X exit monitor and don't recover\n"
#if defined(CONFIG_PPC64) && !defined(CONFIG_PPC_BOOK3E)
" u dump segment table or SLB\n"
#elif defined(CONFIG_PPC_STD_MMU_32)
@@ -950,6 +969,9 @@ cmds(struct pt_regs *excp)
case 'p':
proccall();
break;
+ case 'P':
+ show_tasks();
+ break;
#ifdef CONFIG_PPC_STD_MMU
case 'u':
dump_segments();
@@ -2253,6 +2275,8 @@ dump(void)
last_cmd = "di\n";
} else if (c == 'l') {
dump_log_buf();
+ } else if (c == 'o') {
+ dump_opal_msglog();
} else if (c == 'r') {
scanhex(&ndump);
if (ndump == 0)
@@ -2395,6 +2419,45 @@ dump_log_buf(void)
catch_memory_errors = 0;
}
+#ifdef CONFIG_PPC_POWERNV
+static void dump_opal_msglog(void)
+{
+ unsigned char buf[128];
+ ssize_t res;
+ loff_t pos = 0;
+
+ if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+ printf("Machine is not running OPAL firmware.\n");
+ return;
+ }
+
+ if (setjmp(bus_error_jmp) != 0) {
+ printf("Error dumping OPAL msglog!\n");
+ return;
+ }
+
+ catch_memory_errors = 1;
+ sync();
+
+ xmon_start_pagination();
+ while ((res = opal_msglog_copy(buf, pos, sizeof(buf) - 1))) {
+ if (res < 0) {
+ printf("Error dumping OPAL msglog! Error: %zd\n", res);
+ break;
+ }
+ buf[res] = '\0';
+ printf("%s", buf);
+ pos += res;
+ }
+ xmon_end_pagination();
+
+ sync();
+ /* wait a little while to see if we get a machine check */
+ __delay(200);
+ catch_memory_errors = 0;
+}
+#endif
+
/*
* Memory operations - move, set, print differences
*/
@@ -2508,6 +2571,61 @@ memzcan(void)
printf("%.8x\n", a - mskip);
}
+static void show_task(struct task_struct *tsk)
+{
+ char state;
+
+ /*
+ * Cloned from kdb_task_state_char(), which is not entirely
+ * appropriate for calling from xmon. This could be moved
+ * to a common, generic, routine used by both.
+ */
+ state = (tsk->state == 0) ? 'R' :
+ (tsk->state < 0) ? 'U' :
+ (tsk->state & TASK_UNINTERRUPTIBLE) ? 'D' :
+ (tsk->state & TASK_STOPPED) ? 'T' :
+ (tsk->state & TASK_TRACED) ? 'C' :
+ (tsk->exit_state & EXIT_ZOMBIE) ? 'Z' :
+ (tsk->exit_state & EXIT_DEAD) ? 'E' :
+ (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
+
+ printf("%p %016lx %6d %6d %c %2d %s\n", tsk,
+ tsk->thread.ksp,
+ tsk->pid, tsk->parent->pid,
+ state, task_thread_info(tsk)->cpu,
+ tsk->comm);
+}
+
+static void show_tasks(void)
+{
+ unsigned long tskv;
+ struct task_struct *tsk = NULL;
+
+ printf(" task_struct ->thread.ksp PID PPID S P CMD\n");
+
+ if (scanhex(&tskv))
+ tsk = (struct task_struct *)tskv;
+
+ if (setjmp(bus_error_jmp) != 0) {
+ catch_memory_errors = 0;
+ printf("*** Error dumping task %p\n", tsk);
+ return;
+ }
+
+ catch_memory_errors = 1;
+ sync();
+
+ if (tsk)
+ show_task(tsk);
+ else
+ for_each_process(tsk)
+ show_task(tsk);
+
+ sync();
+ __delay(200);
+ catch_memory_errors = 0;
+}
+
static void proccall(void)
{
unsigned long args[8];
diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile
index be2ac5ce349f..8a55c1aa11aa 100644
--- a/drivers/misc/cxl/Makefile
+++ b/drivers/misc/cxl/Makefile
@@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC_WERROR) += -Werror
cxl-y += main.o file.o irq.o fault.o native.o
cxl-y += context.o sysfs.o debugfs.o pci.o trace.o
cxl-y += vphb.o api.o
+cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o
obj-$(CONFIG_CXL) += cxl.o
obj-$(CONFIG_CXL_BASE) += base.o
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index ea3eeb7011e1..2107c948406d 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -51,8 +51,6 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
if (rc)
goto err_mapping;
- cxl_assign_psn_space(ctx);
-
return ctx;
err_mapping:
@@ -78,7 +76,6 @@ struct device *cxl_get_phys_dev(struct pci_dev *dev)
return afu->adapter->dev.parent;
}
-EXPORT_SYMBOL_GPL(cxl_get_phys_dev);
int cxl_release_context(struct cxl_context *ctx)
{
@@ -91,28 +88,11 @@ int cxl_release_context(struct cxl_context *ctx)
}
EXPORT_SYMBOL_GPL(cxl_release_context);
-int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
-{
- if (num == 0)
- num = ctx->afu->pp_irqs;
- return afu_allocate_irqs(ctx, num);
-}
-EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
-
-void cxl_free_afu_irqs(struct cxl_context *ctx)
-{
- afu_irq_name_free(ctx);
- cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
-}
-EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
-
static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
{
__u16 range;
int r;
- WARN_ON(num == 0);
-
for (r = 0; r < CXL_IRQ_RANGES; r++) {
range = ctx->irqs.range[r];
if (num < range) {
@@ -123,6 +103,44 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
return 0;
}
+int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
+{
+ int res;
+ irq_hw_number_t hwirq;
+
+ if (num == 0)
+ num = ctx->afu->pp_irqs;
+ res = afu_allocate_irqs(ctx, num);
+ if (!res && !cpu_has_feature(CPU_FTR_HVMODE)) {
+ /* In a guest, the PSL interrupt is not multiplexed. It was
+ * allocated above, and we need to set its handler
+ */
+ hwirq = cxl_find_afu_irq(ctx, 0);
+ if (hwirq)
+ cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl");
+ }
+ return res;
+}
+EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
+
+void cxl_free_afu_irqs(struct cxl_context *ctx)
+{
+ irq_hw_number_t hwirq;
+ unsigned int virq;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE)) {
+ hwirq = cxl_find_afu_irq(ctx, 0);
+ if (hwirq) {
+ virq = irq_find_mapping(NULL, hwirq);
+ if (virq)
+ cxl_unmap_irq(virq, ctx);
+ }
+ }
+ afu_irq_name_free(ctx);
+ cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+}
+EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
+
int cxl_map_afu_irq(struct cxl_context *ctx, int num,
irq_handler_t handler, void *cookie, char *name)
{
@@ -178,7 +196,7 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
cxl_ctx_get();
- if ((rc = cxl_attach_process(ctx, kernel, wed , 0))) {
+ if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
put_pid(ctx->pid);
cxl_ctx_put();
goto out;
@@ -193,7 +211,7 @@ EXPORT_SYMBOL_GPL(cxl_start_context);
int cxl_process_element(struct cxl_context *ctx)
{
- return ctx->pe;
+ return ctx->external_pe;
}
EXPORT_SYMBOL_GPL(cxl_process_element);
@@ -207,7 +225,6 @@ EXPORT_SYMBOL_GPL(cxl_stop_context);
void cxl_set_master(struct cxl_context *ctx)
{
ctx->master = true;
- cxl_assign_psn_space(ctx);
}
EXPORT_SYMBOL_GPL(cxl_set_master);
@@ -325,15 +342,11 @@ EXPORT_SYMBOL_GPL(cxl_start_work);
void __iomem *cxl_psa_map(struct cxl_context *ctx)
{
- struct cxl_afu *afu = ctx->afu;
- int rc;
-
- rc = cxl_afu_check_and_enable(afu);
- if (rc)
+ if (ctx->status != STARTED)
return NULL;
pr_devel("%s: psn_phys%llx size:%llx\n",
- __func__, afu->psn_phys, afu->adapter->ps_size);
+ __func__, ctx->psn_phys, ctx->psn_size);
return ioremap(ctx->psn_phys, ctx->psn_size);
}
EXPORT_SYMBOL_GPL(cxl_psa_map);
@@ -349,11 +362,11 @@ int cxl_afu_reset(struct cxl_context *ctx)
struct cxl_afu *afu = ctx->afu;
int rc;
- rc = __cxl_afu_reset(afu);
+ rc = cxl_ops->afu_reset(afu);
if (rc)
return rc;
- return cxl_afu_check_and_enable(afu);
+ return cxl_ops->afu_check_and_enable(afu);
}
EXPORT_SYMBOL_GPL(cxl_afu_reset);
@@ -363,3 +376,11 @@ void cxl_perst_reloads_same_image(struct cxl_afu *afu,
afu->adapter->perst_same_image = perst_reloads_same_image;
}
EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
+
+ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count)
+{
+ struct cxl_afu *afu = cxl_pci_to_afu(dev);
+
+ return cxl_ops->read_adapter_vpd(afu->adapter, buf, count);
+}
+EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd);
diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c
index a9f0dd3255a2..9b90ec6c07cd 100644
--- a/drivers/misc/cxl/base.c
+++ b/drivers/misc/cxl/base.c
@@ -11,6 +11,7 @@
#include <linux/rcupdate.h>
#include <asm/errno.h>
#include <misc/cxl-base.h>
+#include <linux/of_platform.h>
#include "cxl.h"
/* protected by rcu */
@@ -84,3 +85,34 @@ void unregister_cxl_calls(struct cxl_calls *calls)
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(unregister_cxl_calls);
+
+int cxl_update_properties(struct device_node *dn,
+ struct property *new_prop)
+{
+ return of_update_property(dn, new_prop);
+}
+EXPORT_SYMBOL_GPL(cxl_update_properties);
+
+static int __init cxl_base_init(void)
+{
+ struct device_node *np = NULL;
+ struct platform_device *dev;
+ int count = 0;
+
+ /*
+ * Scan for compatible devices in guest only
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ return 0;
+
+ while ((np = of_find_compatible_node(np, NULL,
+ "ibm,coherent-platform-facility"))) {
+ dev = of_platform_device_create(np, NULL, NULL);
+ if (dev)
+ count++;
+ }
+ pr_devel("Found %d cxl device(s)\n", count);
+ return 0;
+}
+
+module_init(cxl_base_init);
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 262b88eac414..10370f280500 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -95,7 +95,12 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
return i;
ctx->pe = i;
- ctx->elem = &ctx->afu->spa[i];
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ ctx->elem = &ctx->afu->native->spa[i];
+ ctx->external_pe = ctx->pe;
+ } else {
+ ctx->external_pe = -1; /* assigned when attaching */
+ }
ctx->pe_inserted = false;
/*
@@ -214,8 +219,8 @@ int __detach_context(struct cxl_context *ctx)
/* Only warn if we detached while the link was OK.
* If detach fails when hw is down, we don't care.
*/
- WARN_ON(cxl_detach_process(ctx) &&
- cxl_adapter_link_ok(ctx->afu->adapter));
+ WARN_ON(cxl_ops->detach_process(ctx) &&
+ cxl_ops->link_ok(ctx->afu->adapter, ctx->afu));
flush_work(&ctx->fault_work); /* Only needed for dedicated process */
/* release the reference to the group leader and mm handling pid */
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a521bc72cec2..38e21cf7806e 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -324,6 +324,10 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
#define CXL_MODE_TIME_SLICED 0x4
#define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED)
+#define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */
+#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS)
+#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS)
+
enum cxl_context_status {
CLOSED,
OPENED,
@@ -336,6 +340,12 @@ enum prefault_modes {
CXL_PREFAULT_ALL,
};
+enum cxl_attrs {
+ CXL_ADAPTER_ATTRS,
+ CXL_AFU_MASTER_ATTRS,
+ CXL_AFU_ATTRS,
+};
+
struct cxl_sste {
__be64 esid_data;
__be64 vsid_data;
@@ -344,18 +354,46 @@ struct cxl_sste {
#define to_cxl_adapter(d) container_of(d, struct cxl, dev)
#define to_cxl_afu(d) container_of(d, struct cxl_afu, dev)
-struct cxl_afu {
+struct cxl_afu_native {
+ void __iomem *p1n_mmio;
+ void __iomem *afu_desc_mmio;
irq_hw_number_t psl_hwirq;
+ unsigned int psl_virq;
+ struct mutex spa_mutex;
+ /*
+ * Only the first part of the SPA is used for the process element
+ * linked list. The only other part that software needs to worry about
+ * is sw_command_status, which we store a separate pointer to.
+ * Everything else in the SPA is only used by hardware
+ */
+ struct cxl_process_element *spa;
+ __be64 *sw_command_status;
+ unsigned int spa_size;
+ int spa_order;
+ int spa_max_procs;
+ u64 pp_offset;
+};
+
+struct cxl_afu_guest {
+ u64 handle;
+ phys_addr_t p2n_phys;
+ u64 p2n_size;
+ int max_ints;
+ struct mutex recovery_lock;
+ int previous_state;
+};
+
+struct cxl_afu {
+ struct cxl_afu_native *native;
+ struct cxl_afu_guest *guest;
irq_hw_number_t serr_hwirq;
- char *err_irq_name;
- char *psl_irq_name;
unsigned int serr_virq;
- void __iomem *p1n_mmio;
+ char *psl_irq_name;
+ char *err_irq_name;
void __iomem *p2n_mmio;
phys_addr_t psn_phys;
- u64 pp_offset;
u64 pp_size;
- void __iomem *afu_desc_mmio;
+
struct cxl *adapter;
struct device dev;
struct cdev afu_cdev_s, afu_cdev_m, afu_cdev_d;
@@ -363,26 +401,12 @@ struct cxl_afu {
struct idr contexts_idr;
struct dentry *debugfs;
struct mutex contexts_lock;
- struct mutex spa_mutex;
spinlock_t afu_cntl_lock;
/* AFU error buffer fields and bin attribute for sysfs */
u64 eb_len, eb_offset;
struct bin_attribute attr_eb;
- /*
- * Only the first part of the SPA is used for the process element
- * linked list. The only other part that software needs to worry about
- * is sw_command_status, which we store a separate pointer to.
- * Everything else in the SPA is only used by hardware
- */
- struct cxl_process_element *spa;
- __be64 *sw_command_status;
- unsigned int spa_size;
- int spa_order;
- int spa_max_procs;
- unsigned int psl_virq;
-
/* pointer to the vphb */
struct pci_controller *phb;
@@ -421,6 +445,12 @@ struct cxl_irq_name {
char *name;
};
+struct irq_avail {
+ irq_hw_number_t offset;
+ irq_hw_number_t range;
+ unsigned long *bitmap;
+};
+
/*
* This is a cxl context. If the PSL is in dedicated mode, there will be one
* of these per AFU. If in AFU directed there can be lots of these.
@@ -476,7 +506,19 @@ struct cxl_context {
struct cxl_process_element *elem;
- int pe; /* process element handle */
+ /*
+ * pe is the process element handle, assigned by this driver when the
+ * context is initialized.
+ *
+ * external_pe is the PE shown outside of cxl.
+ * On bare-metal, pe=external_pe, because we decide what the handle is.
+ * In a guest, we only find out about the pe used by pHyp when the
+ * context is attached, and that's the value we want to report outside
+ * of cxl.
+ */
+ int pe;
+ int external_pe;
+
u32 irq_count;
bool pe_inserted;
bool master;
@@ -488,11 +530,34 @@ struct cxl_context {
struct rcu_head rcu;
};
-struct cxl {
+struct cxl_native {
+ u64 afu_desc_off;
+ u64 afu_desc_size;
void __iomem *p1_mmio;
void __iomem *p2_mmio;
irq_hw_number_t err_hwirq;
unsigned int err_virq;
+ u64 ps_off;
+};
+
+struct cxl_guest {
+ struct platform_device *pdev;
+ int irq_nranges;
+ struct cdev cdev;
+ irq_hw_number_t irq_base_offset;
+ struct irq_avail *irq_avail;
+ spinlock_t irq_alloc_lock;
+ u64 handle;
+ char *status;
+ u16 vendor;
+ u16 device;
+ u16 subsystem_vendor;
+ u16 subsystem;
+};
+
+struct cxl {
+ struct cxl_native *native;
+ struct cxl_guest *guest;
spinlock_t afu_list_lock;
struct cxl_afu *afu[CXL_MAX_SLICES];
struct device dev;
@@ -503,9 +568,6 @@ struct cxl {
struct bin_attribute cxl_attr;
int adapter_num;
int user_irqs;
- u64 afu_desc_off;
- u64 afu_desc_size;
- u64 ps_off;
u64 ps_size;
u16 psl_rev;
u16 base_image;
@@ -519,13 +581,15 @@ struct cxl {
bool perst_same_image;
};
-int cxl_alloc_one_irq(struct cxl *adapter);
-void cxl_release_one_irq(struct cxl *adapter, int hwirq);
-int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num);
-void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter);
-int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq);
+int cxl_pci_alloc_one_irq(struct cxl *adapter);
+void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq);
+int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num);
+void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter);
+int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq);
int cxl_update_image_control(struct cxl *adapter);
-int cxl_reset(struct cxl *adapter);
+int cxl_pci_reset(struct cxl *adapter);
+void cxl_pci_release_afu(struct device *dev);
+ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len);
/* common == phyp + powernv */
struct cxl_process_element_common {
@@ -555,29 +619,32 @@ struct cxl_process_element {
__be32 software_state;
} __packed;
-static inline bool cxl_adapter_link_ok(struct cxl *cxl)
+static inline bool cxl_adapter_link_ok(struct cxl *cxl, struct cxl_afu *afu)
{
struct pci_dev *pdev;
- pdev = to_pci_dev(cxl->dev.parent);
- return !pci_channel_offline(pdev);
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ pdev = to_pci_dev(cxl->dev.parent);
+ return !pci_channel_offline(pdev);
+ }
+ return true;
}
static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg)
{
WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
- return cxl->p1_mmio + cxl_reg_off(reg);
+ return cxl->native->p1_mmio + cxl_reg_off(reg);
}
static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val)
{
- if (likely(cxl_adapter_link_ok(cxl)))
+ if (likely(cxl_adapter_link_ok(cxl, NULL)))
out_be64(_cxl_p1_addr(cxl, reg), val);
}
static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg)
{
- if (likely(cxl_adapter_link_ok(cxl)))
+ if (likely(cxl_adapter_link_ok(cxl, NULL)))
return in_be64(_cxl_p1_addr(cxl, reg));
else
return ~0ULL;
@@ -586,18 +653,18 @@ static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg)
static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg)
{
WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
- return afu->p1n_mmio + cxl_reg_off(reg);
+ return afu->native->p1n_mmio + cxl_reg_off(reg);
}
static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val)
{
- if (likely(cxl_adapter_link_ok(afu->adapter)))
+ if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
out_be64(_cxl_p1n_addr(afu, reg), val);
}
static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg)
{
- if (likely(cxl_adapter_link_ok(afu->adapter)))
+ if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
return in_be64(_cxl_p1n_addr(afu, reg));
else
return ~0ULL;
@@ -610,39 +677,19 @@ static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg
static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val)
{
- if (likely(cxl_adapter_link_ok(afu->adapter)))
+ if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
out_be64(_cxl_p2n_addr(afu, reg), val);
}
static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg)
{
- if (likely(cxl_adapter_link_ok(afu->adapter)))
+ if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
return in_be64(_cxl_p2n_addr(afu, reg));
else
return ~0ULL;
}
-static inline u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off)
-{
- if (likely(cxl_adapter_link_ok(afu->adapter)))
- return in_le64((afu)->afu_desc_mmio + (afu)->crs_offset +
- ((cr) * (afu)->crs_len) + (off));
- else
- return ~0ULL;
-}
-
-static inline u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off)
-{
- if (likely(cxl_adapter_link_ok(afu->adapter)))
- return in_le32((afu)->afu_desc_mmio + (afu)->crs_offset +
- ((cr) * (afu)->crs_len) + (off));
- else
- return 0xffffffff;
-}
-u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off);
-u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off);
-
-ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
loff_t off, size_t count);
@@ -652,13 +699,14 @@ struct cxl_calls {
};
int register_cxl_calls(struct cxl_calls *calls);
void unregister_cxl_calls(struct cxl_calls *calls);
+int cxl_update_properties(struct device_node *dn, struct property *new_prop);
-int cxl_alloc_adapter_nr(struct cxl *adapter);
void cxl_remove_adapter_nr(struct cxl *adapter);
int cxl_alloc_spa(struct cxl_afu *afu);
void cxl_release_spa(struct cxl_afu *afu);
+dev_t cxl_get_dev(void);
int cxl_file_init(void);
void cxl_file_exit(void);
int cxl_register_adapter(struct cxl *adapter);
@@ -679,21 +727,19 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu);
int cxl_sysfs_afu_m_add(struct cxl_afu *afu);
void cxl_sysfs_afu_m_remove(struct cxl_afu *afu);
-int cxl_afu_activate_mode(struct cxl_afu *afu, int mode);
-int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode);
-int cxl_afu_deactivate_mode(struct cxl_afu *afu);
+struct cxl *cxl_alloc_adapter(void);
+struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice);
int cxl_afu_select_best_mode(struct cxl_afu *afu);
-int cxl_register_psl_irq(struct cxl_afu *afu);
-void cxl_release_psl_irq(struct cxl_afu *afu);
-int cxl_register_psl_err_irq(struct cxl *adapter);
-void cxl_release_psl_err_irq(struct cxl *adapter);
-int cxl_register_serr_irq(struct cxl_afu *afu);
-void cxl_release_serr_irq(struct cxl_afu *afu);
+int cxl_native_register_psl_irq(struct cxl_afu *afu);
+void cxl_native_release_psl_irq(struct cxl_afu *afu);
+int cxl_native_register_psl_err_irq(struct cxl *adapter);
+void cxl_native_release_psl_err_irq(struct cxl *adapter);
+int cxl_native_register_serr_irq(struct cxl_afu *afu);
+void cxl_native_release_serr_irq(struct cxl_afu *afu);
int afu_register_irqs(struct cxl_context *ctx, u32 count);
void afu_release_irqs(struct cxl_context *ctx, void *cookie);
void afu_irq_name_free(struct cxl_context *ctx);
-irqreturn_t cxl_slice_irq_err(int irq, void *data);
int cxl_debugfs_init(void);
void cxl_debugfs_exit(void);
@@ -707,6 +753,7 @@ void cxl_prefault(struct cxl_context *ctx, u64 wed);
struct cxl *get_cxl_adapter(int num);
int cxl_alloc_sst(struct cxl_context *ctx);
+void cxl_dump_debug_buffer(void *addr, size_t size);
void init_cxl_native(void);
@@ -720,40 +767,54 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
void cxl_unmap_irq(unsigned int virq, void *cookie);
int __detach_context(struct cxl_context *ctx);
-/* This matches the layout of the H_COLLECT_CA_INT_INFO retbuf */
+/*
+ * This must match the layout of the H_COLLECT_CA_INT_INFO retbuf defined
+ * in PAPR.
+ * A word about endianness: a pointer to this structure is passed when
+ * calling the hcall. However, it is not a block of memory filled up by
+ * the hypervisor. The return values are found in registers, and copied
+ * one by one when returning from the hcall. See the end of the call to
+ * plpar_hcall9() in hvCall.S
+ * As a consequence:
+ * - we don't need to do any endianness conversion
+ * - the pid and tid are an exception. They are 32-bit values returned in
+ * the same 64-bit register. So we do need to worry about byte ordering.
+ */
struct cxl_irq_info {
u64 dsisr;
u64 dar;
u64 dsr;
+#ifndef CONFIG_CPU_LITTLE_ENDIAN
u32 pid;
u32 tid;
+#else
+ u32 tid;
+ u32 pid;
+#endif
u64 afu_err;
u64 errstat;
- u64 padding[3]; /* to match the expected retbuf size for plpar_hcall9 */
+ u64 proc_handle;
+ u64 padding[2]; /* to match the expected retbuf size for plpar_hcall9 */
};
void cxl_assign_psn_space(struct cxl_context *ctx);
-int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed,
- u64 amr);
-int cxl_detach_process(struct cxl_context *ctx);
-
-int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info);
-int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask);
+irqreturn_t cxl_irq(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info);
+int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler,
+ void *cookie, irq_hw_number_t *dest_hwirq,
+ unsigned int *dest_virq, const char *name);
int cxl_check_error(struct cxl_afu *afu);
int cxl_afu_slbia(struct cxl_afu *afu);
int cxl_tlb_slb_invalidate(struct cxl *adapter);
int cxl_afu_disable(struct cxl_afu *afu);
-int __cxl_afu_reset(struct cxl_afu *afu);
-int cxl_afu_check_and_enable(struct cxl_afu *afu);
int cxl_psl_purge(struct cxl_afu *afu);
void cxl_stop_trace(struct cxl *cxl);
int cxl_pci_vphb_add(struct cxl_afu *afu);
-void cxl_pci_vphb_reconfigure(struct cxl_afu *afu);
void cxl_pci_vphb_remove(struct cxl_afu *afu);
extern struct pci_driver cxl_pci_driver;
+extern struct platform_driver cxl_of_driver;
int afu_allocate_irqs(struct cxl_context *ctx, u32 count);
int afu_open(struct inode *inode, struct file *file);
@@ -764,4 +825,61 @@ unsigned int afu_poll(struct file *file, struct poll_table_struct *poll);
ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off);
extern const struct file_operations afu_fops;
+struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *dev);
+void cxl_guest_remove_adapter(struct cxl *adapter);
+int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np);
+int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np);
+ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len);
+ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len);
+int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np);
+void cxl_guest_remove_afu(struct cxl_afu *afu);
+int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np);
+int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *afu_np);
+int cxl_guest_add_chardev(struct cxl *adapter);
+void cxl_guest_remove_chardev(struct cxl *adapter);
+void cxl_guest_reload_module(struct cxl *adapter);
+int cxl_of_probe(struct platform_device *pdev);
+
+struct cxl_backend_ops {
+ struct module *module;
+ int (*adapter_reset)(struct cxl *adapter);
+ int (*alloc_one_irq)(struct cxl *adapter);
+ void (*release_one_irq)(struct cxl *adapter, int hwirq);
+ int (*alloc_irq_ranges)(struct cxl_irq_ranges *irqs,
+ struct cxl *adapter, unsigned int num);
+ void (*release_irq_ranges)(struct cxl_irq_ranges *irqs,
+ struct cxl *adapter);
+ int (*setup_irq)(struct cxl *adapter, unsigned int hwirq,
+ unsigned int virq);
+ irqreturn_t (*handle_psl_slice_error)(struct cxl_context *ctx,
+ u64 dsisr, u64 errstat);
+ irqreturn_t (*psl_interrupt)(int irq, void *data);
+ int (*ack_irq)(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask);
+ int (*attach_process)(struct cxl_context *ctx, bool kernel,
+ u64 wed, u64 amr);
+ int (*detach_process)(struct cxl_context *ctx);
+ bool (*support_attributes)(const char *attr_name, enum cxl_attrs type);
+ bool (*link_ok)(struct cxl *cxl, struct cxl_afu *afu);
+ void (*release_afu)(struct device *dev);
+ ssize_t (*afu_read_err_buffer)(struct cxl_afu *afu, char *buf,
+ loff_t off, size_t count);
+ int (*afu_check_and_enable)(struct cxl_afu *afu);
+ int (*afu_activate_mode)(struct cxl_afu *afu, int mode);
+ int (*afu_deactivate_mode)(struct cxl_afu *afu, int mode);
+ int (*afu_reset)(struct cxl_afu *afu);
+ int (*afu_cr_read8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 *val);
+ int (*afu_cr_read16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 *val);
+ int (*afu_cr_read32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 *val);
+ int (*afu_cr_read64)(struct cxl_afu *afu, int cr_idx, u64 offset, u64 *val);
+ int (*afu_cr_write8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 val);
+ int (*afu_cr_write16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 val);
+ int (*afu_cr_write32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 val);
+ ssize_t (*read_adapter_vpd)(struct cxl *adapter, void *buf, size_t count);
+};
+extern const struct cxl_backend_ops cxl_native_ops;
+extern const struct cxl_backend_ops cxl_guest_ops;
+extern const struct cxl_backend_ops *cxl_ops;
+
+/* check if the given pci_dev is on the the cxl vphb bus */
+bool cxl_pci_is_vphb_device(struct pci_dev *dev);
#endif
diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c
index 18df6f44af2a..5751899e0c17 100644
--- a/drivers/misc/cxl/debugfs.c
+++ b/drivers/misc/cxl/debugfs.c
@@ -118,6 +118,10 @@ void cxl_debugfs_afu_remove(struct cxl_afu *afu)
int __init cxl_debugfs_init(void)
{
struct dentry *ent;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return 0;
+
ent = debugfs_create_dir("cxl", NULL);
if (IS_ERR(ent))
return PTR_ERR(ent);
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 81c3f75b7330..9a8650bcb042 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -101,7 +101,7 @@ static void cxl_ack_ae(struct cxl_context *ctx)
{
unsigned long flags;
- cxl_ack_irq(ctx, CXL_PSL_TFC_An_AE, 0);
+ cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0);
spin_lock_irqsave(&ctx->lock, flags);
ctx->pending_fault = true;
@@ -125,7 +125,7 @@ static int cxl_handle_segment_miss(struct cxl_context *ctx,
else {
mb(); /* Order seg table write to TFC MMIO write */
- cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
+ cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
}
return IRQ_HANDLED;
@@ -163,7 +163,7 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
local_irq_restore(flags);
pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe);
- cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
+ cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
}
/*
@@ -254,14 +254,17 @@ void cxl_handle_fault(struct work_struct *fault_work)
u64 dar = ctx->dar;
struct mm_struct *mm = NULL;
- if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
- cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
- cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
- /* Most likely explanation is harmless - a dedicated process
- * has detached and these were cleared by the PSL purge, but
- * warn about it just in case */
- dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n");
- return;
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
+ cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
+ cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
+ /* Most likely explanation is harmless - a dedicated
+ * process has detached and these were cleared by the
+ * PSL purge, but warn about it just in case
+ */
+ dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n");
+ return;
+ }
}
/* Early return if the context is being / has been detached */
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 783337d22f36..eec468f1612f 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -26,9 +26,7 @@
#include "trace.h"
#define CXL_NUM_MINORS 256 /* Total to reserve */
-#define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */
-#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS)
#define CXL_AFU_MINOR_D(afu) (CXL_CARD_MINOR(afu->adapter) + 1 + (3 * afu->slice))
#define CXL_AFU_MINOR_M(afu) (CXL_AFU_MINOR_D(afu) + 1)
#define CXL_AFU_MINOR_S(afu) (CXL_AFU_MINOR_D(afu) + 2)
@@ -36,7 +34,6 @@
#define CXL_AFU_MKDEV_M(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_M(afu))
#define CXL_AFU_MKDEV_S(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_S(afu))
-#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS)
#define CXL_DEVT_AFU(dev) ((MINOR(dev) % CXL_DEV_MINORS - 1) / 3)
#define CXL_DEVT_IS_CARD(dev) (MINOR(dev) % CXL_DEV_MINORS == 0)
@@ -79,7 +76,7 @@ static int __afu_open(struct inode *inode, struct file *file, bool master)
if (!afu->current_mode)
goto err_put_afu;
- if (!cxl_adapter_link_ok(adapter)) {
+ if (!cxl_ops->link_ok(adapter, afu)) {
rc = -EIO;
goto err_put_afu;
}
@@ -210,8 +207,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
- if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor,
- amr))) {
+ if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
+ amr))) {
afu_release_irqs(ctx, ctx);
goto out;
}
@@ -222,12 +219,13 @@ out:
mutex_unlock(&ctx->status_mutex);
return rc;
}
+
static long afu_ioctl_process_element(struct cxl_context *ctx,
int __user *upe)
{
pr_devel("%s: pe: %i\n", __func__, ctx->pe);
- if (copy_to_user(upe, &ctx->pe, sizeof(__u32)))
+ if (copy_to_user(upe, &ctx->external_pe, sizeof(__u32)))
return -EFAULT;
return 0;
@@ -259,7 +257,7 @@ long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
if (ctx->status == CLOSED)
return -EIO;
- if (!cxl_adapter_link_ok(ctx->afu->adapter))
+ if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
return -EIO;
pr_devel("afu_ioctl\n");
@@ -289,7 +287,7 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm)
if (ctx->status != STARTED)
return -EIO;
- if (!cxl_adapter_link_ok(ctx->afu->adapter))
+ if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
return -EIO;
return cxl_context_iomap(ctx, vm);
@@ -336,7 +334,7 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count,
int rc;
DEFINE_WAIT(wait);
- if (!cxl_adapter_link_ok(ctx->afu->adapter))
+ if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
return -EIO;
if (count < CXL_READ_MIN_SIZE)
@@ -349,7 +347,7 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count,
if (ctx_event_pending(ctx))
break;
- if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
+ if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) {
rc = -EIO;
goto out;
}
@@ -445,7 +443,8 @@ static const struct file_operations afu_master_fops = {
static char *cxl_devnode(struct device *dev, umode_t *mode)
{
- if (CXL_DEVT_IS_CARD(dev->devt)) {
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ CXL_DEVT_IS_CARD(dev->devt)) {
/*
* These minor numbers will eventually be used to program the
* PSL and AFUs once we have dynamic reprogramming support
@@ -546,6 +545,11 @@ int cxl_register_adapter(struct cxl *adapter)
return device_register(&adapter->dev);
}
+dev_t cxl_get_dev(void)
+{
+ return cxl_dev;
+}
+
int __init cxl_file_init(void)
{
int rc;
diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c
new file mode 100644
index 000000000000..68dd0b7da471
--- /dev/null
+++ b/drivers/misc/cxl/flash.c
@@ -0,0 +1,538 @@
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/semaphore.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <asm/rtas.h>
+
+#include "cxl.h"
+#include "hcalls.h"
+
+#define DOWNLOAD_IMAGE 1
+#define VALIDATE_IMAGE 2
+
+struct ai_header {
+ u16 version;
+ u8 reserved0[6];
+ u16 vendor;
+ u16 device;
+ u16 subsystem_vendor;
+ u16 subsystem;
+ u64 image_offset;
+ u64 image_length;
+ u8 reserved1[96];
+};
+
+static struct semaphore sem;
+unsigned long *buffer[CXL_AI_MAX_ENTRIES];
+struct sg_list *le;
+static u64 continue_token;
+static unsigned int transfer;
+
+struct update_props_workarea {
+ __be32 phandle;
+ __be32 state;
+ __be64 reserved;
+ __be32 nprops;
+} __packed;
+
+struct update_nodes_workarea {
+ __be32 state;
+ __be64 unit_address;
+ __be32 reserved;
+} __packed;
+
+#define DEVICE_SCOPE 3
+#define NODE_ACTION_MASK 0xff000000
+#define NODE_COUNT_MASK 0x00ffffff
+#define OPCODE_DELETE 0x01000000
+#define OPCODE_UPDATE 0x02000000
+#define OPCODE_ADD 0x03000000
+
+static int rcall(int token, char *buf, s32 scope)
+{
+ int rc;
+
+ spin_lock(&rtas_data_buf_lock);
+
+ memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE);
+ rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope);
+ memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+
+ spin_unlock(&rtas_data_buf_lock);
+ return rc;
+}
+
+static int update_property(struct device_node *dn, const char *name,
+ u32 vd, char *value)
+{
+ struct property *new_prop;
+ u32 *val;
+ int rc;
+
+ new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+ if (!new_prop)
+ return -ENOMEM;
+
+ new_prop->name = kstrdup(name, GFP_KERNEL);
+ if (!new_prop->name) {
+ kfree(new_prop);
+ return -ENOMEM;
+ }
+
+ new_prop->length = vd;
+ new_prop->value = kzalloc(new_prop->length, GFP_KERNEL);
+ if (!new_prop->value) {
+ kfree(new_prop->name);
+ kfree(new_prop);
+ return -ENOMEM;
+ }
+ memcpy(new_prop->value, value, vd);
+
+ val = (u32 *)new_prop->value;
+ rc = cxl_update_properties(dn, new_prop);
+ pr_devel("%s: update property (%s, length: %i, value: %#x)\n",
+ dn->name, name, vd, be32_to_cpu(*val));
+
+ if (rc) {
+ kfree(new_prop->name);
+ kfree(new_prop->value);
+ kfree(new_prop);
+ }
+ return rc;
+}
+
+static int update_node(__be32 phandle, s32 scope)
+{
+ struct update_props_workarea *upwa;
+ struct device_node *dn;
+ int i, rc, ret;
+ char *prop_data;
+ char *buf;
+ int token;
+ u32 nprops;
+ u32 vd;
+
+ token = rtas_token("ibm,update-properties");
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ dn = of_find_node_by_phandle(be32_to_cpu(phandle));
+ if (!dn) {
+ kfree(buf);
+ return -ENOENT;
+ }
+
+ upwa = (struct update_props_workarea *)&buf[0];
+ upwa->phandle = phandle;
+ do {
+ rc = rcall(token, buf, scope);
+ if (rc < 0)
+ break;
+
+ prop_data = buf + sizeof(*upwa);
+ nprops = be32_to_cpu(upwa->nprops);
+
+ if (*prop_data == 0) {
+ prop_data++;
+ vd = be32_to_cpu(*(__be32 *)prop_data);
+ prop_data += vd + sizeof(vd);
+ nprops--;
+ }
+
+ for (i = 0; i < nprops; i++) {
+ char *prop_name;
+
+ prop_name = prop_data;
+ prop_data += strlen(prop_name) + 1;
+ vd = be32_to_cpu(*(__be32 *)prop_data);
+ prop_data += sizeof(vd);
+
+ if ((vd != 0x00000000) && (vd != 0x80000000)) {
+ ret = update_property(dn, prop_name, vd,
+ prop_data);
+ if (ret)
+ pr_err("cxl: Could not update property %s - %i\n",
+ prop_name, ret);
+
+ prop_data += vd;
+ }
+ }
+ } while (rc == 1);
+
+ of_node_put(dn);
+ kfree(buf);
+ return rc;
+}
+
+static int update_devicetree(struct cxl *adapter, s32 scope)
+{
+ struct update_nodes_workarea *unwa;
+ u32 action, node_count;
+ int token, rc, i;
+ __be32 *data, drc_index, phandle;
+ char *buf;
+
+ token = rtas_token("ibm,update-nodes");
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ unwa = (struct update_nodes_workarea *)&buf[0];
+ unwa->unit_address = cpu_to_be64(adapter->guest->handle);
+ do {
+ rc = rcall(token, buf, scope);
+ if (rc && rc != 1)
+ break;
+
+ data = (__be32 *)buf + 4;
+ while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
+ action = be32_to_cpu(*data) & NODE_ACTION_MASK;
+ node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
+ pr_devel("device reconfiguration - action: %#x, nodes: %#x\n",
+ action, node_count);
+ data++;
+
+ for (i = 0; i < node_count; i++) {
+ phandle = *data++;
+
+ switch (action) {
+ case OPCODE_DELETE:
+ /* nothing to do */
+ break;
+ case OPCODE_UPDATE:
+ update_node(phandle, scope);
+ break;
+ case OPCODE_ADD:
+ /* nothing to do, just move pointer */
+ drc_index = *data++;
+ break;
+ }
+ }
+ }
+ } while (rc == 1);
+
+ kfree(buf);
+ return 0;
+}
+
+static int handle_image(struct cxl *adapter, int operation,
+ long (*fct)(u64, u64, u64, u64 *),
+ struct cxl_adapter_image *ai)
+{
+ size_t mod, s_copy, len_chunk = 0;
+ struct ai_header *header = NULL;
+ unsigned int entries = 0, i;
+ void *dest, *from;
+ int rc = 0, need_header;
+
+ /* base adapter image header */
+ need_header = (ai->flags & CXL_AI_NEED_HEADER);
+ if (need_header) {
+ header = kzalloc(sizeof(struct ai_header), GFP_KERNEL);
+ if (!header)
+ return -ENOMEM;
+ header->version = cpu_to_be16(1);
+ header->vendor = cpu_to_be16(adapter->guest->vendor);
+ header->device = cpu_to_be16(adapter->guest->device);
+ header->subsystem_vendor = cpu_to_be16(adapter->guest->subsystem_vendor);
+ header->subsystem = cpu_to_be16(adapter->guest->subsystem);
+ header->image_offset = cpu_to_be64(CXL_AI_HEADER_SIZE);
+ header->image_length = cpu_to_be64(ai->len_image);
+ }
+
+ /* number of entries in the list */
+ len_chunk = ai->len_data;
+ if (need_header)
+ len_chunk += CXL_AI_HEADER_SIZE;
+
+ entries = len_chunk / CXL_AI_BUFFER_SIZE;
+ mod = len_chunk % CXL_AI_BUFFER_SIZE;
+ if (mod)
+ entries++;
+
+ if (entries > CXL_AI_MAX_ENTRIES) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ /* < -- MAX_CHUNK_SIZE = 4096 * 256 = 1048576 bytes -->
+ * chunk 0 ----------------------------------------------------
+ * | header | data |
+ * ----------------------------------------------------
+ * chunk 1 ----------------------------------------------------
+ * | data |
+ * ----------------------------------------------------
+ * ....
+ * chunk n ----------------------------------------------------
+ * | data |
+ * ----------------------------------------------------
+ */
+ from = (void *) ai->data;
+ for (i = 0; i < entries; i++) {
+ dest = buffer[i];
+ s_copy = CXL_AI_BUFFER_SIZE;
+
+ if ((need_header) && (i == 0)) {
+ /* add adapter image header */
+ memcpy(buffer[i], header, sizeof(struct ai_header));
+ s_copy = CXL_AI_BUFFER_SIZE - CXL_AI_HEADER_SIZE;
+ dest += CXL_AI_HEADER_SIZE; /* image offset */
+ }
+ if ((i == (entries - 1)) && mod)
+ s_copy = mod;
+
+ /* copy data */
+ if (copy_from_user(dest, from, s_copy))
+ goto err;
+
+ /* fill in the list */
+ le[i].phys_addr = cpu_to_be64(virt_to_phys(buffer[i]));
+ le[i].len = cpu_to_be64(CXL_AI_BUFFER_SIZE);
+ if ((i == (entries - 1)) && mod)
+ le[i].len = cpu_to_be64(mod);
+ from += s_copy;
+ }
+ pr_devel("%s (op: %i, need header: %i, entries: %i, token: %#llx)\n",
+ __func__, operation, need_header, entries, continue_token);
+
+ /*
+ * download/validate the adapter image to the coherent
+ * platform facility
+ */
+ rc = fct(adapter->guest->handle, virt_to_phys(le), entries,
+ &continue_token);
+ if (rc == 0) /* success of download/validation operation */
+ continue_token = 0;
+
+err:
+ kfree(header);
+
+ return rc;
+}
+
+static int transfer_image(struct cxl *adapter, int operation,
+ struct cxl_adapter_image *ai)
+{
+ int rc = 0;
+ int afu;
+
+ switch (operation) {
+ case DOWNLOAD_IMAGE:
+ rc = handle_image(adapter, operation,
+ &cxl_h_download_adapter_image, ai);
+ if (rc < 0) {
+ pr_devel("resetting adapter\n");
+ cxl_h_reset_adapter(adapter->guest->handle);
+ }
+ return rc;
+
+ case VALIDATE_IMAGE:
+ rc = handle_image(adapter, operation,
+ &cxl_h_validate_adapter_image, ai);
+ if (rc < 0) {
+ pr_devel("resetting adapter\n");
+ cxl_h_reset_adapter(adapter->guest->handle);
+ return rc;
+ }
+ if (rc == 0) {
+ pr_devel("remove curent afu\n");
+ for (afu = 0; afu < adapter->slices; afu++)
+ cxl_guest_remove_afu(adapter->afu[afu]);
+
+ pr_devel("resetting adapter\n");
+ cxl_h_reset_adapter(adapter->guest->handle);
+
+ /* The entire image has now been
+ * downloaded and the validation has
+ * been successfully performed.
+ * After that, the partition should call
+ * ibm,update-nodes and
+ * ibm,update-properties to receive the
+ * current configuration
+ */
+ rc = update_devicetree(adapter, DEVICE_SCOPE);
+ transfer = 1;
+ }
+ return rc;
+ }
+
+ return -EINVAL;
+}
+
+static long ioctl_transfer_image(struct cxl *adapter, int operation,
+ struct cxl_adapter_image __user *uai)
+{
+ struct cxl_adapter_image ai;
+
+ pr_devel("%s\n", __func__);
+
+ if (copy_from_user(&ai, uai, sizeof(struct cxl_adapter_image)))
+ return -EFAULT;
+
+ /*
+ * Make sure reserved fields and bits are set to 0
+ */
+ if (ai.reserved1 || ai.reserved2 || ai.reserved3 || ai.reserved4 ||
+ (ai.flags & ~CXL_AI_ALL))
+ return -EINVAL;
+
+ return transfer_image(adapter, operation, &ai);
+}
+
+static int device_open(struct inode *inode, struct file *file)
+{
+ int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev);
+ struct cxl *adapter;
+ int rc = 0, i;
+
+ pr_devel("in %s\n", __func__);
+
+ BUG_ON(sizeof(struct ai_header) != CXL_AI_HEADER_SIZE);
+
+ /* Allows one process to open the device by using a semaphore */
+ if (down_interruptible(&sem) != 0)
+ return -EPERM;
+
+ if (!(adapter = get_cxl_adapter(adapter_num)))
+ return -ENODEV;
+
+ file->private_data = adapter;
+ continue_token = 0;
+ transfer = 0;
+
+ for (i = 0; i < CXL_AI_MAX_ENTRIES; i++)
+ buffer[i] = NULL;
+
+ /* aligned buffer containing list entries which describes up to
+ * 1 megabyte of data (256 entries of 4096 bytes each)
+ * Logical real address of buffer 0 - Buffer 0 length in bytes
+ * Logical real address of buffer 1 - Buffer 1 length in bytes
+ * Logical real address of buffer 2 - Buffer 2 length in bytes
+ * ....
+ * ....
+ * Logical real address of buffer N - Buffer N length in bytes
+ */
+ le = (struct sg_list *)get_zeroed_page(GFP_KERNEL);
+ if (!le) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) {
+ buffer[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ if (!buffer[i]) {
+ rc = -ENOMEM;
+ goto err1;
+ }
+ }
+
+ return 0;
+
+err1:
+ for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) {
+ if (buffer[i])
+ free_page((unsigned long) buffer[i]);
+ }
+
+ if (le)
+ free_page((unsigned long) le);
+err:
+ put_device(&adapter->dev);
+
+ return rc;
+}
+
+static long device_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct cxl *adapter = file->private_data;
+
+ pr_devel("in %s\n", __func__);
+
+ if (cmd == CXL_IOCTL_DOWNLOAD_IMAGE)
+ return ioctl_transfer_image(adapter,
+ DOWNLOAD_IMAGE,
+ (struct cxl_adapter_image __user *)arg);
+ else if (cmd == CXL_IOCTL_VALIDATE_IMAGE)
+ return ioctl_transfer_image(adapter,
+ VALIDATE_IMAGE,
+ (struct cxl_adapter_image __user *)arg);
+ else
+ return -EINVAL;
+}
+
+static long device_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return device_ioctl(file, cmd, arg);
+}
+
+static int device_close(struct inode *inode, struct file *file)
+{
+ struct cxl *adapter = file->private_data;
+ int i;
+
+ pr_devel("in %s\n", __func__);
+
+ for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) {
+ if (buffer[i])
+ free_page((unsigned long) buffer[i]);
+ }
+
+ if (le)
+ free_page((unsigned long) le);
+
+ up(&sem);
+ put_device(&adapter->dev);
+ continue_token = 0;
+
+ /* reload the module */
+ if (transfer)
+ cxl_guest_reload_module(adapter);
+ else {
+ pr_devel("resetting adapter\n");
+ cxl_h_reset_adapter(adapter->guest->handle);
+ }
+
+ transfer = 0;
+ return 0;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = device_open,
+ .unlocked_ioctl = device_ioctl,
+ .compat_ioctl = device_compat_ioctl,
+ .release = device_close,
+};
+
+void cxl_guest_remove_chardev(struct cxl *adapter)
+{
+ cdev_del(&adapter->guest->cdev);
+}
+
+int cxl_guest_add_chardev(struct cxl *adapter)
+{
+ dev_t devt;
+ int rc;
+
+ devt = MKDEV(MAJOR(cxl_get_dev()), CXL_CARD_MINOR(adapter));
+ cdev_init(&adapter->guest->cdev, &fops);
+ if ((rc = cdev_add(&adapter->guest->cdev, devt, 1))) {
+ dev_err(&adapter->dev,
+ "Unable to add chardev on adapter (card%i): %i\n",
+ adapter->adapter_num, rc);
+ goto err;
+ }
+ adapter->dev.devt = devt;
+ sema_init(&sem, 1);
+err:
+ return rc;
+}
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
new file mode 100644
index 000000000000..8213372de2b7
--- /dev/null
+++ b/drivers/misc/cxl/guest.c
@@ -0,0 +1,1177 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+
+#include "cxl.h"
+#include "hcalls.h"
+#include "trace.h"
+
+#define CXL_ERROR_DETECTED_EVENT 1
+#define CXL_SLOT_RESET_EVENT 2
+#define CXL_RESUME_EVENT 3
+
+static void pci_error_handlers(struct cxl_afu *afu,
+ int bus_error_event,
+ pci_channel_state_t state)
+{
+ struct pci_dev *afu_dev;
+
+ if (afu->phb == NULL)
+ return;
+
+ list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+ if (!afu_dev->driver)
+ continue;
+
+ switch (bus_error_event) {
+ case CXL_ERROR_DETECTED_EVENT:
+ afu_dev->error_state = state;
+
+ if (afu_dev->driver->err_handler &&
+ afu_dev->driver->err_handler->error_detected)
+ afu_dev->driver->err_handler->error_detected(afu_dev, state);
+ break;
+ case CXL_SLOT_RESET_EVENT:
+ afu_dev->error_state = state;
+
+ if (afu_dev->driver->err_handler &&
+ afu_dev->driver->err_handler->slot_reset)
+ afu_dev->driver->err_handler->slot_reset(afu_dev);
+ break;
+ case CXL_RESUME_EVENT:
+ if (afu_dev->driver->err_handler &&
+ afu_dev->driver->err_handler->resume)
+ afu_dev->driver->err_handler->resume(afu_dev);
+ break;
+ }
+ }
+}
+
+static irqreturn_t guest_handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr,
+ u64 errstat)
+{
+ pr_devel("in %s\n", __func__);
+ dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat);
+
+ return cxl_ops->ack_irq(ctx, 0, errstat);
+}
+
+static ssize_t guest_collect_vpd(struct cxl *adapter, struct cxl_afu *afu,
+ void *buf, size_t len)
+{
+ unsigned int entries, mod;
+ unsigned long **vpd_buf = NULL;
+ struct sg_list *le;
+ int rc = 0, i, tocopy;
+ u64 out = 0;
+
+ if (buf == NULL)
+ return -EINVAL;
+
+ /* number of entries in the list */
+ entries = len / SG_BUFFER_SIZE;
+ mod = len % SG_BUFFER_SIZE;
+ if (mod)
+ entries++;
+
+ if (entries > SG_MAX_ENTRIES) {
+ entries = SG_MAX_ENTRIES;
+ len = SG_MAX_ENTRIES * SG_BUFFER_SIZE;
+ mod = 0;
+ }
+
+ vpd_buf = kzalloc(entries * sizeof(unsigned long *), GFP_KERNEL);
+ if (!vpd_buf)
+ return -ENOMEM;
+
+ le = (struct sg_list *)get_zeroed_page(GFP_KERNEL);
+ if (!le) {
+ rc = -ENOMEM;
+ goto err1;
+ }
+
+ for (i = 0; i < entries; i++) {
+ vpd_buf[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ if (!vpd_buf[i]) {
+ rc = -ENOMEM;
+ goto err2;
+ }
+ le[i].phys_addr = cpu_to_be64(virt_to_phys(vpd_buf[i]));
+ le[i].len = cpu_to_be64(SG_BUFFER_SIZE);
+ if ((i == (entries - 1)) && mod)
+ le[i].len = cpu_to_be64(mod);
+ }
+
+ if (adapter)
+ rc = cxl_h_collect_vpd_adapter(adapter->guest->handle,
+ virt_to_phys(le), entries, &out);
+ else
+ rc = cxl_h_collect_vpd(afu->guest->handle, 0,
+ virt_to_phys(le), entries, &out);
+ pr_devel("length of available (entries: %i), vpd: %#llx\n",
+ entries, out);
+
+ if (!rc) {
+ /*
+ * hcall returns in 'out' the size of available VPDs.
+ * It fills the buffer with as much data as possible.
+ */
+ if (out < len)
+ len = out;
+ rc = len;
+ if (out) {
+ for (i = 0; i < entries; i++) {
+ if (len < SG_BUFFER_SIZE)
+ tocopy = len;
+ else
+ tocopy = SG_BUFFER_SIZE;
+ memcpy(buf, vpd_buf[i], tocopy);
+ buf += tocopy;
+ len -= tocopy;
+ }
+ }
+ }
+err2:
+ for (i = 0; i < entries; i++) {
+ if (vpd_buf[i])
+ free_page((unsigned long) vpd_buf[i]);
+ }
+ free_page((unsigned long) le);
+err1:
+ kfree(vpd_buf);
+ return rc;
+}
+
+static int guest_get_irq_info(struct cxl_context *ctx, struct cxl_irq_info *info)
+{
+ return cxl_h_collect_int_info(ctx->afu->guest->handle, ctx->process_token, info);
+}
+
+static irqreturn_t guest_psl_irq(int irq, void *data)
+{
+ struct cxl_context *ctx = data;
+ struct cxl_irq_info irq_info;
+ int rc;
+
+ pr_devel("%d: received PSL interrupt %i\n", ctx->pe, irq);
+ rc = guest_get_irq_info(ctx, &irq_info);
+ if (rc) {
+ WARN(1, "Unable to get IRQ info: %i\n", rc);
+ return IRQ_HANDLED;
+ }
+
+ rc = cxl_irq(irq, ctx, &irq_info);
+ return rc;
+}
+
+static int afu_read_error_state(struct cxl_afu *afu, int *state_out)
+{
+ u64 state;
+ int rc = 0;
+
+ rc = cxl_h_read_error_state(afu->guest->handle, &state);
+ if (!rc) {
+ WARN_ON(state != H_STATE_NORMAL &&
+ state != H_STATE_DISABLE &&
+ state != H_STATE_TEMP_UNAVAILABLE &&
+ state != H_STATE_PERM_UNAVAILABLE);
+ *state_out = state & 0xffffffff;
+ }
+ return rc;
+}
+
+static irqreturn_t guest_slice_irq_err(int irq, void *data)
+{
+ struct cxl_afu *afu = data;
+ int rc;
+ u64 serr;
+
+ WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq);
+ rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, &serr);
+ if (rc) {
+ dev_crit(&afu->dev, "Couldn't read PSL_SERR_An: %d\n", rc);
+ return IRQ_HANDLED;
+ }
+ dev_crit(&afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr);
+
+ rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr);
+ if (rc)
+ dev_crit(&afu->dev, "Couldn't ack slice error interrupt: %d\n",
+ rc);
+
+ return IRQ_HANDLED;
+}
+
+
+static int irq_alloc_range(struct cxl *adapter, int len, int *irq)
+{
+ int i, n;
+ struct irq_avail *cur;
+
+ for (i = 0; i < adapter->guest->irq_nranges; i++) {
+ cur = &adapter->guest->irq_avail[i];
+ n = bitmap_find_next_zero_area(cur->bitmap, cur->range,
+ 0, len, 0);
+ if (n < cur->range) {
+ bitmap_set(cur->bitmap, n, len);
+ *irq = cur->offset + n;
+ pr_devel("guest: allocate IRQs %#x->%#x\n",
+ *irq, *irq + len - 1);
+
+ return 0;
+ }
+ }
+ return -ENOSPC;
+}
+
+static int irq_free_range(struct cxl *adapter, int irq, int len)
+{
+ int i, n;
+ struct irq_avail *cur;
+
+ if (len == 0)
+ return -ENOENT;
+
+ for (i = 0; i < adapter->guest->irq_nranges; i++) {
+ cur = &adapter->guest->irq_avail[i];
+ if (irq >= cur->offset &&
+ (irq + len) <= (cur->offset + cur->range)) {
+ n = irq - cur->offset;
+ bitmap_clear(cur->bitmap, n, len);
+ pr_devel("guest: release IRQs %#x->%#x\n",
+ irq, irq + len - 1);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int guest_reset(struct cxl *adapter)
+{
+ struct cxl_afu *afu = NULL;
+ int i, rc;
+
+ pr_devel("Adapter reset request\n");
+ for (i = 0; i < adapter->slices; i++) {
+ if ((afu = adapter->afu[i])) {
+ pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
+ pci_channel_io_frozen);
+ cxl_context_detach_all(afu);
+ }
+ }
+
+ rc = cxl_h_reset_adapter(adapter->guest->handle);
+ for (i = 0; i < adapter->slices; i++) {
+ if (!rc && (afu = adapter->afu[i])) {
+ pci_error_handlers(afu, CXL_SLOT_RESET_EVENT,
+ pci_channel_io_normal);
+ pci_error_handlers(afu, CXL_RESUME_EVENT, 0);
+ }
+ }
+ return rc;
+}
+
+static int guest_alloc_one_irq(struct cxl *adapter)
+{
+ int irq;
+
+ spin_lock(&adapter->guest->irq_alloc_lock);
+ if (irq_alloc_range(adapter, 1, &irq))
+ irq = -ENOSPC;
+ spin_unlock(&adapter->guest->irq_alloc_lock);
+ return irq;
+}
+
+static void guest_release_one_irq(struct cxl *adapter, int irq)
+{
+ spin_lock(&adapter->guest->irq_alloc_lock);
+ irq_free_range(adapter, irq, 1);
+ spin_unlock(&adapter->guest->irq_alloc_lock);
+}
+
+static int guest_alloc_irq_ranges(struct cxl_irq_ranges *irqs,
+ struct cxl *adapter, unsigned int num)
+{
+ int i, try, irq;
+
+ memset(irqs, 0, sizeof(struct cxl_irq_ranges));
+
+ spin_lock(&adapter->guest->irq_alloc_lock);
+ for (i = 0; i < CXL_IRQ_RANGES && num; i++) {
+ try = num;
+ while (try) {
+ if (irq_alloc_range(adapter, try, &irq) == 0)
+ break;
+ try /= 2;
+ }
+ if (!try)
+ goto error;
+ irqs->offset[i] = irq;
+ irqs->range[i] = try;
+ num -= try;
+ }
+ if (num)
+ goto error;
+ spin_unlock(&adapter->guest->irq_alloc_lock);
+ return 0;
+
+error:
+ for (i = 0; i < CXL_IRQ_RANGES; i++)
+ irq_free_range(adapter, irqs->offset[i], irqs->range[i]);
+ spin_unlock(&adapter->guest->irq_alloc_lock);
+ return -ENOSPC;
+}
+
+static void guest_release_irq_ranges(struct cxl_irq_ranges *irqs,
+ struct cxl *adapter)
+{
+ int i;
+
+ spin_lock(&adapter->guest->irq_alloc_lock);
+ for (i = 0; i < CXL_IRQ_RANGES; i++)
+ irq_free_range(adapter, irqs->offset[i], irqs->range[i]);
+ spin_unlock(&adapter->guest->irq_alloc_lock);
+}
+
+static int guest_register_serr_irq(struct cxl_afu *afu)
+{
+ afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
+ dev_name(&afu->dev));
+ if (!afu->err_irq_name)
+ return -ENOMEM;
+
+ if (!(afu->serr_virq = cxl_map_irq(afu->adapter, afu->serr_hwirq,
+ guest_slice_irq_err, afu, afu->err_irq_name))) {
+ kfree(afu->err_irq_name);
+ afu->err_irq_name = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void guest_release_serr_irq(struct cxl_afu *afu)
+{
+ cxl_unmap_irq(afu->serr_virq, afu);
+ cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq);
+ kfree(afu->err_irq_name);
+}
+
+static int guest_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask)
+{
+ return cxl_h_control_faults(ctx->afu->guest->handle, ctx->process_token,
+ tfc >> 32, (psl_reset_mask != 0));
+}
+
+static void disable_afu_irqs(struct cxl_context *ctx)
+{
+ irq_hw_number_t hwirq;
+ unsigned int virq;
+ int r, i;
+
+ pr_devel("Disabling AFU(%d) interrupts\n", ctx->afu->slice);
+ for (r = 0; r < CXL_IRQ_RANGES; r++) {
+ hwirq = ctx->irqs.offset[r];
+ for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+ virq = irq_find_mapping(NULL, hwirq);
+ disable_irq(virq);
+ }
+ }
+}
+
+static void enable_afu_irqs(struct cxl_context *ctx)
+{
+ irq_hw_number_t hwirq;
+ unsigned int virq;
+ int r, i;
+
+ pr_devel("Enabling AFU(%d) interrupts\n", ctx->afu->slice);
+ for (r = 0; r < CXL_IRQ_RANGES; r++) {
+ hwirq = ctx->irqs.offset[r];
+ for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+ virq = irq_find_mapping(NULL, hwirq);
+ enable_irq(virq);
+ }
+ }
+}
+
+static int _guest_afu_cr_readXX(int sz, struct cxl_afu *afu, int cr_idx,
+ u64 offset, u64 *val)
+{
+ unsigned long cr;
+ char c;
+ int rc = 0;
+
+ if (afu->crs_len < sz)
+ return -ENOENT;
+
+ if (unlikely(offset >= afu->crs_len))
+ return -ERANGE;
+
+ cr = get_zeroed_page(GFP_KERNEL);
+ if (!cr)
+ return -ENOMEM;
+
+ rc = cxl_h_get_config(afu->guest->handle, cr_idx, offset,
+ virt_to_phys((void *)cr), sz);
+ if (rc)
+ goto err;
+
+ switch (sz) {
+ case 1:
+ c = *((char *) cr);
+ *val = c;
+ break;
+ case 2:
+ *val = in_le16((u16 *)cr);
+ break;
+ case 4:
+ *val = in_le32((unsigned *)cr);
+ break;
+ case 8:
+ *val = in_le64((u64 *)cr);
+ break;
+ default:
+ WARN_ON(1);
+ }
+err:
+ free_page(cr);
+ return rc;
+}
+
+static int guest_afu_cr_read32(struct cxl_afu *afu, int cr_idx, u64 offset,
+ u32 *out)
+{
+ int rc;
+ u64 val;
+
+ rc = _guest_afu_cr_readXX(4, afu, cr_idx, offset, &val);
+ if (!rc)
+ *out = (u32) val;
+ return rc;
+}
+
+static int guest_afu_cr_read16(struct cxl_afu *afu, int cr_idx, u64 offset,
+ u16 *out)
+{
+ int rc;
+ u64 val;
+
+ rc = _guest_afu_cr_readXX(2, afu, cr_idx, offset, &val);
+ if (!rc)
+ *out = (u16) val;
+ return rc;
+}
+
+static int guest_afu_cr_read8(struct cxl_afu *afu, int cr_idx, u64 offset,
+ u8 *out)
+{
+ int rc;
+ u64 val;
+
+ rc = _guest_afu_cr_readXX(1, afu, cr_idx, offset, &val);
+ if (!rc)
+ *out = (u8) val;
+ return rc;
+}
+
+static int guest_afu_cr_read64(struct cxl_afu *afu, int cr_idx, u64 offset,
+ u64 *out)
+{
+ return _guest_afu_cr_readXX(8, afu, cr_idx, offset, out);
+}
+
+static int guest_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in)
+{
+ /* config record is not writable from guest */
+ return -EPERM;
+}
+
+static int guest_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in)
+{
+ /* config record is not writable from guest */
+ return -EPERM;
+}
+
+static int guest_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in)
+{
+ /* config record is not writable from guest */
+ return -EPERM;
+}
+
+static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+ struct cxl_process_element_hcall *elem;
+ struct cxl *adapter = ctx->afu->adapter;
+ const struct cred *cred;
+ u32 pid, idx;
+ int rc, r, i;
+ u64 mmio_addr, mmio_size;
+ __be64 flags = 0;
+
+ /* Must be 8 byte aligned and cannot cross a 4096 byte boundary */
+ if (!(elem = (struct cxl_process_element_hcall *)
+ get_zeroed_page(GFP_KERNEL)))
+ return -ENOMEM;
+
+ elem->version = cpu_to_be64(CXL_PROCESS_ELEMENT_VERSION);
+ if (ctx->kernel) {
+ pid = 0;
+ flags |= CXL_PE_TRANSLATION_ENABLED;
+ flags |= CXL_PE_PRIVILEGED_PROCESS;
+ if (mfmsr() & MSR_SF)
+ flags |= CXL_PE_64_BIT;
+ } else {
+ pid = current->pid;
+ flags |= CXL_PE_PROBLEM_STATE;
+ flags |= CXL_PE_TRANSLATION_ENABLED;
+ if (!test_tsk_thread_flag(current, TIF_32BIT))
+ flags |= CXL_PE_64_BIT;
+ cred = get_current_cred();
+ if (uid_eq(cred->euid, GLOBAL_ROOT_UID))
+ flags |= CXL_PE_PRIVILEGED_PROCESS;
+ put_cred(cred);
+ }
+ elem->flags = cpu_to_be64(flags);
+ elem->common.tid = cpu_to_be32(0); /* Unused */
+ elem->common.pid = cpu_to_be32(pid);
+ elem->common.csrp = cpu_to_be64(0); /* disable */
+ elem->common.aurp0 = cpu_to_be64(0); /* disable */
+ elem->common.aurp1 = cpu_to_be64(0); /* disable */
+
+ cxl_prefault(ctx, wed);
+
+ elem->common.sstp0 = cpu_to_be64(ctx->sstp0);
+ elem->common.sstp1 = cpu_to_be64(ctx->sstp1);
+ for (r = 0; r < CXL_IRQ_RANGES; r++) {
+ for (i = 0; i < ctx->irqs.range[r]; i++) {
+ if (r == 0 && i == 0) {
+ elem->pslVirtualIsn = cpu_to_be32(ctx->irqs.offset[0]);
+ } else {
+ idx = ctx->irqs.offset[r] + i - adapter->guest->irq_base_offset;
+ elem->applicationVirtualIsnBitmap[idx / 8] |= 0x80 >> (idx % 8);
+ }
+ }
+ }
+ elem->common.amr = cpu_to_be64(amr);
+ elem->common.wed = cpu_to_be64(wed);
+
+ disable_afu_irqs(ctx);
+
+ rc = cxl_h_attach_process(ctx->afu->guest->handle, elem,
+ &ctx->process_token, &mmio_addr, &mmio_size);
+ if (rc == H_SUCCESS) {
+ if (ctx->master || !ctx->afu->pp_psa) {
+ ctx->psn_phys = ctx->afu->psn_phys;
+ ctx->psn_size = ctx->afu->adapter->ps_size;
+ } else {
+ ctx->psn_phys = mmio_addr;
+ ctx->psn_size = mmio_size;
+ }
+ if (ctx->afu->pp_psa && mmio_size &&
+ ctx->afu->pp_size == 0) {
+ /*
+ * There's no property in the device tree to read the
+ * pp_size. We only find out at the 1st attach.
+ * Compared to bare-metal, it is too late and we
+ * should really lock here. However, on powerVM,
+ * pp_size is really only used to display in /sys.
+ * Being discussed with pHyp for their next release.
+ */
+ ctx->afu->pp_size = mmio_size;
+ }
+ /* from PAPR: process element is bytes 4-7 of process token */
+ ctx->external_pe = ctx->process_token & 0xFFFFFFFF;
+ pr_devel("CXL pe=%i is known as %i for pHyp, mmio_size=%#llx",
+ ctx->pe, ctx->external_pe, ctx->psn_size);
+ ctx->pe_inserted = true;
+ enable_afu_irqs(ctx);
+ }
+
+ free_page((u64)elem);
+ return rc;
+}
+
+static int guest_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
+{
+ pr_devel("in %s\n", __func__);
+
+ ctx->kernel = kernel;
+ if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
+ return attach_afu_directed(ctx, wed, amr);
+
+ /* dedicated mode not supported on FW840 */
+
+ return -EINVAL;
+}
+
+static int detach_afu_directed(struct cxl_context *ctx)
+{
+ if (!ctx->pe_inserted)
+ return 0;
+ if (cxl_h_detach_process(ctx->afu->guest->handle, ctx->process_token))
+ return -1;
+ return 0;
+}
+
+static int guest_detach_process(struct cxl_context *ctx)
+{
+ pr_devel("in %s\n", __func__);
+ trace_cxl_detach(ctx);
+
+ if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+ return -EIO;
+
+ if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
+ return detach_afu_directed(ctx);
+
+ return -EINVAL;
+}
+
+static void guest_release_afu(struct device *dev)
+{
+ struct cxl_afu *afu = to_cxl_afu(dev);
+
+ pr_devel("%s\n", __func__);
+
+ idr_destroy(&afu->contexts_idr);
+
+ kfree(afu->guest);
+ kfree(afu);
+}
+
+ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len)
+{
+ return guest_collect_vpd(NULL, afu, buf, len);
+}
+
+#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE
+static ssize_t guest_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+ loff_t off, size_t count)
+{
+ void *tbuf = NULL;
+ int rc = 0;
+
+ tbuf = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!tbuf)
+ return -ENOMEM;
+
+ rc = cxl_h_get_afu_err(afu->guest->handle,
+ off & 0x7,
+ virt_to_phys(tbuf),
+ count);
+ if (rc)
+ goto err;
+
+ if (count > ERR_BUFF_MAX_COPY_SIZE)
+ count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7);
+ memcpy(buf, tbuf, count);
+err:
+ free_page((u64)tbuf);
+
+ return rc;
+}
+
+static int guest_afu_check_and_enable(struct cxl_afu *afu)
+{
+ return 0;
+}
+
+static bool guest_support_attributes(const char *attr_name,
+ enum cxl_attrs type)
+{
+ switch (type) {
+ case CXL_ADAPTER_ATTRS:
+ if ((strcmp(attr_name, "base_image") == 0) ||
+ (strcmp(attr_name, "load_image_on_perst") == 0) ||
+ (strcmp(attr_name, "perst_reloads_same_image") == 0) ||
+ (strcmp(attr_name, "image_loaded") == 0))
+ return false;
+ break;
+ case CXL_AFU_MASTER_ATTRS:
+ if ((strcmp(attr_name, "pp_mmio_off") == 0))
+ return false;
+ break;
+ case CXL_AFU_ATTRS:
+ break;
+ default:
+ break;
+ }
+
+ return true;
+}
+
+static int activate_afu_directed(struct cxl_afu *afu)
+{
+ int rc;
+
+ dev_info(&afu->dev, "Activating AFU(%d) directed mode\n", afu->slice);
+
+ afu->current_mode = CXL_MODE_DIRECTED;
+
+ afu->num_procs = afu->max_procs_virtualised;
+
+ if ((rc = cxl_chardev_m_afu_add(afu)))
+ return rc;
+
+ if ((rc = cxl_sysfs_afu_m_add(afu)))
+ goto err;
+
+ if ((rc = cxl_chardev_s_afu_add(afu)))
+ goto err1;
+
+ return 0;
+err1:
+ cxl_sysfs_afu_m_remove(afu);
+err:
+ cxl_chardev_afu_remove(afu);
+ return rc;
+}
+
+static int guest_afu_activate_mode(struct cxl_afu *afu, int mode)
+{
+ if (!mode)
+ return 0;
+ if (!(mode & afu->modes_supported))
+ return -EINVAL;
+
+ if (mode == CXL_MODE_DIRECTED)
+ return activate_afu_directed(afu);
+
+ if (mode == CXL_MODE_DEDICATED)
+ dev_err(&afu->dev, "Dedicated mode not supported\n");
+
+ return -EINVAL;
+}
+
+static int deactivate_afu_directed(struct cxl_afu *afu)
+{
+ dev_info(&afu->dev, "Deactivating AFU(%d) directed mode\n", afu->slice);
+
+ afu->current_mode = 0;
+ afu->num_procs = 0;
+
+ cxl_sysfs_afu_m_remove(afu);
+ cxl_chardev_afu_remove(afu);
+
+ cxl_ops->afu_reset(afu);
+
+ return 0;
+}
+
+static int guest_afu_deactivate_mode(struct cxl_afu *afu, int mode)
+{
+ if (!mode)
+ return 0;
+ if (!(mode & afu->modes_supported))
+ return -EINVAL;
+
+ if (mode == CXL_MODE_DIRECTED)
+ return deactivate_afu_directed(afu);
+ return 0;
+}
+
+static int guest_afu_reset(struct cxl_afu *afu)
+{
+ pr_devel("AFU(%d) reset request\n", afu->slice);
+ return cxl_h_reset_afu(afu->guest->handle);
+}
+
+static int guest_map_slice_regs(struct cxl_afu *afu)
+{
+ if (!(afu->p2n_mmio = ioremap(afu->guest->p2n_phys, afu->guest->p2n_size))) {
+ dev_err(&afu->dev, "Error mapping AFU(%d) MMIO regions\n",
+ afu->slice);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void guest_unmap_slice_regs(struct cxl_afu *afu)
+{
+ if (afu->p2n_mmio)
+ iounmap(afu->p2n_mmio);
+}
+
+static int afu_update_state(struct cxl_afu *afu)
+{
+ int rc, cur_state;
+
+ rc = afu_read_error_state(afu, &cur_state);
+ if (rc)
+ return rc;
+
+ if (afu->guest->previous_state == cur_state)
+ return 0;
+
+ pr_devel("AFU(%d) update state to %#x\n", afu->slice, cur_state);
+
+ switch (cur_state) {
+ case H_STATE_NORMAL:
+ afu->guest->previous_state = cur_state;
+ rc = 1;
+ break;
+
+ case H_STATE_DISABLE:
+ pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
+ pci_channel_io_frozen);
+
+ cxl_context_detach_all(afu);
+ if ((rc = cxl_ops->afu_reset(afu)))
+ pr_devel("reset hcall failed %d\n", rc);
+
+ rc = afu_read_error_state(afu, &cur_state);
+ if (!rc && cur_state == H_STATE_NORMAL) {
+ pci_error_handlers(afu, CXL_SLOT_RESET_EVENT,
+ pci_channel_io_normal);
+ pci_error_handlers(afu, CXL_RESUME_EVENT, 0);
+ rc = 1;
+ }
+ afu->guest->previous_state = 0;
+ break;
+
+ case H_STATE_TEMP_UNAVAILABLE:
+ afu->guest->previous_state = cur_state;
+ break;
+
+ case H_STATE_PERM_UNAVAILABLE:
+ dev_err(&afu->dev, "AFU is in permanent error state\n");
+ pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
+ pci_channel_io_perm_failure);
+ afu->guest->previous_state = cur_state;
+ break;
+
+ default:
+ pr_err("Unexpected AFU(%d) error state: %#x\n",
+ afu->slice, cur_state);
+ return -EINVAL;
+ }
+
+ return rc;
+}
+
+static int afu_do_recovery(struct cxl_afu *afu)
+{
+ int rc;
+
+ /* many threads can arrive here, in case of detach_all for example.
+ * Only one needs to drive the recovery
+ */
+ if (mutex_trylock(&afu->guest->recovery_lock)) {
+ rc = afu_update_state(afu);
+ mutex_unlock(&afu->guest->recovery_lock);
+ return rc;
+ }
+ return 0;
+}
+
+static bool guest_link_ok(struct cxl *cxl, struct cxl_afu *afu)
+{
+ int state;
+
+ if (afu) {
+ if (afu_read_error_state(afu, &state) ||
+ state != H_STATE_NORMAL) {
+ if (afu_do_recovery(afu) > 0) {
+ /* check again in case we've just fixed it */
+ if (!afu_read_error_state(afu, &state) &&
+ state == H_STATE_NORMAL)
+ return true;
+ }
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int afu_properties_look_ok(struct cxl_afu *afu)
+{
+ if (afu->pp_irqs < 0) {
+ dev_err(&afu->dev, "Unexpected per-process minimum interrupt value\n");
+ return -EINVAL;
+ }
+
+ if (afu->max_procs_virtualised < 1) {
+ dev_err(&afu->dev, "Unexpected max number of processes virtualised value\n");
+ return -EINVAL;
+ }
+
+ if (afu->crs_len < 0) {
+ dev_err(&afu->dev, "Unexpected configuration record size value\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np)
+{
+ struct cxl_afu *afu;
+ bool free = true;
+ int rc;
+
+ pr_devel("in %s - AFU(%d)\n", __func__, slice);
+ if (!(afu = cxl_alloc_afu(adapter, slice)))
+ return -ENOMEM;
+
+ if (!(afu->guest = kzalloc(sizeof(struct cxl_afu_guest), GFP_KERNEL))) {
+ kfree(afu);
+ return -ENOMEM;
+ }
+
+ mutex_init(&afu->guest->recovery_lock);
+
+ if ((rc = dev_set_name(&afu->dev, "afu%i.%i",
+ adapter->adapter_num,
+ slice)))
+ goto err1;
+
+ adapter->slices++;
+
+ if ((rc = cxl_of_read_afu_handle(afu, afu_np)))
+ goto err1;
+
+ if ((rc = cxl_ops->afu_reset(afu)))
+ goto err1;
+
+ if ((rc = cxl_of_read_afu_properties(afu, afu_np)))
+ goto err1;
+
+ if ((rc = afu_properties_look_ok(afu)))
+ goto err1;
+
+ if ((rc = guest_map_slice_regs(afu)))
+ goto err1;
+
+ if ((rc = guest_register_serr_irq(afu)))
+ goto err2;
+
+ /*
+ * After we call this function we must not free the afu directly, even
+ * if it returns an error!
+ */
+ if ((rc = cxl_register_afu(afu)))
+ goto err_put1;
+
+ if ((rc = cxl_sysfs_afu_add(afu)))
+ goto err_put1;
+
+ /*
+ * pHyp doesn't expose the programming models supported by the
+ * AFU. pHyp currently only supports directed mode. If it adds
+ * dedicated mode later, this version of cxl has no way to
+ * detect it. So we'll initialize the driver, but the first
+ * attach will fail.
+ * Being discussed with pHyp to do better (likely new property)
+ */
+ if (afu->max_procs_virtualised == 1)
+ afu->modes_supported = CXL_MODE_DEDICATED;
+ else
+ afu->modes_supported = CXL_MODE_DIRECTED;
+
+ if ((rc = cxl_afu_select_best_mode(afu)))
+ goto err_put2;
+
+ adapter->afu[afu->slice] = afu;
+
+ afu->enabled = true;
+
+ if ((rc = cxl_pci_vphb_add(afu)))
+ dev_info(&afu->dev, "Can't register vPHB\n");
+
+ return 0;
+
+err_put2:
+ cxl_sysfs_afu_remove(afu);
+err_put1:
+ device_unregister(&afu->dev);
+ free = false;
+ guest_release_serr_irq(afu);
+err2:
+ guest_unmap_slice_regs(afu);
+err1:
+ if (free) {
+ kfree(afu->guest);
+ kfree(afu);
+ }
+ return rc;
+}
+
+void cxl_guest_remove_afu(struct cxl_afu *afu)
+{
+ pr_devel("in %s - AFU(%d)\n", __func__, afu->slice);
+
+ if (!afu)
+ return;
+
+ cxl_pci_vphb_remove(afu);
+ cxl_sysfs_afu_remove(afu);
+
+ spin_lock(&afu->adapter->afu_list_lock);
+ afu->adapter->afu[afu->slice] = NULL;
+ spin_unlock(&afu->adapter->afu_list_lock);
+
+ cxl_context_detach_all(afu);
+ cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
+ guest_release_serr_irq(afu);
+ guest_unmap_slice_regs(afu);
+
+ device_unregister(&afu->dev);
+}
+
+static void free_adapter(struct cxl *adapter)
+{
+ struct irq_avail *cur;
+ int i;
+
+ if (adapter->guest->irq_avail) {
+ for (i = 0; i < adapter->guest->irq_nranges; i++) {
+ cur = &adapter->guest->irq_avail[i];
+ kfree(cur->bitmap);
+ }
+ kfree(adapter->guest->irq_avail);
+ }
+ kfree(adapter->guest->status);
+ cxl_remove_adapter_nr(adapter);
+ kfree(adapter->guest);
+ kfree(adapter);
+}
+
+static int properties_look_ok(struct cxl *adapter)
+{
+ /* The absence of this property means that the operational
+ * status is unknown or okay
+ */
+ if (strlen(adapter->guest->status) &&
+ strcmp(adapter->guest->status, "okay")) {
+ pr_err("ABORTING:Bad operational status of the device\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len)
+{
+ return guest_collect_vpd(adapter, NULL, buf, len);
+}
+
+void cxl_guest_remove_adapter(struct cxl *adapter)
+{
+ pr_devel("in %s\n", __func__);
+
+ cxl_sysfs_adapter_remove(adapter);
+
+ cxl_guest_remove_chardev(adapter);
+ device_unregister(&adapter->dev);
+}
+
+static void release_adapter(struct device *dev)
+{
+ free_adapter(to_cxl_adapter(dev));
+}
+
+struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *pdev)
+{
+ struct cxl *adapter;
+ bool free = true;
+ int rc;
+
+ if (!(adapter = cxl_alloc_adapter()))
+ return ERR_PTR(-ENOMEM);
+
+ if (!(adapter->guest = kzalloc(sizeof(struct cxl_guest), GFP_KERNEL))) {
+ free_adapter(adapter);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ adapter->slices = 0;
+ adapter->guest->pdev = pdev;
+ adapter->dev.parent = &pdev->dev;
+ adapter->dev.release = release_adapter;
+ dev_set_drvdata(&pdev->dev, adapter);
+
+ if ((rc = cxl_of_read_adapter_handle(adapter, np)))
+ goto err1;
+
+ if ((rc = cxl_of_read_adapter_properties(adapter, np)))
+ goto err1;
+
+ if ((rc = properties_look_ok(adapter)))
+ goto err1;
+
+ if ((rc = cxl_guest_add_chardev(adapter)))
+ goto err1;
+
+ /*
+ * After we call this function we must not free the adapter directly,
+ * even if it returns an error!
+ */
+ if ((rc = cxl_register_adapter(adapter)))
+ goto err_put1;
+
+ if ((rc = cxl_sysfs_adapter_add(adapter)))
+ goto err_put1;
+
+ return adapter;
+
+err_put1:
+ device_unregister(&adapter->dev);
+ free = false;
+ cxl_guest_remove_chardev(adapter);
+err1:
+ if (free)
+ free_adapter(adapter);
+ return ERR_PTR(rc);
+}
+
+void cxl_guest_reload_module(struct cxl *adapter)
+{
+ struct platform_device *pdev;
+
+ pdev = adapter->guest->pdev;
+ cxl_guest_remove_adapter(adapter);
+
+ cxl_of_probe(pdev);
+}
+
+const struct cxl_backend_ops cxl_guest_ops = {
+ .module = THIS_MODULE,
+ .adapter_reset = guest_reset,
+ .alloc_one_irq = guest_alloc_one_irq,
+ .release_one_irq = guest_release_one_irq,
+ .alloc_irq_ranges = guest_alloc_irq_ranges,
+ .release_irq_ranges = guest_release_irq_ranges,
+ .setup_irq = NULL,
+ .handle_psl_slice_error = guest_handle_psl_slice_error,
+ .psl_interrupt = guest_psl_irq,
+ .ack_irq = guest_ack_irq,
+ .attach_process = guest_attach_process,
+ .detach_process = guest_detach_process,
+ .support_attributes = guest_support_attributes,
+ .link_ok = guest_link_ok,
+ .release_afu = guest_release_afu,
+ .afu_read_err_buffer = guest_afu_read_err_buffer,
+ .afu_check_and_enable = guest_afu_check_and_enable,
+ .afu_activate_mode = guest_afu_activate_mode,
+ .afu_deactivate_mode = guest_afu_deactivate_mode,
+ .afu_reset = guest_afu_reset,
+ .afu_cr_read8 = guest_afu_cr_read8,
+ .afu_cr_read16 = guest_afu_cr_read16,
+ .afu_cr_read32 = guest_afu_cr_read32,
+ .afu_cr_read64 = guest_afu_cr_read64,
+ .afu_cr_write8 = guest_afu_cr_write8,
+ .afu_cr_write16 = guest_afu_cr_write16,
+ .afu_cr_write32 = guest_afu_cr_write32,
+ .read_adapter_vpd = cxl_guest_read_adapter_vpd,
+};
diff --git a/drivers/misc/cxl/hcalls.c b/drivers/misc/cxl/hcalls.c
new file mode 100644
index 000000000000..d6d11f4056d7
--- /dev/null
+++ b/drivers/misc/cxl/hcalls.c
@@ -0,0 +1,647 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <asm/byteorder.h>
+#include "hcalls.h"
+#include "trace.h"
+
+#define CXL_HCALL_TIMEOUT 60000
+#define CXL_HCALL_TIMEOUT_DOWNLOAD 120000
+
+#define H_ATTACH_CA_PROCESS 0x344
+#define H_CONTROL_CA_FUNCTION 0x348
+#define H_DETACH_CA_PROCESS 0x34C
+#define H_COLLECT_CA_INT_INFO 0x350
+#define H_CONTROL_CA_FAULTS 0x354
+#define H_DOWNLOAD_CA_FUNCTION 0x35C
+#define H_DOWNLOAD_CA_FACILITY 0x364
+#define H_CONTROL_CA_FACILITY 0x368
+
+#define H_CONTROL_CA_FUNCTION_RESET 1 /* perform a reset */
+#define H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS 2 /* suspend a process from being executed */
+#define H_CONTROL_CA_FUNCTION_RESUME_PROCESS 3 /* resume a process to be executed */
+#define H_CONTROL_CA_FUNCTION_READ_ERR_STATE 4 /* read the error state */
+#define H_CONTROL_CA_FUNCTION_GET_AFU_ERR 5 /* collect the AFU error buffer */
+#define H_CONTROL_CA_FUNCTION_GET_CONFIG 6 /* collect configuration record */
+#define H_CONTROL_CA_FUNCTION_GET_DOWNLOAD_STATE 7 /* query to return download status */
+#define H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS 8 /* terminate the process before completion */
+#define H_CONTROL_CA_FUNCTION_COLLECT_VPD 9 /* collect VPD */
+#define H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT 11 /* read the function-wide error data based on an interrupt */
+#define H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT 12 /* acknowledge function-wide error data based on an interrupt */
+#define H_CONTROL_CA_FUNCTION_GET_ERROR_LOG 13 /* retrieve the Platform Log ID (PLID) of an error log */
+
+#define H_CONTROL_CA_FAULTS_RESPOND_PSL 1
+#define H_CONTROL_CA_FAULTS_RESPOND_AFU 2
+
+#define H_CONTROL_CA_FACILITY_RESET 1 /* perform a reset */
+#define H_CONTROL_CA_FACILITY_COLLECT_VPD 2 /* collect VPD */
+
+#define H_DOWNLOAD_CA_FACILITY_DOWNLOAD 1 /* download adapter image */
+#define H_DOWNLOAD_CA_FACILITY_VALIDATE 2 /* validate adapter image */
+
+
+#define _CXL_LOOP_HCALL(call, rc, retbuf, fn, ...) \
+ { \
+ unsigned int delay, total_delay = 0; \
+ u64 token = 0; \
+ \
+ memset(retbuf, 0, sizeof(retbuf)); \
+ while (1) { \
+ rc = call(fn, retbuf, __VA_ARGS__, token); \
+ token = retbuf[0]; \
+ if (rc != H_BUSY && !H_IS_LONG_BUSY(rc)) \
+ break; \
+ \
+ if (rc == H_BUSY) \
+ delay = 10; \
+ else \
+ delay = get_longbusy_msecs(rc); \
+ \
+ total_delay += delay; \
+ if (total_delay > CXL_HCALL_TIMEOUT) { \
+ WARN(1, "Warning: Giving up waiting for CXL hcall " \
+ "%#x after %u msec\n", fn, total_delay); \
+ rc = H_BUSY; \
+ break; \
+ } \
+ msleep(delay); \
+ } \
+ }
+#define CXL_H_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall, __VA_ARGS__)
+#define CXL_H9_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall9, __VA_ARGS__)
+
+#define _PRINT_MSG(rc, format, ...) \
+ { \
+ if ((rc != H_SUCCESS) && (rc != H_CONTINUE)) \
+ pr_err(format, __VA_ARGS__); \
+ else \
+ pr_devel(format, __VA_ARGS__); \
+ } \
+
+
+static char *afu_op_names[] = {
+ "UNKNOWN_OP", /* 0 undefined */
+ "RESET", /* 1 */
+ "SUSPEND_PROCESS", /* 2 */
+ "RESUME_PROCESS", /* 3 */
+ "READ_ERR_STATE", /* 4 */
+ "GET_AFU_ERR", /* 5 */
+ "GET_CONFIG", /* 6 */
+ "GET_DOWNLOAD_STATE", /* 7 */
+ "TERMINATE_PROCESS", /* 8 */
+ "COLLECT_VPD", /* 9 */
+ "UNKNOWN_OP", /* 10 undefined */
+ "GET_FUNCTION_ERR_INT", /* 11 */
+ "ACK_FUNCTION_ERR_INT", /* 12 */
+ "GET_ERROR_LOG", /* 13 */
+};
+
+static char *control_adapter_op_names[] = {
+ "UNKNOWN_OP", /* 0 undefined */
+ "RESET", /* 1 */
+ "COLLECT_VPD", /* 2 */
+};
+
+static char *download_op_names[] = {
+ "UNKNOWN_OP", /* 0 undefined */
+ "DOWNLOAD", /* 1 */
+ "VALIDATE", /* 2 */
+};
+
+static char *op_str(unsigned int op, char *name_array[], int array_len)
+{
+ if (op >= array_len)
+ return "UNKNOWN_OP";
+ return name_array[op];
+}
+
+#define OP_STR(op, name_array) op_str(op, name_array, ARRAY_SIZE(name_array))
+
+#define OP_STR_AFU(op) OP_STR(op, afu_op_names)
+#define OP_STR_CONTROL_ADAPTER(op) OP_STR(op, control_adapter_op_names)
+#define OP_STR_DOWNLOAD_ADAPTER(op) OP_STR(op, download_op_names)
+
+
+long cxl_h_attach_process(u64 unit_address,
+ struct cxl_process_element_hcall *element,
+ u64 *process_token, u64 *mmio_addr, u64 *mmio_size)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_ATTACH_CA_PROCESS, unit_address, virt_to_phys(element));
+ _PRINT_MSG(rc, "cxl_h_attach_process(%#.16llx, %#.16lx): %li\n",
+ unit_address, virt_to_phys(element), rc);
+ trace_cxl_hcall_attach(unit_address, virt_to_phys(element), retbuf[0], retbuf[1], retbuf[2], rc);
+
+ pr_devel("token: 0x%.8lx mmio_addr: 0x%lx mmio_size: 0x%lx\nProcess Element Structure:\n",
+ retbuf[0], retbuf[1], retbuf[2]);
+ cxl_dump_debug_buffer(element, sizeof(*element));
+
+ switch (rc) {
+ case H_SUCCESS: /* The process info is attached to the coherent platform function */
+ *process_token = retbuf[0];
+ if (mmio_addr)
+ *mmio_addr = retbuf[1];
+ if (mmio_size)
+ *mmio_size = retbuf[2];
+ return 0;
+ case H_PARAMETER: /* An incorrect parameter was supplied. */
+ case H_FUNCTION: /* The function is not supported. */
+ return -EINVAL;
+ case H_AUTHORITY: /* The partition does not have authority to perform this hcall */
+ case H_RESOURCE: /* The coherent platform function does not have enough additional resource to attach the process */
+ case H_HARDWARE: /* A hardware event prevented the attach operation */
+ case H_STATE: /* The coherent platform function is not in a valid state */
+ case H_BUSY:
+ return -EBUSY;
+ default:
+ WARN(1, "Unexpected return code: %lx", rc);
+ return -EINVAL;
+ }
+}
+
+/**
+ * cxl_h_detach_process - Detach a process element from a coherent
+ * platform function.
+ */
+long cxl_h_detach_process(u64 unit_address, u64 process_token)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_DETACH_CA_PROCESS, unit_address, process_token);
+ _PRINT_MSG(rc, "cxl_h_detach_process(%#.16llx, 0x%.8llx): %li\n", unit_address, process_token, rc);
+ trace_cxl_hcall_detach(unit_address, process_token, rc);
+
+ switch (rc) {
+ case H_SUCCESS: /* The process was detached from the coherent platform function */
+ return 0;
+ case H_PARAMETER: /* An incorrect parameter was supplied. */
+ return -EINVAL;
+ case H_AUTHORITY: /* The partition does not have authority to perform this hcall */
+ case H_RESOURCE: /* The function has page table mappings for MMIO */
+ case H_HARDWARE: /* A hardware event prevented the detach operation */
+ case H_STATE: /* The coherent platform function is not in a valid state */
+ case H_BUSY:
+ return -EBUSY;
+ default:
+ WARN(1, "Unexpected return code: %lx", rc);
+ return -EINVAL;
+ }
+}
+
+/**
+ * cxl_h_control_function - This H_CONTROL_CA_FUNCTION hypervisor call allows
+ * the partition to manipulate or query
+ * certain coherent platform function behaviors.
+ */
+static long cxl_h_control_function(u64 unit_address, u64 op,
+ u64 p1, u64 p2, u64 p3, u64 p4, u64 *out)
+{
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+ long rc;
+
+ CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FUNCTION, unit_address, op, p1, p2, p3, p4);
+ _PRINT_MSG(rc, "cxl_h_control_function(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n",
+ unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc);
+ trace_cxl_hcall_control_function(unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc);
+
+ switch (rc) {
+ case H_SUCCESS: /* The operation is completed for the coherent platform function */
+ if ((op == H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT ||
+ op == H_CONTROL_CA_FUNCTION_READ_ERR_STATE ||
+ op == H_CONTROL_CA_FUNCTION_COLLECT_VPD))
+ *out = retbuf[0];
+ return 0;
+ case H_PARAMETER: /* An incorrect parameter was supplied. */
+ case H_FUNCTION: /* The function is not supported. */
+ case H_NOT_FOUND: /* The operation supplied was not valid */
+ case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */
+ case H_SG_LIST: /* An block list entry was invalid */
+ return -EINVAL;
+ case H_AUTHORITY: /* The partition does not have authority to perform this hcall */
+ case H_RESOURCE: /* The function has page table mappings for MMIO */
+ case H_HARDWARE: /* A hardware event prevented the attach operation */
+ case H_STATE: /* The coherent platform function is not in a valid state */
+ case H_BUSY:
+ return -EBUSY;
+ default:
+ WARN(1, "Unexpected return code: %lx", rc);
+ return -EINVAL;
+ }
+}
+
+/**
+ * cxl_h_reset_afu - Perform a reset to the coherent platform function.
+ */
+long cxl_h_reset_afu(u64 unit_address)
+{
+ return cxl_h_control_function(unit_address,
+ H_CONTROL_CA_FUNCTION_RESET,
+ 0, 0, 0, 0,
+ NULL);
+}
+
+/**
+ * cxl_h_suspend_process - Suspend a process from being executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ * process was attached.
+ */
+long cxl_h_suspend_process(u64 unit_address, u64 process_token)
+{
+ return cxl_h_control_function(unit_address,
+ H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS,
+ process_token, 0, 0, 0,
+ NULL);
+}
+
+/**
+ * cxl_h_resume_process - Resume a process to be executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ * process was attached.
+ */
+long cxl_h_resume_process(u64 unit_address, u64 process_token)
+{
+ return cxl_h_control_function(unit_address,
+ H_CONTROL_CA_FUNCTION_RESUME_PROCESS,
+ process_token, 0, 0, 0,
+ NULL);
+}
+
+/**
+ * cxl_h_read_error_state - Checks the error state of the coherent
+ * platform function.
+ * R4 contains the error state
+ */
+long cxl_h_read_error_state(u64 unit_address, u64 *state)
+{
+ return cxl_h_control_function(unit_address,
+ H_CONTROL_CA_FUNCTION_READ_ERR_STATE,
+ 0, 0, 0, 0,
+ state);
+}
+
+/**
+ * cxl_h_get_afu_err - collect the AFU error buffer
+ * Parameter1 = byte offset into error buffer to retrieve, valid values
+ * are between 0 and (ibm,error-buffer-size - 1)
+ * Parameter2 = 4K aligned real address of error buffer, to be filled in
+ * Parameter3 = length of error buffer, valid values are 4K or less
+ */
+long cxl_h_get_afu_err(u64 unit_address, u64 offset,
+ u64 buf_address, u64 len)
+{
+ return cxl_h_control_function(unit_address,
+ H_CONTROL_CA_FUNCTION_GET_AFU_ERR,
+ offset, buf_address, len, 0,
+ NULL);
+}
+
+/**
+ * cxl_h_get_config - collect configuration record for the
+ * coherent platform function
+ * Parameter1 = # of configuration record to retrieve, valid values are
+ * between 0 and (ibm,#config-records - 1)
+ * Parameter2 = byte offset into configuration record to retrieve,
+ * valid values are between 0 and (ibm,config-record-size - 1)
+ * Parameter3 = 4K aligned real address of configuration record buffer,
+ * to be filled in
+ * Parameter4 = length of configuration buffer, valid values are 4K or less
+ */
+long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset,
+ u64 buf_address, u64 len)
+{
+ return cxl_h_control_function(unit_address,
+ H_CONTROL_CA_FUNCTION_GET_CONFIG,
+ cr_num, offset, buf_address, len,
+ NULL);
+}
+
+/**
+ * cxl_h_terminate_process - Terminate the process before completion
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ * process was attached.
+ */
+long cxl_h_terminate_process(u64 unit_address, u64 process_token)
+{
+ return cxl_h_control_function(unit_address,
+ H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS,
+ process_token, 0, 0, 0,
+ NULL);
+}
+
+/**
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = # of VPD record to retrieve, valid values are between 0
+ * and (ibm,#config-records - 1).
+ * Parameter2 = 4K naturally aligned real buffer containing block
+ * list entries
+ * Parameter3 = number of block list entries in the block list, valid
+ * values are between 0 and 256
+ */
+long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address,
+ u64 num, u64 *out)
+{
+ return cxl_h_control_function(unit_address,
+ H_CONTROL_CA_FUNCTION_COLLECT_VPD,
+ record, list_address, num, 0,
+ out);
+}
+
+/**
+ * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt
+ */
+long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg)
+{
+ return cxl_h_control_function(unit_address,
+ H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT,
+ 0, 0, 0, 0, reg);
+}
+
+/**
+ * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data
+ * based on an interrupt
+ * Parameter1 = value to write to the function-wide error interrupt register
+ */
+long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value)
+{
+ return cxl_h_control_function(unit_address,
+ H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT,
+ value, 0, 0, 0,
+ NULL);
+}
+
+/**
+ * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of
+ * an error log
+ */
+long cxl_h_get_error_log(u64 unit_address, u64 value)
+{
+ return cxl_h_control_function(unit_address,
+ H_CONTROL_CA_FUNCTION_GET_ERROR_LOG,
+ 0, 0, 0, 0,
+ NULL);
+}
+
+/**
+ * cxl_h_collect_int_info - Collect interrupt info about a coherent
+ * platform function after an interrupt occurred.
+ */
+long cxl_h_collect_int_info(u64 unit_address, u64 process_token,
+ struct cxl_irq_info *info)
+{
+ long rc;
+
+ BUG_ON(sizeof(*info) != sizeof(unsigned long[PLPAR_HCALL9_BUFSIZE]));
+
+ rc = plpar_hcall9(H_COLLECT_CA_INT_INFO, (unsigned long *) info,
+ unit_address, process_token);
+ _PRINT_MSG(rc, "cxl_h_collect_int_info(%#.16llx, 0x%llx): %li\n",
+ unit_address, process_token, rc);
+ trace_cxl_hcall_collect_int_info(unit_address, process_token, rc);
+
+ switch (rc) {
+ case H_SUCCESS: /* The interrupt info is returned in return registers. */
+ pr_devel("dsisr:%#llx, dar:%#llx, dsr:%#llx, pid:%u, tid:%u, afu_err:%#llx, errstat:%#llx\n",
+ info->dsisr, info->dar, info->dsr, info->pid,
+ info->tid, info->afu_err, info->errstat);
+ return 0;
+ case H_PARAMETER: /* An incorrect parameter was supplied. */
+ return -EINVAL;
+ case H_AUTHORITY: /* The partition does not have authority to perform this hcall. */
+ case H_HARDWARE: /* A hardware event prevented the collection of the interrupt info.*/
+ case H_STATE: /* The coherent platform function is not in a valid state to collect interrupt info. */
+ return -EBUSY;
+ default:
+ WARN(1, "Unexpected return code: %lx", rc);
+ return -EINVAL;
+ }
+}
+
+/**
+ * cxl_h_control_faults - Control the operation of a coherent platform
+ * function after a fault occurs.
+ *
+ * Parameters
+ * control-mask: value to control the faults
+ * looks like PSL_TFC_An shifted >> 32
+ * reset-mask: mask to control reset of function faults
+ * Set reset_mask = 1 to reset PSL errors
+ */
+long cxl_h_control_faults(u64 unit_address, u64 process_token,
+ u64 control_mask, u64 reset_mask)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ memset(retbuf, 0, sizeof(retbuf));
+
+ rc = plpar_hcall(H_CONTROL_CA_FAULTS, retbuf, unit_address,
+ H_CONTROL_CA_FAULTS_RESPOND_PSL, process_token,
+ control_mask, reset_mask);
+ _PRINT_MSG(rc, "cxl_h_control_faults(%#.16llx, 0x%llx, %#llx, %#llx): %li (%#lx)\n",
+ unit_address, process_token, control_mask, reset_mask,
+ rc, retbuf[0]);
+ trace_cxl_hcall_control_faults(unit_address, process_token,
+ control_mask, reset_mask, retbuf[0], rc);
+
+ switch (rc) {
+ case H_SUCCESS: /* Faults were successfully controlled for the function. */
+ return 0;
+ case H_PARAMETER: /* An incorrect parameter was supplied. */
+ return -EINVAL;
+ case H_HARDWARE: /* A hardware event prevented the control of faults. */
+ case H_STATE: /* The function was in an invalid state. */
+ case H_AUTHORITY: /* The partition does not have authority to perform this hcall; the coherent platform facilities may need to be licensed. */
+ return -EBUSY;
+ case H_FUNCTION: /* The function is not supported */
+ case H_NOT_FOUND: /* The operation supplied was not valid */
+ return -EINVAL;
+ default:
+ WARN(1, "Unexpected return code: %lx", rc);
+ return -EINVAL;
+ }
+}
+
+/**
+ * cxl_h_control_facility - This H_CONTROL_CA_FACILITY hypervisor call
+ * allows the partition to manipulate or query
+ * certain coherent platform facility behaviors.
+ */
+static long cxl_h_control_facility(u64 unit_address, u64 op,
+ u64 p1, u64 p2, u64 p3, u64 p4, u64 *out)
+{
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+ long rc;
+
+ CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FACILITY, unit_address, op, p1, p2, p3, p4);
+ _PRINT_MSG(rc, "cxl_h_control_facility(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n",
+ unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc);
+ trace_cxl_hcall_control_facility(unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc);
+
+ switch (rc) {
+ case H_SUCCESS: /* The operation is completed for the coherent platform facility */
+ if (op == H_CONTROL_CA_FACILITY_COLLECT_VPD)
+ *out = retbuf[0];
+ return 0;
+ case H_PARAMETER: /* An incorrect parameter was supplied. */
+ case H_FUNCTION: /* The function is not supported. */
+ case H_NOT_FOUND: /* The operation supplied was not valid */
+ case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */
+ case H_SG_LIST: /* An block list entry was invalid */
+ return -EINVAL;
+ case H_AUTHORITY: /* The partition does not have authority to perform this hcall */
+ case H_RESOURCE: /* The function has page table mappings for MMIO */
+ case H_HARDWARE: /* A hardware event prevented the attach operation */
+ case H_STATE: /* The coherent platform facility is not in a valid state */
+ case H_BUSY:
+ return -EBUSY;
+ default:
+ WARN(1, "Unexpected return code: %lx", rc);
+ return -EINVAL;
+ }
+}
+
+/**
+ * cxl_h_reset_adapter - Perform a reset to the coherent platform facility.
+ */
+long cxl_h_reset_adapter(u64 unit_address)
+{
+ return cxl_h_control_facility(unit_address,
+ H_CONTROL_CA_FACILITY_RESET,
+ 0, 0, 0, 0,
+ NULL);
+}
+
+/**
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = 4K naturally aligned real buffer containing block
+ * list entries
+ * Parameter2 = number of block list entries in the block list, valid
+ * values are between 0 and 256
+ */
+long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address,
+ u64 num, u64 *out)
+{
+ return cxl_h_control_facility(unit_address,
+ H_CONTROL_CA_FACILITY_COLLECT_VPD,
+ list_address, num, 0, 0,
+ out);
+}
+
+/**
+ * cxl_h_download_facility - This H_DOWNLOAD_CA_FACILITY
+ * hypervisor call provide platform support for
+ * downloading a base adapter image to the coherent
+ * platform facility, and for validating the entire
+ * image after the download.
+ * Parameters
+ * op: operation to perform to the coherent platform function
+ * Download: operation = 1, the base image in the coherent platform
+ * facility is first erased, and then
+ * programmed using the image supplied
+ * in the scatter/gather list.
+ * Validate: operation = 2, the base image in the coherent platform
+ * facility is compared with the image
+ * supplied in the scatter/gather list.
+ * list_address: 4K naturally aligned real buffer containing
+ * scatter/gather list entries.
+ * num: number of block list entries in the scatter/gather list.
+ */
+static long cxl_h_download_facility(u64 unit_address, u64 op,
+ u64 list_address, u64 num,
+ u64 *out)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ unsigned int delay, total_delay = 0;
+ u64 token = 0;
+ long rc;
+
+ if (*out != 0)
+ token = *out;
+
+ memset(retbuf, 0, sizeof(retbuf));
+ while (1) {
+ rc = plpar_hcall(H_DOWNLOAD_CA_FACILITY, retbuf,
+ unit_address, op, list_address, num,
+ token);
+ token = retbuf[0];
+ if (rc != H_BUSY && !H_IS_LONG_BUSY(rc))
+ break;
+
+ if (rc != H_BUSY) {
+ delay = get_longbusy_msecs(rc);
+ total_delay += delay;
+ if (total_delay > CXL_HCALL_TIMEOUT_DOWNLOAD) {
+ WARN(1, "Warning: Giving up waiting for CXL hcall "
+ "%#x after %u msec\n",
+ H_DOWNLOAD_CA_FACILITY, total_delay);
+ rc = H_BUSY;
+ break;
+ }
+ msleep(delay);
+ }
+ }
+ _PRINT_MSG(rc, "cxl_h_download_facility(%#.16llx, %s(%#llx, %#llx), %#lx): %li\n",
+ unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc);
+ trace_cxl_hcall_download_facility(unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc);
+
+ switch (rc) {
+ case H_SUCCESS: /* The operation is completed for the coherent platform facility */
+ return 0;
+ case H_PARAMETER: /* An incorrect parameter was supplied */
+ case H_FUNCTION: /* The function is not supported. */
+ case H_SG_LIST: /* An block list entry was invalid */
+ case H_BAD_DATA: /* Image verification failed */
+ return -EINVAL;
+ case H_AUTHORITY: /* The partition does not have authority to perform this hcall */
+ case H_RESOURCE: /* The function has page table mappings for MMIO */
+ case H_HARDWARE: /* A hardware event prevented the attach operation */
+ case H_STATE: /* The coherent platform facility is not in a valid state */
+ case H_BUSY:
+ return -EBUSY;
+ case H_CONTINUE:
+ *out = retbuf[0];
+ return 1; /* More data is needed for the complete image */
+ default:
+ WARN(1, "Unexpected return code: %lx", rc);
+ return -EINVAL;
+ }
+}
+
+/**
+ * cxl_h_download_adapter_image - Download the base image to the coherent
+ * platform facility.
+ */
+long cxl_h_download_adapter_image(u64 unit_address,
+ u64 list_address, u64 num,
+ u64 *out)
+{
+ return cxl_h_download_facility(unit_address,
+ H_DOWNLOAD_CA_FACILITY_DOWNLOAD,
+ list_address, num, out);
+}
+
+/**
+ * cxl_h_validate_adapter_image - Validate the base image in the coherent
+ * platform facility.
+ */
+long cxl_h_validate_adapter_image(u64 unit_address,
+ u64 list_address, u64 num,
+ u64 *out)
+{
+ return cxl_h_download_facility(unit_address,
+ H_DOWNLOAD_CA_FACILITY_VALIDATE,
+ list_address, num, out);
+}
diff --git a/drivers/misc/cxl/hcalls.h b/drivers/misc/cxl/hcalls.h
new file mode 100644
index 000000000000..3e25522a5df6
--- /dev/null
+++ b/drivers/misc/cxl/hcalls.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _HCALLS_H
+#define _HCALLS_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/hvcall.h>
+#include "cxl.h"
+
+#define SG_BUFFER_SIZE 4096
+#define SG_MAX_ENTRIES 256
+
+struct sg_list {
+ u64 phys_addr;
+ u64 len;
+};
+
+/*
+ * This is straight out of PAPR, but replacing some of the compound fields with
+ * a single field, where they were identical to the register layout.
+ *
+ * The 'flags' parameter regroups the various bit-fields
+ */
+#define CXL_PE_CSRP_VALID (1ULL << 63)
+#define CXL_PE_PROBLEM_STATE (1ULL << 62)
+#define CXL_PE_SECONDARY_SEGMENT_TBL_SRCH (1ULL << 61)
+#define CXL_PE_TAGS_ACTIVE (1ULL << 60)
+#define CXL_PE_USER_STATE (1ULL << 59)
+#define CXL_PE_TRANSLATION_ENABLED (1ULL << 58)
+#define CXL_PE_64_BIT (1ULL << 57)
+#define CXL_PE_PRIVILEGED_PROCESS (1ULL << 56)
+
+#define CXL_PROCESS_ELEMENT_VERSION 1
+struct cxl_process_element_hcall {
+ __be64 version;
+ __be64 flags;
+ u8 reserved0[12];
+ __be32 pslVirtualIsn;
+ u8 applicationVirtualIsnBitmap[256];
+ u8 reserved1[144];
+ struct cxl_process_element_common common;
+ u8 reserved4[12];
+} __packed;
+
+#define H_STATE_NORMAL 1
+#define H_STATE_DISABLE 2
+#define H_STATE_TEMP_UNAVAILABLE 3
+#define H_STATE_PERM_UNAVAILABLE 4
+
+/* NOTE: element must be a logical real address, and must be pinned */
+long cxl_h_attach_process(u64 unit_address, struct cxl_process_element_hcall *element,
+ u64 *process_token, u64 *mmio_addr, u64 *mmio_size);
+
+/**
+ * cxl_h_detach_process - Detach a process element from a coherent
+ * platform function.
+ */
+long cxl_h_detach_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_reset_afu - Perform a reset to the coherent platform function.
+ */
+long cxl_h_reset_afu(u64 unit_address);
+
+/**
+ * cxl_h_suspend_process - Suspend a process from being executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ * process was attached.
+ */
+long cxl_h_suspend_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_resume_process - Resume a process to be executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ * process was attached.
+ */
+long cxl_h_resume_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_read_error_state - Reads the error state of the coherent
+ * platform function.
+ * R4 contains the error state
+ */
+long cxl_h_read_error_state(u64 unit_address, u64 *state);
+
+/**
+ * cxl_h_get_afu_err - collect the AFU error buffer
+ * Parameter1 = byte offset into error buffer to retrieve, valid values
+ * are between 0 and (ibm,error-buffer-size - 1)
+ * Parameter2 = 4K aligned real address of error buffer, to be filled in
+ * Parameter3 = length of error buffer, valid values are 4K or less
+ */
+long cxl_h_get_afu_err(u64 unit_address, u64 offset, u64 buf_address, u64 len);
+
+/**
+ * cxl_h_get_config - collect configuration record for the
+ * coherent platform function
+ * Parameter1 = # of configuration record to retrieve, valid values are
+ * between 0 and (ibm,#config-records - 1)
+ * Parameter2 = byte offset into configuration record to retrieve,
+ * valid values are between 0 and (ibm,config-record-size - 1)
+ * Parameter3 = 4K aligned real address of configuration record buffer,
+ * to be filled in
+ * Parameter4 = length of configuration buffer, valid values are 4K or less
+ */
+long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset,
+ u64 buf_address, u64 len);
+
+/**
+ * cxl_h_terminate_process - Terminate the process before completion
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ * process was attached.
+ */
+long cxl_h_terminate_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = # of VPD record to retrieve, valid values are between 0
+ * and (ibm,#config-records - 1).
+ * Parameter2 = 4K naturally aligned real buffer containing block
+ * list entries
+ * Parameter3 = number of block list entries in the block list, valid
+ * values are between 0 and 256
+ */
+long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address,
+ u64 num, u64 *out);
+
+/**
+ * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt
+ */
+long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg);
+
+/**
+ * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data
+ * based on an interrupt
+ * Parameter1 = value to write to the function-wide error interrupt register
+ */
+long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value);
+
+/**
+ * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of
+ * an error log
+ */
+long cxl_h_get_error_log(u64 unit_address, u64 value);
+
+/**
+ * cxl_h_collect_int_info - Collect interrupt info about a coherent
+ * platform function after an interrupt occurred.
+ */
+long cxl_h_collect_int_info(u64 unit_address, u64 process_token,
+ struct cxl_irq_info *info);
+
+/**
+ * cxl_h_control_faults - Control the operation of a coherent platform
+ * function after a fault occurs.
+ *
+ * Parameters
+ * control-mask: value to control the faults
+ * looks like PSL_TFC_An shifted >> 32
+ * reset-mask: mask to control reset of function faults
+ * Set reset_mask = 1 to reset PSL errors
+ */
+long cxl_h_control_faults(u64 unit_address, u64 process_token,
+ u64 control_mask, u64 reset_mask);
+
+/**
+ * cxl_h_reset_adapter - Perform a reset to the coherent platform facility.
+ */
+long cxl_h_reset_adapter(u64 unit_address);
+
+/**
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = 4K naturally aligned real buffer containing block
+ * list entries
+ * Parameter2 = number of block list entries in the block list, valid
+ * values are between 0 and 256
+ */
+long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address,
+ u64 num, u64 *out);
+
+/**
+ * cxl_h_download_adapter_image - Download the base image to the coherent
+ * platform facility.
+ */
+long cxl_h_download_adapter_image(u64 unit_address,
+ u64 list_address, u64 num,
+ u64 *out);
+
+/**
+ * cxl_h_validate_adapter_image - Validate the base image in the coherent
+ * platform facility.
+ */
+long cxl_h_validate_adapter_image(u64 unit_address,
+ u64 list_address, u64 num,
+ u64 *out);
+#endif /* _HCALLS_H */
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index 09a406058c46..be646dc41a2c 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -19,70 +19,11 @@
#include "cxl.h"
#include "trace.h"
-/* XXX: This is implementation specific */
-static irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, u64 errstat)
+static int afu_irq_range_start(void)
{
- u64 fir1, fir2, fir_slice, serr, afu_debug;
-
- fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1);
- fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2);
- fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An);
- serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An);
- afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An);
-
- dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat);
- dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1);
- dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2);
- dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr);
- dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
- dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
-
- dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n");
- cxl_stop_trace(ctx->afu->adapter);
-
- return cxl_ack_irq(ctx, 0, errstat);
-}
-
-irqreturn_t cxl_slice_irq_err(int irq, void *data)
-{
- struct cxl_afu *afu = data;
- u64 fir_slice, errstat, serr, afu_debug;
-
- WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq);
-
- serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
- fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An);
- errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
- afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An);
- dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr);
- dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
- dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat);
- dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
-
- cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
-
- return IRQ_HANDLED;
-}
-
-static irqreturn_t cxl_irq_err(int irq, void *data)
-{
- struct cxl *adapter = data;
- u64 fir1, fir2, err_ivte;
-
- WARN(1, "CXL ERROR interrupt %i\n", irq);
-
- err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE);
- dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte);
-
- dev_crit(&adapter->dev, "STOPPING CXL TRACE\n");
- cxl_stop_trace(adapter);
-
- fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1);
- fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2);
-
- dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2);
-
- return IRQ_HANDLED;
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ return 1;
+ return 0;
}
static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 dar)
@@ -93,9 +34,8 @@ static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 da
return IRQ_HANDLED;
}
-static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info)
+irqreturn_t cxl_irq(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info)
{
- struct cxl_context *ctx = data;
u64 dsisr, dar;
dsisr = irq_info->dsisr;
@@ -145,7 +85,8 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info)
if (dsisr & CXL_PSL_DSISR_An_UR)
pr_devel("CXL interrupt: AURP PTE not found\n");
if (dsisr & CXL_PSL_DSISR_An_PE)
- return handle_psl_slice_error(ctx, dsisr, irq_info->errstat);
+ return cxl_ops->handle_psl_slice_error(ctx, dsisr,
+ irq_info->errstat);
if (dsisr & CXL_PSL_DSISR_An_AE) {
pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err);
@@ -169,7 +110,7 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info)
wake_up_all(&ctx->wq);
}
- cxl_ack_irq(ctx, CXL_PSL_TFC_An_A, 0);
+ cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0);
return IRQ_HANDLED;
}
if (dsisr & CXL_PSL_DSISR_An_OC)
@@ -179,54 +120,27 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info)
return IRQ_HANDLED;
}
-static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info)
-{
- if (irq_info->dsisr & CXL_PSL_DSISR_TRANS)
- cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
- else
- cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
-
- return IRQ_HANDLED;
-}
-
-static irqreturn_t cxl_irq_multiplexed(int irq, void *data)
-{
- struct cxl_afu *afu = data;
- struct cxl_context *ctx;
- struct cxl_irq_info irq_info;
- int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff;
- int ret;
-
- if ((ret = cxl_get_irq(afu, &irq_info))) {
- WARN(1, "Unable to get CXL IRQ Info: %i\n", ret);
- return fail_psl_irq(afu, &irq_info);
- }
-
- rcu_read_lock();
- ctx = idr_find(&afu->contexts_idr, ph);
- if (ctx) {
- ret = cxl_irq(irq, ctx, &irq_info);
- rcu_read_unlock();
- return ret;
- }
- rcu_read_unlock();
-
- WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR"
- " %016llx\n(Possible AFU HW issue - was a term/remove acked"
- " with outstanding transactions?)\n", ph, irq_info.dsisr,
- irq_info.dar);
- return fail_psl_irq(afu, &irq_info);
-}
-
static irqreturn_t cxl_irq_afu(int irq, void *data)
{
struct cxl_context *ctx = data;
irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq));
- int irq_off, afu_irq = 1;
+ int irq_off, afu_irq = 0;
__u16 range;
int r;
- for (r = 1; r < CXL_IRQ_RANGES; r++) {
+ /*
+ * Look for the interrupt number.
+ * On bare-metal, we know range 0 only contains the PSL
+ * interrupt so we could start counting at range 1 and initialize
+ * afu_irq at 1.
+ * In a guest, range 0 also contains AFU interrupts, so it must
+ * be counted for. Therefore we initialize afu_irq at 0 to take into
+ * account the PSL interrupt.
+ *
+ * For code-readability, it just seems easier to go over all
+ * the ranges on bare-metal and guest. The end result is the same.
+ */
+ for (r = 0; r < CXL_IRQ_RANGES; r++) {
irq_off = hwirq - ctx->irqs.offset[r];
range = ctx->irqs.range[r];
if (irq_off >= 0 && irq_off < range) {
@@ -236,7 +150,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data)
afu_irq += range;
}
if (unlikely(r >= CXL_IRQ_RANGES)) {
- WARN(1, "Recieved AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n",
+ WARN(1, "Received AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n",
ctx->pe, irq, hwirq);
return IRQ_HANDLED;
}
@@ -246,7 +160,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data)
afu_irq, ctx->pe, irq, hwirq);
if (unlikely(!ctx->irq_bitmap)) {
- WARN(1, "Recieved AFU IRQ for context with no IRQ bitmap\n");
+ WARN(1, "Received AFU IRQ for context with no IRQ bitmap\n");
return IRQ_HANDLED;
}
spin_lock(&ctx->lock);
@@ -272,7 +186,8 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
return 0;
}
- cxl_setup_irq(adapter, hwirq, virq);
+ if (cxl_ops->setup_irq)
+ cxl_ops->setup_irq(adapter, hwirq, virq);
pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq);
@@ -291,16 +206,16 @@ void cxl_unmap_irq(unsigned int virq, void *cookie)
irq_dispose_mapping(virq);
}
-static int cxl_register_one_irq(struct cxl *adapter,
- irq_handler_t handler,
- void *cookie,
- irq_hw_number_t *dest_hwirq,
- unsigned int *dest_virq,
- const char *name)
+int cxl_register_one_irq(struct cxl *adapter,
+ irq_handler_t handler,
+ void *cookie,
+ irq_hw_number_t *dest_hwirq,
+ unsigned int *dest_virq,
+ const char *name)
{
int hwirq, virq;
- if ((hwirq = cxl_alloc_one_irq(adapter)) < 0)
+ if ((hwirq = cxl_ops->alloc_one_irq(adapter)) < 0)
return hwirq;
if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name)))
@@ -312,108 +227,10 @@ static int cxl_register_one_irq(struct cxl *adapter,
return 0;
err:
- cxl_release_one_irq(adapter, hwirq);
+ cxl_ops->release_one_irq(adapter, hwirq);
return -ENOMEM;
}
-int cxl_register_psl_err_irq(struct cxl *adapter)
-{
- int rc;
-
- adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
- dev_name(&adapter->dev));
- if (!adapter->irq_name)
- return -ENOMEM;
-
- if ((rc = cxl_register_one_irq(adapter, cxl_irq_err, adapter,
- &adapter->err_hwirq,
- &adapter->err_virq,
- adapter->irq_name))) {
- kfree(adapter->irq_name);
- adapter->irq_name = NULL;
- return rc;
- }
-
- cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->err_hwirq & 0xffff);
-
- return 0;
-}
-
-void cxl_release_psl_err_irq(struct cxl *adapter)
-{
- if (adapter->err_virq != irq_find_mapping(NULL, adapter->err_hwirq))
- return;
-
- cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
- cxl_unmap_irq(adapter->err_virq, adapter);
- cxl_release_one_irq(adapter, adapter->err_hwirq);
- kfree(adapter->irq_name);
-}
-
-int cxl_register_serr_irq(struct cxl_afu *afu)
-{
- u64 serr;
- int rc;
-
- afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
- dev_name(&afu->dev));
- if (!afu->err_irq_name)
- return -ENOMEM;
-
- if ((rc = cxl_register_one_irq(afu->adapter, cxl_slice_irq_err, afu,
- &afu->serr_hwirq,
- &afu->serr_virq, afu->err_irq_name))) {
- kfree(afu->err_irq_name);
- afu->err_irq_name = NULL;
- return rc;
- }
-
- serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
- serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff);
- cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
-
- return 0;
-}
-
-void cxl_release_serr_irq(struct cxl_afu *afu)
-{
- if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
- return;
-
- cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000);
- cxl_unmap_irq(afu->serr_virq, afu);
- cxl_release_one_irq(afu->adapter, afu->serr_hwirq);
- kfree(afu->err_irq_name);
-}
-
-int cxl_register_psl_irq(struct cxl_afu *afu)
-{
- int rc;
-
- afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s",
- dev_name(&afu->dev));
- if (!afu->psl_irq_name)
- return -ENOMEM;
-
- if ((rc = cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu,
- &afu->psl_hwirq, &afu->psl_virq,
- afu->psl_irq_name))) {
- kfree(afu->psl_irq_name);
- afu->psl_irq_name = NULL;
- }
- return rc;
-}
-
-void cxl_release_psl_irq(struct cxl_afu *afu)
-{
- if (afu->psl_virq != irq_find_mapping(NULL, afu->psl_hwirq))
- return;
-
- cxl_unmap_irq(afu->psl_virq, afu);
- cxl_release_one_irq(afu->adapter, afu->psl_hwirq);
- kfree(afu->psl_irq_name);
-}
-
void afu_irq_name_free(struct cxl_context *ctx)
{
struct cxl_irq_name *irq_name, *tmp;
@@ -429,16 +246,33 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count)
{
int rc, r, i, j = 1;
struct cxl_irq_name *irq_name;
+ int alloc_count;
+
+ /*
+ * In native mode, range 0 is reserved for the multiplexed
+ * PSL interrupt. It has been allocated when the AFU was initialized.
+ *
+ * In a guest, the PSL interrupt is not mutliplexed, but per-context,
+ * and is the first interrupt from range 0. It still needs to be
+ * allocated, so bump the count by one.
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ alloc_count = count;
+ else
+ alloc_count = count + 1;
/* Initialize the list head to hold irq names */
INIT_LIST_HEAD(&ctx->irq_names);
- if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count)))
+ if ((rc = cxl_ops->alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter,
+ alloc_count)))
return rc;
- /* Multiplexed PSL Interrupt */
- ctx->irqs.offset[0] = ctx->afu->psl_hwirq;
- ctx->irqs.range[0] = 1;
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ /* Multiplexed PSL Interrupt */
+ ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq;
+ ctx->irqs.range[0] = 1;
+ }
ctx->irq_count = count;
ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count),
@@ -450,7 +284,7 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count)
* Allocate names first. If any fail, bail out before allocating
* actual hardware IRQs.
*/
- for (r = 1; r < CXL_IRQ_RANGES; r++) {
+ for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) {
for (i = 0; i < ctx->irqs.range[r]; i++) {
irq_name = kmalloc(sizeof(struct cxl_irq_name),
GFP_KERNEL);
@@ -471,7 +305,7 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count)
return 0;
out:
- cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+ cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
afu_irq_name_free(ctx);
return -ENOMEM;
}
@@ -480,15 +314,30 @@ static void afu_register_hwirqs(struct cxl_context *ctx)
{
irq_hw_number_t hwirq;
struct cxl_irq_name *irq_name;
- int r,i;
+ int r, i;
+ irqreturn_t (*handler)(int irq, void *data);
/* We've allocated all memory now, so let's do the irq allocations */
irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list);
- for (r = 1; r < CXL_IRQ_RANGES; r++) {
+ for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) {
hwirq = ctx->irqs.offset[r];
for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
- cxl_map_irq(ctx->afu->adapter, hwirq,
- cxl_irq_afu, ctx, irq_name->name);
+ if (r == 0 && i == 0)
+ /*
+ * The very first interrupt of range 0 is
+ * always the PSL interrupt, but we only
+ * need to connect a handler for guests,
+ * because there's one PSL interrupt per
+ * context.
+ * On bare-metal, the PSL interrupt is
+ * multiplexed and was setup when the AFU
+ * was configured.
+ */
+ handler = cxl_ops->psl_interrupt;
+ else
+ handler = cxl_irq_afu;
+ cxl_map_irq(ctx->afu->adapter, hwirq, handler, ctx,
+ irq_name->name);
irq_name = list_next_entry(irq_name, list);
}
}
@@ -504,7 +353,7 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count)
afu_register_hwirqs(ctx);
return 0;
- }
+}
void afu_release_irqs(struct cxl_context *ctx, void *cookie)
{
@@ -512,7 +361,7 @@ void afu_release_irqs(struct cxl_context *ctx, void *cookie)
unsigned int virq;
int r, i;
- for (r = 1; r < CXL_IRQ_RANGES; r++) {
+ for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) {
hwirq = ctx->irqs.offset[r];
for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
virq = irq_find_mapping(NULL, hwirq);
@@ -522,7 +371,7 @@ void afu_release_irqs(struct cxl_context *ctx, void *cookie)
}
afu_irq_name_free(ctx);
- cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+ cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
ctx->irq_count = 0;
}
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 9fde75ed4fac..ae68c3201156 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -32,6 +32,29 @@ uint cxl_verbose;
module_param_named(verbose, cxl_verbose, uint, 0600);
MODULE_PARM_DESC(verbose, "Enable verbose dmesg output");
+const struct cxl_backend_ops *cxl_ops;
+
+int cxl_afu_slbia(struct cxl_afu *afu)
+{
+ unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+
+ pr_devel("cxl_afu_slbia issuing SLBIA command\n");
+ cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL);
+ while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n");
+ return -EBUSY;
+ }
+ /* If the adapter has gone down, we can assume that we
+ * will PERST it and that will invalidate everything.
+ */
+ if (!cxl_ops->link_ok(afu->adapter, afu))
+ return -EIO;
+ cpu_relax();
+ }
+ return 0;
+}
+
static inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm)
{
struct task_struct *task;
@@ -139,6 +162,32 @@ int cxl_alloc_sst(struct cxl_context *ctx)
return 0;
}
+/* print buffer content as integers when debugging */
+void cxl_dump_debug_buffer(void *buf, size_t buf_len)
+{
+#ifdef DEBUG
+ int i, *ptr;
+
+ /*
+ * We want to regroup up to 4 integers per line, which means they
+ * need to be in the same pr_devel() statement
+ */
+ ptr = (int *) buf;
+ for (i = 0; i * 4 < buf_len; i += 4) {
+ if ((i + 3) * 4 < buf_len)
+ pr_devel("%.8x %.8x %.8x %.8x\n", ptr[i], ptr[i + 1],
+ ptr[i + 2], ptr[i + 3]);
+ else if ((i + 2) * 4 < buf_len)
+ pr_devel("%.8x %.8x %.8x\n", ptr[i], ptr[i + 1],
+ ptr[i + 2]);
+ else if ((i + 1) * 4 < buf_len)
+ pr_devel("%.8x %.8x\n", ptr[i], ptr[i + 1]);
+ else
+ pr_devel("%.8x\n", ptr[i]);
+ }
+#endif /* DEBUG */
+}
+
/* Find a CXL adapter by it's number and increase it's refcount */
struct cxl *get_cxl_adapter(int num)
{
@@ -152,7 +201,7 @@ struct cxl *get_cxl_adapter(int num)
return adapter;
}
-int cxl_alloc_adapter_nr(struct cxl *adapter)
+static int cxl_alloc_adapter_nr(struct cxl *adapter)
{
int i;
@@ -174,13 +223,58 @@ void cxl_remove_adapter_nr(struct cxl *adapter)
idr_remove(&cxl_adapter_idr, adapter->adapter_num);
}
+struct cxl *cxl_alloc_adapter(void)
+{
+ struct cxl *adapter;
+
+ if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL)))
+ return NULL;
+
+ spin_lock_init(&adapter->afu_list_lock);
+
+ if (cxl_alloc_adapter_nr(adapter))
+ goto err1;
+
+ if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
+ goto err2;
+
+ return adapter;
+
+err2:
+ cxl_remove_adapter_nr(adapter);
+err1:
+ kfree(adapter);
+ return NULL;
+}
+
+struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
+{
+ struct cxl_afu *afu;
+
+ if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL)))
+ return NULL;
+
+ afu->adapter = adapter;
+ afu->dev.parent = &adapter->dev;
+ afu->dev.release = cxl_ops->release_afu;
+ afu->slice = slice;
+ idr_init(&afu->contexts_idr);
+ mutex_init(&afu->contexts_lock);
+ spin_lock_init(&afu->afu_cntl_lock);
+
+ afu->prefault_mode = CXL_PREFAULT_NONE;
+ afu->irqs_max = afu->adapter->user_irqs;
+
+ return afu;
+}
+
int cxl_afu_select_best_mode(struct cxl_afu *afu)
{
if (afu->modes_supported & CXL_MODE_DIRECTED)
- return cxl_afu_activate_mode(afu, CXL_MODE_DIRECTED);
+ return cxl_ops->afu_activate_mode(afu, CXL_MODE_DIRECTED);
if (afu->modes_supported & CXL_MODE_DEDICATED)
- return cxl_afu_activate_mode(afu, CXL_MODE_DEDICATED);
+ return cxl_ops->afu_activate_mode(afu, CXL_MODE_DEDICATED);
dev_warn(&afu->dev, "No supported programming modes available\n");
/* We don't fail this so the user can inspect sysfs */
@@ -191,9 +285,6 @@ static int __init init_cxl(void)
{
int rc = 0;
- if (!cpu_has_feature(CPU_FTR_HVMODE))
- return -EPERM;
-
if ((rc = cxl_file_init()))
return rc;
@@ -202,7 +293,17 @@ static int __init init_cxl(void)
if ((rc = register_cxl_calls(&cxl_calls)))
goto err;
- if ((rc = pci_register_driver(&cxl_pci_driver)))
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ cxl_ops = &cxl_native_ops;
+ rc = pci_register_driver(&cxl_pci_driver);
+ }
+#ifdef CONFIG_PPC_PSERIES
+ else {
+ cxl_ops = &cxl_guest_ops;
+ rc = platform_driver_register(&cxl_of_driver);
+ }
+#endif
+ if (rc)
goto err1;
return 0;
@@ -217,7 +318,12 @@ err:
static void exit_cxl(void)
{
- pci_unregister_driver(&cxl_pci_driver);
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ pci_unregister_driver(&cxl_pci_driver);
+#ifdef CONFIG_PPC_PSERIES
+ else
+ platform_driver_unregister(&cxl_of_driver);
+#endif
cxl_debugfs_exit();
cxl_file_exit();
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index f40909793490..387fcbdf9793 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -42,7 +42,7 @@ static int afu_control(struct cxl_afu *afu, u64 command,
goto out;
}
- if (!cxl_adapter_link_ok(afu->adapter)) {
+ if (!cxl_ops->link_ok(afu->adapter, afu)) {
afu->enabled = enabled;
rc = -EIO;
goto out;
@@ -80,7 +80,7 @@ int cxl_afu_disable(struct cxl_afu *afu)
}
/* This will disable as well as reset */
-int __cxl_afu_reset(struct cxl_afu *afu)
+static int native_afu_reset(struct cxl_afu *afu)
{
pr_devel("AFU reset request\n");
@@ -90,9 +90,9 @@ int __cxl_afu_reset(struct cxl_afu *afu)
false);
}
-int cxl_afu_check_and_enable(struct cxl_afu *afu)
+static int native_afu_check_and_enable(struct cxl_afu *afu)
{
- if (!cxl_adapter_link_ok(afu->adapter)) {
+ if (!cxl_ops->link_ok(afu->adapter, afu)) {
WARN(1, "Refusing to enable afu while link down!\n");
return -EIO;
}
@@ -114,7 +114,7 @@ int cxl_psl_purge(struct cxl_afu *afu)
pr_devel("PSL purge request\n");
- if (!cxl_adapter_link_ok(afu->adapter)) {
+ if (!cxl_ops->link_ok(afu->adapter, afu)) {
dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n");
rc = -EIO;
goto out;
@@ -136,7 +136,7 @@ int cxl_psl_purge(struct cxl_afu *afu)
rc = -EBUSY;
goto out;
}
- if (!cxl_adapter_link_ok(afu->adapter)) {
+ if (!cxl_ops->link_ok(afu->adapter, afu)) {
rc = -EIO;
goto out;
}
@@ -186,22 +186,22 @@ static int spa_max_procs(int spa_size)
int cxl_alloc_spa(struct cxl_afu *afu)
{
/* Work out how many pages to allocate */
- afu->spa_order = 0;
+ afu->native->spa_order = 0;
do {
- afu->spa_order++;
- afu->spa_size = (1 << afu->spa_order) * PAGE_SIZE;
- afu->spa_max_procs = spa_max_procs(afu->spa_size);
- } while (afu->spa_max_procs < afu->num_procs);
+ afu->native->spa_order++;
+ afu->native->spa_size = (1 << afu->native->spa_order) * PAGE_SIZE;
+ afu->native->spa_max_procs = spa_max_procs(afu->native->spa_size);
+ } while (afu->native->spa_max_procs < afu->num_procs);
- WARN_ON(afu->spa_size > 0x100000); /* Max size supported by the hardware */
+ WARN_ON(afu->native->spa_size > 0x100000); /* Max size supported by the hardware */
- if (!(afu->spa = (struct cxl_process_element *)
- __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->spa_order))) {
+ if (!(afu->native->spa = (struct cxl_process_element *)
+ __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->native->spa_order))) {
pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n");
return -ENOMEM;
}
pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n",
- 1<<afu->spa_order, afu->spa_max_procs, afu->num_procs);
+ 1<<afu->native->spa_order, afu->native->spa_max_procs, afu->num_procs);
return 0;
}
@@ -210,13 +210,15 @@ static void attach_spa(struct cxl_afu *afu)
{
u64 spap;
- afu->sw_command_status = (__be64 *)((char *)afu->spa +
- ((afu->spa_max_procs + 3) * 128));
+ afu->native->sw_command_status = (__be64 *)((char *)afu->native->spa +
+ ((afu->native->spa_max_procs + 3) * 128));
- spap = virt_to_phys(afu->spa) & CXL_PSL_SPAP_Addr;
- spap |= ((afu->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size;
+ spap = virt_to_phys(afu->native->spa) & CXL_PSL_SPAP_Addr;
+ spap |= ((afu->native->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size;
spap |= CXL_PSL_SPAP_V;
- pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap);
+ pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n",
+ afu->native->spa, afu->native->spa_max_procs,
+ afu->native->sw_command_status, spap);
cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap);
}
@@ -227,9 +229,10 @@ static inline void detach_spa(struct cxl_afu *afu)
void cxl_release_spa(struct cxl_afu *afu)
{
- if (afu->spa) {
- free_pages((unsigned long) afu->spa, afu->spa_order);
- afu->spa = NULL;
+ if (afu->native->spa) {
+ free_pages((unsigned long) afu->native->spa,
+ afu->native->spa_order);
+ afu->native->spa = NULL;
}
}
@@ -247,7 +250,7 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter)
dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n");
return -EBUSY;
}
- if (!cxl_adapter_link_ok(adapter))
+ if (!cxl_ops->link_ok(adapter, NULL))
return -EIO;
cpu_relax();
}
@@ -258,28 +261,7 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter)
dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n");
return -EBUSY;
}
- if (!cxl_adapter_link_ok(adapter))
- return -EIO;
- cpu_relax();
- }
- return 0;
-}
-
-int cxl_afu_slbia(struct cxl_afu *afu)
-{
- unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
-
- pr_devel("cxl_afu_slbia issuing SLBIA command\n");
- cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL);
- while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) {
- if (time_after_eq(jiffies, timeout)) {
- dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n");
- return -EBUSY;
- }
- /* If the adapter has gone down, we can assume that we
- * will PERST it and that will invalidate everything.
- */
- if (!cxl_adapter_link_ok(afu->adapter))
+ if (!cxl_ops->link_ok(adapter, NULL))
return -EIO;
cpu_relax();
}
@@ -312,7 +294,7 @@ static void slb_invalid(struct cxl_context *ctx)
struct cxl *adapter = ctx->afu->adapter;
u64 slbia;
- WARN_ON(!mutex_is_locked(&ctx->afu->spa_mutex));
+ WARN_ON(!mutex_is_locked(&ctx->afu->native->spa_mutex));
cxl_p1_write(adapter, CXL_PSL_LBISEL,
((u64)be32_to_cpu(ctx->elem->common.pid) << 32) |
@@ -320,7 +302,7 @@ static void slb_invalid(struct cxl_context *ctx)
cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID);
while (1) {
- if (!cxl_adapter_link_ok(adapter))
+ if (!cxl_ops->link_ok(adapter, NULL))
break;
slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA);
if (!(slbia & CXL_TLB_SLB_P))
@@ -342,7 +324,7 @@ static int do_process_element_cmd(struct cxl_context *ctx,
ctx->elem->software_state = cpu_to_be32(pe_state);
smp_wmb();
- *(ctx->afu->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe);
+ *(ctx->afu->native->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe);
smp_mb();
cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe);
while (1) {
@@ -351,12 +333,12 @@ static int do_process_element_cmd(struct cxl_context *ctx,
rc = -EBUSY;
goto out;
}
- if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
+ if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) {
dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n");
rc = -EIO;
goto out;
}
- state = be64_to_cpup(ctx->afu->sw_command_status);
+ state = be64_to_cpup(ctx->afu->native->sw_command_status);
if (state == ~0ULL) {
pr_err("cxl: Error adding process element to AFU\n");
rc = -1;
@@ -384,12 +366,12 @@ static int add_process_element(struct cxl_context *ctx)
{
int rc = 0;
- mutex_lock(&ctx->afu->spa_mutex);
+ mutex_lock(&ctx->afu->native->spa_mutex);
pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe);
if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V)))
ctx->pe_inserted = true;
pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe);
- mutex_unlock(&ctx->afu->spa_mutex);
+ mutex_unlock(&ctx->afu->native->spa_mutex);
return rc;
}
@@ -401,18 +383,18 @@ static int terminate_process_element(struct cxl_context *ctx)
if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V)))
return rc;
- mutex_lock(&ctx->afu->spa_mutex);
+ mutex_lock(&ctx->afu->native->spa_mutex);
pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe);
/* We could be asked to terminate when the hw is down. That
* should always succeed: it's not running if the hw has gone
* away and is being reset.
*/
- if (cxl_adapter_link_ok(ctx->afu->adapter))
+ if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE,
CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T);
ctx->elem->software_state = 0; /* Remove Valid bit */
pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe);
- mutex_unlock(&ctx->afu->spa_mutex);
+ mutex_unlock(&ctx->afu->native->spa_mutex);
return rc;
}
@@ -420,20 +402,20 @@ static int remove_process_element(struct cxl_context *ctx)
{
int rc = 0;
- mutex_lock(&ctx->afu->spa_mutex);
+ mutex_lock(&ctx->afu->native->spa_mutex);
pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe);
/* We could be asked to remove when the hw is down. Again, if
* the hw is down, the PE is gone, so we succeed.
*/
- if (cxl_adapter_link_ok(ctx->afu->adapter))
+ if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0);
if (!rc)
ctx->pe_inserted = false;
slb_invalid(ctx);
pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe);
- mutex_unlock(&ctx->afu->spa_mutex);
+ mutex_unlock(&ctx->afu->native->spa_mutex);
return rc;
}
@@ -446,7 +428,7 @@ void cxl_assign_psn_space(struct cxl_context *ctx)
ctx->psn_size = ctx->afu->adapter->ps_size;
} else {
ctx->psn_phys = ctx->afu->psn_phys +
- (ctx->afu->pp_offset + ctx->afu->pp_size * ctx->pe);
+ (ctx->afu->native->pp_offset + ctx->afu->pp_size * ctx->pe);
ctx->psn_size = ctx->afu->pp_size;
}
}
@@ -458,7 +440,7 @@ static int activate_afu_directed(struct cxl_afu *afu)
dev_info(&afu->dev, "Activating AFU directed mode\n");
afu->num_procs = afu->max_procs_virtualised;
- if (afu->spa == NULL) {
+ if (afu->native->spa == NULL) {
if (cxl_alloc_spa(afu))
return -ENOMEM;
}
@@ -552,7 +534,7 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
ctx->elem->common.wed = cpu_to_be64(wed);
/* first guy needs to enable */
- if ((result = cxl_afu_check_and_enable(ctx->afu)))
+ if ((result = cxl_ops->afu_check_and_enable(ctx->afu)))
return result;
return add_process_element(ctx);
@@ -568,7 +550,7 @@ static int deactivate_afu_directed(struct cxl_afu *afu)
cxl_sysfs_afu_m_remove(afu);
cxl_chardev_afu_remove(afu);
- __cxl_afu_reset(afu);
+ cxl_ops->afu_reset(afu);
cxl_afu_disable(afu);
cxl_psl_purge(afu);
@@ -632,7 +614,7 @@ static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr)
/* master only context for dedicated */
cxl_assign_psn_space(ctx);
- if ((rc = __cxl_afu_reset(afu)))
+ if ((rc = cxl_ops->afu_reset(afu)))
return rc;
cxl_p2n_write(afu, CXL_PSL_WED_An, wed);
@@ -652,7 +634,7 @@ static int deactivate_dedicated_process(struct cxl_afu *afu)
return 0;
}
-int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode)
+static int native_afu_deactivate_mode(struct cxl_afu *afu, int mode)
{
if (mode == CXL_MODE_DIRECTED)
return deactivate_afu_directed(afu);
@@ -661,19 +643,14 @@ int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode)
return 0;
}
-int cxl_afu_deactivate_mode(struct cxl_afu *afu)
-{
- return _cxl_afu_deactivate_mode(afu, afu->current_mode);
-}
-
-int cxl_afu_activate_mode(struct cxl_afu *afu, int mode)
+static int native_afu_activate_mode(struct cxl_afu *afu, int mode)
{
if (!mode)
return 0;
if (!(mode & afu->modes_supported))
return -EINVAL;
- if (!cxl_adapter_link_ok(afu->adapter)) {
+ if (!cxl_ops->link_ok(afu->adapter, afu)) {
WARN(1, "Device link is down, refusing to activate!\n");
return -EIO;
}
@@ -686,9 +663,10 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode)
return -EINVAL;
}
-int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
+static int native_attach_process(struct cxl_context *ctx, bool kernel,
+ u64 wed, u64 amr)
{
- if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
+ if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) {
WARN(1, "Device link is down, refusing to attach process!\n");
return -EIO;
}
@@ -705,7 +683,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
static inline int detach_process_native_dedicated(struct cxl_context *ctx)
{
- __cxl_afu_reset(ctx->afu);
+ cxl_ops->afu_reset(ctx->afu);
cxl_afu_disable(ctx->afu);
cxl_psl_purge(ctx->afu);
return 0;
@@ -723,7 +701,7 @@ static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
return 0;
}
-int cxl_detach_process(struct cxl_context *ctx)
+static int native_detach_process(struct cxl_context *ctx)
{
trace_cxl_detach(ctx);
@@ -733,14 +711,14 @@ int cxl_detach_process(struct cxl_context *ctx)
return detach_process_native_afu_directed(ctx);
}
-int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info)
+static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info)
{
u64 pidtid;
/* If the adapter has gone away, we can't get any meaningful
* information.
*/
- if (!cxl_adapter_link_ok(afu->adapter))
+ if (!cxl_ops->link_ok(afu->adapter, afu))
return -EIO;
info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
@@ -751,10 +729,214 @@ int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info)
info->tid = pidtid & 0xffffffff;
info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An);
info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+ info->proc_handle = 0;
+
+ return 0;
+}
+
+static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx,
+ u64 dsisr, u64 errstat)
+{
+ u64 fir1, fir2, fir_slice, serr, afu_debug;
+
+ fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1);
+ fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2);
+ fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An);
+ serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An);
+ afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An);
+
+ dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat);
+ dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1);
+ dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2);
+ dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr);
+ dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
+ dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
+
+ dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n");
+ cxl_stop_trace(ctx->afu->adapter);
+
+ return cxl_ops->ack_irq(ctx, 0, errstat);
+}
+
+static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info)
+{
+ if (irq_info->dsisr & CXL_PSL_DSISR_TRANS)
+ cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+ else
+ cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t native_irq_multiplexed(int irq, void *data)
+{
+ struct cxl_afu *afu = data;
+ struct cxl_context *ctx;
+ struct cxl_irq_info irq_info;
+ int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff;
+ int ret;
+
+ if ((ret = native_get_irq_info(afu, &irq_info))) {
+ WARN(1, "Unable to get CXL IRQ Info: %i\n", ret);
+ return fail_psl_irq(afu, &irq_info);
+ }
+
+ rcu_read_lock();
+ ctx = idr_find(&afu->contexts_idr, ph);
+ if (ctx) {
+ ret = cxl_irq(irq, ctx, &irq_info);
+ rcu_read_unlock();
+ return ret;
+ }
+ rcu_read_unlock();
+
+ WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR"
+ " %016llx\n(Possible AFU HW issue - was a term/remove acked"
+ " with outstanding transactions?)\n", ph, irq_info.dsisr,
+ irq_info.dar);
+ return fail_psl_irq(afu, &irq_info);
+}
+
+static irqreturn_t native_slice_irq_err(int irq, void *data)
+{
+ struct cxl_afu *afu = data;
+ u64 fir_slice, errstat, serr, afu_debug;
+
+ WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq);
+
+ serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+ fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An);
+ errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+ afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An);
+ dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr);
+ dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
+ dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat);
+ dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
+
+ cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t native_irq_err(int irq, void *data)
+{
+ struct cxl *adapter = data;
+ u64 fir1, fir2, err_ivte;
+
+ WARN(1, "CXL ERROR interrupt %i\n", irq);
+
+ err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE);
+ dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte);
+
+ dev_crit(&adapter->dev, "STOPPING CXL TRACE\n");
+ cxl_stop_trace(adapter);
+
+ fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1);
+ fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2);
+
+ dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2);
+
+ return IRQ_HANDLED;
+}
+
+int cxl_native_register_psl_err_irq(struct cxl *adapter)
+{
+ int rc;
+
+ adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
+ dev_name(&adapter->dev));
+ if (!adapter->irq_name)
+ return -ENOMEM;
+
+ if ((rc = cxl_register_one_irq(adapter, native_irq_err, adapter,
+ &adapter->native->err_hwirq,
+ &adapter->native->err_virq,
+ adapter->irq_name))) {
+ kfree(adapter->irq_name);
+ adapter->irq_name = NULL;
+ return rc;
+ }
+
+ cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->native->err_hwirq & 0xffff);
+
+ return 0;
+}
+
+void cxl_native_release_psl_err_irq(struct cxl *adapter)
+{
+ if (adapter->native->err_virq != irq_find_mapping(NULL, adapter->native->err_hwirq))
+ return;
+
+ cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
+ cxl_unmap_irq(adapter->native->err_virq, adapter);
+ cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq);
+ kfree(adapter->irq_name);
+}
+
+int cxl_native_register_serr_irq(struct cxl_afu *afu)
+{
+ u64 serr;
+ int rc;
+
+ afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
+ dev_name(&afu->dev));
+ if (!afu->err_irq_name)
+ return -ENOMEM;
+
+ if ((rc = cxl_register_one_irq(afu->adapter, native_slice_irq_err, afu,
+ &afu->serr_hwirq,
+ &afu->serr_virq, afu->err_irq_name))) {
+ kfree(afu->err_irq_name);
+ afu->err_irq_name = NULL;
+ return rc;
+ }
+
+ serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+ serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff);
+ cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
return 0;
}
+void cxl_native_release_serr_irq(struct cxl_afu *afu)
+{
+ if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
+ return;
+
+ cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000);
+ cxl_unmap_irq(afu->serr_virq, afu);
+ cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq);
+ kfree(afu->err_irq_name);
+}
+
+int cxl_native_register_psl_irq(struct cxl_afu *afu)
+{
+ int rc;
+
+ afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s",
+ dev_name(&afu->dev));
+ if (!afu->psl_irq_name)
+ return -ENOMEM;
+
+ if ((rc = cxl_register_one_irq(afu->adapter, native_irq_multiplexed,
+ afu, &afu->native->psl_hwirq, &afu->native->psl_virq,
+ afu->psl_irq_name))) {
+ kfree(afu->psl_irq_name);
+ afu->psl_irq_name = NULL;
+ }
+ return rc;
+}
+
+void cxl_native_release_psl_irq(struct cxl_afu *afu)
+{
+ if (afu->native->psl_virq != irq_find_mapping(NULL, afu->native->psl_hwirq))
+ return;
+
+ cxl_unmap_irq(afu->native->psl_virq, afu);
+ cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq);
+ kfree(afu->psl_irq_name);
+}
+
static void recover_psl_err(struct cxl_afu *afu, u64 errstat)
{
u64 dsisr;
@@ -769,7 +951,7 @@ static void recover_psl_err(struct cxl_afu *afu, u64 errstat)
cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat);
}
-int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask)
+static int native_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask)
{
trace_cxl_psl_irq_ack(ctx, tfc);
if (tfc)
@@ -784,3 +966,132 @@ int cxl_check_error(struct cxl_afu *afu)
{
return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL);
}
+
+static bool native_support_attributes(const char *attr_name,
+ enum cxl_attrs type)
+{
+ return true;
+}
+
+static int native_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off, u64 *out)
+{
+ if (unlikely(!cxl_ops->link_ok(afu->adapter, afu)))
+ return -EIO;
+ if (unlikely(off >= afu->crs_len))
+ return -ERANGE;
+ *out = in_le64(afu->native->afu_desc_mmio + afu->crs_offset +
+ (cr * afu->crs_len) + off);
+ return 0;
+}
+
+static int native_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off, u32 *out)
+{
+ if (unlikely(!cxl_ops->link_ok(afu->adapter, afu)))
+ return -EIO;
+ if (unlikely(off >= afu->crs_len))
+ return -ERANGE;
+ *out = in_le32(afu->native->afu_desc_mmio + afu->crs_offset +
+ (cr * afu->crs_len) + off);
+ return 0;
+}
+
+static int native_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off, u16 *out)
+{
+ u64 aligned_off = off & ~0x3L;
+ u32 val;
+ int rc;
+
+ rc = native_afu_cr_read32(afu, cr, aligned_off, &val);
+ if (!rc)
+ *out = (val >> ((off & 0x3) * 8)) & 0xffff;
+ return rc;
+}
+
+static int native_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off, u8 *out)
+{
+ u64 aligned_off = off & ~0x3L;
+ u32 val;
+ int rc;
+
+ rc = native_afu_cr_read32(afu, cr, aligned_off, &val);
+ if (!rc)
+ *out = (val >> ((off & 0x3) * 8)) & 0xff;
+ return rc;
+}
+
+static int native_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in)
+{
+ if (unlikely(!cxl_ops->link_ok(afu->adapter, afu)))
+ return -EIO;
+ if (unlikely(off >= afu->crs_len))
+ return -ERANGE;
+ out_le32(afu->native->afu_desc_mmio + afu->crs_offset +
+ (cr * afu->crs_len) + off, in);
+ return 0;
+}
+
+static int native_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in)
+{
+ u64 aligned_off = off & ~0x3L;
+ u32 val32, mask, shift;
+ int rc;
+
+ rc = native_afu_cr_read32(afu, cr, aligned_off, &val32);
+ if (rc)
+ return rc;
+ shift = (off & 0x3) * 8;
+ WARN_ON(shift == 24);
+ mask = 0xffff << shift;
+ val32 = (val32 & ~mask) | (in << shift);
+
+ rc = native_afu_cr_write32(afu, cr, aligned_off, val32);
+ return rc;
+}
+
+static int native_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in)
+{
+ u64 aligned_off = off & ~0x3L;
+ u32 val32, mask, shift;
+ int rc;
+
+ rc = native_afu_cr_read32(afu, cr, aligned_off, &val32);
+ if (rc)
+ return rc;
+ shift = (off & 0x3) * 8;
+ mask = 0xff << shift;
+ val32 = (val32 & ~mask) | (in << shift);
+
+ rc = native_afu_cr_write32(afu, cr, aligned_off, val32);
+ return rc;
+}
+
+const struct cxl_backend_ops cxl_native_ops = {
+ .module = THIS_MODULE,
+ .adapter_reset = cxl_pci_reset,
+ .alloc_one_irq = cxl_pci_alloc_one_irq,
+ .release_one_irq = cxl_pci_release_one_irq,
+ .alloc_irq_ranges = cxl_pci_alloc_irq_ranges,
+ .release_irq_ranges = cxl_pci_release_irq_ranges,
+ .setup_irq = cxl_pci_setup_irq,
+ .handle_psl_slice_error = native_handle_psl_slice_error,
+ .psl_interrupt = NULL,
+ .ack_irq = native_ack_irq,
+ .attach_process = native_attach_process,
+ .detach_process = native_detach_process,
+ .support_attributes = native_support_attributes,
+ .link_ok = cxl_adapter_link_ok,
+ .release_afu = cxl_pci_release_afu,
+ .afu_read_err_buffer = cxl_pci_afu_read_err_buffer,
+ .afu_check_and_enable = native_afu_check_and_enable,
+ .afu_activate_mode = native_afu_activate_mode,
+ .afu_deactivate_mode = native_afu_deactivate_mode,
+ .afu_reset = native_afu_reset,
+ .afu_cr_read8 = native_afu_cr_read8,
+ .afu_cr_read16 = native_afu_cr_read16,
+ .afu_cr_read32 = native_afu_cr_read32,
+ .afu_cr_read64 = native_afu_cr_read64,
+ .afu_cr_write8 = native_afu_cr_write8,
+ .afu_cr_write16 = native_afu_cr_write16,
+ .afu_cr_write32 = native_afu_cr_write32,
+ .read_adapter_vpd = cxl_pci_read_adapter_vpd,
+};
diff --git a/drivers/misc/cxl/of.c b/drivers/misc/cxl/of.c
new file mode 100644
index 000000000000..edc458395f68
--- /dev/null
+++ b/drivers/misc/cxl/of.c
@@ -0,0 +1,513 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#include "cxl.h"
+
+
+static const __be32 *read_prop_string(const struct device_node *np,
+ const char *prop_name)
+{
+ const __be32 *prop;
+
+ prop = of_get_property(np, prop_name, NULL);
+ if (cxl_verbose && prop)
+ pr_info("%s: %s\n", prop_name, (char *) prop);
+ return prop;
+}
+
+static const __be32 *read_prop_dword(const struct device_node *np,
+ const char *prop_name, u32 *val)
+{
+ const __be32 *prop;
+
+ prop = of_get_property(np, prop_name, NULL);
+ if (prop)
+ *val = be32_to_cpu(prop[0]);
+ if (cxl_verbose && prop)
+ pr_info("%s: %#x (%u)\n", prop_name, *val, *val);
+ return prop;
+}
+
+static const __be64 *read_prop64_dword(const struct device_node *np,
+ const char *prop_name, u64 *val)
+{
+ const __be64 *prop;
+
+ prop = of_get_property(np, prop_name, NULL);
+ if (prop)
+ *val = be64_to_cpu(prop[0]);
+ if (cxl_verbose && prop)
+ pr_info("%s: %#llx (%llu)\n", prop_name, *val, *val);
+ return prop;
+}
+
+
+static int read_handle(struct device_node *np, u64 *handle)
+{
+ const __be32 *prop;
+ u64 size;
+
+ /* Get address and size of the node */
+ prop = of_get_address(np, 0, &size, NULL);
+ if (size)
+ return -EINVAL;
+
+ /* Helper to read a big number; size is in cells (not bytes) */
+ *handle = of_read_number(prop, of_n_addr_cells(np));
+ return 0;
+}
+
+static int read_phys_addr(struct device_node *np, char *prop_name,
+ struct cxl_afu *afu)
+{
+ int i, len, entry_size, naddr, nsize, type;
+ u64 addr, size;
+ const __be32 *prop;
+
+ naddr = of_n_addr_cells(np);
+ nsize = of_n_size_cells(np);
+
+ prop = of_get_property(np, prop_name, &len);
+ if (prop) {
+ entry_size = naddr + nsize;
+ for (i = 0; i < (len / 4); i += entry_size, prop += entry_size) {
+ type = be32_to_cpu(prop[0]);
+ addr = of_read_number(prop, naddr);
+ size = of_read_number(&prop[naddr], nsize);
+ switch (type) {
+ case 0: /* unit address */
+ afu->guest->handle = addr;
+ break;
+ case 1: /* p2 area */
+ afu->guest->p2n_phys += addr;
+ afu->guest->p2n_size = size;
+ break;
+ case 2: /* problem state area */
+ afu->psn_phys += addr;
+ afu->adapter->ps_size = size;
+ break;
+ default:
+ pr_err("Invalid address type %d found in %s property of AFU\n",
+ type, prop_name);
+ return -EINVAL;
+ }
+ if (cxl_verbose)
+ pr_info("%s: %#x %#llx (size %#llx)\n",
+ prop_name, type, addr, size);
+ }
+ }
+ return 0;
+}
+
+static int read_vpd(struct cxl *adapter, struct cxl_afu *afu)
+{
+ char vpd[256];
+ int rc;
+ size_t len = sizeof(vpd);
+
+ memset(vpd, 0, len);
+
+ if (adapter)
+ rc = cxl_guest_read_adapter_vpd(adapter, vpd, len);
+ else
+ rc = cxl_guest_read_afu_vpd(afu, vpd, len);
+
+ if (rc > 0) {
+ cxl_dump_debug_buffer(vpd, rc);
+ rc = 0;
+ }
+ return rc;
+}
+
+int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np)
+{
+ if (read_handle(afu_np, &afu->guest->handle))
+ return -EINVAL;
+ pr_devel("AFU handle: 0x%.16llx\n", afu->guest->handle);
+
+ return 0;
+}
+
+int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *np)
+{
+ int i, len, rc;
+ char *p;
+ const __be32 *prop;
+ u16 device_id, vendor_id;
+ u32 val = 0, class_code;
+
+ /* Properties are read in the same order as listed in PAPR */
+
+ if (cxl_verbose) {
+ pr_info("Dump of the 'ibm,coherent-platform-function' node properties:\n");
+
+ prop = of_get_property(np, "compatible", &len);
+ i = 0;
+ while (i < len) {
+ p = (char *) prop + i;
+ pr_info("compatible: %s\n", p);
+ i += strlen(p) + 1;
+ }
+ read_prop_string(np, "name");
+ }
+
+ rc = read_phys_addr(np, "reg", afu);
+ if (rc)
+ return rc;
+
+ rc = read_phys_addr(np, "assigned-addresses", afu);
+ if (rc)
+ return rc;
+
+ if (afu->psn_phys == 0)
+ afu->psa = false;
+ else
+ afu->psa = true;
+
+ if (cxl_verbose) {
+ read_prop_string(np, "ibm,loc-code");
+ read_prop_string(np, "device_type");
+ }
+
+ read_prop_dword(np, "ibm,#processes", &afu->max_procs_virtualised);
+
+ if (cxl_verbose) {
+ read_prop_dword(np, "ibm,scratchpad-size", &val);
+ read_prop_dword(np, "ibm,programmable", &val);
+ read_prop_string(np, "ibm,phandle");
+ read_vpd(NULL, afu);
+ }
+
+ read_prop_dword(np, "ibm,max-ints-per-process", &afu->guest->max_ints);
+ afu->irqs_max = afu->guest->max_ints;
+
+ prop = read_prop_dword(np, "ibm,min-ints-per-process", &afu->pp_irqs);
+ if (prop) {
+ /* One extra interrupt for the PSL interrupt is already
+ * included. Remove it now to keep only AFU interrupts and
+ * match the native case.
+ */
+ afu->pp_irqs--;
+ }
+
+ if (cxl_verbose) {
+ read_prop_dword(np, "ibm,max-ints", &val);
+ read_prop_dword(np, "ibm,vpd-size", &val);
+ }
+
+ read_prop64_dword(np, "ibm,error-buffer-size", &afu->eb_len);
+ afu->eb_offset = 0;
+
+ if (cxl_verbose)
+ read_prop_dword(np, "ibm,config-record-type", &val);
+
+ read_prop64_dword(np, "ibm,config-record-size", &afu->crs_len);
+ afu->crs_offset = 0;
+
+ read_prop_dword(np, "ibm,#config-records", &afu->crs_num);
+
+ if (cxl_verbose) {
+ for (i = 0; i < afu->crs_num; i++) {
+ rc = cxl_ops->afu_cr_read16(afu, i, PCI_DEVICE_ID,
+ &device_id);
+ if (!rc)
+ pr_info("record %d - device-id: %#x\n",
+ i, device_id);
+ rc = cxl_ops->afu_cr_read16(afu, i, PCI_VENDOR_ID,
+ &vendor_id);
+ if (!rc)
+ pr_info("record %d - vendor-id: %#x\n",
+ i, vendor_id);
+ rc = cxl_ops->afu_cr_read32(afu, i, PCI_CLASS_REVISION,
+ &class_code);
+ if (!rc) {
+ class_code >>= 8;
+ pr_info("record %d - class-code: %#x\n",
+ i, class_code);
+ }
+ }
+
+ read_prop_dword(np, "ibm,function-number", &val);
+ read_prop_dword(np, "ibm,privileged-function", &val);
+ read_prop_dword(np, "vendor-id", &val);
+ read_prop_dword(np, "device-id", &val);
+ read_prop_dword(np, "revision-id", &val);
+ read_prop_dword(np, "class-code", &val);
+ read_prop_dword(np, "subsystem-vendor-id", &val);
+ read_prop_dword(np, "subsystem-id", &val);
+ }
+ /*
+ * if "ibm,process-mmio" doesn't exist then per-process mmio is
+ * not supported
+ */
+ val = 0;
+ prop = read_prop_dword(np, "ibm,process-mmio", &val);
+ if (prop && val == 1)
+ afu->pp_psa = true;
+ else
+ afu->pp_psa = false;
+
+ if (cxl_verbose) {
+ read_prop_dword(np, "ibm,supports-aur", &val);
+ read_prop_dword(np, "ibm,supports-csrp", &val);
+ read_prop_dword(np, "ibm,supports-prr", &val);
+ }
+
+ prop = read_prop_dword(np, "ibm,function-error-interrupt", &val);
+ if (prop)
+ afu->serr_hwirq = val;
+
+ pr_devel("AFU handle: %#llx\n", afu->guest->handle);
+ pr_devel("p2n_phys: %#llx (size %#llx)\n",
+ afu->guest->p2n_phys, afu->guest->p2n_size);
+ pr_devel("psn_phys: %#llx (size %#llx)\n",
+ afu->psn_phys, afu->adapter->ps_size);
+ pr_devel("Max number of processes virtualised=%i\n",
+ afu->max_procs_virtualised);
+ pr_devel("Per-process irqs min=%i, max=%i\n", afu->pp_irqs,
+ afu->irqs_max);
+ pr_devel("Slice error interrupt=%#lx\n", afu->serr_hwirq);
+
+ return 0;
+}
+
+static int read_adapter_irq_config(struct cxl *adapter, struct device_node *np)
+{
+ const __be32 *ranges;
+ int len, nranges, i;
+ struct irq_avail *cur;
+
+ ranges = of_get_property(np, "interrupt-ranges", &len);
+ if (ranges == NULL || len < (2 * sizeof(int)))
+ return -EINVAL;
+
+ /*
+ * encoded array of two cells per entry, each cell encoded as
+ * with encode-int
+ */
+ nranges = len / (2 * sizeof(int));
+ if (nranges == 0 || (nranges * 2 * sizeof(int)) != len)
+ return -EINVAL;
+
+ adapter->guest->irq_avail = kzalloc(nranges * sizeof(struct irq_avail),
+ GFP_KERNEL);
+ if (adapter->guest->irq_avail == NULL)
+ return -ENOMEM;
+
+ adapter->guest->irq_base_offset = be32_to_cpu(ranges[0]);
+ for (i = 0; i < nranges; i++) {
+ cur = &adapter->guest->irq_avail[i];
+ cur->offset = be32_to_cpu(ranges[i * 2]);
+ cur->range = be32_to_cpu(ranges[i * 2 + 1]);
+ cur->bitmap = kcalloc(BITS_TO_LONGS(cur->range),
+ sizeof(*cur->bitmap), GFP_KERNEL);
+ if (cur->bitmap == NULL)
+ goto err;
+ if (cur->offset < adapter->guest->irq_base_offset)
+ adapter->guest->irq_base_offset = cur->offset;
+ if (cxl_verbose)
+ pr_info("available IRQ range: %#lx-%#lx (%lu)\n",
+ cur->offset, cur->offset + cur->range - 1,
+ cur->range);
+ }
+ adapter->guest->irq_nranges = nranges;
+ spin_lock_init(&adapter->guest->irq_alloc_lock);
+
+ return 0;
+err:
+ for (i--; i >= 0; i--) {
+ cur = &adapter->guest->irq_avail[i];
+ kfree(cur->bitmap);
+ }
+ kfree(adapter->guest->irq_avail);
+ adapter->guest->irq_avail = NULL;
+ return -ENOMEM;
+}
+
+int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np)
+{
+ if (read_handle(np, &adapter->guest->handle))
+ return -EINVAL;
+ pr_devel("Adapter handle: 0x%.16llx\n", adapter->guest->handle);
+
+ return 0;
+}
+
+int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np)
+{
+ int rc, len, naddr, i;
+ char *p;
+ const __be32 *prop;
+ u32 val = 0;
+
+ /* Properties are read in the same order as listed in PAPR */
+
+ naddr = of_n_addr_cells(np);
+
+ if (cxl_verbose) {
+ pr_info("Dump of the 'ibm,coherent-platform-facility' node properties:\n");
+
+ read_prop_dword(np, "#address-cells", &val);
+ read_prop_dword(np, "#size-cells", &val);
+
+ prop = of_get_property(np, "compatible", &len);
+ i = 0;
+ while (i < len) {
+ p = (char *) prop + i;
+ pr_info("compatible: %s\n", p);
+ i += strlen(p) + 1;
+ }
+ read_prop_string(np, "name");
+ read_prop_string(np, "model");
+
+ prop = of_get_property(np, "reg", NULL);
+ if (prop) {
+ pr_info("reg: addr:%#llx size:%#x\n",
+ of_read_number(prop, naddr),
+ be32_to_cpu(prop[naddr]));
+ }
+
+ read_prop_string(np, "ibm,loc-code");
+ }
+
+ if ((rc = read_adapter_irq_config(adapter, np)))
+ return rc;
+
+ if (cxl_verbose) {
+ read_prop_string(np, "device_type");
+ read_prop_string(np, "ibm,phandle");
+ }
+
+ prop = read_prop_dword(np, "ibm,caia-version", &val);
+ if (prop) {
+ adapter->caia_major = (val & 0xFF00) >> 8;
+ adapter->caia_minor = val & 0xFF;
+ }
+
+ prop = read_prop_dword(np, "ibm,psl-revision", &val);
+ if (prop)
+ adapter->psl_rev = val;
+
+ prop = read_prop_string(np, "status");
+ if (prop) {
+ adapter->guest->status = kasprintf(GFP_KERNEL, "%s", (char *) prop);
+ if (adapter->guest->status == NULL)
+ return -ENOMEM;
+ }
+
+ prop = read_prop_dword(np, "vendor-id", &val);
+ if (prop)
+ adapter->guest->vendor = val;
+
+ prop = read_prop_dword(np, "device-id", &val);
+ if (prop)
+ adapter->guest->device = val;
+
+ if (cxl_verbose) {
+ read_prop_dword(np, "ibm,privileged-facility", &val);
+ read_prop_dword(np, "revision-id", &val);
+ read_prop_dword(np, "class-code", &val);
+ }
+
+ prop = read_prop_dword(np, "subsystem-vendor-id", &val);
+ if (prop)
+ adapter->guest->subsystem_vendor = val;
+
+ prop = read_prop_dword(np, "subsystem-id", &val);
+ if (prop)
+ adapter->guest->subsystem = val;
+
+ if (cxl_verbose)
+ read_vpd(adapter, NULL);
+
+ return 0;
+}
+
+static int cxl_of_remove(struct platform_device *pdev)
+{
+ struct cxl *adapter;
+ int afu;
+
+ adapter = dev_get_drvdata(&pdev->dev);
+ for (afu = 0; afu < adapter->slices; afu++)
+ cxl_guest_remove_afu(adapter->afu[afu]);
+
+ cxl_guest_remove_adapter(adapter);
+ return 0;
+}
+
+static void cxl_of_shutdown(struct platform_device *pdev)
+{
+ cxl_of_remove(pdev);
+}
+
+int cxl_of_probe(struct platform_device *pdev)
+{
+ struct device_node *np = NULL;
+ struct device_node *afu_np = NULL;
+ struct cxl *adapter = NULL;
+ int ret;
+ int slice, slice_ok;
+
+ pr_devel("in %s\n", __func__);
+
+ np = pdev->dev.of_node;
+ if (np == NULL)
+ return -ENODEV;
+
+ /* init adapter */
+ adapter = cxl_guest_init_adapter(np, pdev);
+ if (IS_ERR(adapter)) {
+ dev_err(&pdev->dev, "guest_init_adapter failed: %li\n", PTR_ERR(adapter));
+ return PTR_ERR(adapter);
+ }
+
+ /* init afu */
+ slice_ok = 0;
+ for (afu_np = NULL, slice = 0; (afu_np = of_get_next_child(np, afu_np)); slice++) {
+ if ((ret = cxl_guest_init_afu(adapter, slice, afu_np)))
+ dev_err(&pdev->dev, "AFU %i failed to initialise: %i\n",
+ slice, ret);
+ else
+ slice_ok++;
+ }
+
+ if (slice_ok == 0) {
+ dev_info(&pdev->dev, "No active AFU");
+ adapter->slices = 0;
+ }
+
+ if (afu_np)
+ of_node_put(afu_np);
+ return 0;
+}
+
+static const struct of_device_id cxl_of_match[] = {
+ { .compatible = "ibm,coherent-platform-facility",},
+ {},
+};
+MODULE_DEVICE_TABLE(of, cxl_of_match);
+
+struct platform_driver cxl_of_driver = {
+ .driver = {
+ .name = "cxl_of",
+ .of_match_table = cxl_of_match,
+ .owner = THIS_MODULE
+ },
+ .probe = cxl_of_probe,
+ .remove = cxl_of_remove,
+ .shutdown = cxl_of_shutdown,
+};
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 4c1903f781fc..6634b7a3c42b 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -90,8 +90,8 @@
/* This works a little different than the p1/p2 register accesses to make it
* easier to pull out individual fields */
-#define AFUD_READ(afu, off) in_be64(afu->afu_desc_mmio + off)
-#define AFUD_READ_LE(afu, off) in_le64(afu->afu_desc_mmio + off)
+#define AFUD_READ(afu, off) in_be64(afu->native->afu_desc_mmio + off)
+#define AFUD_READ_LE(afu, off) in_le64(afu->native->afu_desc_mmio + off)
#define EXTRACT_PPC_BIT(val, bit) (!!(val & PPC_BIT(bit)))
#define EXTRACT_PPC_BITS(val, bs, be) ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be))
@@ -116,24 +116,6 @@
#define AFUD_EB_LEN(val) EXTRACT_PPC_BITS(val, 8, 63)
#define AFUD_READ_EB_OFF(afu) AFUD_READ(afu, 0x48)
-u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off)
-{
- u64 aligned_off = off & ~0x3L;
- u32 val;
-
- val = cxl_afu_cr_read32(afu, cr, aligned_off);
- return (val >> ((off & 0x2) * 8)) & 0xffff;
-}
-
-u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off)
-{
- u64 aligned_off = off & ~0x3L;
- u32 val;
-
- val = cxl_afu_cr_read32(afu, cr, aligned_off);
- return (val >> ((off & 0x3) * 8)) & 0xff;
-}
-
static const struct pci_device_id cxl_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), },
{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), },
@@ -433,8 +415,8 @@ static int init_implementation_afu_regs(struct cxl_afu *afu)
return 0;
}
-int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq,
- unsigned int virq)
+int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq,
+ unsigned int virq)
{
struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
@@ -476,28 +458,30 @@ int cxl_update_image_control(struct cxl *adapter)
return 0;
}
-int cxl_alloc_one_irq(struct cxl *adapter)
+int cxl_pci_alloc_one_irq(struct cxl *adapter)
{
struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
return pnv_cxl_alloc_hwirqs(dev, 1);
}
-void cxl_release_one_irq(struct cxl *adapter, int hwirq)
+void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq)
{
struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
return pnv_cxl_release_hwirqs(dev, hwirq, 1);
}
-int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num)
+int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs,
+ struct cxl *adapter, unsigned int num)
{
struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
return pnv_cxl_alloc_hwirq_ranges(irqs, dev, num);
}
-void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter)
+void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs,
+ struct cxl *adapter)
{
struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
@@ -558,7 +542,7 @@ static int switch_card_to_cxl(struct pci_dev *dev)
return 0;
}
-static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev)
+static int pci_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev)
{
u64 p1n_base, p2n_base, afu_desc;
const u64 p1n_size = 0x100;
@@ -566,15 +550,15 @@ static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct p
p1n_base = p1_base(dev) + 0x10000 + (afu->slice * p1n_size);
p2n_base = p2_base(dev) + (afu->slice * p2n_size);
- afu->psn_phys = p2_base(dev) + (adapter->ps_off + (afu->slice * adapter->ps_size));
- afu_desc = p2_base(dev) + adapter->afu_desc_off + (afu->slice * adapter->afu_desc_size);
+ afu->psn_phys = p2_base(dev) + (adapter->native->ps_off + (afu->slice * adapter->ps_size));
+ afu_desc = p2_base(dev) + adapter->native->afu_desc_off + (afu->slice * adapter->native->afu_desc_size);
- if (!(afu->p1n_mmio = ioremap(p1n_base, p1n_size)))
+ if (!(afu->native->p1n_mmio = ioremap(p1n_base, p1n_size)))
goto err;
if (!(afu->p2n_mmio = ioremap(p2n_base, p2n_size)))
goto err1;
if (afu_desc) {
- if (!(afu->afu_desc_mmio = ioremap(afu_desc, adapter->afu_desc_size)))
+ if (!(afu->native->afu_desc_mmio = ioremap(afu_desc, adapter->native->afu_desc_size)))
goto err2;
}
@@ -582,62 +566,41 @@ static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct p
err2:
iounmap(afu->p2n_mmio);
err1:
- iounmap(afu->p1n_mmio);
+ iounmap(afu->native->p1n_mmio);
err:
dev_err(&afu->dev, "Error mapping AFU MMIO regions\n");
return -ENOMEM;
}
-static void cxl_unmap_slice_regs(struct cxl_afu *afu)
+static void pci_unmap_slice_regs(struct cxl_afu *afu)
{
if (afu->p2n_mmio) {
iounmap(afu->p2n_mmio);
afu->p2n_mmio = NULL;
}
- if (afu->p1n_mmio) {
- iounmap(afu->p1n_mmio);
- afu->p1n_mmio = NULL;
+ if (afu->native->p1n_mmio) {
+ iounmap(afu->native->p1n_mmio);
+ afu->native->p1n_mmio = NULL;
}
- if (afu->afu_desc_mmio) {
- iounmap(afu->afu_desc_mmio);
- afu->afu_desc_mmio = NULL;
+ if (afu->native->afu_desc_mmio) {
+ iounmap(afu->native->afu_desc_mmio);
+ afu->native->afu_desc_mmio = NULL;
}
}
-static void cxl_release_afu(struct device *dev)
+void cxl_pci_release_afu(struct device *dev)
{
struct cxl_afu *afu = to_cxl_afu(dev);
- pr_devel("cxl_release_afu\n");
+ pr_devel("%s\n", __func__);
idr_destroy(&afu->contexts_idr);
cxl_release_spa(afu);
+ kfree(afu->native);
kfree(afu);
}
-static struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
-{
- struct cxl_afu *afu;
-
- if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL)))
- return NULL;
-
- afu->adapter = adapter;
- afu->dev.parent = &adapter->dev;
- afu->dev.release = cxl_release_afu;
- afu->slice = slice;
- idr_init(&afu->contexts_idr);
- mutex_init(&afu->contexts_lock);
- spin_lock_init(&afu->afu_cntl_lock);
- mutex_init(&afu->spa_mutex);
-
- afu->prefault_mode = CXL_PREFAULT_NONE;
- afu->irqs_max = afu->adapter->user_irqs;
-
- return afu;
-}
-
/* Expects AFU struct to have recently been zeroed out */
static int cxl_read_afu_descriptor(struct cxl_afu *afu)
{
@@ -659,7 +622,7 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu)
afu->pp_size = AFUD_PPPSA_LEN(val) * 4096;
afu->psa = AFUD_PPPSA_PSA(val);
if ((afu->pp_psa = AFUD_PPPSA_PP(val)))
- afu->pp_offset = AFUD_READ_PPPSA_OFF(afu);
+ afu->native->pp_offset = AFUD_READ_PPPSA_OFF(afu);
val = AFUD_READ_CR(afu);
afu->crs_len = AFUD_CR_LEN(val) * 256;
@@ -686,10 +649,11 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu)
static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu)
{
- int i;
+ int i, rc;
+ u32 val;
if (afu->psa && afu->adapter->ps_size <
- (afu->pp_offset + afu->pp_size*afu->max_procs_virtualised)) {
+ (afu->native->pp_offset + afu->pp_size*afu->max_procs_virtualised)) {
dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n");
return -ENODEV;
}
@@ -698,7 +662,8 @@ static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu)
dev_warn(&afu->dev, "AFU uses < PAGE_SIZE per-process PSA!");
for (i = 0; i < afu->crs_num; i++) {
- if ((cxl_afu_cr_read32(afu, i, 0) == 0)) {
+ rc = cxl_ops->afu_cr_read32(afu, i, 0, &val);
+ if (rc || val == 0) {
dev_err(&afu->dev, "ABORTING: AFU configuration record %i is invalid\n", i);
return -EINVAL;
}
@@ -719,7 +684,7 @@ static int sanitise_afu_regs(struct cxl_afu *afu)
reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg);
- if (__cxl_afu_reset(afu))
+ if (cxl_ops->afu_reset(afu))
return -EIO;
if (cxl_afu_disable(afu))
return -EIO;
@@ -767,13 +732,13 @@ static int sanitise_afu_regs(struct cxl_afu *afu)
* 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte
* aligned the function uses a bounce buffer which can be max PAGE_SIZE.
*/
-ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
loff_t off, size_t count)
{
loff_t aligned_start, aligned_end;
size_t aligned_length;
void *tbuf;
- const void __iomem *ebuf = afu->afu_desc_mmio + afu->eb_offset;
+ const void __iomem *ebuf = afu->native->afu_desc_mmio + afu->eb_offset;
if (count == 0 || off < 0 || (size_t)off >= afu->eb_len)
return 0;
@@ -804,18 +769,18 @@ ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
return count;
}
-static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev)
+static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev)
{
int rc;
- if ((rc = cxl_map_slice_regs(afu, adapter, dev)))
+ if ((rc = pci_map_slice_regs(afu, adapter, dev)))
return rc;
if ((rc = sanitise_afu_regs(afu)))
goto err1;
/* We need to reset the AFU before we can read the AFU descriptor */
- if ((rc = __cxl_afu_reset(afu)))
+ if ((rc = cxl_ops->afu_reset(afu)))
goto err1;
if (cxl_verbose)
@@ -830,44 +795,50 @@ static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
if ((rc = init_implementation_afu_regs(afu)))
goto err1;
- if ((rc = cxl_register_serr_irq(afu)))
+ if ((rc = cxl_native_register_serr_irq(afu)))
goto err1;
- if ((rc = cxl_register_psl_irq(afu)))
+ if ((rc = cxl_native_register_psl_irq(afu)))
goto err2;
return 0;
err2:
- cxl_release_serr_irq(afu);
+ cxl_native_release_serr_irq(afu);
err1:
- cxl_unmap_slice_regs(afu);
+ pci_unmap_slice_regs(afu);
return rc;
}
-static void cxl_deconfigure_afu(struct cxl_afu *afu)
+static void pci_deconfigure_afu(struct cxl_afu *afu)
{
- cxl_release_psl_irq(afu);
- cxl_release_serr_irq(afu);
- cxl_unmap_slice_regs(afu);
+ cxl_native_release_psl_irq(afu);
+ cxl_native_release_serr_irq(afu);
+ pci_unmap_slice_regs(afu);
}
-static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
+static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
{
struct cxl_afu *afu;
- int rc;
+ int rc = -ENOMEM;
afu = cxl_alloc_afu(adapter, slice);
if (!afu)
return -ENOMEM;
+ afu->native = kzalloc(sizeof(struct cxl_afu_native), GFP_KERNEL);
+ if (!afu->native)
+ goto err_free_afu;
+
+ mutex_init(&afu->native->spa_mutex);
+
rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice);
if (rc)
- goto err_free;
+ goto err_free_native;
- rc = cxl_configure_afu(afu, adapter, dev);
+ rc = pci_configure_afu(afu, adapter, dev);
if (rc)
- goto err_free;
+ goto err_free_native;
/* Don't care if this fails */
cxl_debugfs_afu_add(afu);
@@ -890,24 +861,27 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
return 0;
err_put1:
- cxl_deconfigure_afu(afu);
+ pci_deconfigure_afu(afu);
cxl_debugfs_afu_remove(afu);
device_unregister(&afu->dev);
return rc;
-err_free:
+err_free_native:
+ kfree(afu->native);
+err_free_afu:
kfree(afu);
return rc;
}
-static void cxl_remove_afu(struct cxl_afu *afu)
+static void cxl_pci_remove_afu(struct cxl_afu *afu)
{
- pr_devel("cxl_remove_afu\n");
+ pr_devel("%s\n", __func__);
if (!afu)
return;
+ cxl_pci_vphb_remove(afu);
cxl_sysfs_afu_remove(afu);
cxl_debugfs_afu_remove(afu);
@@ -916,13 +890,13 @@ static void cxl_remove_afu(struct cxl_afu *afu)
spin_unlock(&afu->adapter->afu_list_lock);
cxl_context_detach_all(afu);
- cxl_afu_deactivate_mode(afu);
+ cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
- cxl_deconfigure_afu(afu);
+ pci_deconfigure_afu(afu);
device_unregister(&afu->dev);
}
-int cxl_reset(struct cxl *adapter)
+int cxl_pci_reset(struct cxl *adapter)
{
struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
int rc;
@@ -956,17 +930,17 @@ static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev)
pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx",
p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev));
- if (!(adapter->p1_mmio = ioremap(p1_base(dev), p1_size(dev))))
+ if (!(adapter->native->p1_mmio = ioremap(p1_base(dev), p1_size(dev))))
goto err3;
- if (!(adapter->p2_mmio = ioremap(p2_base(dev), p2_size(dev))))
+ if (!(adapter->native->p2_mmio = ioremap(p2_base(dev), p2_size(dev))))
goto err4;
return 0;
err4:
- iounmap(adapter->p1_mmio);
- adapter->p1_mmio = NULL;
+ iounmap(adapter->native->p1_mmio);
+ adapter->native->p1_mmio = NULL;
err3:
pci_release_region(dev, 0);
err2:
@@ -977,14 +951,14 @@ err1:
static void cxl_unmap_adapter_regs(struct cxl *adapter)
{
- if (adapter->p1_mmio) {
- iounmap(adapter->p1_mmio);
- adapter->p1_mmio = NULL;
+ if (adapter->native->p1_mmio) {
+ iounmap(adapter->native->p1_mmio);
+ adapter->native->p1_mmio = NULL;
pci_release_region(to_pci_dev(adapter->dev.parent), 2);
}
- if (adapter->p2_mmio) {
- iounmap(adapter->p2_mmio);
- adapter->p2_mmio = NULL;
+ if (adapter->native->p2_mmio) {
+ iounmap(adapter->native->p2_mmio);
+ adapter->native->p2_mmio = NULL;
pci_release_region(to_pci_dev(adapter->dev.parent), 0);
}
}
@@ -1025,10 +999,10 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev)
/* Convert everything to bytes, because there is NO WAY I'd look at the
* code a month later and forget what units these are in ;-) */
- adapter->ps_off = ps_off * 64 * 1024;
+ adapter->native->ps_off = ps_off * 64 * 1024;
adapter->ps_size = ps_size * 64 * 1024;
- adapter->afu_desc_off = afu_desc_off * 64 * 1024;
- adapter->afu_desc_size = afu_desc_size *64 * 1024;
+ adapter->native->afu_desc_off = afu_desc_off * 64 * 1024;
+ adapter->native->afu_desc_size = afu_desc_size * 64 * 1024;
/* Total IRQs - 1 PSL ERROR - #AFU*(1 slice error + 1 DSI) */
adapter->user_irqs = pnv_cxl_get_irq_count(dev) - 1 - 2*adapter->slices;
@@ -1079,21 +1053,26 @@ static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev)
return -EINVAL;
}
- if (!adapter->afu_desc_off || !adapter->afu_desc_size) {
+ if (!adapter->native->afu_desc_off || !adapter->native->afu_desc_size) {
dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n");
return -EINVAL;
}
- if (adapter->ps_size > p2_size(dev) - adapter->ps_off) {
+ if (adapter->ps_size > p2_size(dev) - adapter->native->ps_off) {
dev_err(&dev->dev, "ABORTING: Problem state size larger than "
"available in BAR2: 0x%llx > 0x%llx\n",
- adapter->ps_size, p2_size(dev) - adapter->ps_off);
+ adapter->ps_size, p2_size(dev) - adapter->native->ps_off);
return -EINVAL;
}
return 0;
}
+ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len)
+{
+ return pci_read_vpd(to_pci_dev(adapter->dev.parent), 0, len, buf);
+}
+
static void cxl_release_adapter(struct device *dev)
{
struct cxl *adapter = to_cxl_adapter(dev);
@@ -1102,33 +1081,10 @@ static void cxl_release_adapter(struct device *dev)
cxl_remove_adapter_nr(adapter);
+ kfree(adapter->native);
kfree(adapter);
}
-static struct cxl *cxl_alloc_adapter(void)
-{
- struct cxl *adapter;
-
- if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL)))
- return NULL;
-
- spin_lock_init(&adapter->afu_list_lock);
-
- if (cxl_alloc_adapter_nr(adapter))
- goto err1;
-
- if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
- goto err2;
-
- return adapter;
-
-err2:
- cxl_remove_adapter_nr(adapter);
-err1:
- kfree(adapter);
- return NULL;
-}
-
#define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31))
static int sanitise_adapter_regs(struct cxl *adapter)
@@ -1192,7 +1148,7 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
if ((rc = cxl_setup_psl_timebase(adapter, dev)))
goto err;
- if ((rc = cxl_register_psl_err_irq(adapter)))
+ if ((rc = cxl_native_register_psl_err_irq(adapter)))
goto err;
return 0;
@@ -1207,13 +1163,13 @@ static void cxl_deconfigure_adapter(struct cxl *adapter)
{
struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
- cxl_release_psl_err_irq(adapter);
+ cxl_native_release_psl_err_irq(adapter);
cxl_unmap_adapter_regs(adapter);
pci_disable_device(pdev);
}
-static struct cxl *cxl_init_adapter(struct pci_dev *dev)
+static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev)
{
struct cxl *adapter;
int rc;
@@ -1222,6 +1178,12 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev)
if (!adapter)
return ERR_PTR(-ENOMEM);
+ adapter->native = kzalloc(sizeof(struct cxl_native), GFP_KERNEL);
+ if (!adapter->native) {
+ rc = -ENOMEM;
+ goto err_release;
+ }
+
/* Set defaults for parameters which need to persist over
* configure/reconfigure
*/
@@ -1231,8 +1193,7 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev)
rc = cxl_configure_adapter(adapter, dev);
if (rc) {
pci_disable_device(dev);
- cxl_release_adapter(&adapter->dev);
- return ERR_PTR(rc);
+ goto err_release;
}
/* Don't care if this one fails: */
@@ -1258,9 +1219,13 @@ err_put1:
cxl_deconfigure_adapter(adapter);
device_unregister(&adapter->dev);
return ERR_PTR(rc);
+
+err_release:
+ cxl_release_adapter(&adapter->dev);
+ return ERR_PTR(rc);
}
-static void cxl_remove_adapter(struct cxl *adapter)
+static void cxl_pci_remove_adapter(struct cxl *adapter)
{
pr_devel("cxl_remove_adapter\n");
@@ -1278,17 +1243,22 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
int slice;
int rc;
+ if (cxl_pci_is_vphb_device(dev)) {
+ dev_dbg(&dev->dev, "cxl_init_adapter: Ignoring cxl vphb device\n");
+ return -ENODEV;
+ }
+
if (cxl_verbose)
dump_cxl_config_space(dev);
- adapter = cxl_init_adapter(dev);
+ adapter = cxl_pci_init_adapter(dev);
if (IS_ERR(adapter)) {
dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter));
return PTR_ERR(adapter);
}
for (slice = 0; slice < adapter->slices; slice++) {
- if ((rc = cxl_init_afu(adapter, slice, dev))) {
+ if ((rc = pci_init_afu(adapter, slice, dev))) {
dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc);
continue;
}
@@ -1313,10 +1283,9 @@ static void cxl_remove(struct pci_dev *dev)
*/
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
- cxl_pci_vphb_remove(afu);
- cxl_remove_afu(afu);
+ cxl_pci_remove_afu(afu);
}
- cxl_remove_adapter(adapter);
+ cxl_pci_remove_adapter(adapter);
}
static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu,
@@ -1462,8 +1431,8 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
return result;
cxl_context_detach_all(afu);
- cxl_afu_deactivate_mode(afu);
- cxl_deconfigure_afu(afu);
+ cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
+ pci_deconfigure_afu(afu);
}
cxl_deconfigure_adapter(adapter);
@@ -1486,14 +1455,12 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
- if (cxl_configure_afu(afu, adapter, pdev))
+ if (pci_configure_afu(afu, adapter, pdev))
goto err;
if (cxl_afu_select_best_mode(afu))
goto err;
- cxl_pci_vphb_reconfigure(afu);
-
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
/* Reset the device context.
* TODO: make this less disruptive
@@ -1509,7 +1476,7 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
afu_dev->dev.archdata.cxl_ctx = ctx;
- if (cxl_afu_check_and_enable(afu))
+ if (cxl_ops->afu_check_and_enable(afu))
goto err;
afu_dev->error_state = pci_channel_io_normal;
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index 02006f7109a8..907c99b9f5af 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -69,7 +69,7 @@ static ssize_t reset_adapter_store(struct device *device,
if ((rc != 1) || (val != 1))
return -EINVAL;
- if ((rc = cxl_reset(adapter)))
+ if ((rc = cxl_ops->adapter_reset(adapter)))
return rc;
return count;
}
@@ -165,7 +165,7 @@ static ssize_t pp_mmio_off_show(struct device *device,
{
struct cxl_afu *afu = to_afu_chardev_m(device);
- return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_offset);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->native->pp_offset);
}
static ssize_t pp_mmio_len_show(struct device *device,
@@ -211,7 +211,7 @@ static ssize_t reset_store_afu(struct device *device,
goto err;
}
- if ((rc = __cxl_afu_reset(afu)))
+ if ((rc = cxl_ops->afu_reset(afu)))
goto err;
rc = count;
@@ -253,8 +253,14 @@ static ssize_t irqs_max_store(struct device *device,
if (irqs_max < afu->pp_irqs)
return -EINVAL;
- if (irqs_max > afu->adapter->user_irqs)
- return -EINVAL;
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (irqs_max > afu->adapter->user_irqs)
+ return -EINVAL;
+ } else {
+ /* pHyp sets a per-AFU limit */
+ if (irqs_max > afu->guest->max_ints)
+ return -EINVAL;
+ }
afu->irqs_max = irqs_max;
return count;
@@ -348,7 +354,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr,
}
/*
- * cxl_afu_deactivate_mode needs to be done outside the lock, prevent
+ * afu_deactivate_mode needs to be done outside the lock, prevent
* other contexts coming in before we are ready:
*/
old_mode = afu->current_mode;
@@ -357,9 +363,9 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr,
mutex_unlock(&afu->contexts_lock);
- if ((rc = _cxl_afu_deactivate_mode(afu, old_mode)))
+ if ((rc = cxl_ops->afu_deactivate_mode(afu, old_mode)))
return rc;
- if ((rc = cxl_afu_activate_mode(afu, mode)))
+ if ((rc = cxl_ops->afu_activate_mode(afu, mode)))
return rc;
return count;
@@ -389,7 +395,7 @@ static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj,
struct cxl_afu *afu = to_cxl_afu(container_of(kobj,
struct device, kobj));
- return cxl_afu_read_err_buffer(afu, buf, off, count);
+ return cxl_ops->afu_read_err_buffer(afu, buf, off, count);
}
static struct device_attribute afu_attrs[] = {
@@ -406,24 +412,39 @@ static struct device_attribute afu_attrs[] = {
int cxl_sysfs_adapter_add(struct cxl *adapter)
{
+ struct device_attribute *dev_attr;
int i, rc;
for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) {
- if ((rc = device_create_file(&adapter->dev, &adapter_attrs[i])))
- goto err;
+ dev_attr = &adapter_attrs[i];
+ if (cxl_ops->support_attributes(dev_attr->attr.name,
+ CXL_ADAPTER_ATTRS)) {
+ if ((rc = device_create_file(&adapter->dev, dev_attr)))
+ goto err;
+ }
}
return 0;
err:
- for (i--; i >= 0; i--)
- device_remove_file(&adapter->dev, &adapter_attrs[i]);
+ for (i--; i >= 0; i--) {
+ dev_attr = &adapter_attrs[i];
+ if (cxl_ops->support_attributes(dev_attr->attr.name,
+ CXL_ADAPTER_ATTRS))
+ device_remove_file(&adapter->dev, dev_attr);
+ }
return rc;
}
+
void cxl_sysfs_adapter_remove(struct cxl *adapter)
{
+ struct device_attribute *dev_attr;
int i;
- for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++)
- device_remove_file(&adapter->dev, &adapter_attrs[i]);
+ for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) {
+ dev_attr = &adapter_attrs[i];
+ if (cxl_ops->support_attributes(dev_attr->attr.name,
+ CXL_ADAPTER_ATTRS))
+ device_remove_file(&adapter->dev, dev_attr);
+ }
}
struct afu_config_record {
@@ -469,10 +490,12 @@ static ssize_t afu_read_config(struct file *filp, struct kobject *kobj,
struct afu_config_record *cr = to_cr(kobj);
struct cxl_afu *afu = to_cxl_afu(container_of(kobj->parent, struct device, kobj));
- u64 i, j, val;
+ u64 i, j, val, rc;
for (i = 0; i < count;) {
- val = cxl_afu_cr_read64(afu, cr->cr, off & ~0x7);
+ rc = cxl_ops->afu_cr_read64(afu, cr->cr, off & ~0x7, &val);
+ if (rc)
+ val = ~0ULL;
for (j = off & 0x7; j < 8 && i < count; i++, j++, off++)
buf[i] = (val >> (j * 8)) & 0xff;
}
@@ -517,14 +540,22 @@ static struct afu_config_record *cxl_sysfs_afu_new_cr(struct cxl_afu *afu, int c
return ERR_PTR(-ENOMEM);
cr->cr = cr_idx;
- cr->device = cxl_afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID);
- cr->vendor = cxl_afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID);
- cr->class = cxl_afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION) >> 8;
+
+ rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID, &cr->device);
+ if (rc)
+ goto err;
+ rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID, &cr->vendor);
+ if (rc)
+ goto err;
+ rc = cxl_ops->afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION, &cr->class);
+ if (rc)
+ goto err;
+ cr->class >>= 8;
/*
* Export raw AFU PCIe like config record. For now this is read only by
* root - we can expand that later to be readable by non-root and maybe
- * even writable provided we have a good use-case. Once we suport
+ * even writable provided we have a good use-case. Once we support
* exposing AFUs through a virtual PHB they will get that for free from
* Linux' PCI infrastructure, but until then it's not clear that we
* need it for anything since the main use case is just identifying
@@ -562,6 +593,7 @@ err:
void cxl_sysfs_afu_remove(struct cxl_afu *afu)
{
+ struct device_attribute *dev_attr;
struct afu_config_record *cr, *tmp;
int i;
@@ -569,8 +601,12 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu)
if (afu->eb_len)
device_remove_bin_file(&afu->dev, &afu->attr_eb);
- for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
- device_remove_file(&afu->dev, &afu_attrs[i]);
+ for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
+ dev_attr = &afu_attrs[i];
+ if (cxl_ops->support_attributes(dev_attr->attr.name,
+ CXL_AFU_ATTRS))
+ device_remove_file(&afu->dev, &afu_attrs[i]);
+ }
list_for_each_entry_safe(cr, tmp, &afu->crs, list) {
sysfs_remove_bin_file(&cr->kobj, &cr->config_attr);
@@ -580,14 +616,19 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu)
int cxl_sysfs_afu_add(struct cxl_afu *afu)
{
+ struct device_attribute *dev_attr;
struct afu_config_record *cr;
int i, rc;
INIT_LIST_HEAD(&afu->crs);
for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
- if ((rc = device_create_file(&afu->dev, &afu_attrs[i])))
- goto err;
+ dev_attr = &afu_attrs[i];
+ if (cxl_ops->support_attributes(dev_attr->attr.name,
+ CXL_AFU_ATTRS)) {
+ if ((rc = device_create_file(&afu->dev, &afu_attrs[i])))
+ goto err;
+ }
}
/* conditionally create the add the binary file for error info buffer */
@@ -626,32 +667,50 @@ err:
/* reset the eb_len as we havent created the bin attr */
afu->eb_len = 0;
- for (i--; i >= 0; i--)
+ for (i--; i >= 0; i--) {
+ dev_attr = &afu_attrs[i];
+ if (cxl_ops->support_attributes(dev_attr->attr.name,
+ CXL_AFU_ATTRS))
device_remove_file(&afu->dev, &afu_attrs[i]);
+ }
return rc;
}
int cxl_sysfs_afu_m_add(struct cxl_afu *afu)
{
+ struct device_attribute *dev_attr;
int i, rc;
for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) {
- if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i])))
- goto err;
+ dev_attr = &afu_master_attrs[i];
+ if (cxl_ops->support_attributes(dev_attr->attr.name,
+ CXL_AFU_MASTER_ATTRS)) {
+ if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i])))
+ goto err;
+ }
}
return 0;
err:
- for (i--; i >= 0; i--)
- device_remove_file(afu->chardev_m, &afu_master_attrs[i]);
+ for (i--; i >= 0; i--) {
+ dev_attr = &afu_master_attrs[i];
+ if (cxl_ops->support_attributes(dev_attr->attr.name,
+ CXL_AFU_MASTER_ATTRS))
+ device_remove_file(afu->chardev_m, &afu_master_attrs[i]);
+ }
return rc;
}
void cxl_sysfs_afu_m_remove(struct cxl_afu *afu)
{
+ struct device_attribute *dev_attr;
int i;
- for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++)
- device_remove_file(afu->chardev_m, &afu_master_attrs[i]);
+ for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) {
+ dev_attr = &afu_master_attrs[i];
+ if (cxl_ops->support_attributes(dev_attr->attr.name,
+ CXL_AFU_MASTER_ATTRS))
+ device_remove_file(afu->chardev_m, &afu_master_attrs[i]);
+ }
}
diff --git a/drivers/misc/cxl/trace.h b/drivers/misc/cxl/trace.h
index 6e1e2adfba8e..751d6119683e 100644
--- a/drivers/misc/cxl/trace.h
+++ b/drivers/misc/cxl/trace.h
@@ -450,6 +450,199 @@ DEFINE_EVENT(cxl_pe_class, cxl_slbia,
TP_ARGS(ctx)
);
+TRACE_EVENT(cxl_hcall,
+ TP_PROTO(u64 unit_address, u64 process_token, long rc),
+
+ TP_ARGS(unit_address, process_token, rc),
+
+ TP_STRUCT__entry(
+ __field(u64, unit_address)
+ __field(u64, process_token)
+ __field(long, rc)
+ ),
+
+ TP_fast_assign(
+ __entry->unit_address = unit_address;
+ __entry->process_token = process_token;
+ __entry->rc = rc;
+ ),
+
+ TP_printk("unit_address=0x%016llx process_token=0x%016llx rc=%li",
+ __entry->unit_address,
+ __entry->process_token,
+ __entry->rc
+ )
+);
+
+TRACE_EVENT(cxl_hcall_control,
+ TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3,
+ u64 p4, unsigned long r4, long rc),
+
+ TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc),
+
+ TP_STRUCT__entry(
+ __field(u64, unit_address)
+ __field(char *, fct)
+ __field(u64, p1)
+ __field(u64, p2)
+ __field(u64, p3)
+ __field(u64, p4)
+ __field(unsigned long, r4)
+ __field(long, rc)
+ ),
+
+ TP_fast_assign(
+ __entry->unit_address = unit_address;
+ __entry->fct = fct;
+ __entry->p1 = p1;
+ __entry->p2 = p2;
+ __entry->p3 = p3;
+ __entry->p4 = p4;
+ __entry->r4 = r4;
+ __entry->rc = rc;
+ ),
+
+ TP_printk("unit_address=%#.16llx %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li",
+ __entry->unit_address,
+ __entry->fct,
+ __entry->p1,
+ __entry->p2,
+ __entry->p3,
+ __entry->p4,
+ __entry->r4,
+ __entry->rc
+ )
+);
+
+TRACE_EVENT(cxl_hcall_attach,
+ TP_PROTO(u64 unit_address, u64 phys_addr, unsigned long process_token,
+ unsigned long mmio_addr, unsigned long mmio_size, long rc),
+
+ TP_ARGS(unit_address, phys_addr, process_token,
+ mmio_addr, mmio_size, rc),
+
+ TP_STRUCT__entry(
+ __field(u64, unit_address)
+ __field(u64, phys_addr)
+ __field(unsigned long, process_token)
+ __field(unsigned long, mmio_addr)
+ __field(unsigned long, mmio_size)
+ __field(long, rc)
+ ),
+
+ TP_fast_assign(
+ __entry->unit_address = unit_address;
+ __entry->phys_addr = phys_addr;
+ __entry->process_token = process_token;
+ __entry->mmio_addr = mmio_addr;
+ __entry->mmio_size = mmio_size;
+ __entry->rc = rc;
+ ),
+
+ TP_printk("unit_address=0x%016llx phys_addr=0x%016llx "
+ "token=0x%.8lx mmio_addr=0x%lx mmio_size=0x%lx rc=%li",
+ __entry->unit_address,
+ __entry->phys_addr,
+ __entry->process_token,
+ __entry->mmio_addr,
+ __entry->mmio_size,
+ __entry->rc
+ )
+);
+
+DEFINE_EVENT(cxl_hcall, cxl_hcall_detach,
+ TP_PROTO(u64 unit_address, u64 process_token, long rc),
+ TP_ARGS(unit_address, process_token, rc)
+);
+
+DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_function,
+ TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3,
+ u64 p4, unsigned long r4, long rc),
+ TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc)
+);
+
+DEFINE_EVENT(cxl_hcall, cxl_hcall_collect_int_info,
+ TP_PROTO(u64 unit_address, u64 process_token, long rc),
+ TP_ARGS(unit_address, process_token, rc)
+);
+
+TRACE_EVENT(cxl_hcall_control_faults,
+ TP_PROTO(u64 unit_address, u64 process_token,
+ u64 control_mask, u64 reset_mask, unsigned long r4,
+ long rc),
+
+ TP_ARGS(unit_address, process_token,
+ control_mask, reset_mask, r4, rc),
+
+ TP_STRUCT__entry(
+ __field(u64, unit_address)
+ __field(u64, process_token)
+ __field(u64, control_mask)
+ __field(u64, reset_mask)
+ __field(unsigned long, r4)
+ __field(long, rc)
+ ),
+
+ TP_fast_assign(
+ __entry->unit_address = unit_address;
+ __entry->process_token = process_token;
+ __entry->control_mask = control_mask;
+ __entry->reset_mask = reset_mask;
+ __entry->r4 = r4;
+ __entry->rc = rc;
+ ),
+
+ TP_printk("unit_address=0x%016llx process_token=0x%llx "
+ "control_mask=%#llx reset_mask=%#llx r4=%#lx rc=%li",
+ __entry->unit_address,
+ __entry->process_token,
+ __entry->control_mask,
+ __entry->reset_mask,
+ __entry->r4,
+ __entry->rc
+ )
+);
+
+DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_facility,
+ TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3,
+ u64 p4, unsigned long r4, long rc),
+ TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc)
+);
+
+TRACE_EVENT(cxl_hcall_download_facility,
+ TP_PROTO(u64 unit_address, char *fct, u64 list_address, u64 num,
+ unsigned long r4, long rc),
+
+ TP_ARGS(unit_address, fct, list_address, num, r4, rc),
+
+ TP_STRUCT__entry(
+ __field(u64, unit_address)
+ __field(char *, fct)
+ __field(u64, list_address)
+ __field(u64, num)
+ __field(unsigned long, r4)
+ __field(long, rc)
+ ),
+
+ TP_fast_assign(
+ __entry->unit_address = unit_address;
+ __entry->fct = fct;
+ __entry->list_address = list_address;
+ __entry->num = num;
+ __entry->r4 = r4;
+ __entry->rc = rc;
+ ),
+
+ TP_printk("%#.16llx, %s(%#llx, %#llx), %#lx): %li",
+ __entry->unit_address,
+ __entry->fct,
+ __entry->list_address,
+ __entry->num,
+ __entry->r4,
+ __entry->rc
+ )
+);
+
#endif /* _CXL_TRACE_H */
/* This part must be outside protection */
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index cbd4331fb45c..cdc7723b845d 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -49,7 +49,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev)
phb = pci_bus_to_host(dev->bus);
afu = (struct cxl_afu *)phb->private_data;
- if (!cxl_adapter_link_ok(afu->adapter)) {
+ if (!cxl_ops->link_ok(afu->adapter, afu)) {
dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__);
return false;
}
@@ -66,7 +66,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev)
return false;
dev->dev.archdata.cxl_ctx = ctx;
- return (cxl_afu_check_and_enable(afu) == 0);
+ return (cxl_ops->afu_check_and_enable(afu) == 0);
}
static void cxl_pci_disable_device(struct pci_dev *dev)
@@ -99,113 +99,90 @@ static int cxl_pcie_cfg_record(u8 bus, u8 devfn)
return (bus << 8) + devfn;
}
-static unsigned long cxl_pcie_cfg_addr(struct pci_controller* phb,
- u8 bus, u8 devfn, int offset)
-{
- int record = cxl_pcie_cfg_record(bus, devfn);
-
- return (unsigned long)phb->cfg_addr + ((unsigned long)phb->cfg_data * record) + offset;
-}
-
-
static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
- int offset, int len,
- volatile void __iomem **ioaddr,
- u32 *mask, int *shift)
+ struct cxl_afu **_afu, int *_record)
{
struct pci_controller *phb;
struct cxl_afu *afu;
- unsigned long addr;
+ int record;
phb = pci_bus_to_host(bus);
if (phb == NULL)
return PCIBIOS_DEVICE_NOT_FOUND;
- afu = (struct cxl_afu *)phb->private_data;
- if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num)
+ afu = (struct cxl_afu *)phb->private_data;
+ record = cxl_pcie_cfg_record(bus->number, devfn);
+ if (record > afu->crs_num)
return PCIBIOS_DEVICE_NOT_FOUND;
- if (offset >= (unsigned long)phb->cfg_data)
- return PCIBIOS_BAD_REGISTER_NUMBER;
- addr = cxl_pcie_cfg_addr(phb, bus->number, devfn, offset);
- *ioaddr = (void *)(addr & ~0x3ULL);
- *shift = ((addr & 0x3) * 8);
- switch (len) {
- case 1:
- *mask = 0xff;
- break;
- case 2:
- *mask = 0xffff;
- break;
- default:
- *mask = 0xffffffff;
- break;
- }
+ *_afu = afu;
+ *_record = record;
return 0;
}
-
-static inline bool cxl_config_link_ok(struct pci_bus *bus)
-{
- struct pci_controller *phb;
- struct cxl_afu *afu;
-
- /* Config space IO is based on phb->cfg_addr, which is based on
- * afu_desc_mmio. This isn't safe to read/write when the link
- * goes down, as EEH tears down MMIO space.
- *
- * Check if the link is OK before proceeding.
- */
-
- phb = pci_bus_to_host(bus);
- if (phb == NULL)
- return false;
- afu = (struct cxl_afu *)phb->private_data;
- return cxl_adapter_link_ok(afu->adapter);
-}
-
static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
int offset, int len, u32 *val)
{
- volatile void __iomem *ioaddr;
- int shift, rc;
- u32 mask;
+ int rc, record;
+ struct cxl_afu *afu;
+ u8 val8;
+ u16 val16;
+ u32 val32;
- rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr,
- &mask, &shift);
+ rc = cxl_pcie_config_info(bus, devfn, &afu, &record);
if (rc)
return rc;
- if (!cxl_config_link_ok(bus))
+ switch (len) {
+ case 1:
+ rc = cxl_ops->afu_cr_read8(afu, record, offset, &val8);
+ *val = val8;
+ break;
+ case 2:
+ rc = cxl_ops->afu_cr_read16(afu, record, offset, &val16);
+ *val = val16;
+ break;
+ case 4:
+ rc = cxl_ops->afu_cr_read32(afu, record, offset, &val32);
+ *val = val32;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ if (rc)
return PCIBIOS_DEVICE_NOT_FOUND;
- /* Can only read 32 bits */
- *val = (in_le32(ioaddr) >> shift) & mask;
return PCIBIOS_SUCCESSFUL;
}
static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
int offset, int len, u32 val)
{
- volatile void __iomem *ioaddr;
- u32 v, mask;
- int shift, rc;
+ int rc, record;
+ struct cxl_afu *afu;
- rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr,
- &mask, &shift);
+ rc = cxl_pcie_config_info(bus, devfn, &afu, &record);
if (rc)
return rc;
- if (!cxl_config_link_ok(bus))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- /* Can only write 32 bits so do read-modify-write */
- mask <<= shift;
- val <<= shift;
+ switch (len) {
+ case 1:
+ rc = cxl_ops->afu_cr_write8(afu, record, offset, val & 0xff);
+ break;
+ case 2:
+ rc = cxl_ops->afu_cr_write16(afu, record, offset, val & 0xffff);
+ break;
+ case 4:
+ rc = cxl_ops->afu_cr_write32(afu, record, offset, val);
+ break;
+ default:
+ WARN_ON(1);
+ }
- v = (in_le32(ioaddr) & ~mask) | (val & mask);
+ if (rc)
+ return PCIBIOS_SET_FAILED;
- out_le32(ioaddr, v);
return PCIBIOS_SUCCESSFUL;
}
@@ -233,23 +210,31 @@ int cxl_pci_vphb_add(struct cxl_afu *afu)
{
struct pci_dev *phys_dev;
struct pci_controller *phb, *phys_phb;
-
- phys_dev = to_pci_dev(afu->adapter->dev.parent);
- phys_phb = pci_bus_to_host(phys_dev->bus);
+ struct device_node *vphb_dn;
+ struct device *parent;
+
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ phys_dev = to_pci_dev(afu->adapter->dev.parent);
+ phys_phb = pci_bus_to_host(phys_dev->bus);
+ vphb_dn = phys_phb->dn;
+ parent = &phys_dev->dev;
+ } else {
+ vphb_dn = afu->adapter->dev.parent->of_node;
+ parent = afu->adapter->dev.parent;
+ }
/* Alloc and setup PHB data structure */
- phb = pcibios_alloc_controller(phys_phb->dn);
-
+ phb = pcibios_alloc_controller(vphb_dn);
if (!phb)
return -ENODEV;
/* Setup parent in sysfs */
- phb->parent = &phys_dev->dev;
+ phb->parent = parent;
/* Setup the PHB using arch provided callback */
phb->ops = &cxl_pcie_pci_ops;
- phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset;
- phb->cfg_data = (void *)(u64)afu->crs_len;
+ phb->cfg_addr = NULL;
+ phb->cfg_data = 0;
phb->private_data = afu;
phb->controller_ops = cxl_pci_controller_ops;
@@ -272,15 +257,6 @@ int cxl_pci_vphb_add(struct cxl_afu *afu)
return 0;
}
-void cxl_pci_vphb_reconfigure(struct cxl_afu *afu)
-{
- /* When we are reconfigured, the AFU's MMIO space is unmapped
- * and remapped. We need to reflect this in the PHB's view of
- * the world.
- */
- afu->phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset;
-}
-
void cxl_pci_vphb_remove(struct cxl_afu *afu)
{
struct pci_controller *phb;
@@ -296,6 +272,15 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu)
pcibios_free_controller(phb);
}
+bool cxl_pci_is_vphb_device(struct pci_dev *dev)
+{
+ struct pci_controller *phb;
+
+ phb = pci_bus_to_host(dev->bus);
+
+ return (phb->ops == &cxl_pcie_pci_ops);
+}
+
struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev)
{
struct pci_controller *phb;
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 89b3befc7155..6469ff6208b0 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -271,6 +271,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx)
void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { }
+void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
+
/**
* pci_bus_add_device - start driver for a single device
* @dev: device to add
@@ -285,6 +287,7 @@ void pci_bus_add_device(struct pci_dev *dev)
* Can not put in pci_device_add yet because resources
* are not assigned yet for some devices.
*/
+ pcibios_bus_add_device(dev);
pci_fixup_device(pci_fixup_final, dev);
pci_create_sysfs_dev_files(dev);
pci_proc_attach_device(dev);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 31f31d460fc9..fa4f13869fa9 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -113,7 +113,7 @@ resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
return dev->sriov->barsz[resno - PCI_IOV_RESOURCES];
}
-static int virtfn_add(struct pci_dev *dev, int id, int reset)
+int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset)
{
int i;
int rc = -ENOMEM;
@@ -188,7 +188,7 @@ failed:
return rc;
}
-static void virtfn_remove(struct pci_dev *dev, int id, int reset)
+void pci_iov_remove_virtfn(struct pci_dev *dev, int id, int reset)
{
char buf[VIRTFN_ID_LEN];
struct pci_dev *virtfn;
@@ -321,7 +321,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
}
for (i = 0; i < initial; i++) {
- rc = virtfn_add(dev, i, 0);
+ rc = pci_iov_add_virtfn(dev, i, 0);
if (rc)
goto failed;
}
@@ -333,7 +333,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
failed:
while (i--)
- virtfn_remove(dev, i, 0);
+ pci_iov_remove_virtfn(dev, i, 0);
pcibios_sriov_disable(dev);
err_pcibios:
@@ -359,7 +359,7 @@ static void sriov_disable(struct pci_dev *dev)
return;
for (i = 0; i < iov->num_VFs; i++)
- virtfn_remove(dev, i, 0);
+ pci_iov_remove_virtfn(dev, i, 0);
pcibios_sriov_disable(dev);
diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index 5ada9268a450..580f37006977 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -106,7 +106,6 @@ struct cxlflash_cfg {
atomic_t scan_host_needed;
struct cxl_afu *cxl_afu;
- struct pci_dev *parent_dev;
atomic_t recovery_threads;
struct mutex ctx_recovery_mutex;
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index f6d90ce8f3b7..e04aae70643b 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -1407,7 +1407,7 @@ static int start_context(struct cxlflash_cfg *cfg)
*/
static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[])
{
- struct pci_dev *dev = cfg->parent_dev;
+ struct pci_dev *dev = cfg->dev;
int rc = 0;
int ro_start, ro_size, i, j, k;
ssize_t vpd_size;
@@ -1416,7 +1416,7 @@ static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[])
char *wwpn_vpd_tags[NUM_FC_PORTS] = { "V5", "V6" };
/* Get the VPD data from the device */
- vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
+ vpd_size = cxl_read_adapter_vpd(dev, vpd_data, sizeof(vpd_data));
if (unlikely(vpd_size <= 0)) {
dev_err(&dev->dev, "%s: Unable to read VPD (size = %ld)\n",
__func__, vpd_size);
@@ -2392,7 +2392,6 @@ static int cxlflash_probe(struct pci_dev *pdev,
{
struct Scsi_Host *host;
struct cxlflash_cfg *cfg = NULL;
- struct device *phys_dev;
struct dev_dependent_vals *ddv;
int rc = 0;
@@ -2458,19 +2457,6 @@ static int cxlflash_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, cfg);
- /*
- * Use the special service provided to look up the physical
- * PCI device, since we are called on the probe of the virtual
- * PCI host bus (vphb)
- */
- phys_dev = cxl_get_phys_dev(pdev);
- if (!dev_is_pci(phys_dev)) {
- dev_err(&pdev->dev, "%s: not a pci dev\n", __func__);
- rc = -ENODEV;
- goto out_remove;
- }
- cfg->parent_dev = to_pci_dev(phys_dev);
-
cfg->cxl_afu = cxl_pci_to_afu(pdev);
rc = init_pci(cfg);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 0b3c0d39ef75..c370b261c720 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -239,6 +239,14 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp);
#endif
+#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
+static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+
+}
+#endif
+
#ifndef __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 301de78d65f7..5f3ee5a60a81 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -34,20 +34,29 @@
* The idea here is to build acquire/release variants by adding explicit
* barriers on top of the relaxed variant. In the case where the relaxed
* variant is already fully ordered, no additional barriers are needed.
+ *
+ * Besides, if an arch has a special barrier for acquire/release, it could
+ * implement its own __atomic_op_* and use the same framework for building
+ * variants
*/
+#ifndef __atomic_op_acquire
#define __atomic_op_acquire(op, args...) \
({ \
typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \
smp_mb__after_atomic(); \
__ret; \
})
+#endif
+#ifndef __atomic_op_release
#define __atomic_op_release(op, args...) \
({ \
smp_mb__before_atomic(); \
op##_relaxed(args); \
})
+#endif
+#ifndef __atomic_op_fence
#define __atomic_op_fence(op, args...) \
({ \
typeof(op##_relaxed(args)) __ret; \
@@ -56,6 +65,7 @@
smp_mb__after_atomic(); \
__ret; \
})
+#endif
/* atomic_add_return_relaxed */
#ifndef atomic_add_return_relaxed
diff --git a/include/linux/bug.h b/include/linux/bug.h
index 7f4818673c41..e51b0709e78d 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -20,6 +20,7 @@ struct pt_regs;
#define BUILD_BUG_ON_MSG(cond, msg) (0)
#define BUILD_BUG_ON(condition) (0)
#define BUILD_BUG() (0)
+#define MAYBE_BUILD_BUG_ON(cond) (0)
#else /* __CHECKER__ */
/* Force a compilation error if a constant expression is not a power of 2 */
@@ -83,6 +84,14 @@ struct pt_regs;
*/
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
+#define MAYBE_BUILD_BUG_ON(cond) \
+ do { \
+ if (__builtin_constant_p((cond))) \
+ BUILD_BUG_ON(cond); \
+ else \
+ BUG_ON(cond); \
+ } while (0)
+
#endif /* __CHECKER__ */
#ifdef CONFIG_GENERIC_BUG
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 459fd25b378e..f12513a20a06 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -111,9 +111,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
__split_huge_pmd(__vma, __pmd, __address); \
} while (0)
-#if HPAGE_PMD_ORDER >= MAX_ORDER
-#error "hugepages can't be allocated by the buddy allocator"
-#endif
extern int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice);
extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27df4a6585da..bc435d6293d2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -770,6 +770,7 @@ extern struct list_head pci_root_buses; /* list of all known PCI buses */
int no_pci_devices(void);
void pcibios_resource_survey_bus(struct pci_bus *bus);
+void pcibios_bus_add_device(struct pci_dev *pdev);
void pcibios_add_bus(struct pci_bus *bus);
void pcibios_remove_bus(struct pci_bus *bus);
void pcibios_fixup_bus(struct pci_bus *);
@@ -1738,6 +1739,8 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int id);
int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
void pci_disable_sriov(struct pci_dev *dev);
+int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset);
+void pci_iov_remove_virtfn(struct pci_dev *dev, int id, int reset);
int pci_num_vf(struct pci_dev *dev);
int pci_vfs_assigned(struct pci_dev *dev);
int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
@@ -1754,6 +1757,12 @@ static inline int pci_iov_virtfn_devfn(struct pci_dev *dev, int id)
}
static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
{ return -ENODEV; }
+static inline int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset)
+{
+ return -ENOSYS;
+}
+static inline void pci_iov_remove_virtfn(struct pci_dev *dev,
+ int id, int reset) { }
static inline void pci_disable_sriov(struct pci_dev *dev) { }
static inline int pci_num_vf(struct pci_dev *dev) { return 0; }
static inline int pci_vfs_assigned(struct pci_dev *dev)
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index f2ffe5bd720d..7d5e2613c7b8 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -30,9 +30,6 @@ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev);
/* Get the AFU conf record number associated with a pci_dev */
unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev);
-/* Get the physical device (ie. the PCIe card) which the AFU is attached */
-struct device *cxl_get_phys_dev(struct pci_dev *dev);
-
/*
* Context lifetime overview:
@@ -210,4 +207,9 @@ ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
void cxl_perst_reloads_same_image(struct cxl_afu *afu,
bool perst_reloads_same_image);
+/*
+ * Read the VPD for the card where the AFU resides
+ */
+ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count);
+
#endif /* _MISC_CXL_H */
diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h
index 1e889aa8a36e..8cd334f99ddc 100644
--- a/include/uapi/misc/cxl.h
+++ b/include/uapi/misc/cxl.h
@@ -55,11 +55,35 @@ struct cxl_afu_id {
__u64 reserved6;
};
+/* base adapter image header is included in the image */
+#define CXL_AI_NEED_HEADER 0x0000000000000001ULL
+#define CXL_AI_ALL CXL_AI_NEED_HEADER
+
+#define CXL_AI_HEADER_SIZE 128
+#define CXL_AI_BUFFER_SIZE 4096
+#define CXL_AI_MAX_ENTRIES 256
+#define CXL_AI_MAX_CHUNK_SIZE (CXL_AI_BUFFER_SIZE * CXL_AI_MAX_ENTRIES)
+
+struct cxl_adapter_image {
+ __u64 flags;
+ __u64 data;
+ __u64 len_data;
+ __u64 len_image;
+ __u64 reserved1;
+ __u64 reserved2;
+ __u64 reserved3;
+ __u64 reserved4;
+};
+
/* ioctl numbers */
#define CXL_MAGIC 0xCA
+/* AFU devices */
#define CXL_IOCTL_START_WORK _IOW(CXL_MAGIC, 0x00, struct cxl_ioctl_start_work)
#define CXL_IOCTL_GET_PROCESS_ELEMENT _IOR(CXL_MAGIC, 0x01, __u32)
#define CXL_IOCTL_GET_AFU_ID _IOR(CXL_MAGIC, 0x02, struct cxl_afu_id)
+/* adapter devices */
+#define CXL_IOCTL_DOWNLOAD_IMAGE _IOW(CXL_MAGIC, 0x0A, struct cxl_adapter_image)
+#define CXL_IOCTL_VALIDATE_IMAGE _IOW(CXL_MAGIC, 0x0B, struct cxl_adapter_image)
#define CXL_READ_MIN_SIZE 0x1000 /* 4K */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 08fc0ba2207e..36c22a89df61 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -83,7 +83,7 @@ unsigned long transparent_hugepage_flags __read_mostly =
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
/* default scan 8*512 pte (or vmas) every 30 second */
-static unsigned int khugepaged_pages_to_scan __read_mostly = HPAGE_PMD_NR*8;
+static unsigned int khugepaged_pages_to_scan __read_mostly;
static unsigned int khugepaged_pages_collapsed;
static unsigned int khugepaged_full_scans;
static unsigned int khugepaged_scan_sleep_millisecs __read_mostly = 10000;
@@ -98,7 +98,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
* it would have happened if the vma was large enough during page
* fault.
*/
-static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
+static unsigned int khugepaged_max_ptes_none __read_mostly;
static int khugepaged(void *none);
static int khugepaged_slab_init(void);
@@ -660,6 +660,18 @@ static int __init hugepage_init(void)
return -EINVAL;
}
+ khugepaged_pages_to_scan = HPAGE_PMD_NR * 8;
+ khugepaged_max_ptes_none = HPAGE_PMD_NR - 1;
+ /*
+ * hugepages can't be allocated by the buddy allocator
+ */
+ MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER >= MAX_ORDER);
+ /*
+ * we use page->mapping and page->index in second tail page
+ * as list_head: assuming THP order >= 2
+ */
+ MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER < 2);
+
err = hugepage_init_sysfs(&hugepage_kobj);
if (err)
goto err_sysfs;
@@ -764,7 +776,6 @@ void prep_transhuge_page(struct page *page)
* we use page->mapping and page->indexlru in second tail page
* as list_head: assuming THP order >= 2
*/
- BUILD_BUG_ON(HPAGE_PMD_ORDER < 2);
INIT_LIST_HEAD(page_deferred_list(page));
set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
@@ -2860,6 +2871,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
young = pmd_young(*pmd);
dirty = pmd_dirty(*pmd);
+ pmdp_huge_split_prepare(vma, haddr, pmd);
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
pmd_populate(mm, &_pmd, pgtable);
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 0c2706bda330..b08f77cbe31b 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -8,7 +8,7 @@ ifeq ($(ARCH),powerpc)
GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
-CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
+CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
export CFLAGS
@@ -22,7 +22,8 @@ SUB_DIRS = benchmarks \
switch_endian \
syscalls \
tm \
- vphn
+ vphn \
+ math
endif
diff --git a/tools/testing/selftests/powerpc/basic_asm.h b/tools/testing/selftests/powerpc/basic_asm.h
new file mode 100644
index 000000000000..3349a0704d1a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/basic_asm.h
@@ -0,0 +1,70 @@
+#ifndef _SELFTESTS_POWERPC_BASIC_ASM_H
+#define _SELFTESTS_POWERPC_BASIC_ASM_H
+
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+#define LOAD_REG_IMMEDIATE(reg,expr) \
+ lis reg,(expr)@highest; \
+ ori reg,reg,(expr)@higher; \
+ rldicr reg,reg,32,31; \
+ oris reg,reg,(expr)@high; \
+ ori reg,reg,(expr)@l;
+
+/*
+ * Note: These macros assume that variables being stored on the stack are
+ * doublewords, while this is usually the case it may not always be the
+ * case for each use case.
+ */
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define STACK_FRAME_MIN_SIZE 32
+#define STACK_FRAME_TOC_POS 24
+#define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8))
+#define __STACK_FRAME_LOCAL(_num_params,_var_num) ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8))
+#else
+#define STACK_FRAME_MIN_SIZE 112
+#define STACK_FRAME_TOC_POS 40
+#define __STACK_FRAME_PARAM(i) (48 + ((i)*8))
+
+/*
+ * Caveat: if a function passed more than 8 doublewords, the caller will have
+ * made more space... which would render the 112 incorrect.
+ */
+#define __STACK_FRAME_LOCAL(_num_params,_var_num) (112 + ((_var_num)*8))
+#endif
+
+/* Parameter x saved to the stack */
+#define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var)
+
+/* Local variable x saved to the stack after x parameters */
+#define STACK_FRAME_LOCAL(num_params,var) __STACK_FRAME_LOCAL(num_params,var)
+#define STACK_FRAME_LR_POS 16
+#define STACK_FRAME_CR_POS 8
+
+/*
+ * It is very important to note here that _extra is the extra amount of
+ * stack space needed. This space can be accessed using STACK_FRAME_PARAM()
+ * or STACK_FRAME_LOCAL() macros.
+ *
+ * r1 and r2 are not defined in ppc-asm.h (instead they are defined as sp
+ * and toc). Kernel programmers tend to prefer rX even for r1 and r2, hence
+ * %1 and %r2. r0 is defined in ppc-asm.h and therefore %r0 gets
+ * preprocessed incorrectly, hence r0.
+ */
+#define PUSH_BASIC_STACK(_extra) \
+ mflr r0; \
+ std r0,STACK_FRAME_LR_POS(%r1); \
+ stdu %r1,-(_extra + STACK_FRAME_MIN_SIZE)(%r1); \
+ mfcr r0; \
+ stw r0,STACK_FRAME_CR_POS(%r1); \
+ std %r2,STACK_FRAME_TOC_POS(%r1);
+
+#define POP_BASIC_STACK(_extra) \
+ ld %r2,STACK_FRAME_TOC_POS(%r1); \
+ lwz r0,STACK_FRAME_CR_POS(%r1); \
+ mtcr r0; \
+ addi %r1,%r1,(_extra + STACK_FRAME_MIN_SIZE); \
+ ld r0,STACK_FRAME_LR_POS(%r1); \
+ mtlr r0;
+
+#endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
new file mode 100644
index 000000000000..4fe13a439fd7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -0,0 +1,6 @@
+fpu_syscall
+vmx_syscall
+fpu_preempt
+vmx_preempt
+fpu_signal
+vmx_signal
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
new file mode 100644
index 000000000000..5b88875d5955
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -0,0 +1,19 @@
+TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../harness.c
+$(TEST_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
+
+fpu_syscall: fpu_asm.S
+fpu_preempt: fpu_asm.S
+fpu_signal: fpu_asm.S
+
+vmx_syscall: vmx_asm.S
+vmx_preempt: vmx_asm.S
+vmx_signal: vmx_asm.S
+
+include ../../lib.mk
+
+clean:
+ rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S
new file mode 100644
index 000000000000..f3711d80e709
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_asm.S
@@ -0,0 +1,198 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "../basic_asm.h"
+
+#define PUSH_FPU(pos) \
+ stfd f14,pos(sp); \
+ stfd f15,pos+8(sp); \
+ stfd f16,pos+16(sp); \
+ stfd f17,pos+24(sp); \
+ stfd f18,pos+32(sp); \
+ stfd f19,pos+40(sp); \
+ stfd f20,pos+48(sp); \
+ stfd f21,pos+56(sp); \
+ stfd f22,pos+64(sp); \
+ stfd f23,pos+72(sp); \
+ stfd f24,pos+80(sp); \
+ stfd f25,pos+88(sp); \
+ stfd f26,pos+96(sp); \
+ stfd f27,pos+104(sp); \
+ stfd f28,pos+112(sp); \
+ stfd f29,pos+120(sp); \
+ stfd f30,pos+128(sp); \
+ stfd f31,pos+136(sp);
+
+#define POP_FPU(pos) \
+ lfd f14,pos(sp); \
+ lfd f15,pos+8(sp); \
+ lfd f16,pos+16(sp); \
+ lfd f17,pos+24(sp); \
+ lfd f18,pos+32(sp); \
+ lfd f19,pos+40(sp); \
+ lfd f20,pos+48(sp); \
+ lfd f21,pos+56(sp); \
+ lfd f22,pos+64(sp); \
+ lfd f23,pos+72(sp); \
+ lfd f24,pos+80(sp); \
+ lfd f25,pos+88(sp); \
+ lfd f26,pos+96(sp); \
+ lfd f27,pos+104(sp); \
+ lfd f28,pos+112(sp); \
+ lfd f29,pos+120(sp); \
+ lfd f30,pos+128(sp); \
+ lfd f31,pos+136(sp);
+
+# Careful calling this, it will 'clobber' fpu (by design)
+# Don't call this from C
+FUNC_START(load_fpu)
+ lfd f14,0(r3)
+ lfd f15,8(r3)
+ lfd f16,16(r3)
+ lfd f17,24(r3)
+ lfd f18,32(r3)
+ lfd f19,40(r3)
+ lfd f20,48(r3)
+ lfd f21,56(r3)
+ lfd f22,64(r3)
+ lfd f23,72(r3)
+ lfd f24,80(r3)
+ lfd f25,88(r3)
+ lfd f26,96(r3)
+ lfd f27,104(r3)
+ lfd f28,112(r3)
+ lfd f29,120(r3)
+ lfd f30,128(r3)
+ lfd f31,136(r3)
+ blr
+FUNC_END(load_fpu)
+
+FUNC_START(check_fpu)
+ mr r4,r3
+ li r3,1 # assume a bad result
+ lfd f0,0(r4)
+ fcmpu cr1,f0,f14
+ bne cr1,1f
+ lfd f0,8(r4)
+ fcmpu cr1,f0,f15
+ bne cr1,1f
+ lfd f0,16(r4)
+ fcmpu cr1,f0,f16
+ bne cr1,1f
+ lfd f0,24(r4)
+ fcmpu cr1,f0,f17
+ bne cr1,1f
+ lfd f0,32(r4)
+ fcmpu cr1,f0,f18
+ bne cr1,1f
+ lfd f0,40(r4)
+ fcmpu cr1,f0,f19
+ bne cr1,1f
+ lfd f0,48(r4)
+ fcmpu cr1,f0,f20
+ bne cr1,1f
+ lfd f0,56(r4)
+ fcmpu cr1,f0,f21
+ bne cr1,1f
+ lfd f0,64(r4)
+ fcmpu cr1,f0,f22
+ bne cr1,1f
+ lfd f0,72(r4)
+ fcmpu cr1,f0,f23
+ bne cr1,1f
+ lfd f0,80(r4)
+ fcmpu cr1,f0,f24
+ bne cr1,1f
+ lfd f0,88(r4)
+ fcmpu cr1,f0,f25
+ bne cr1,1f
+ lfd f0,96(r4)
+ fcmpu cr1,f0,f26
+ bne cr1,1f
+ lfd f0,104(r4)
+ fcmpu cr1,f0,f27
+ bne cr1,1f
+ lfd f0,112(r4)
+ fcmpu cr1,f0,f28
+ bne cr1,1f
+ lfd f0,120(r4)
+ fcmpu cr1,f0,f29
+ bne cr1,1f
+ lfd f0,128(r4)
+ fcmpu cr1,f0,f30
+ bne cr1,1f
+ lfd f0,136(r4)
+ fcmpu cr1,f0,f31
+ bne cr1,1f
+ li r3,0 # Success!!!
+1: blr
+
+FUNC_START(test_fpu)
+ # r3 holds pointer to where to put the result of fork
+ # r4 holds pointer to the pid
+ # f14-f31 are non volatiles
+ PUSH_BASIC_STACK(256)
+ std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
+ std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
+ PUSH_FPU(STACK_FRAME_LOCAL(2,0))
+
+ bl load_fpu
+ nop
+ li r0,__NR_fork
+ sc
+
+ # pass the result of the fork to the caller
+ ld r9,STACK_FRAME_PARAM(1)(sp)
+ std r3,0(r9)
+
+ ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_fpu
+ nop
+
+ POP_FPU(STACK_FRAME_LOCAL(2,0))
+ POP_BASIC_STACK(256)
+ blr
+FUNC_END(test_fpu)
+
+# int preempt_fpu(double *darray, int *threads_running, int *running)
+# On starting will (atomically) decrement not_ready as a signal that the FPU
+# has been loaded with darray. Will proceed to check the validity of the FPU
+# registers while running is not zero.
+FUNC_START(preempt_fpu)
+ PUSH_BASIC_STACK(256)
+ std r3,STACK_FRAME_PARAM(0)(sp) # double *darray
+ std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+ std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+ PUSH_FPU(STACK_FRAME_LOCAL(3,0))
+
+ bl load_fpu
+ nop
+
+ sync
+ # Atomic DEC
+ ld r3,STACK_FRAME_PARAM(1)(sp)
+1: lwarx r4,0,r3
+ addi r4,r4,-1
+ stwcx. r4,0,r3
+ bne- 1b
+
+2: ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_fpu
+ nop
+ cmpdi r3,0
+ bne 3f
+ ld r4,STACK_FRAME_PARAM(2)(sp)
+ ld r5,0(r4)
+ cmpwi r5,0
+ bne 2b
+
+3: POP_FPU(STACK_FRAME_LOCAL(3,0))
+ POP_BASIC_STACK(256)
+ blr
+FUNC_END(preempt_fpu)
diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c
new file mode 100644
index 000000000000..0f85b79d883d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers change across preemption.
+ * Two things should be noted here a) The check_fpu function in asm only checks
+ * the non volatile registers as it is reused from the syscall test b) There is
+ * no way to be sure preemption happened so this test just uses many threads
+ * and a long wait. As such, a successful test doesn't mean much but a failure
+ * is bad.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+int threads_starting;
+int running;
+
+extern void preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void *preempt_fpu_c(void *p)
+{
+ int i;
+ srand(pthread_self());
+ for (i = 0; i < 21; i++)
+ darray[i] = rand();
+
+ /* Test failed if it ever returns */
+ preempt_fpu(darray, &threads_starting, &running);
+
+ return p;
+}
+
+int test_preempt_fpu(void)
+{
+ int i, rc, threads;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc((threads) * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, preempt_fpu_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ /* Not really necessary but nice to wait for every thread to start */
+ printf("\tWaiting for all workers to start...");
+ while(threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+ sleep(PREEMPT_TIME);
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ /*
+ * Working are checking this value every loop. In preempt_fpu 'cmpwi r5,0; bne 2b'.
+ * r5 will have loaded the value of running.
+ */
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ void *rc_p;
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why preempt_fpu
+ * returned
+ */
+ if ((long) rc_p)
+ printf("oops\n");
+ FAIL_IF((long) rc_p);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_preempt_fpu, "fpu_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c
new file mode 100644
index 000000000000..888aa51b4204
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_signal.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers are correctly reported in a
+ * signal context. Each worker just spins checking its FPU registers, at some
+ * point a signal will interrupt it and C code will check the signal context
+ * ensuring it is also the same.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Number of times each thread should receive the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+bool bad_context;
+int threads_starting;
+int running;
+
+extern long preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void signal_fpu_sig(int sig, siginfo_t *info, void *context)
+{
+ int i;
+ ucontext_t *uc = context;
+ mcontext_t *mc = &uc->uc_mcontext;
+
+ /* Only the non volatiles were loaded up */
+ for (i = 14; i < 32; i++) {
+ if (mc->fp_regs[i] != darray[i - 14]) {
+ bad_context = true;
+ break;
+ }
+ }
+}
+
+void *signal_fpu_c(void *p)
+{
+ int i;
+ long rc;
+ struct sigaction act;
+ act.sa_sigaction = signal_fpu_sig;
+ act.sa_flags = SA_SIGINFO;
+ rc = sigaction(SIGUSR1, &act, NULL);
+ if (rc)
+ return p;
+
+ srand(pthread_self());
+ for (i = 0; i < 21; i++)
+ darray[i] = rand();
+
+ rc = preempt_fpu(darray, &threads_starting, &running);
+
+ return (void *) rc;
+}
+
+int test_signal_fpu(void)
+{
+ int i, j, rc, threads;
+ void *rc_p;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, signal_fpu_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ printf("\tWaiting for all workers to start...");
+ while (threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tSending signals to all threads %d times...", ITERATIONS);
+ for (i = 0; i < ITERATIONS; i++) {
+ for (j = 0; j < threads; j++) {
+ pthread_kill(tids[j], SIGUSR1);
+ }
+ sleep(1);
+ }
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why signal_fpu
+ * returned
+ */
+ if ((long) rc_p || bad_context)
+ printf("oops\n");
+ if (bad_context)
+ fprintf(stderr, "\t!! bad_context is true\n");
+ FAIL_IF((long) rc_p || bad_context);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_signal_fpu, "fpu_signal");
+}
diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c
new file mode 100644
index 000000000000..949e6721256d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers change across a syscall (fork).
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+
+#include "utils.h"
+
+extern int test_fpu(double *darray, pid_t *pid);
+
+double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+int syscall_fpu(void)
+{
+ pid_t fork_pid;
+ int i;
+ int ret;
+ int child_ret;
+ for (i = 0; i < 1000; i++) {
+ /* test_fpu will fork() */
+ ret = test_fpu(darray, &fork_pid);
+ if (fork_pid == -1)
+ return -1;
+ if (fork_pid == 0)
+ exit(ret);
+ waitpid(fork_pid, &child_ret, 0);
+ if (ret || child_ret)
+ return 1;
+ }
+
+ return 0;
+}
+
+int test_syscall_fpu(void)
+{
+ /*
+ * Setup an environment with much context switching
+ */
+ pid_t pid2;
+ pid_t pid = fork();
+ int ret;
+ int child_ret;
+ FAIL_IF(pid == -1);
+
+ pid2 = fork();
+ /* Can't FAIL_IF(pid2 == -1); because already forked once */
+ if (pid2 == -1) {
+ /*
+ * Couldn't fork, ensure test is a fail
+ */
+ child_ret = ret = 1;
+ } else {
+ ret = syscall_fpu();
+ if (pid2)
+ waitpid(pid2, &child_ret, 0);
+ else
+ exit(ret);
+ }
+
+ ret |= child_ret;
+
+ if (pid)
+ waitpid(pid, &child_ret, 0);
+ else
+ exit(ret);
+
+ FAIL_IF(ret || child_ret);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_syscall_fpu, "syscall_fpu");
+
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_asm.S b/tools/testing/selftests/powerpc/math/vmx_asm.S
new file mode 100644
index 000000000000..1b8c248b3ac1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_asm.S
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "../basic_asm.h"
+
+# POS MUST BE 16 ALIGNED!
+#define PUSH_VMX(pos,reg) \
+ li reg,pos; \
+ stvx v20,reg,sp; \
+ addi reg,reg,16; \
+ stvx v21,reg,sp; \
+ addi reg,reg,16; \
+ stvx v22,reg,sp; \
+ addi reg,reg,16; \
+ stvx v23,reg,sp; \
+ addi reg,reg,16; \
+ stvx v24,reg,sp; \
+ addi reg,reg,16; \
+ stvx v25,reg,sp; \
+ addi reg,reg,16; \
+ stvx v26,reg,sp; \
+ addi reg,reg,16; \
+ stvx v27,reg,sp; \
+ addi reg,reg,16; \
+ stvx v28,reg,sp; \
+ addi reg,reg,16; \
+ stvx v29,reg,sp; \
+ addi reg,reg,16; \
+ stvx v30,reg,sp; \
+ addi reg,reg,16; \
+ stvx v31,reg,sp;
+
+# POS MUST BE 16 ALIGNED!
+#define POP_VMX(pos,reg) \
+ li reg,pos; \
+ lvx v20,reg,sp; \
+ addi reg,reg,16; \
+ lvx v21,reg,sp; \
+ addi reg,reg,16; \
+ lvx v22,reg,sp; \
+ addi reg,reg,16; \
+ lvx v23,reg,sp; \
+ addi reg,reg,16; \
+ lvx v24,reg,sp; \
+ addi reg,reg,16; \
+ lvx v25,reg,sp; \
+ addi reg,reg,16; \
+ lvx v26,reg,sp; \
+ addi reg,reg,16; \
+ lvx v27,reg,sp; \
+ addi reg,reg,16; \
+ lvx v28,reg,sp; \
+ addi reg,reg,16; \
+ lvx v29,reg,sp; \
+ addi reg,reg,16; \
+ lvx v30,reg,sp; \
+ addi reg,reg,16; \
+ lvx v31,reg,sp;
+
+# Carefull this will 'clobber' vmx (by design)
+# Don't call this from C
+FUNC_START(load_vmx)
+ li r5,0
+ lvx v20,r5,r3
+ addi r5,r5,16
+ lvx v21,r5,r3
+ addi r5,r5,16
+ lvx v22,r5,r3
+ addi r5,r5,16
+ lvx v23,r5,r3
+ addi r5,r5,16
+ lvx v24,r5,r3
+ addi r5,r5,16
+ lvx v25,r5,r3
+ addi r5,r5,16
+ lvx v26,r5,r3
+ addi r5,r5,16
+ lvx v27,r5,r3
+ addi r5,r5,16
+ lvx v28,r5,r3
+ addi r5,r5,16
+ lvx v29,r5,r3
+ addi r5,r5,16
+ lvx v30,r5,r3
+ addi r5,r5,16
+ lvx v31,r5,r3
+ blr
+FUNC_END(load_vmx)
+
+# Should be safe from C, only touches r4, r5 and v0,v1,v2
+FUNC_START(check_vmx)
+ PUSH_BASIC_STACK(32)
+ mr r4,r3
+ li r3,1 # assume a bad result
+ li r5,0
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v20
+ vmr v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v21
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v22
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v23
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v24
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v25
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v26
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v27
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v28
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v29
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v30
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v31
+ vand v2,v2,v1
+
+ li r5,STACK_FRAME_LOCAL(0,0)
+ stvx v2,r5,sp
+ ldx r0,r5,sp
+ cmpdi r0,0xffffffffffffffff
+ bne 1f
+ li r3,0
+1: POP_BASIC_STACK(32)
+ blr
+FUNC_END(check_vmx)
+
+# Safe from C
+FUNC_START(test_vmx)
+ # r3 holds pointer to where to put the result of fork
+ # r4 holds pointer to the pid
+ # v20-v31 are non-volatile
+ PUSH_BASIC_STACK(512)
+ std r3,STACK_FRAME_PARAM(0)(sp) # Address of varray
+ std r4,STACK_FRAME_PARAM(1)(sp) # address of pid
+ PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4)
+
+ bl load_vmx
+ nop
+
+ li r0,__NR_fork
+ sc
+ # Pass the result of fork back to the caller
+ ld r9,STACK_FRAME_PARAM(1)(sp)
+ std r3,0(r9)
+
+ ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_vmx
+ nop
+
+ POP_VMX(STACK_FRAME_LOCAL(2,0),r4)
+ POP_BASIC_STACK(512)
+ blr
+FUNC_END(test_vmx)
+
+# int preempt_vmx(vector int *varray, int *threads_starting, int *running)
+# On starting will (atomically) decrement threads_starting as a signal that
+# the VMX have been loaded with varray. Will proceed to check the validity of
+# the VMX registers while running is not zero.
+FUNC_START(preempt_vmx)
+ PUSH_BASIC_STACK(512)
+ std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
+ std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+ std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+ # VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0)
+ PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4)
+
+ bl load_vmx
+ nop
+
+ sync
+ # Atomic DEC
+ ld r3,STACK_FRAME_PARAM(1)(sp)
+1: lwarx r4,0,r3
+ addi r4,r4,-1
+ stwcx. r4,0,r3
+ bne- 1b
+
+2: ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_vmx
+ nop
+ cmpdi r3,0
+ bne 3f
+ ld r4,STACK_FRAME_PARAM(2)(sp)
+ ld r5,0(r4)
+ cmpwi r5,0
+ bne 2b
+
+3: POP_VMX(STACK_FRAME_LOCAL(4,0),r4)
+ POP_BASIC_STACK(512)
+ blr
+FUNC_END(preempt_vmx)
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
new file mode 100644
index 000000000000..9ef376c55b13
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers change across preemption.
+ * Two things should be noted here a) The check_vmx function in asm only checks
+ * the non volatile registers as it is reused from the syscall test b) There is
+ * no way to be sure preemption happened so this test just uses many threads
+ * and a long wait. As such, a successful test doesn't mean much but a failure
+ * is bad.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+int threads_starting;
+int running;
+
+extern void preempt_vmx(vector int *varray, int *threads_starting, int *running);
+
+void *preempt_vmx_c(void *p)
+{
+ int i, j;
+ srand(pthread_self());
+ for (i = 0; i < 12; i++)
+ for (j = 0; j < 4; j++)
+ varray[i][j] = rand();
+
+ /* Test fails if it ever returns */
+ preempt_vmx(varray, &threads_starting, &running);
+ return p;
+}
+
+int test_preempt_vmx(void)
+{
+ int i, rc, threads;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, preempt_vmx_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ /* Not really nessesary but nice to wait for every thread to start */
+ printf("\tWaiting for all workers to start...");
+ while(threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+ sleep(PREEMPT_TIME);
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ /*
+ * Working are checking this value every loop. In preempt_vmx 'cmpwi r5,0; bne 2b'.
+ * r5 will have loaded the value of running.
+ */
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ void *rc_p;
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why preempt_vmx
+ * returned
+ */
+ if ((long) rc_p)
+ printf("oops\n");
+ FAIL_IF((long) rc_p);
+ }
+ printf("done\n");
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_preempt_vmx, "vmx_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
new file mode 100644
index 000000000000..671d7533a557
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers are correctly reported in a
+ * signal context. Each worker just spins checking its VMX registers, at some
+ * point a signal will interrupt it and C code will check the signal context
+ * ensuring it is also the same.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <altivec.h>
+
+#include "utils.h"
+
+/* Number of times each thread should receive the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+bool bad_context;
+int running;
+int threads_starting;
+
+extern int preempt_vmx(vector int *varray, int *threads_starting, int *sentinal);
+
+void signal_vmx_sig(int sig, siginfo_t *info, void *context)
+{
+ int i;
+ ucontext_t *uc = context;
+ mcontext_t *mc = &uc->uc_mcontext;
+
+ /* Only the non volatiles were loaded up */
+ for (i = 20; i < 32; i++) {
+ if (memcmp(mc->v_regs->vrregs[i], &varray[i - 20], 16)) {
+ int j;
+ /*
+ * Shouldn't printf() in a signal handler, however, this is a
+ * test and we've detected failure. Understanding what failed
+ * is paramount. All that happens after this is tests exit with
+ * failure.
+ */
+ printf("VMX mismatch at reg %d!\n", i);
+ printf("Reg | Actual | Expected\n");
+ for (j = 20; j < 32; j++) {
+ printf("%d | 0x%04x%04x%04x%04x | 0x%04x%04x%04x%04x\n", j, mc->v_regs->vrregs[j][0],
+ mc->v_regs->vrregs[j][1], mc->v_regs->vrregs[j][2], mc->v_regs->vrregs[j][3],
+ varray[j - 20][0], varray[j - 20][1], varray[j - 20][2], varray[j - 20][3]);
+ }
+ bad_context = true;
+ break;
+ }
+ }
+}
+
+void *signal_vmx_c(void *p)
+{
+ int i, j;
+ long rc;
+ struct sigaction act;
+ act.sa_sigaction = signal_vmx_sig;
+ act.sa_flags = SA_SIGINFO;
+ rc = sigaction(SIGUSR1, &act, NULL);
+ if (rc)
+ return p;
+
+ srand(pthread_self());
+ for (i = 0; i < 12; i++)
+ for (j = 0; j < 4; j++)
+ varray[i][j] = rand();
+
+ rc = preempt_vmx(varray, &threads_starting, &running);
+
+ return (void *) rc;
+}
+
+int test_signal_vmx(void)
+{
+ int i, j, rc, threads;
+ void *rc_p;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, signal_vmx_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ printf("\tWaiting for %d workers to start... %d", threads, threads_starting);
+ while (threads_starting) {
+ asm volatile("": : :"memory");
+ usleep(1000);
+ printf(", %d", threads_starting);
+ }
+ printf(" ...done\n");
+
+ printf("\tSending signals to all threads %d times...", ITERATIONS);
+ for (i = 0; i < ITERATIONS; i++) {
+ for (j = 0; j < threads; j++) {
+ pthread_kill(tids[j], SIGUSR1);
+ }
+ sleep(1);
+ }
+ printf("done\n");
+
+ printf("\tKilling workers...");
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why signal_vmx
+ * returned
+ */
+ if ((long) rc_p || bad_context)
+ printf("oops\n");
+ if (bad_context)
+ fprintf(stderr, "\t!! bad_context is true\n");
+ FAIL_IF((long) rc_p || bad_context);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_signal_vmx, "vmx_signal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
new file mode 100644
index 000000000000..a017918ee1ca
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers change across a syscall (fork).
+ */
+
+#include <altivec.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "utils.h"
+
+vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+extern int test_vmx(vector int *varray, pid_t *pid);
+
+int vmx_syscall(void)
+{
+ pid_t fork_pid;
+ int i;
+ int ret;
+ int child_ret;
+ for (i = 0; i < 1000; i++) {
+ /* test_vmx will fork() */
+ ret = test_vmx(varray, &fork_pid);
+ if (fork_pid == -1)
+ return -1;
+ if (fork_pid == 0)
+ exit(ret);
+ waitpid(fork_pid, &child_ret, 0);
+ if (ret || child_ret)
+ return 1;
+ }
+
+ return 0;
+}
+
+int test_vmx_syscall(void)
+{
+ /*
+ * Setup an environment with much context switching
+ */
+ pid_t pid2;
+ pid_t pid = fork();
+ int ret;
+ int child_ret;
+ FAIL_IF(pid == -1);
+
+ pid2 = fork();
+ ret = vmx_syscall();
+ /* Can't FAIL_IF(pid2 == -1); because we've already forked */
+ if (pid2 == -1) {
+ /*
+ * Couldn't fork, ensure child_ret is set and is a fail
+ */
+ ret = child_ret = 1;
+ } else {
+ if (pid2)
+ waitpid(pid2, &child_ret, 0);
+ else
+ exit(ret);
+ }
+
+ ret |= child_ret;
+
+ if (pid)
+ waitpid(pid, &child_ret, 0);
+ else
+ exit(ret);
+
+ FAIL_IF(ret || child_ret);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_vmx_syscall, "vmx_syscall");
+
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
index d86653f282b1..8c54d18b3e9a 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
@@ -40,7 +40,7 @@ void signal_usr1(int signum, siginfo_t *info, void *uc)
#ifdef __powerpc64__
ucp->uc_mcontext.gp_regs[PT_MSR] |= (7ULL << 32);
#else
- ucp->uc_mcontext.regs->gpr[PT_MSR] |= (7ULL);
+ ucp->uc_mcontext.uc_regs->gregs[PT_MSR] |= (7ULL);
#endif
/* Should segv on return becuase of invalid context */
segv_expected = 1;