diff options
171 files changed, 8619 insertions, 2300 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl index b07e86d4597f..7fd737eed38a 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -159,7 +159,7 @@ Description: read only Decimal value of the Per Process MMIO space length. Users: https://github.com/ibm-capi/libcxl -What: /sys/class/cxl/<afu>m/pp_mmio_off +What: /sys/class/cxl/<afu>m/pp_mmio_off (not in a guest) Date: September 2014 Contact: linuxppc-dev@lists.ozlabs.org Description: read only @@ -183,7 +183,7 @@ Description: read only Identifies the revision level of the PSL. Users: https://github.com/ibm-capi/libcxl -What: /sys/class/cxl/<card>/base_image +What: /sys/class/cxl/<card>/base_image (not in a guest) Date: September 2014 Contact: linuxppc-dev@lists.ozlabs.org Description: read only @@ -193,7 +193,7 @@ Description: read only during the initial program load. Users: https://github.com/ibm-capi/libcxl -What: /sys/class/cxl/<card>/image_loaded +What: /sys/class/cxl/<card>/image_loaded (not in a guest) Date: September 2014 Contact: linuxppc-dev@lists.ozlabs.org Description: read only @@ -201,7 +201,7 @@ Description: read only onto the card. Users: https://github.com/ibm-capi/libcxl -What: /sys/class/cxl/<card>/load_image_on_perst +What: /sys/class/cxl/<card>/load_image_on_perst (not in a guest) Date: December 2014 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write @@ -224,7 +224,7 @@ Description: write only to reload the FPGA depending on load_image_on_perst. Users: https://github.com/ibm-capi/libcxl -What: /sys/class/cxl/<card>/perst_reloads_same_image +What: /sys/class/cxl/<card>/perst_reloads_same_image (not in a guest) Date: July 2015 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write diff --git a/Documentation/powerpc/cxl.txt b/Documentation/powerpc/cxl.txt index 205c1b81625c..d5506ba0fef7 100644 --- a/Documentation/powerpc/cxl.txt +++ b/Documentation/powerpc/cxl.txt @@ -116,6 +116,8 @@ Work Element Descriptor (WED) User API ======== +1. AFU character devices + For AFUs operating in AFU directed mode, two character device files will be created. /dev/cxl/afu0.0m will correspond to a master context and /dev/cxl/afu0.0s will correspond to a slave @@ -362,6 +364,59 @@ read reserved fields: For future extensions and padding + +2. Card character device (powerVM guest only) + + In a powerVM guest, an extra character device is created for the + card. The device is only used to write (flash) a new image on the + FPGA accelerator. Once the image is written and verified, the + device tree is updated and the card is reset to reload the updated + image. + +open +---- + + Opens the device and allocates a file descriptor to be used with + the rest of the API. The device can only be opened once. + +ioctl +----- + +CXL_IOCTL_DOWNLOAD_IMAGE: +CXL_IOCTL_VALIDATE_IMAGE: + Starts and controls flashing a new FPGA image. Partial + reconfiguration is not supported (yet), so the image must contain + a copy of the PSL and AFU(s). Since an image can be quite large, + the caller may have to iterate, splitting the image in smaller + chunks. + + Takes a pointer to a struct cxl_adapter_image: + struct cxl_adapter_image { + __u64 flags; + __u64 data; + __u64 len_data; + __u64 len_image; + __u64 reserved1; + __u64 reserved2; + __u64 reserved3; + __u64 reserved4; + }; + + flags: + These flags indicate which optional fields are present in + this struct. Currently all fields are mandatory. + + data: + Pointer to a buffer with part of the image to write to the + card. + + len_data: + Size of the buffer pointed to by data. + + len_image: + Full size of the image. + + Sysfs Class =========== diff --git a/MAINTAINERS b/MAINTAINERS index 7f1fa4ff300a..74bbff3dda52 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4230,13 +4230,6 @@ M: Maxim Levitsky <maximlevitsky@gmail.com> S: Maintained F: drivers/media/rc/ene_ir.* -ENHANCED ERROR HANDLING (EEH) -M: Gavin Shan <shangw@linux.vnet.ibm.com> -L: linuxppc-dev@lists.ozlabs.org -S: Supported -F: Documentation/powerpc/eeh-pci-error-recovery.txt -F: arch/powerpc/kernel/eeh*.c - EPSON S1D13XXX FRAMEBUFFER DRIVER M: Kristoffer Ericson <kristoffer.ericson@gmail.com> S: Maintained @@ -8252,6 +8245,15 @@ L: linux-pci@vger.kernel.org S: Supported F: Documentation/PCI/pci-error-recovery.txt +PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC +M: Russell Currey <ruscur@russell.cc> +L: linuxppc-dev@lists.ozlabs.org +S: Supported +F: Documentation/powerpc/eeh-pci-error-recovery.txt +F: arch/powerpc/kernel/eeh*.c +F: arch/powerpc/platforms/*/eeh*.c +F: arch/powerpc/include/*/eeh*.h + PCI SUBSYSTEM M: Bjorn Helgaas <bhelgaas@google.com> L: linux-pci@vger.kernel.org diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 91da283cd658..5130fa166a93 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -304,7 +304,7 @@ config ZONE_DMA32 config PGTABLE_LEVELS int default 2 if !PPC64 - default 3 if PPC_64K_PAGES + default 3 if PPC_64K_PAGES && !PPC_BOOK3S_64 default 4 source "init/Kconfig" @@ -576,7 +576,7 @@ choice config PPC_4K_PAGES bool "4k page size" - select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S + select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64 config PPC_16K_PAGES bool "16k page size" @@ -585,7 +585,7 @@ config PPC_16K_PAGES config PPC_64K_PAGES bool "64k page size" depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64) - select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S + select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64 config PPC_256K_PAGES bool "256k page size" diff --git a/arch/powerpc/boot/rs6000.h b/arch/powerpc/boot/rs6000.h index 433f45084e41..d70517ccc0f7 100644 --- a/arch/powerpc/boot/rs6000.h +++ b/arch/powerpc/boot/rs6000.h @@ -239,5 +239,5 @@ struct external_reloc { #define DEFAULT_DATA_SECTION_ALIGNMENT 4 #define DEFAULT_BSS_SECTION_ALIGNMENT 4 #define DEFAULT_TEXT_SECTION_ALIGNMENT 4 -/* For new sections we havn't heard of before */ +/* For new sections we haven't heard of before */ #define DEFAULT_SECTION_ALIGNMENT 4 diff --git a/arch/powerpc/boot/treeboot-akebono.c b/arch/powerpc/boot/treeboot-akebono.c index b73174c34fe4..bcc5902f8462 100644 --- a/arch/powerpc/boot/treeboot-akebono.c +++ b/arch/powerpc/boot/treeboot-akebono.c @@ -38,7 +38,7 @@ BSS_STACK(4096); -#define SPRN_PIR 0x11E /* Processor Indentification Register */ +#define SPRN_PIR 0x11E /* Processor Identification Register */ #define USERDATA_LEN 256 /* Length of userdata passed in by PIBS */ #define MAX_RANKS 0x4 #define DDR3_MR0CF 0x80010011U diff --git a/arch/powerpc/boot/treeboot-currituck.c b/arch/powerpc/boot/treeboot-currituck.c index 925ae43b7467..303d2074ee56 100644 --- a/arch/powerpc/boot/treeboot-currituck.c +++ b/arch/powerpc/boot/treeboot-currituck.c @@ -80,7 +80,7 @@ static void ibm_currituck_fixups(void) } } -#define SPRN_PIR 0x11E /* Processor Indentification Register */ +#define SPRN_PIR 0x11E /* Processor Identification Register */ void platform_init(void) { unsigned long end_of_ram, avail_ram; diff --git a/arch/powerpc/boot/treeboot-iss4xx.c b/arch/powerpc/boot/treeboot-iss4xx.c index 329e710feda2..733f8bf25184 100644 --- a/arch/powerpc/boot/treeboot-iss4xx.c +++ b/arch/powerpc/boot/treeboot-iss4xx.c @@ -59,7 +59,7 @@ static void *iss_4xx_vmlinux_alloc(unsigned long size) return (void *)ibm4xx_memstart; } -#define SPRN_PIR 0x11E /* Processor Indentification Register */ +#define SPRN_PIR 0x11E /* Processor Identification Register */ void platform_init(void) { unsigned long end_of_ram = 0x08000000; diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig new file mode 100644 index 000000000000..045031048f8d --- /dev/null +++ b/arch/powerpc/configs/powernv_defconfig @@ -0,0 +1,313 @@ +CONFIG_PPC64=y +CONFIG_SMP=y +CONFIG_NR_CPUS=2048 +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_AUDIT=y +CONFIG_IRQ_DOMAIN_DEBUG=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_NUMA_BALANCING=y +CONFIG_CGROUPS=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_USER_NS=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OPAL_PRD=y +# CONFIG_PPC_PSERIES is not set +# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_IDLE=y +CONFIG_HZ_100=y +CONFIG_BINFMT_MISC=m +CONFIG_PPC_TRANSACTIONAL_MEM=y +CONFIG_HOTPLUG_CPU=y +CONFIG_KEXEC=y +CONFIG_IRQ_ALL_CPUS=y +CONFIG_NUMA=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_PPC_64K_PAGES=y +CONFIG_PPC_SUBPAGE_PROT=y +CONFIG_SCHED_SMT=y +CONFIG_PM=y +CONFIG_PCI_MSI=y +CONFIG_HOTPLUG_PCI=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_NET_IPIP=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +# CONFIG_IPV6 is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_ADVANCED is not set +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_MTD=y +CONFIG_MTD_POWERNV_FLASH=y +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +CONFIG_BLK_DEV_FD=m +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=65536 +CONFIG_VIRTIO_BLK=m +CONFIG_IDE=y +CONFIG_BLK_DEV_IDECD=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_BLK_DEV_AMD74XX=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_SRP_ATTRS=y +CONFIG_SCSI_CXGB3_ISCSI=m +CONFIG_SCSI_CXGB4_ISCSI=m +CONFIG_SCSI_BNX2_ISCSI=m +CONFIG_BE2ISCSI=m +CONFIG_SCSI_MPT2SAS=m +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 +CONFIG_SCSI_IPR=y +CONFIG_SCSI_QLA_FC=m +CONFIG_SCSI_QLA_ISCSI=m +CONFIG_SCSI_LPFC=m +CONFIG_SCSI_VIRTIO=m +CONFIG_SCSI_DH=y +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +# CONFIG_ATA_SFF is not set +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=y +CONFIG_MD_RAID0=y +CONFIG_MD_RAID1=y +CONFIG_MD_RAID10=m +CONFIG_MD_RAID456=m +CONFIG_MD_MULTIPATH=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_THIN_PROVISIONING=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_UEVENT=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_NETCONSOLE=y +CONFIG_TUN=m +CONFIG_VETH=m +CONFIG_VIRTIO_NET=m +CONFIG_VHOST_NET=m +CONFIG_VORTEX=y +CONFIG_ACENIC=m +CONFIG_ACENIC_OMIT_TIGON_I=y +CONFIG_PCNET32=y +CONFIG_TIGON3=y +CONFIG_BNX2X=m +CONFIG_CHELSIO_T1=m +CONFIG_BE2NET=m +CONFIG_S2IO=m +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_IXGB=m +CONFIG_IXGBE=m +CONFIG_MLX4_EN=m +CONFIG_MYRI10GE=m +CONFIG_QLGE=m +CONFIG_NETXEN_NIC=m +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=m +CONFIG_INPUT_MISC=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_JSM=m +CONFIG_VIRTIO_CONSOLE=m +CONFIG_IPMI_HANDLER=y +CONFIG_IPMI_DEVICE_INTERFACE=y +CONFIG_IPMI_POWERNV=y +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=1024 +CONFIG_DRM=y +CONFIG_DRM_AST=y +CONFIG_FIRMWARE_EDID=y +CONFIG_FB_OF=y +CONFIG_FB_MATROX=y +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G=y +CONFIG_FB_RADEON=y +CONFIG_FB_IBM_GXT4500=y +CONFIG_LCD_PLATFORM=m +# CONFIG_VGA_CONSOLE is not set +CONFIG_LOGO=y +CONFIG_HID_GYRATION=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SUNPLUS=y +CONFIG_USB_HIDDEV=y +CONFIG_USB=y +CONFIG_USB_MON=m +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_HCD_PPC_OF is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_STORAGE=m +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=m +CONFIG_LEDS_POWERNV=m +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_CXGB3=m +CONFIG_INFINIBAND_CXGB4=m +CONFIG_MLX4_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_CM=y +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_ISER=m +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_GENERIC=y +CONFIG_VIRTIO_PCI=m +CONFIG_VIRTIO_BALLOON=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_REISERFS_FS=y +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +CONFIG_XFS_FS=m +CONFIG_XFS_POSIX_ACL=y +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_NILFS2_FS=m +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=m +CONFIG_OVERLAY_FS=m +CONFIG_ISO9660_FS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_PSTORE=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_LATENCYTOP=y +CONFIG_SCHED_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_CODE_PATCHING_SELFTEST=y +CONFIG_FTR_FIXUP_SELFTEST=y +CONFIG_MSI_BITMAP_SELFTEST=y +CONFIG_XMON=y +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_DEV_NX=y +CONFIG_VIRTUALIZATION=y +CONFIG_KVM_BOOK3S_64=m +CONFIG_KVM_BOOK3S_64_HV=m diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S index 5dc6bce90a77..bc6ff43a9889 100644 --- a/arch/powerpc/crypto/aes-spe-core.S +++ b/arch/powerpc/crypto/aes-spe-core.S @@ -61,7 +61,7 @@ * via bl/blr. It expects that caller has pre-xored input data with first * 4 words of encryption key into rD0-rD3. Pointer/counter registers must * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 - * and rW0-rW3 and caller must execute a final xor on the ouput registers. + * and rW0-rW3 and caller must execute a final xor on the output registers. * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. * */ @@ -209,7 +209,7 @@ ppc_encrypt_block_loop: * via bl/blr. It expects that caller has pre-xored input data with first * 4 words of encryption key into rD0-rD3. Pointer/counter registers must * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 - * and rW0-rW3 and caller must execute a final xor on the ouput registers. + * and rW0-rW3 and caller must execute a final xor on the output registers. * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. * */ diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c index 93ee046d12cd..6d99ebf2ea15 100644 --- a/arch/powerpc/crypto/aes-spe-glue.c +++ b/arch/powerpc/crypto/aes-spe-glue.c @@ -32,7 +32,7 @@ * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data * will need an estimated maximum of 20,000 cycles. Headroom for cache misses * included. Even with the low end model clocked at 667 MHz this equals to a - * critical time window of less than 30us. The value has been choosen to + * critical time window of less than 30us. The value has been chosen to * process a 512 byte disk block in one or a large 1400 bytes IPsec network * packet in two runs. * diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 55f106ed12bf..ae0751ef8788 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -12,6 +12,24 @@ #define ATOMIC_INIT(i) { (i) } +/* + * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with + * a "bne-" instruction at the end, so an isync is enough as a acquire barrier + * on the platform without lwsync. + */ +#define __atomic_op_acquire(op, args...) \ +({ \ + typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ + __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory"); \ + __ret; \ +}) + +#define __atomic_op_release(op, args...) \ +({ \ + __asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory"); \ + op##_relaxed(args); \ +}) + static __inline__ int atomic_read(const atomic_t *v) { int t; @@ -42,27 +60,27 @@ static __inline__ void atomic_##op(int a, atomic_t *v) \ : "cc"); \ } \ -#define ATOMIC_OP_RETURN(op, asm_op) \ -static __inline__ int atomic_##op##_return(int a, atomic_t *v) \ +#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ +static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \ { \ int t; \ \ __asm__ __volatile__( \ - PPC_ATOMIC_ENTRY_BARRIER \ -"1: lwarx %0,0,%2 # atomic_" #op "_return\n" \ - #asm_op " %0,%1,%0\n" \ - PPC405_ERR77(0,%2) \ -" stwcx. %0,0,%2 \n" \ +"1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \ + #asm_op " %0,%2,%0\n" \ + PPC405_ERR77(0, %3) \ +" stwcx. %0,0,%3\n" \ " bne- 1b\n" \ - PPC_ATOMIC_EXIT_BARRIER \ - : "=&r" (t) \ + : "=&r" (t), "+m" (v->counter) \ : "r" (a), "r" (&v->counter) \ - : "cc", "memory"); \ + : "cc"); \ \ return t; \ } -#define ATOMIC_OPS(op, asm_op) ATOMIC_OP(op, asm_op) ATOMIC_OP_RETURN(op, asm_op) +#define ATOMIC_OPS(op, asm_op) \ + ATOMIC_OP(op, asm_op) \ + ATOMIC_OP_RETURN_RELAXED(op, asm_op) ATOMIC_OPS(add, add) ATOMIC_OPS(sub, subf) @@ -71,8 +89,11 @@ ATOMIC_OP(and, and) ATOMIC_OP(or, or) ATOMIC_OP(xor, xor) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed + #undef ATOMIC_OPS -#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP_RETURN_RELAXED #undef ATOMIC_OP #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) @@ -92,21 +113,19 @@ static __inline__ void atomic_inc(atomic_t *v) : "cc", "xer"); } -static __inline__ int atomic_inc_return(atomic_t *v) +static __inline__ int atomic_inc_return_relaxed(atomic_t *v) { int t; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: lwarx %0,0,%1 # atomic_inc_return\n\ - addic %0,%0,1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1 \n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) +"1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n" +" addic %0,%0,1\n" + PPC405_ERR77(0, %2) +" stwcx. %0,0,%2\n" +" bne- 1b" + : "=&r" (t), "+m" (v->counter) : "r" (&v->counter) - : "cc", "xer", "memory"); + : "cc", "xer"); return t; } @@ -136,27 +155,34 @@ static __inline__ void atomic_dec(atomic_t *v) : "cc", "xer"); } -static __inline__ int atomic_dec_return(atomic_t *v) +static __inline__ int atomic_dec_return_relaxed(atomic_t *v) { int t; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: lwarx %0,0,%1 # atomic_dec_return\n\ - addic %0,%0,-1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1\n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) +"1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n" +" addic %0,%0,-1\n" + PPC405_ERR77(0, %2) +" stwcx. %0,0,%2\n" +" bne- 1b" + : "=&r" (t), "+m" (v->counter) : "r" (&v->counter) - : "cc", "xer", "memory"); + : "cc", "xer"); return t; } +#define atomic_inc_return_relaxed atomic_inc_return_relaxed +#define atomic_dec_return_relaxed atomic_dec_return_relaxed + #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic_cmpxchg_relaxed(v, o, n) \ + cmpxchg_relaxed(&((v)->counter), (o), (n)) +#define atomic_cmpxchg_acquire(v, o, n) \ + cmpxchg_acquire(&((v)->counter), (o), (n)) + #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) /** * __atomic_add_unless - add unless the number is a given value @@ -285,26 +311,27 @@ static __inline__ void atomic64_##op(long a, atomic64_t *v) \ : "cc"); \ } -#define ATOMIC64_OP_RETURN(op, asm_op) \ -static __inline__ long atomic64_##op##_return(long a, atomic64_t *v) \ +#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \ +static inline long \ +atomic64_##op##_return_relaxed(long a, atomic64_t *v) \ { \ long t; \ \ __asm__ __volatile__( \ - PPC_ATOMIC_ENTRY_BARRIER \ -"1: ldarx %0,0,%2 # atomic64_" #op "_return\n" \ - #asm_op " %0,%1,%0\n" \ -" stdcx. %0,0,%2 \n" \ +"1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \ + #asm_op " %0,%2,%0\n" \ +" stdcx. %0,0,%3\n" \ " bne- 1b\n" \ - PPC_ATOMIC_EXIT_BARRIER \ - : "=&r" (t) \ + : "=&r" (t), "+m" (v->counter) \ : "r" (a), "r" (&v->counter) \ - : "cc", "memory"); \ + : "cc"); \ \ return t; \ } -#define ATOMIC64_OPS(op, asm_op) ATOMIC64_OP(op, asm_op) ATOMIC64_OP_RETURN(op, asm_op) +#define ATOMIC64_OPS(op, asm_op) \ + ATOMIC64_OP(op, asm_op) \ + ATOMIC64_OP_RETURN_RELAXED(op, asm_op) ATOMIC64_OPS(add, add) ATOMIC64_OPS(sub, subf) @@ -312,8 +339,11 @@ ATOMIC64_OP(and, and) ATOMIC64_OP(or, or) ATOMIC64_OP(xor, xor) -#undef ATOMIC64_OPS -#undef ATOMIC64_OP_RETURN +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed + +#undef ATOPIC64_OPS +#undef ATOMIC64_OP_RETURN_RELAXED #undef ATOMIC64_OP #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) @@ -332,20 +362,18 @@ static __inline__ void atomic64_inc(atomic64_t *v) : "cc", "xer"); } -static __inline__ long atomic64_inc_return(atomic64_t *v) +static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v) { long t; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: ldarx %0,0,%1 # atomic64_inc_return\n\ - addic %0,%0,1\n\ - stdcx. %0,0,%1 \n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) +"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n" +" addic %0,%0,1\n" +" stdcx. %0,0,%2\n" +" bne- 1b" + : "=&r" (t), "+m" (v->counter) : "r" (&v->counter) - : "cc", "xer", "memory"); + : "cc", "xer"); return t; } @@ -374,24 +402,25 @@ static __inline__ void atomic64_dec(atomic64_t *v) : "cc", "xer"); } -static __inline__ long atomic64_dec_return(atomic64_t *v) +static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v) { long t; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER -"1: ldarx %0,0,%1 # atomic64_dec_return\n\ - addic %0,%0,-1\n\ - stdcx. %0,0,%1\n\ - bne- 1b" - PPC_ATOMIC_EXIT_BARRIER - : "=&r" (t) +"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n" +" addic %0,%0,-1\n" +" stdcx. %0,0,%2\n" +" bne- 1b" + : "=&r" (t), "+m" (v->counter) : "r" (&v->counter) - : "cc", "xer", "memory"); + : "cc", "xer"); return t; } +#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed +#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed + #define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) @@ -420,7 +449,13 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v) } #define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_cmpxchg_relaxed(v, o, n) \ + cmpxchg_relaxed(&((v)->counter), (o), (n)) +#define atomic64_cmpxchg_acquire(v, o, n) \ + cmpxchg_acquire(&((v)->counter), (o), (n)) + #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) +#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) /** * atomic64_add_unless - add unless the number is a given value diff --git a/arch/powerpc/include/asm/mmu-hash32.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 16f513e5cbd7..16f513e5cbd7 100644 --- a/arch/powerpc/include/asm/mmu-hash32.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index ea0414d6659e..5f08a0832238 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -52,44 +52,14 @@ _PAGE_F_SECOND | _PAGE_F_GIX) /* shift to put page number into pte */ -#define PTE_RPN_SHIFT (18) +#define PTE_RPN_SHIFT (12) +#define PTE_RPN_SIZE (45) /* gives 57-bit real addresses */ #define _PAGE_4K_PFN 0 #ifndef __ASSEMBLY__ /* - * 4-level page tables related bits + * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ - -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_bad(pgd) (pgd_val(pgd) == 0) -#define pgd_present(pgd) (pgd_val(pgd) != 0) -#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) - -static inline void pgd_clear(pgd_t *pgdp) -{ - *pgdp = __pgd(0); -} - -static inline pte_t pgd_pte(pgd_t pgd) -{ - return __pte(pgd_val(pgd)); -} - -static inline pgd_t pte_pgd(pte_t pte) -{ - return __pgd(pte_val(pte)); -} -extern struct page *pgd_page(pgd_t pgd); - -#define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ - (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) - -#define pud_ERROR(e) \ - pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) - -/* - * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ #define remap_4k_pfn(vma, addr, pfn, prot) \ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot)) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 849bbec80f7b..0a7956a80a08 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -1,15 +1,14 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H -#include <asm-generic/pgtable-nopud.h> - #define PTE_INDEX_SIZE 8 -#define PMD_INDEX_SIZE 10 -#define PUD_INDEX_SIZE 0 +#define PMD_INDEX_SIZE 5 +#define PUD_INDEX_SIZE 5 #define PGD_INDEX_SIZE 12 #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) /* With 4k base page size, hugepage PTEs go at the PMD level */ @@ -20,13 +19,18 @@ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -/* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +/* PUD_SHIFT determines what a third-level page table entry can map */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define _PAGE_COMBO 0x00040000 /* this is a combo 4k page */ -#define _PAGE_4K_PFN 0x00080000 /* PFN is for a single 4k page */ +#define _PAGE_COMBO 0x00001000 /* this is a combo 4k page */ +#define _PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ /* * Used to track subpage group valid if _PAGE_COMBO is set * This overloads _PAGE_F_GIX and _PAGE_F_SECOND @@ -39,10 +43,12 @@ /* Shift to put page number into pte. * - * That gives us a max RPN of 34 bits, which means a max of 50 bits - * of addressable physical space, or 46 bits for the special 4k PFNs. + * That gives us a max RPN of 41 bits, which means a max of 57 bits + * of addressable physical space, or 53 bits for the special 4k PFNs. */ -#define PTE_RPN_SHIFT (30) +#define PTE_RPN_SHIFT (16) +#define PTE_RPN_SIZE (41) + /* * we support 16 fragments per PTE page of 64K size. */ @@ -54,13 +60,12 @@ #define PTE_FRAG_SIZE_SHIFT 12 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) -/* - * Bits to mask out from a PMD to get to the PTE page - * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned. - */ -#define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1) -/* Bits to mask out from a PGD/PUD to get to the PMD page */ -#define PUD_MASKED_BITS 0x1ff +/* Bits to mask out from a PMD to get to the PTE page */ +#define PMD_MASKED_BITS 0xc0000000000000ffUL +/* Bits to mask out from a PUD to get to the PMD page */ +#define PUD_MASKED_BITS 0xc0000000000000ffUL +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0xc0000000000000ffUL #ifndef __ASSEMBLY__ @@ -120,7 +125,7 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) #define remap_4k_pfn(vma, addr, pfn, prot) \ - (WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL : \ + (WARN_ON(((pfn) >= (1UL << PTE_RPN_SIZE))) ? -EINVAL : \ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \ __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN))) @@ -130,11 +135,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); #else #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) #endif +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) -#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) -#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) - #ifdef CONFIG_HUGETLB_PAGE /* * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have @@ -208,30 +211,30 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp) /* * The linux hugepage PMD now include the pmd entries followed by the address * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. - * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per + * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. * - * The last three bits are intentionally left to zero. This memory location + * The top three bits are intentionally left as zero. This memory location * are also used as normal page PTE pointers. So if we have any pointers * left around while we collapse a hugepage, we need to make sure * _PAGE_PRESENT bit of that is zero when we look at them */ static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) { - return (hpte_slot_array[index] >> 3) & 0x1; + return hpte_slot_array[index] & 0x1; } static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, int index) { - return hpte_slot_array[index] >> 4; + return hpte_slot_array[index] >> 1; } static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, unsigned int index, unsigned int hidx) { - hpte_slot_array[index] = hidx << 4 | 0x1 << 3; + hpte_slot_array[index] = (hidx << 1) | 0x1; } /* diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 8d1c8162f0c1..d0ee6fcef823 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -4,8 +4,7 @@ /* * Common bits between 4K and 64K pages in a linux-style PTE. - * These match the bits in the (hardware-defined) PowerPC PTE as closely - * as possible. Additional bits may be defined in pgtable-hash64-*.h + * Additional bits may be defined in pgtable-hash64-*.h * * Note: We only support user read/write permissions. Supervisor always * have full read/write to pages above PAGE_OFFSET (pages below that @@ -14,32 +13,35 @@ * We could create separate kernel read-only if we used the 3 PP bits * combinations that newer processors provide but we currently don't. */ -#define _PAGE_PTE 0x00001 -#define _PAGE_PRESENT 0x00002 /* software: pte contains a translation */ -#define _PAGE_BIT_SWAP_TYPE 2 -#define _PAGE_USER 0x00004 /* matches one of the PP bits */ -#define _PAGE_EXEC 0x00008 /* No execute on POWER4 and newer (we invert) */ -#define _PAGE_GUARDED 0x00010 -/* We can derive Memory coherence from _PAGE_NO_CACHE */ +#define _PAGE_BIT_SWAP_TYPE 0 + +#define _PAGE_EXEC 0x00001 /* execute permission */ +#define _PAGE_RW 0x00002 /* read & write access allowed */ +#define _PAGE_READ 0x00004 /* read access allowed */ +#define _PAGE_USER 0x00008 /* page may be accessed by userspace */ +#define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) page */ +/* M (memory coherence) is always set in the HPTE, so we don't need it here */ #define _PAGE_COHERENT 0x0 #define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */ #define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */ #define _PAGE_DIRTY 0x00080 /* C: page changed */ #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ -#define _PAGE_RW 0x00200 /* software: user write access allowed */ -#define _PAGE_HASHPTE 0x00400 /* software: pte has an associated HPTE */ +#define _PAGE_SPECIAL 0x00400 /* software: special page */ #define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */ -#define _PAGE_F_GIX 0x07000 /* full page: hidx bits */ -#define _PAGE_F_GIX_SHIFT 12 -#define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not */ -#define _PAGE_SPECIAL 0x10000 /* software: special page */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _PAGE_SOFT_DIRTY 0x20000 /* software: software dirty tracking */ +#define _PAGE_SOFT_DIRTY 0x200 /* software: software dirty tracking */ #else -#define _PAGE_SOFT_DIRTY 0x00000 +#define _PAGE_SOFT_DIRTY 0x000 #endif +#define _PAGE_F_GIX_SHIFT 57 +#define _PAGE_F_GIX (7ul << 57) /* HPTE index within HPTEG */ +#define _PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */ +#define _PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */ +#define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */ +#define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */ + /* * We need to differentiate between explicit huge page and THP huge * page, since THP huge page also need to track real subpage details @@ -132,7 +134,7 @@ * The mask convered by the RPN must be a ULL on 32-bit platforms with * 64-bit PTEs */ -#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#define PTE_RPN_MASK (((1UL << PTE_RPN_SIZE) - 1) << PTE_RPN_SHIFT) /* * _PAGE_CHG_MASK masks of bits that are to be preserved across * pgprot changes @@ -223,15 +225,17 @@ #define PUD_BAD_BITS (PMD_TABLE_SIZE-1) #ifndef __ASSEMBLY__ -#define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ - || (pmd_val(pmd) & PMD_BAD_BITS)) -#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) +#define pmd_bad(pmd) (pmd_val(pmd) & PMD_BAD_BITS) +#define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS) + +#define pud_bad(pud) (pud_val(pud) & PUD_BAD_BITS) +#define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) -#define pud_bad(pud) (!is_kernel_addr(pud_val(pud)) \ - || (pud_val(pud) & PUD_BAD_BITS)) -#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) +/* Pointers in the page table tree are physical addresses */ +#define __pgtable_ptr_val(ptr) __pa(ptr) #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) +#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) #define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) #define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) @@ -360,8 +364,18 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) :"cc"); } +static inline int pgd_bad(pgd_t pgd) +{ + return (pgd_val(pgd) == 0); +} + #define __HAVE_ARCH_PTE_SAME #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) +static inline unsigned long pgd_page_vaddr(pgd_t pgd) +{ + return (unsigned long)__va(pgd_val(pgd) & ~PGD_MASKED_BITS); +} + /* Generic accessors to PTE bits */ static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} @@ -402,7 +416,7 @@ static inline int pte_protnone(pte_t pte) static inline int pte_present(pte_t pte) { - return pte_val(pte) & _PAGE_PRESENT; + return !!(pte_val(pte) & _PAGE_PRESENT); } /* Conversion functions: convert a page and protection to a page entry, @@ -413,13 +427,13 @@ static inline int pte_present(pte_t pte) */ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) { - return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) | + return __pte((((pte_basic_t)(pfn) << PTE_RPN_SHIFT) & PTE_RPN_MASK) | pgprot_val(pgprot)); } static inline unsigned long pte_pfn(pte_t pte) { - return pte_val(pte) >> PTE_RPN_SHIFT; + return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT; } /* Generic modifiers for PTE bits */ diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 7352d3f212df..0cea4807e26f 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -114,6 +114,7 @@ #define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */ #define POWER8_TLB_SETS 512 /* # sets in POWER8 TLB */ +#define POWER9_TLB_SETS_HASH 256 /* # sets in POWER9 TLB Hash mode */ #ifndef __ASSEMBLY__ @@ -607,6 +608,9 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1; return get_vsid(context, ea, ssize); } + +unsigned htab_shift_for_mem_size(unsigned long mem_size); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_MMU_HASH64_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 8d1c41d28318..77d3ce05798e 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -43,13 +43,8 @@ */ #ifndef __real_pte -#ifdef CONFIG_STRICT_MM_TYPECHECKS #define __real_pte(e,p) ((real_pte_t){(e)}) #define __rpte_to_pte(r) ((r).pte) -#else -#define __real_pte(e,p) (e) -#define __rpte_to_pte(r) (__pte(r)) -#endif #define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >>_PAGE_F_GIX_SHIFT) #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ @@ -111,6 +106,26 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) *pgdp = __pgd(val); } +static inline void pgd_clear(pgd_t *pgdp) +{ + *pgdp = __pgd(0); +} + +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_present(pgd) (!pgd_none(pgd)) + +static inline pte_t pgd_pte(pgd_t pgd) +{ + return __pte(pgd_val(pgd)); +} + +static inline pgd_t pte_pgd(pte_t pte) +{ + return __pgd(pte_val(pte)); +} + +extern struct page *pgd_page(pgd_t pgd); + /* * Find an entry in a page-table-directory. We combine the address region * (the high order N bits) and the pgd portion of the address. @@ -118,9 +133,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) +#define pud_offset(pgdp, addr) \ + (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) #define pmd_offset(pudp,addr) \ (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) - #define pte_offset_kernel(dir,addr) \ (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) @@ -135,6 +151,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) @@ -154,10 +172,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) #define SWP_TYPE_BITS 5 #define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ & ((1UL << SWP_TYPE_BITS) - 1)) -#define __swp_offset(x) ((x).val >> PTE_RPN_SHIFT) +#define __swp_offset(x) (((x).val & PTE_RPN_MASK) >> PTE_RPN_SHIFT) #define __swp_entry(type, offset) ((swp_entry_t) { \ - ((type) << _PAGE_BIT_SWAP_TYPE) \ - | ((offset) << PTE_RPN_SHIFT) }) + ((type) << _PAGE_BIT_SWAP_TYPE) \ + | (((offset) << PTE_RPN_SHIFT) & PTE_RPN_MASK)}) /* * swp_entry_t must be independent of pte bits. We build a swp_entry_t from * swap type and offset we get from swap and convert that to pte to find a @@ -281,6 +299,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); +#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE +extern void pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h new file mode 100644 index 000000000000..1b753f96b374 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -0,0 +1,94 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H +#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H + +#define MMU_NO_CONTEXT 0 + +/* + * TLB flushing for 64-bit hash-MMU CPUs + */ + +#include <linux/percpu.h> +#include <asm/page.h> + +#define PPC64_TLB_BATCH_NR 192 + +struct ppc64_tlb_batch { + int active; + unsigned long index; + struct mm_struct *mm; + real_pte_t pte[PPC64_TLB_BATCH_NR]; + unsigned long vpn[PPC64_TLB_BATCH_NR]; + unsigned int psize; + int ssize; +}; +DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); + +extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); + +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE + +static inline void arch_enter_lazy_mmu_mode(void) +{ + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + + batch->active = 1; +} + +static inline void arch_leave_lazy_mmu_mode(void) +{ + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + + if (batch->index) + __flush_tlb_pending(batch); + batch->active = 0; +} + +#define arch_flush_lazy_mmu_mode() do {} while (0) + + +extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, + int ssize, unsigned long flags); +extern void flush_hash_range(unsigned long number, int local); +extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize, + unsigned long flags); + +static inline void local_flush_tlb_mm(struct mm_struct *mm) +{ +} + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ +} + +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} + +static inline void flush_tlb_page_nohash(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ +} + +/* Private function for use by PCI IO mapping code */ +extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, + unsigned long end); +extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr); +#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */ diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index d1a8d93cccfd..44efe739b6b9 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -5,25 +5,25 @@ #include <linux/compiler.h> #include <asm/synch.h> #include <asm/asm-compat.h> +#include <linux/bug.h> /* * Atomic exchange * - * Changes the memory location '*ptr' to be val and returns + * Changes the memory location '*p' to be val and returns * the previous value stored there. */ + static __always_inline unsigned long -__xchg_u32(volatile void *p, unsigned long val) +__xchg_u32_local(volatile void *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stwcx. %3,0,%2 \n\ bne- 1b" - PPC_ATOMIC_EXIT_BARRIER : "=&r" (prev), "+m" (*(volatile unsigned int *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -31,42 +31,34 @@ __xchg_u32(volatile void *p, unsigned long val) return prev; } -/* - * Atomic exchange - * - * Changes the memory location '*ptr' to be val and returns - * the previous value stored there. - */ static __always_inline unsigned long -__xchg_u32_local(volatile void *p, unsigned long val) +__xchg_u32_relaxed(u32 *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( -"1: lwarx %0,0,%2 \n" - PPC405_ERR77(0,%2) -" stwcx. %3,0,%2 \n\ - bne- 1b" - : "=&r" (prev), "+m" (*(volatile unsigned int *)p) +"1: lwarx %0,0,%2\n" + PPC405_ERR77(0, %2) +" stwcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*p) : "r" (p), "r" (val) - : "cc", "memory"); + : "cc"); return prev; } #ifdef CONFIG_PPC64 static __always_inline unsigned long -__xchg_u64(volatile void *p, unsigned long val) +__xchg_u64_local(volatile void *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( - PPC_ATOMIC_ENTRY_BARRIER "1: ldarx %0,0,%2 \n" PPC405_ERR77(0,%2) " stdcx. %3,0,%2 \n\ bne- 1b" - PPC_ATOMIC_EXIT_BARRIER : "=&r" (prev), "+m" (*(volatile unsigned long *)p) : "r" (p), "r" (val) : "cc", "memory"); @@ -75,64 +67,52 @@ __xchg_u64(volatile void *p, unsigned long val) } static __always_inline unsigned long -__xchg_u64_local(volatile void *p, unsigned long val) +__xchg_u64_relaxed(u64 *p, unsigned long val) { unsigned long prev; __asm__ __volatile__( -"1: ldarx %0,0,%2 \n" - PPC405_ERR77(0,%2) -" stdcx. %3,0,%2 \n\ - bne- 1b" - : "=&r" (prev), "+m" (*(volatile unsigned long *)p) +"1: ldarx %0,0,%2\n" + PPC405_ERR77(0, %2) +" stdcx. %3,0,%2\n" +" bne- 1b" + : "=&r" (prev), "+m" (*p) : "r" (p), "r" (val) - : "cc", "memory"); + : "cc"); return prev; } #endif -/* - * This function doesn't exist, so you'll get a linker error - * if something tries to do an invalid xchg(). - */ -extern void __xchg_called_with_bad_pointer(void); - static __always_inline unsigned long -__xchg(volatile void *ptr, unsigned long x, unsigned int size) +__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) { switch (size) { case 4: - return __xchg_u32(ptr, x); + return __xchg_u32_local(ptr, x); #ifdef CONFIG_PPC64 case 8: - return __xchg_u64(ptr, x); + return __xchg_u64_local(ptr, x); #endif } - __xchg_called_with_bad_pointer(); + BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg"); return x; } static __always_inline unsigned long -__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) +__xchg_relaxed(void *ptr, unsigned long x, unsigned int size) { switch (size) { case 4: - return __xchg_u32_local(ptr, x); + return __xchg_u32_relaxed(ptr, x); #ifdef CONFIG_PPC64 case 8: - return __xchg_u64_local(ptr, x); + return __xchg_u64_relaxed(ptr, x); #endif } - __xchg_called_with_bad_pointer(); + BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg_local"); return x; } -#define xchg(ptr,x) \ - ({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ - }) - #define xchg_local(ptr,x) \ ({ \ __typeof__(*(ptr)) _x_ = (x); \ @@ -140,6 +120,12 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) (unsigned long)_x_, sizeof(*(ptr))); \ }) +#define xchg_relaxed(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ + (unsigned long)_x_, sizeof(*(ptr))); \ +}) /* * Compare and exchange - if *p == old, set it to new, * and return the old value of *p. @@ -190,6 +176,56 @@ __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, return prev; } +static __always_inline unsigned long +__cmpxchg_u32_relaxed(u32 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%2 # __cmpxchg_u32_relaxed\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" + PPC405_ERR77(0, %2) +" stwcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +/* + * cmpxchg family don't have order guarantee if cmp part fails, therefore we + * can avoid superfluous barriers if we use assembly code to implement + * cmpxchg() and cmpxchg_acquire(), however we don't do the similar for + * cmpxchg_release() because that will result in putting a barrier in the + * middle of a ll/sc loop, which is probably a bad idea. For example, this + * might cause the conditional store more likely to fail. + */ +static __always_inline unsigned long +__cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: lwarx %0,0,%2 # __cmpxchg_u32_acquire\n" +" cmpw 0,%0,%3\n" +" bne- 2f\n" + PPC405_ERR77(0, %2) +" stwcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ACQUIRE_BARRIER + "\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} + #ifdef CONFIG_PPC64 static __always_inline unsigned long __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) @@ -233,11 +269,47 @@ __cmpxchg_u64_local(volatile unsigned long *p, unsigned long old, return prev; } -#endif -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid cmpxchg(). */ -extern void __cmpxchg_called_with_bad_pointer(void); +static __always_inline unsigned long +__cmpxchg_u64_relaxed(u64 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%2 # __cmpxchg_u64_relaxed\n" +" cmpd 0,%0,%3\n" +" bne- 2f\n" +" stdcx. %4,0,%2\n" +" bne- 1b\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc"); + + return prev; +} + +static __always_inline unsigned long +__cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( +"1: ldarx %0,0,%2 # __cmpxchg_u64_acquire\n" +" cmpd 0,%0,%3\n" +" bne- 2f\n" +" stdcx. %4,0,%2\n" +" bne- 1b\n" + PPC_ACQUIRE_BARRIER + "\n" +"2:" + : "=&r" (prev), "+m" (*p) + : "r" (p), "r" (old), "r" (new) + : "cc", "memory"); + + return prev; +} +#endif static __always_inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, @@ -251,7 +323,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, return __cmpxchg_u64(ptr, old, new); #endif } - __cmpxchg_called_with_bad_pointer(); + BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg"); return old; } @@ -267,10 +339,41 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, return __cmpxchg_u64_local(ptr, old, new); #endif } - __cmpxchg_called_with_bad_pointer(); + BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_local"); + return old; +} + +static __always_inline unsigned long +__cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, + unsigned int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32_relaxed(ptr, old, new); +#ifdef CONFIG_PPC64 + case 8: + return __cmpxchg_u64_relaxed(ptr, old, new); +#endif + } + BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_relaxed"); return old; } +static __always_inline unsigned long +__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, + unsigned int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32_acquire(ptr, old, new); +#ifdef CONFIG_PPC64 + case 8: + return __cmpxchg_u64_acquire(ptr, old, new); +#endif + } + BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire"); + return old; +} #define cmpxchg(ptr, o, n) \ ({ \ __typeof__(*(ptr)) _o_ = (o); \ @@ -288,6 +391,23 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, (unsigned long)_n_, sizeof(*(ptr))); \ }) +#define cmpxchg_relaxed(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \ + (unsigned long)_o_, (unsigned long)_n_, \ + sizeof(*(ptr))); \ +}) + +#define cmpxchg_acquire(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \ + (unsigned long)_o_, (unsigned long)_n_, \ + sizeof(*(ptr))); \ +}) #ifdef CONFIG_PPC64 #define cmpxchg64(ptr, o, n) \ ({ \ @@ -299,7 +419,16 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg_local((ptr), (o), (n)); \ }) -#define cmpxchg64_relaxed cmpxchg64_local +#define cmpxchg64_relaxed(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_relaxed((ptr), (o), (n)); \ +}) +#define cmpxchg64_acquire(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_acquire((ptr), (o), (n)); \ +}) #else #include <asm-generic/cmpxchg-local.h> #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index b118072670fb..94ace9b4c4e1 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -171,7 +171,7 @@ enum { #define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000200000000) #define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000400000000) #define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000800000000) -/* Free LONG_ASM_CONST(0x0000001000000000) */ +#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000001000000000) #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000002000000000) #define CPU_FTR_CTRL LONG_ASM_CONST(0x0000004000000000) #define CPU_FTR_SMT LONG_ASM_CONST(0x0000008000000000) @@ -196,6 +196,7 @@ enum { #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) +#define CPU_FTR_SUBCORE LONG_ASM_CONST(0x2000000000000000) #ifndef __ASSEMBLY__ @@ -443,9 +444,19 @@ enum { CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) +#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ + CPU_FTR_MMCRA | CPU_FTR_SMT | \ + CPU_FTR_COHERENT_ICACHE | \ + CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ + CPU_FTR_DSCR | CPU_FTR_SAO | \ + CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ + CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -464,7 +475,7 @@ enum { (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ - CPU_FTRS_PA6T | CPU_FTR_VSX) + CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9) #endif #else enum { @@ -515,7 +526,8 @@ enum { (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ - CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE) + CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \ + CPU_FTRS_POWER9) #endif #else enum { diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index c5eb86f3d452..fb9f376ae27b 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -72,6 +72,7 @@ struct pci_dn; #define EEH_PE_PHB (1 << 1) /* PHB PE */ #define EEH_PE_DEVICE (1 << 2) /* Device PE */ #define EEH_PE_BUS (1 << 3) /* Bus PE */ +#define EEH_PE_VF (1 << 4) /* VF PE */ #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ @@ -81,6 +82,7 @@ struct pci_dn; #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ #define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */ #define EEH_PE_REMOVED (1 << 10) /* Removed permanently */ +#define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */ struct eeh_pe { int type; /* PE type: PHB/Bus/Device */ @@ -135,11 +137,15 @@ struct eeh_dev { int pcix_cap; /* Saved PCIx capability */ int pcie_cap; /* Saved PCIe capability */ int aer_cap; /* Saved AER capability */ + int af_cap; /* Saved AF capability */ struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ + struct list_head rmv_list; /* Record the removed edevs */ struct pci_controller *phb; /* Associated PHB */ struct pci_dn *pdn; /* Associated PCI device node */ struct pci_dev *pdev; /* Associated PCI device */ + bool in_error; /* Error flag for edev */ + struct pci_dev *physfn; /* Associated SRIOV PF */ struct pci_bus *bus; /* PCI bus for partial hotplug */ }; diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 7eac89b9f02e..42814f0567cc 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -19,7 +19,7 @@ static inline pte_t *hugepd_page(hugepd_t hpd) * We have only four bits to encode, MMU page size */ BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf); - return (pte_t *)(hpd.pd & ~HUGEPD_SHIFT_MASK); + return __va(hpd.pd & HUGEPD_ADDR_MASK); } static inline unsigned int hugepd_mmu_psize(hugepd_t hpd) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index e3b54dd4f730..0bc9c284aa10 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -94,6 +94,7 @@ #define H_SG_LIST -72 #define H_OP_MODE -73 #define H_COP_HW -74 +#define H_STATE -75 #define H_UNSUPPORTED_FLAG_START -256 #define H_UNSUPPORTED_FLAG_END -511 #define H_MULTI_THREADS_ACTIVE -9005 diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index 1cb39c96d155..b3b0f2d020f0 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -89,7 +89,7 @@ extern volatile struct Hydra __iomem *Hydra; #define HYDRA_INT_EXT2 13 /* PCI IRQX */ #define HYDRA_INT_EXT3 14 /* PCI IRQY */ #define HYDRA_INT_EXT4 15 /* PCI IRQZ */ -#define HYDRA_INT_EXT5 16 /* IDE Primay/Secondary */ +#define HYDRA_INT_EXT5 16 /* IDE Primary/Secondary */ #define HYDRA_INT_EXT6 17 /* IDE Secondary */ #define HYDRA_INT_EXT7 18 /* Power Off Request */ #define HYDRA_INT_SPARE 19 diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 6c1297ec374c..2fd1690b79d2 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -300,7 +300,7 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, * When CONFIG_PPC_INDIRECT_MMIO is set, the platform can provide hooks * on all MMIOs. (Note that this is all 64 bits only for now) * - * To help platforms who may need to differenciate MMIO addresses in + * To help platforms who may need to differentiate MMIO addresses in * their hooks, a bitfield is reserved for use by the platform near the * top of MMIO addresses (not PIO, those have to cope the hard way). * diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 3f191f573d4f..fd22442d30a9 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -54,7 +54,7 @@ struct machdep_calls { int psize, int apsize, int ssize); long (*hpte_remove)(unsigned long hpte_group); - void (*hpte_removebolted)(unsigned long ea, + int (*hpte_removebolted)(unsigned long ea, int psize, int ssize); void (*flush_hash_range)(unsigned long number, int local); void (*hugepage_invalidate)(unsigned long vsid, @@ -174,11 +174,11 @@ struct machdep_calls { platform, called once per cpu. */ void (*enable_pmcs)(void); - /* Set DABR for this platform, leave empty for default implemenation */ + /* Set DABR for this platform, leave empty for default implementation */ int (*set_dabr)(unsigned long dabr, unsigned long dabrx); - /* Set DAWR for this platform, leave empty for default implemenation */ + /* Set DAWR for this platform, leave empty for default implementation */ int (*set_dawr)(unsigned long dawr, unsigned long dawrx); diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 3d5abfe6ba67..8ca1c983bf6c 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -97,6 +97,7 @@ #define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE #define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE #define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE +#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE #define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ MMU_FTR_CI_LARGE_PAGE #define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ @@ -182,10 +183,10 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) #if defined(CONFIG_PPC_STD_MMU_64) /* 64-bit classic hash table MMU */ -# include <asm/mmu-hash64.h> +#include <asm/book3s/64/mmu-hash.h> #elif defined(CONFIG_PPC_STD_MMU_32) /* 32-bit classic hash table MMU */ -# include <asm/mmu-hash32.h> +#include <asm/book3s/32/mmu-hash.h> #elif defined(CONFIG_40x) /* 40x-style software loaded TLB */ # include <asm/mmu-40x.h> diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 5b6b5a427b54..cd4ffd86765f 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -19,7 +19,7 @@ * Thanks to Paul M for explaining this. * * PPC can only do rel jumps += 32MB, and often the kernel and other - * modules are furthur away than this. So, we jump to a table of + * modules are further away than this. So, we jump to a table of * trampolines attached to the module (the Procedure Linkage Table) * whenever that happens. */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index b9f734dd5b81..10debb93c4a4 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -108,6 +108,9 @@ #ifndef __ASSEMBLY__ /* pte_clear moved to later in this file */ +/* Pointers in the page table tree are virtual addresses */ +#define __pgtable_ptr_val(ptr) ((unsigned long)(ptr)) + #define PMD_BAD_BITS (PTE_TABLE_SIZE-1) #define PUD_BAD_BITS (PMD_TABLE_SIZE-1) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 07a99e638449..9d86c6651716 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -248,6 +248,7 @@ extern int opal_elog_init(void); extern void opal_platform_dump_init(void); extern void opal_sys_param_init(void); extern void opal_msglog_init(void); +extern void opal_msglog_sysfs_init(void); extern int opal_async_comp_init(void); extern int opal_sensor_init(void); extern int opal_hmi_handler_init(void); @@ -273,6 +274,8 @@ void opal_free_sg_list(struct opal_sg_list *sg); extern int opal_error_code(int rc); +ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_OPAL_H */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index e34124f6fbf2..ab3d8977bacd 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -271,6 +271,13 @@ extern long long virt_phys_offset; #else #define PD_HUGE 0x80000000 #endif + +#else /* CONFIG_PPC_BOOK3S_64 */ +/* + * Book3S 64 stores real addresses in the hugepd entries to + * avoid overlaps with _PAGE_PRESENT and _PAGE_PTE. + */ +#define HUGEPD_ADDR_MASK (0x0ffffffffffffffful & ~HUGEPD_SHIFT_MASK) #endif /* CONFIG_PPC_BOOK3S_64 */ /* @@ -281,109 +288,7 @@ extern long long virt_phys_offset; #ifndef __ASSEMBLY__ -#ifdef CONFIG_STRICT_MM_TYPECHECKS -/* These are used to make use of C type-checking. */ - -/* PTE level */ -typedef struct { pte_basic_t pte; } pte_t; -#define __pte(x) ((pte_t) { (x) }) -static inline pte_basic_t pte_val(pte_t x) -{ - return x.pte; -} - -/* 64k pages additionally define a bigger "real PTE" type that gathers - * the "second half" part of the PTE for pseudo 64k pages - */ -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) -typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; -#else -typedef struct { pte_t pte; } real_pte_t; -#endif - -/* PMD level */ -#ifdef CONFIG_PPC64 -typedef struct { unsigned long pmd; } pmd_t; -#define __pmd(x) ((pmd_t) { (x) }) -static inline unsigned long pmd_val(pmd_t x) -{ - return x.pmd; -} - -/* PUD level exusts only on 4k pages */ -#ifndef CONFIG_PPC_64K_PAGES -typedef struct { unsigned long pud; } pud_t; -#define __pud(x) ((pud_t) { (x) }) -static inline unsigned long pud_val(pud_t x) -{ - return x.pud; -} -#endif /* !CONFIG_PPC_64K_PAGES */ -#endif /* CONFIG_PPC64 */ - -/* PGD level */ -typedef struct { unsigned long pgd; } pgd_t; -#define __pgd(x) ((pgd_t) { (x) }) -static inline unsigned long pgd_val(pgd_t x) -{ - return x.pgd; -} - -/* Page protection bits */ -typedef struct { unsigned long pgprot; } pgprot_t; -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) }) - -#else - -/* - * .. while these make it easier on the compiler - */ - -typedef pte_basic_t pte_t; -#define __pte(x) (x) -static inline pte_basic_t pte_val(pte_t pte) -{ - return pte; -} - -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) -typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; -#else -typedef pte_t real_pte_t; -#endif - - -#ifdef CONFIG_PPC64 -typedef unsigned long pmd_t; -#define __pmd(x) (x) -static inline unsigned long pmd_val(pmd_t pmd) -{ - return pmd; -} - -#ifndef CONFIG_PPC_64K_PAGES -typedef unsigned long pud_t; -#define __pud(x) (x) -static inline unsigned long pud_val(pud_t pud) -{ - return pud; -} -#endif /* !CONFIG_PPC_64K_PAGES */ -#endif /* CONFIG_PPC64 */ - -typedef unsigned long pgd_t; -#define __pgd(x) (x) -static inline unsigned long pgd_val(pgd_t pgd) -{ - return pgd; -} - -typedef unsigned long pgprot_t; -#define pgprot_val(x) (x) -#define __pgprot(x) (x) - -#endif +#include <asm/pgtable-types.h> typedef struct { signed long pd; } hugepd_t; diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 54843ca5fa2b..9f165e8a77bf 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -212,15 +212,16 @@ struct pci_dn { #define IODA_INVALID_PE (-1) #ifdef CONFIG_PPC_POWERNV int pe_number; + int vf_index; /* VF index in the PF */ #ifdef CONFIG_PCI_IOV u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ - int offset; /* PE# for the first VF PE */ -#define M64_PER_IOV 4 - int m64_per_iov; + int *pe_num_map; /* PE# for the first VF PE or array */ + bool m64_single_mode; /* Use M64 BAR in Single Mode */ #define IODA_INVALID_M64 (-1) - int m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV]; + int (*m64_map)[PCI_SRIOV_NUM_BARS]; #endif /* CONFIG_PCI_IOV */ + int mps; /* Maximum Payload Size */ #endif struct list_head child_list; struct list_head list; diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 814622146d5a..e157489ee7a1 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -136,16 +136,24 @@ extern ssize_t power_events_sysfs_show(struct device *dev, * event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and * 'PM_CYC' where the latter is the name by which the event is known in * POWER CPU specification. + * + * Similarly, some hardware and cache events use the same event code. Eg. + * on POWER8, both "cache-references" and "L1-dcache-loads" events refer + * to the same event, PM_LD_REF_L1. The suffix, allows us to have two + * sysfs objects for the same event and thus two entries/aliases in sysfs. */ #define EVENT_VAR(_id, _suffix) event_attr_##_id##_suffix #define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr #define EVENT_ATTR(_name, _id, _suffix) \ - PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_##_id, \ + PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), _id, \ power_events_sysfs_show) #define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g) #define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g) +#define CACHE_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _c) +#define CACHE_EVENT_PTR(_id) EVENT_PTR(_id, _c) + #define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _p) #define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p) diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index 69ef28a81733..8d5fc3ac43da 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -53,7 +53,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) #ifndef CONFIG_PPC_64K_PAGES -#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) +#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, __pgtable_ptr_val(PUD)) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -68,19 +68,19 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_set(pud, (unsigned long)pmd); + pud_set(pud, __pgtable_ptr_val(pmd)); } static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { - pmd_set(pmd, (unsigned long)pte); + pmd_set(pmd, __pgtable_ptr_val(pte)); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) { - pmd_set(pmd, (unsigned long)page_address(pte_page)); + pmd_set(pmd, __pgtable_ptr_val(page_address(pte_page))); } #define pmd_pgtable(pmd) pmd_page(pmd) @@ -171,23 +171,45 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); extern void __tlb_remove_table(void *_table); #endif -#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd) +#ifndef __PAGETABLE_PUD_FOLDED +/* book3s 64 is 4 level page table */ +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + pgd_set(pgd, __pgtable_ptr_val(pud)); +} + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); +} +#endif + +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + pud_set(pud, __pgtable_ptr_val(pmd)); +} static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { - pmd_set(pmd, (unsigned long)pte); + pmd_set(pmd, __pgtable_ptr_val(pte)); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) { - pmd_set(pmd, (unsigned long)pte_page); + pmd_set(pmd, __pgtable_ptr_val(pte_page)); } static inline pgtable_t pmd_pgtable(pmd_t pmd) { - return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS); + return (pgtable_t)pmd_page_vaddr(pmd); } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, @@ -233,11 +255,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #define __pmd_free_tlb(tlb, pmd, addr) \ pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) -#ifndef CONFIG_PPC_64K_PAGES +#ifndef __PAGETABLE_PUD_FOLDED #define __pud_free_tlb(tlb, pud, addr) \ pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) -#endif /* CONFIG_PPC_64K_PAGES */ +#endif /* __PAGETABLE_PUD_FOLDED */ #define check_pgt_cache() do { } while (0) diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h new file mode 100644 index 000000000000..43140f8b0592 --- /dev/null +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -0,0 +1,103 @@ +#ifndef _ASM_POWERPC_PGTABLE_TYPES_H +#define _ASM_POWERPC_PGTABLE_TYPES_H + +#ifdef CONFIG_STRICT_MM_TYPECHECKS +/* These are used to make use of C type-checking. */ + +/* PTE level */ +typedef struct { pte_basic_t pte; } pte_t; +#define __pte(x) ((pte_t) { (x) }) +static inline pte_basic_t pte_val(pte_t x) +{ + return x.pte; +} + +/* PMD level */ +#ifdef CONFIG_PPC64 +typedef struct { unsigned long pmd; } pmd_t; +#define __pmd(x) ((pmd_t) { (x) }) +static inline unsigned long pmd_val(pmd_t x) +{ + return x.pmd; +} + +/* + * 64 bit hash always use 4 level table. Everybody else use 4 level + * only for 4K page size. + */ +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) +typedef struct { unsigned long pud; } pud_t; +#define __pud(x) ((pud_t) { (x) }) +static inline unsigned long pud_val(pud_t x) +{ + return x.pud; +} +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ +#endif /* CONFIG_PPC64 */ + +/* PGD level */ +typedef struct { unsigned long pgd; } pgd_t; +#define __pgd(x) ((pgd_t) { (x) }) +static inline unsigned long pgd_val(pgd_t x) +{ + return x.pgd; +} + +/* Page protection bits */ +typedef struct { unsigned long pgprot; } pgprot_t; +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) }) + +#else + +/* + * .. while these make it easier on the compiler + */ + +typedef pte_basic_t pte_t; +#define __pte(x) (x) +static inline pte_basic_t pte_val(pte_t pte) +{ + return pte; +} + +#ifdef CONFIG_PPC64 +typedef unsigned long pmd_t; +#define __pmd(x) (x) +static inline unsigned long pmd_val(pmd_t pmd) +{ + return pmd; +} + +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) +typedef unsigned long pud_t; +#define __pud(x) (x) +static inline unsigned long pud_val(pud_t pud) +{ + return pud; +} +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ +#endif /* CONFIG_PPC64 */ + +typedef unsigned long pgd_t; +#define __pgd(x) (x) +static inline unsigned long pgd_val(pgd_t pgd) +{ + return pgd; +} + +typedef unsigned long pgprot_t; +#define pgprot_val(x) (x) +#define __pgprot(x) (x) + +#endif /* CONFIG_STRICT_MM_TYPECHECKS */ +/* + * With hash config 64k pages additionally define a bigger "real PTE" type that + * gathers the "second half" part of the PTE for pseudo 64k pages + */ +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#else +typedef struct { pte_t pte; } real_pte_t; +#endif +#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */ diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h index 10902c9375d0..925697968946 100644 --- a/arch/powerpc/include/asm/pmac_feature.h +++ b/arch/powerpc/include/asm/pmac_feature.h @@ -46,7 +46,7 @@ /* PowerSurge are the first generation of PCI Pmacs. This include * all of the Grand-Central based machines. We currently don't - * differenciate most of them. + * differentiate most of them. */ #define PMAC_TYPE_PSURGE 0x10 /* PowerSurge */ #define PMAC_TYPE_ANS 0x11 /* Apple Network Server */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index ac2330820b9a..8ab8a1a9610a 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -236,7 +236,9 @@ struct thread_struct { #endif struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */ unsigned long trap_nr; /* last trap # on this thread */ + u8 load_fp; #ifdef CONFIG_ALTIVEC + u8 load_vec; struct thread_vr_state vr_state; struct thread_vr_state *vr_save_area; unsigned long vrsave; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c4cb2ffc624e..52ed654d01ba 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -75,6 +75,14 @@ #define MSR_HV 0 #endif +/* + * To be used in shared book E/book S, this avoids needing to worry about + * book S/book E in shared code + */ +#ifndef MSR_SPE +#define MSR_SPE 0 +#endif + #define MSR_VEC __MASK(MSR_VEC_LG) /* Enable AltiVec */ #define MSR_VSX __MASK(MSR_VSX_LG) /* Enable VSX */ #define MSR_POW __MASK(MSR_POW_LG) /* Enable Power Management */ @@ -376,7 +384,7 @@ #define SPRN_TSCR 0x399 /* Thread Switch Control Register */ #define SPRN_DEC 0x016 /* Decrement Register */ -#define SPRN_DER 0x095 /* Debug Enable Regsiter */ +#define SPRN_DER 0x095 /* Debug Enable Register */ #define DER_RSTE 0x40000000 /* Reset Interrupt */ #define DER_CHSTPE 0x20000000 /* Check Stop */ #define DER_MCIE 0x10000000 /* Machine Check Interrupt */ @@ -401,7 +409,7 @@ #define SPRN_DPDES 0x0B0 /* Directed Priv. Doorbell Exc. State */ #define SPRN_EAR 0x11A /* External Address Register */ #define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */ -#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */ +#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Register */ #define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */ #define HID0_HDICE_SH (63 - 23) /* 970 HDEC interrupt enable */ #define HID0_EMCP (1<<31) /* Enable Machine Check pin */ @@ -514,7 +522,7 @@ #define ICTRL_EICP 0x00000100 /* enable icache par. check */ #define SPRN_IMISS 0x3D4 /* Instruction TLB Miss Register */ #define SPRN_IMMR 0x27E /* Internal Memory Map Register */ -#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Regsiter */ +#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Register */ #define SPRN_L2CR2 0x3f8 #define L2CR_L2E 0x80000000 /* L2 enable */ #define L2CR_L2PE 0x40000000 /* L2 parity enable */ @@ -549,7 +557,7 @@ #define L2CR_L2DO_745x 0x00010000 /* L2 data only (745x) */ #define L2CR_L2REP_745x 0x00001000 /* L2 repl. algorithm (745x) */ #define L2CR_L2HWF_745x 0x00000800 /* L2 hardware flush (745x) */ -#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Regsiter */ +#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Register */ #define L3CR_L3E 0x80000000 /* L3 enable */ #define L3CR_L3PE 0x40000000 /* L3 data parity enable */ #define L3CR_L3APE 0x20000000 /* L3 addr parity enable */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 2fef74b474f0..737e012ef56e 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -681,7 +681,7 @@ #define SPRN_CDBCR 0x3D7 /* Cache Debug Control Register */ #define SPRN_TBHI 0x3DC /* Time Base High */ #define SPRN_TBLO 0x3DD /* Time Base Low */ -#define SPRN_DBCR 0x3F2 /* Debug Control Regsiter */ +#define SPRN_DBCR 0x3F2 /* Debug Control Register */ #define SPRN_PBL1 0x3FC /* Protection Bound Lower 1 */ #define SPRN_PBL2 0x3FE /* Protection Bound Lower 2 */ #define SPRN_PBU1 0x3FD /* Protection Bound Upper 1 */ diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h index 37d2da6feabf..f280dd11243f 100644 --- a/arch/powerpc/include/asm/smu.h +++ b/arch/powerpc/include/asm/smu.h @@ -154,7 +154,7 @@ * * The Darwin I2C driver is less subtle though. On any non-success status * from the response command, it waits 5ms and tries again up to 20 times, - * it doesn't differenciate between fatal errors or "busy" status. + * it doesn't differentiate between fatal errors or "busy" status. * * This driver provides an asynchronous paramblock based i2c command * interface to be used either directly by low level code or by a higher diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 5b268b6be74c..17c8380673a6 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -28,12 +28,14 @@ extern void giveup_all(struct task_struct *); extern void enable_kernel_fp(void); extern void flush_fp_to_thread(struct task_struct *); extern void giveup_fpu(struct task_struct *); -extern void __giveup_fpu(struct task_struct *); +extern void save_fpu(struct task_struct *); static inline void disable_kernel_fp(void) { msr_check_and_clear(MSR_FP); } #else +static inline void __giveup_fpu(struct task_struct *t) { } +static inline void save_fpu(struct task_struct *t) { } static inline void flush_fp_to_thread(struct task_struct *t) { } #endif @@ -41,18 +43,19 @@ static inline void flush_fp_to_thread(struct task_struct *t) { } extern void enable_kernel_altivec(void); extern void flush_altivec_to_thread(struct task_struct *); extern void giveup_altivec(struct task_struct *); -extern void __giveup_altivec(struct task_struct *); +extern void save_altivec(struct task_struct *); static inline void disable_kernel_altivec(void) { msr_check_and_clear(MSR_VEC); } +#else +static inline void save_altivec(struct task_struct *t) { } +static inline void __giveup_altivec(struct task_struct *t) { } #endif #ifdef CONFIG_VSX extern void enable_kernel_vsx(void); extern void flush_vsx_to_thread(struct task_struct *); -extern void giveup_vsx(struct task_struct *); -extern void __giveup_vsx(struct task_struct *); static inline void disable_kernel_vsx(void) { msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); @@ -68,6 +71,8 @@ static inline void disable_kernel_spe(void) { msr_check_and_clear(MSR_SPE); } +#else +static inline void __giveup_spe(struct task_struct *t) { } #endif static inline void clear_task_ebb(struct task_struct *t) diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 23d351ca0303..9f77f85e3e99 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -78,97 +78,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) } #elif defined(CONFIG_PPC_STD_MMU_64) - -#define MMU_NO_CONTEXT 0 - -/* - * TLB flushing for 64-bit hash-MMU CPUs - */ - -#include <linux/percpu.h> -#include <asm/page.h> - -#define PPC64_TLB_BATCH_NR 192 - -struct ppc64_tlb_batch { - int active; - unsigned long index; - struct mm_struct *mm; - real_pte_t pte[PPC64_TLB_BATCH_NR]; - unsigned long vpn[PPC64_TLB_BATCH_NR]; - unsigned int psize; - int ssize; -}; -DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); - -extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); - -#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE - -static inline void arch_enter_lazy_mmu_mode(void) -{ - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); - - batch->active = 1; -} - -static inline void arch_leave_lazy_mmu_mode(void) -{ - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); - - if (batch->index) - __flush_tlb_pending(batch); - batch->active = 0; -} - -#define arch_flush_lazy_mmu_mode() do {} while (0) - - -extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, - int ssize, unsigned long flags); -extern void flush_hash_range(unsigned long number, int local); -extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize, - unsigned long flags); - -static inline void local_flush_tlb_mm(struct mm_struct *mm) -{ -} - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ -} - -static inline void local_flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void flush_tlb_page_nohash(struct vm_area_struct *vma, - unsigned long vmaddr) -{ -} - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ -} - -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ -} - -/* Private function for use by PCI IO mapping code */ -extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, - unsigned long end); -extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr); +#include <asm/book3s/64/tlbflush-hash.h> #else #error Unsupported MMU type #endif diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 8e86b48d0369..32e36b16773f 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -57,12 +57,14 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit, extern void hcall_tracepoint_regfunc(void); extern void hcall_tracepoint_unregfunc(void); -TRACE_EVENT_FN(hcall_entry, +TRACE_EVENT_FN_COND(hcall_entry, TP_PROTO(unsigned long opcode, unsigned long *args), TP_ARGS(opcode, args), + TP_CONDITION(cpu_online(raw_smp_processor_id())), + TP_STRUCT__entry( __field(unsigned long, opcode) ), @@ -76,13 +78,15 @@ TRACE_EVENT_FN(hcall_entry, hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc ); -TRACE_EVENT_FN(hcall_exit, +TRACE_EVENT_FN_COND(hcall_exit, TP_PROTO(unsigned long opcode, unsigned long retval, unsigned long *retbuf), TP_ARGS(opcode, retval, retbuf), + TP_CONDITION(cpu_online(raw_smp_processor_id())), + TP_STRUCT__entry( __field(unsigned long, opcode) __field(unsigned long, retval) diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h index d12b11d7641e..a1d112979fd2 100644 --- a/arch/powerpc/include/asm/uninorth.h +++ b/arch/powerpc/include/asm/uninorth.h @@ -132,7 +132,7 @@ /* This one _might_ return the CPU number of the CPU reading it; * the bootROM decides whether to boot or to sleep/spinloop depending - * on this register beeing 0 or not + * on this register being 0 or not */ #define UNI_N_CPU_NUMBER 0x0050 diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 0e25bdb190bb..5d61bbced6a1 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -1,5 +1,5 @@ /* - * Common definitions accross all variants of ICP and ICS interrupt + * Common definitions across all variants of ICP and ICS interrupt * controllers. */ diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h index 7f9c74b46704..b4504f394427 100644 --- a/arch/powerpc/include/uapi/asm/epapr_hcalls.h +++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h @@ -78,7 +78,7 @@ #define EV_SUCCESS 0 #define EV_EPERM 1 /* Operation not permitted */ #define EV_ENOENT 2 /* Entry Not Found */ -#define EV_EIO 3 /* I/O error occured */ +#define EV_EIO 3 /* I/O error occurred */ #define EV_EAGAIN 4 /* The operation had insufficient * resources to complete and should be * retried @@ -89,7 +89,7 @@ #define EV_ENODEV 7 /* No such device */ #define EV_EINVAL 8 /* An argument supplied to the hcall was out of range or invalid */ -#define EV_INTERNAL 9 /* An internal error occured */ +#define EV_INTERNAL 9 /* An internal error occurred */ #define EV_CONFIG 10 /* A configuration error was detected */ #define EV_INVALID_STATE 11 /* The object is in an invalid state */ #define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 07cebc3514f3..10d5eab19458 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -95,12 +95,14 @@ int main(void) DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state)); DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area)); DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr)); + DEFINE(THREAD_LOAD_FP, offsetof(struct thread_struct, load_fp)); #ifdef CONFIG_ALTIVEC DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state)); DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area)); DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr)); + DEFINE(THREAD_LOAD_VEC, offsetof(struct thread_struct, load_vec)); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr)); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 9c9b7411b28b..584e119fa8b0 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -15,6 +15,7 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/cache.h> +#include <asm/book3s/64/mmu-hash.h> /* Entry: r3 = crap, r4 = ptr to cputable entry * @@ -83,6 +84,39 @@ _GLOBAL(__restore_cpu_power8) mtlr r11 blr +_GLOBAL(__setup_cpu_power9) + mflr r11 + bl __init_FSCR + bl __init_hvmode_206 + mtlr r11 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH + bl __init_LPCR + bl __init_HFSCR + bl __init_tlb_power9 + mtlr r11 + blr + +_GLOBAL(__restore_cpu_power9) + mflr r11 + bl __init_FSCR + mfmsr r3 + rldicl. r0,r3,4,63 + mtlr r11 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH + bl __init_LPCR + bl __init_HFSCR + bl __init_tlb_power9 + mtlr r11 + blr + __init_hvmode_206: /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ mfmsr r3 @@ -139,7 +173,7 @@ __init_HFSCR: * (invalidate by congruence class). P7 has 128 CCs., P8 has 512. */ __init_tlb_power7: - li r6,128 + li r6,POWER7_TLB_SETS mtctr r6 li r7,0xc00 /* IS field = 0b11 */ ptesync @@ -150,7 +184,18 @@ __init_tlb_power7: 1: blr __init_tlb_power8: - li r6,512 + li r6,POWER8_TLB_SETS + mtctr r6 + li r7,0xc00 /* IS field = 0b11 */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: blr + +__init_tlb_power9: + li r6,POWER9_TLB_SETS_HASH mtctr r6 li r7,0xc00 /* IS field = 0b11 */ ptesync diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 7d80bfdfb15e..be4d73053bed 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -70,9 +70,12 @@ extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power7(void); extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power8(void); +extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_power9(void); extern void __restore_cpu_a2(void); extern void __flush_tlb_power7(unsigned int action); extern void __flush_tlb_power8(unsigned int action); +extern void __flush_tlb_power9(unsigned int action); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ @@ -116,6 +119,11 @@ extern void __restore_cpu_e6500(void); #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_HAS_ALTIVEC_COMP) +#define COMMON_USER_POWER9 COMMON_USER_POWER8 +#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ + PPC_FEATURE2_ARCH_3_00 | \ + PPC_FEATURE2_HAS_IEEE128) + #ifdef CONFIG_PPC_BOOK3E_64 #define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) #else @@ -499,6 +507,25 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power9 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004e0000, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power9", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .flush_tlb = __flush_tlb_power9, + .platform = "power9", + }, { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, .pvr_value = 0x00700000, diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 40e4d4a27663..6544017eb90b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -268,13 +268,6 @@ static void *eeh_dump_pe_log(void *data, void *flag) struct eeh_dev *edev, *tmp; size_t *plen = flag; - /* If the PE's config space is blocked, 0xFF's will be - * returned. It's pointless to collect the log in this - * case. - */ - if (pe->state & EEH_PE_CFG_BLOCKED) - return NULL; - eeh_pe_for_each_dev(pe, edev, tmp) *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, EEH_PCI_REGS_LOG_LEN - *plen); @@ -677,7 +670,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) /* Check if the request is finished successfully */ if (active_flag) { rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc <= 0) + if (rc < 0) return rc; if (rc & active_flag) @@ -739,7 +732,7 @@ static void *eeh_restore_dev_state(void *data, void *userdata) } /** - * pcibios_set_pcie_slot_reset - Set PCI-E reset state + * pcibios_set_pcie_reset_state - Set PCI-E reset state * @dev: pci device struct * @state: reset state to enter * @@ -761,7 +754,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); eeh_unfreeze_pe(pe, false); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); eeh_pe_state_clear(pe, EEH_PE_ISOLATED); break; @@ -769,14 +763,16 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: @@ -1243,6 +1239,14 @@ void eeh_remove_device(struct pci_dev *dev) * from the parent PE during the BAR resotre. */ edev->pdev = NULL; + + /* + * The flag "in_error" is used to trace EEH devices for VFs + * in error state or not. It's set in eeh_report_error(). If + * it's not set, eeh_report_{reset,resume}() won't be called + * for the VF EEH device. + */ + edev->in_error = false; dev->dev.archdata.edev = NULL; if (!(edev->pe->state & EEH_PE_KEEP)) eeh_rmv_from_parent_pe(edev); @@ -1537,6 +1541,17 @@ int eeh_pe_get_state(struct eeh_pe *pe) if (!eeh_ops || !eeh_ops->get_state) return -ENOENT; + /* + * If the parent PE is owned by the host kernel and is undergoing + * error recovery, we should return the PE state as temporarily + * unavailable so that the error recovery on the guest is suspended + * until the recovery completes on the host. + */ + if (pe->parent && + !(pe->state & EEH_PE_REMOVED) && + (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) + return EEH_PE_STATE_UNAVAIL; + result = eeh_ops->get_state(pe, NULL); rst_active = !!(result & EEH_STATE_RESET_ACTIVE); dma_en = !!(result & EEH_STATE_DMA_ENABLED); diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index a1e86e172e3c..ddbcfab7efdf 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -195,8 +195,11 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) return; } - /* Walk resources on this device, poke them into the tree */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + /* + * Walk resources on this device, poke the first 7 (6 normal BAR and 1 + * ROM BAR) into the tree. + */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { resource_size_t start = pci_resource_start(dev,i); resource_size_t end = pci_resource_end(dev,i); unsigned long flags = pci_resource_flags(dev,i); @@ -222,10 +225,6 @@ void eeh_addr_cache_insert_dev(struct pci_dev *dev) { unsigned long flags; - /* Ignore PCI bridges */ - if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) - return; - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); __eeh_addr_cache_insert_dev(dev); spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index aabba94ff9cb..7815095fe3d8 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -67,6 +67,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) edev->pdn = pdn; edev->phb = phb; INIT_LIST_HEAD(&edev->list); + INIT_LIST_HEAD(&edev->rmv_list); return NULL; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 938742135ee0..fb6207d2c604 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -34,6 +34,11 @@ #include <asm/prom.h> #include <asm/rtas.h> +struct eeh_rmv_data { + struct list_head edev_list; + int removed; +}; + /** * eeh_pcid_name - Retrieve name of PCI device driver * @pdev: PCI device @@ -190,7 +195,7 @@ static void *eeh_report_error(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_frozen; @@ -211,6 +216,7 @@ static void *eeh_report_error(void *data, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; + edev->in_error = true; eeh_pcid_put(dev); return NULL; } @@ -231,7 +237,7 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; driver = eeh_pcid_get(dev); @@ -271,7 +277,7 @@ static void *eeh_report_reset(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_normal; @@ -282,7 +288,8 @@ static void *eeh_report_reset(void *data, void *userdata) if (!driver->err_handler || !driver->err_handler->slot_reset || - (edev->mode & EEH_DEV_NO_HANDLER)) { + (edev->mode & EEH_DEV_NO_HANDLER) || + (!edev->in_error)) { eeh_pcid_put(dev); return NULL; } @@ -326,20 +333,23 @@ static void *eeh_report_resume(void *data, void *userdata) { struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + bool was_in_error; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); if (!driver) return NULL; + was_in_error = edev->in_error; + edev->in_error = false; eeh_enable_irq(dev); if (!driver->err_handler || !driver->err_handler->resume || - (edev->mode & EEH_DEV_NO_HANDLER)) { + (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { edev->mode &= ~EEH_DEV_NO_HANDLER; eeh_pcid_put(dev); return NULL; @@ -365,7 +375,7 @@ static void *eeh_report_failure(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_perm_failure; @@ -386,12 +396,40 @@ static void *eeh_report_failure(void *data, void *userdata) return NULL; } +static void *eeh_add_virt_device(void *data, void *userdata) +{ + struct pci_driver *driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + if (!(edev->physfn)) { + pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n", + __func__, edev->phb->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + return NULL; + } + + driver = eeh_pcid_get(dev); + if (driver) { + eeh_pcid_put(dev); + if (driver->err_handler) + return NULL; + } + +#ifdef CONFIG_PPC_POWERNV + pci_iov_add_virtfn(edev->physfn, pdn->vf_index, 0); +#endif + return NULL; +} + static void *eeh_rmv_device(void *data, void *userdata) { struct pci_driver *driver; struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - int *removed = (int *)userdata; + struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; + int *removed = rmv_data ? &rmv_data->removed : NULL; /* * Actually, we should remove the PCI bridges as well. @@ -416,10 +454,13 @@ static void *eeh_rmv_device(void *data, void *userdata) driver = eeh_pcid_get(dev); if (driver) { eeh_pcid_put(dev); - if (driver->err_handler && + if (removed && + eeh_pe_passed(edev->pe)) + return NULL; + if (removed && + driver->err_handler && driver->err_handler->error_detected && - driver->err_handler->slot_reset && - driver->err_handler->resume) + driver->err_handler->slot_reset) return NULL; } @@ -428,11 +469,29 @@ static void *eeh_rmv_device(void *data, void *userdata) pci_name(dev)); edev->bus = dev->bus; edev->mode |= EEH_DEV_DISCONNECTED; - (*removed)++; + if (removed) + (*removed)++; - pci_lock_rescan_remove(); - pci_stop_and_remove_bus_device(dev); - pci_unlock_rescan_remove(); + if (edev->physfn) { +#ifdef CONFIG_PPC_POWERNV + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0); + edev->pdev = NULL; + + /* + * We have to set the VF PE number to invalid one, which is + * required to plug the VF successfully. + */ + pdn->pe_number = IODA_INVALID_PE; +#endif + if (rmv_data) + list_add(&edev->rmv_list, &rmv_data->edev_list); + } else { + pci_lock_rescan_remove(); + pci_stop_and_remove_bus_device(dev); + pci_unlock_rescan_remove(); + } return NULL; } @@ -546,11 +605,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ -static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) +static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, + struct eeh_rmv_data *rmv_data) { struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); struct timeval tstamp; - int cnt, rc, removed = 0; + int cnt, rc; + struct eeh_dev *edev; /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; @@ -564,11 +625,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) */ eeh_pe_state_mark(pe, EEH_PE_KEEP); if (bus) { - pci_lock_rescan_remove(); - pcibios_remove_pci_devices(bus); - pci_unlock_rescan_remove(); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(bus); + pci_unlock_rescan_remove(); + } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); + eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); } /* @@ -610,14 +676,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * PE. We should disconnect it so the binding can be * rebuilt when adding PCI devices. */ + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - pcibios_add_pci_devices(bus); - } else if (frozen_bus && removed) { + if (pe->type & EEH_PE_VF) + eeh_add_virt_device(edev, NULL); + else + pcibios_add_pci_devices(bus); + } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - pcibios_add_pci_devices(frozen_bus); + if (pe->type & EEH_PE_VF) + eeh_add_virt_device(edev, NULL); + else + pcibios_add_pci_devices(frozen_bus); } eeh_pe_state_clear(pe, EEH_PE_KEEP); @@ -636,8 +710,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) static void eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *frozen_bus; + struct eeh_dev *edev, *tmp; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; + struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; frozen_bus = eeh_pe_bus_get(pe); if (!frozen_bus) { @@ -692,7 +768,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) */ if (result == PCI_ERS_RESULT_NONE) { pr_info("EEH: Reset with hotplug activity\n"); - rc = eeh_reset_device(pe, frozen_bus); + rc = eeh_reset_device(pe, frozen_bus, NULL); if (rc) { pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); @@ -744,7 +820,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { pr_info("EEH: Reset without hotplug activity\n"); - rc = eeh_reset_device(pe, NULL); + rc = eeh_reset_device(pe, NULL, &rmv_data); if (rc) { pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); @@ -764,6 +840,15 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) goto hard_fail; } + /* + * For those hot removed VFs, we should add back them after PF get + * recovered properly. + */ + list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) { + eeh_add_virt_device(edev, NULL); + list_del(&edev->rmv_list); + } + /* Tell all device drivers that they can resume operations */ pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); @@ -803,11 +888,17 @@ perm_error: * the their PCI config any more. */ if (frozen_bus) { - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - pci_lock_rescan_remove(); - pcibios_remove_pci_devices(frozen_bus); - pci_unlock_rescan_remove(); + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(frozen_bus); + pci_unlock_rescan_remove(); + } } } @@ -886,6 +977,7 @@ static void eeh_handle_special_event(void) continue; /* Notify all devices to be down */ + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); bus = eeh_pe_bus_get(phb_pe); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index ca9e5371930e..eea48d8baf49 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -299,7 +299,10 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) * EEH device already having associated PE, but * the direct parent EEH device doesn't have yet. */ - pdn = pdn ? pdn->parent : NULL; + if (edev->physfn) + pdn = pci_get_pdn(edev->physfn); + else + pdn = pdn ? pdn->parent : NULL; while (pdn) { /* We're poking out of PCI territory */ parent = pdn_to_eeh_dev(pdn); @@ -382,7 +385,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) } /* Create a new EEH PE */ - pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); + if (edev->physfn) + pe = eeh_pe_alloc(edev->phb, EEH_PE_VF); + else + pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); if (!pe) { pr_err("%s: out of memory!\n", __func__); return -ENOMEM; @@ -920,25 +926,21 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe) */ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) { - struct pci_bus *bus = NULL; struct eeh_dev *edev; struct pci_dev *pdev; - if (pe->type & EEH_PE_PHB) { - bus = pe->phb->bus; - } else if (pe->type & EEH_PE_BUS || - pe->type & EEH_PE_DEVICE) { - if (pe->bus) { - bus = pe->bus; - goto out; - } + if (pe->type & EEH_PE_PHB) + return pe->phb->bus; - edev = list_first_entry(&pe->edevs, struct eeh_dev, list); - pdev = eeh_dev_to_pci_dev(edev); - if (pdev) - bus = pdev->bus; - } + /* The primary bus might be cached during probe time */ + if (pe->state & EEH_PE_PRI_BUS) + return pe->bus; -out: - return bus; + /* Retrieve the parent PCI bus of first (top) PCI device */ + edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list); + pdev = eeh_dev_to_pci_dev(edev); + if (pdev) + return pdev->bus; + + return NULL; } diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index ec7f8aada697..b9b125327f27 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -210,7 +210,20 @@ system_call: /* label this so stack traces look sane */ li r11,-MAX_ERRNO andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work - cmpld r3,r11 + + andi. r0,r8,MSR_FP + beq 2f +#ifdef CONFIG_ALTIVEC + andis. r0,r8,MSR_VEC@h + bne 3f +#endif +2: addi r3,r1,STACK_FRAME_OVERHEAD + bl restore_math + ld r8,_MSR(r1) + ld r3,RESULT(r1) + li r11,-MAX_ERRNO + +3: cmpld r3,r11 ld r5,_CCR(r1) bge- syscall_error .Lsyscall_error_cont: @@ -602,8 +615,8 @@ _GLOBAL(ret_from_except_lite) /* Check current_thread_info()->flags */ andi. r0,r4,_TIF_USER_WORK_MASK -#ifdef CONFIG_PPC_BOOK3E bne 1f +#ifdef CONFIG_PPC_BOOK3E /* * Check to see if the dbcr0 register is set up to debug. * Use the internal debug mode bit to do this. @@ -618,7 +631,9 @@ _GLOBAL(ret_from_except_lite) mtspr SPRN_DBSR,r10 b restore #else - beq restore + addi r3,r1,STACK_FRAME_OVERHEAD + bl restore_math + b restore #endif 1: andi. r0,r4,_TIF_NEED_RESCHED beq 2f diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 2117eaca3d28..15da2b5df85e 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -130,6 +130,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) or r12,r12,r4 std r12,_MSR(r1) #endif + /* Don't care if r4 overflows, this is desired behaviour */ + lbz r4,THREAD_LOAD_FP(r5) + addi r4,r4,1 + stb r4,THREAD_LOAD_FP(r5) addi r10,r5,THREAD_FPSTATE lfd fr0,FPSTATE_FPSCR(r10) MTFSF_L(fr0) @@ -139,33 +143,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) blr /* - * __giveup_fpu(tsk) - * Disable FP for the task given as the argument, - * and save the floating-point registers in its thread_struct. + * save_fpu(tsk) + * Save the floating-point registers in its thread_struct. * Enables the FPU for use in the kernel on return. */ -_GLOBAL(__giveup_fpu) +_GLOBAL(save_fpu) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r6,THREAD_FPSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r6,0 bne 2f addi r6,r3,THREAD_FPSTATE -2: PPC_LCMPI 0,r5,0 - SAVE_32FPVSRS(0, R4, R6) +2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) - beq 1f - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r3,MSR_FP|MSR_FE0|MSR_FE1 -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r3,r3,MSR_VSX@h -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif - andc r4,r4,r3 /* disable FP for previous task */ - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: blr /* diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index b5061abbd2e0..9cdf5c71e426 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -806,7 +806,7 @@ _GLOBAL(set_context) _GLOBAL(init_cpu_state) mflr r22 #ifdef CONFIG_PPC_47x - /* We use the PVR to differenciate 44x cores from 476 */ + /* We use the PVR to differentiate 44x cores from 476 */ mfspr r3,SPRN_PVR srwi r3,r3,16 cmplwi cr0,r3,PVR_476FPE@h diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index cf4fb5429cf1..470ceebd2d23 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -19,7 +19,7 @@ #include <asm/kvm_book3s_asm.h> #include <asm/opal.h> #include <asm/cpuidle.h> -#include <asm/mmu-hash64.h> +#include <asm/book3s/64/mmu-hash.h> #undef DEBUG diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index e77c3ccf8dcf..dbf098121ce6 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -445,7 +445,11 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, * Global data */ struct kgdb_arch arch_kgdb_ops = { +#ifdef __LITTLE_ENDIAN__ + .gdb_bpt_instr = {0x08, 0x10, 0x82, 0x7d}, +#else .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08}, +#endif }; static int kgdb_not_implemented(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 2c647b1e62e4..ee62b197502d 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -54,8 +54,8 @@ static void flush_tlb_206(unsigned int num_sets, unsigned int action) } /* - * Generic routine to flush TLB on power7. This routine is used as - * flush_tlb hook in cpu_spec for Power7 processor. + * Generic routines to flush TLB on POWER processors. These routines + * are used as flush_tlb hook in the cpu_spec. * * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs. * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID. @@ -65,18 +65,17 @@ void __flush_tlb_power7(unsigned int action) flush_tlb_206(POWER7_TLB_SETS, action); } -/* - * Generic routine to flush TLB on power8. This routine is used as - * flush_tlb hook in cpu_spec for power8 processor. - * - * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs. - * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID. - */ void __flush_tlb_power8(unsigned int action) { flush_tlb_206(POWER8_TLB_SETS, action); } +void __flush_tlb_power9(unsigned int action) +{ + flush_tlb_206(POWER9_TLB_SETS_HASH, action); +} + + /* flush SLBs and reload */ static void flush_and_reload_slb(void) { diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 848b47499a27..9ce9a25f58b5 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -311,7 +311,7 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab) if (name[0] == '.') { if (strcmp(name+1, "TOC.") == 0) syms[i].st_shndx = SHN_ABS; - memmove(name, name+1, strlen(name)); + syms[i].st_name++; } } } diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 7f9ed0c1f6b9..59c436189f46 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -55,7 +55,7 @@ void pcibios_remove_pci_devices(struct pci_bus *bus) pr_debug("PCI: Removing devices on bus %04x:%02x\n", pci_domain_nr(bus), bus->number); - list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { + list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) { pr_debug(" Removing %s...\n", pci_name(dev)); pci_stop_and_remove_bus_device(dev); } diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index b3b4df91b792..38102cb9baa9 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -139,6 +139,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, struct pci_dev *pdev, + int vf_index, int busno, int devfn) { struct pci_dn *pdn; @@ -158,6 +159,7 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, pdn->busno = busno; pdn->devfn = devfn; #ifdef CONFIG_PPC_POWERNV + pdn->vf_index = vf_index; pdn->pe_number = IODA_INVALID_PE; #endif INIT_LIST_HEAD(&pdn->child_list); @@ -179,6 +181,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct pci_dn *parent, *pdn; + struct eeh_dev *edev; int i; /* Only support IOV for now */ @@ -196,7 +199,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) return NULL; for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { - pdn = add_one_dev_pci_data(parent, NULL, + pdn = add_one_dev_pci_data(parent, NULL, i, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); if (!pdn) { @@ -204,6 +207,12 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) __func__, i); return NULL; } + + /* Create the EEH device for the VF */ + eeh_dev_init(pdn, pci_bus_to_host(pdev->bus)); + edev = pdn_to_eeh_dev(pdn); + BUG_ON(!edev); + edev->physfn = pdev; } #endif /* CONFIG_PCI_IOV */ @@ -215,6 +224,7 @@ void remove_dev_pci_data(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV struct pci_dn *parent; struct pci_dn *pdn, *tmp; + struct eeh_dev *edev; int i; /* @@ -256,6 +266,13 @@ void remove_dev_pci_data(struct pci_dev *pdev) pdn->devfn != pci_iov_virtfn_devfn(pdev, i)) continue; + /* Release EEH device for the VF */ + edev = pdn_to_eeh_dev(pdn); + if (edev) { + pdn->edev = NULL; + kfree(edev); + } + if (!list_empty(&pdn->list)) list_del(&pdn->list); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 41e1607e800c..ef7024dacff7 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -28,10 +28,6 @@ EXPORT_SYMBOL(load_vr_state); EXPORT_SYMBOL(store_vr_state); #endif -#ifdef CONFIG_VSX -EXPORT_SYMBOL_GPL(__giveup_vsx); -#endif - #ifdef CONFIG_EPAPR_PARAVIRT EXPORT_SYMBOL(epapr_hypercall_start); #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index dccc87e8fee5..d7a9df51b974 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -133,6 +133,16 @@ void __msr_check_and_clear(unsigned long bits) EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU +void __giveup_fpu(struct task_struct *tsk) +{ + save_fpu(tsk); + tsk->thread.regs->msr &= ~MSR_FP; +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX)) + tsk->thread.regs->msr &= ~MSR_VSX; +#endif +} + void giveup_fpu(struct task_struct *tsk) { check_if_tm_restore_required(tsk); @@ -187,9 +197,32 @@ void enable_kernel_fp(void) } } EXPORT_SYMBOL(enable_kernel_fp); + +static int restore_fp(struct task_struct *tsk) { + if (tsk->thread.load_fp) { + load_fp_state(¤t->thread.fp_state); + current->thread.load_fp++; + return 1; + } + return 0; +} +#else +static int restore_fp(struct task_struct *tsk) { return 0; } #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC +#define loadvec(thr) ((thr).load_vec) + +static void __giveup_altivec(struct task_struct *tsk) +{ + save_altivec(tsk); + tsk->thread.regs->msr &= ~MSR_VEC; +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX)) + tsk->thread.regs->msr &= ~MSR_VSX; +#endif +} + void giveup_altivec(struct task_struct *tsk) { check_if_tm_restore_required(tsk); @@ -229,22 +262,49 @@ void flush_altivec_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_altivec_to_thread); + +static int restore_altivec(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_ALTIVEC) && tsk->thread.load_vec) { + load_vr_state(&tsk->thread.vr_state); + tsk->thread.used_vr = 1; + tsk->thread.load_vec++; + + return 1; + } + return 0; +} +#else +#define loadvec(thr) 0 +static inline int restore_altivec(struct task_struct *tsk) { return 0; } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -void giveup_vsx(struct task_struct *tsk) +static void __giveup_vsx(struct task_struct *tsk) { - check_if_tm_restore_required(tsk); - - msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); if (tsk->thread.regs->msr & MSR_FP) __giveup_fpu(tsk); if (tsk->thread.regs->msr & MSR_VEC) __giveup_altivec(tsk); + tsk->thread.regs->msr &= ~MSR_VSX; +} + +static void giveup_vsx(struct task_struct *tsk) +{ + check_if_tm_restore_required(tsk); + + msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); __giveup_vsx(tsk); msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } -EXPORT_SYMBOL(giveup_vsx); + +static void save_vsx(struct task_struct *tsk) +{ + if (tsk->thread.regs->msr & MSR_FP) + save_fpu(tsk); + if (tsk->thread.regs->msr & MSR_VEC) + save_altivec(tsk); +} void enable_kernel_vsx(void) { @@ -275,6 +335,19 @@ void flush_vsx_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_vsx_to_thread); + +static int restore_vsx(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_VSX)) { + tsk->thread.used_vsr = 1; + return 1; + } + + return 0; +} +#else +static inline int restore_vsx(struct task_struct *tsk) { return 0; } +static inline void save_vsx(struct task_struct *tsk) { } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -374,12 +447,76 @@ void giveup_all(struct task_struct *tsk) } EXPORT_SYMBOL(giveup_all); +void restore_math(struct pt_regs *regs) +{ + unsigned long msr; + + if (!current->thread.load_fp && !loadvec(current->thread)) + return; + + msr = regs->msr; + msr_check_and_set(msr_all_available); + + /* + * Only reload if the bit is not set in the user MSR, the bit BEING set + * indicates that the registers are hot + */ + if ((!(msr & MSR_FP)) && restore_fp(current)) + msr |= MSR_FP | current->thread.fpexc_mode; + + if ((!(msr & MSR_VEC)) && restore_altivec(current)) + msr |= MSR_VEC; + + if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) && + restore_vsx(current)) { + msr |= MSR_VSX; + } + + msr_check_and_clear(msr_all_available); + + regs->msr = msr; +} + +void save_all(struct task_struct *tsk) +{ + unsigned long usermsr; + + if (!tsk->thread.regs) + return; + + usermsr = tsk->thread.regs->msr; + + if ((usermsr & msr_all_available) == 0) + return; + + msr_check_and_set(msr_all_available); + + /* + * Saving the way the register space is in hardware, save_vsx boils + * down to a save_fpu() and save_altivec() + */ + if (usermsr & MSR_VSX) { + save_vsx(tsk); + } else { + if (usermsr & MSR_FP) + save_fpu(tsk); + + if (usermsr & MSR_VEC) + save_altivec(tsk); + } + + if (usermsr & MSR_SPE) + __giveup_spe(tsk); + + msr_check_and_clear(msr_all_available); +} + void flush_all_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { preempt_disable(); BUG_ON(tsk != current); - giveup_all(tsk); + save_all(tsk); #ifdef CONFIG_SPE if (tsk->thread.regs->msr & MSR_SPE) @@ -832,17 +969,9 @@ void restore_tm_state(struct pt_regs *regs) msr_diff = current->thread.ckpt_regs.msr & ~regs->msr; msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; - if (msr_diff & MSR_FP) { - msr_check_and_set(MSR_FP); - load_fp_state(¤t->thread.fp_state); - msr_check_and_clear(MSR_FP); - regs->msr |= current->thread.fpexc_mode; - } - if (msr_diff & MSR_VEC) { - msr_check_and_set(MSR_VEC); - load_vr_state(¤t->thread.vr_state); - msr_check_and_clear(MSR_VEC); - } + + restore_math(regs); + regs->msr |= msr_diff; } @@ -1006,6 +1135,10 @@ struct task_struct *__switch_to(struct task_struct *prev, batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } + + if (current_thread_info()->task->thread.regs) + restore_math(current_thread_info()->task->thread.regs); + #endif /* CONFIG_PPC_BOOK3S_64 */ return last; @@ -1307,6 +1440,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, f = ret_from_fork; } + childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); sp -= STACK_FRAME_OVERHEAD; /* diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index cf8c7e4e0b21..cb64d6feb45a 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -1,7 +1,7 @@ /* * Common signal handling code for both 32 and 64 bits * - * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c * * This file is subject to the terms and conditions of the GNU General @@ -178,7 +178,7 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs) * need to use the stack pointer from the checkpointed state, rather * than the speculated state. This ensures that the signal context * (written tm suspended) will be written below the stack required for - * the rollback. The transaction is aborted becuase of the treclaim, + * the rollback. The transaction is aborted because of the treclaim, * so any memory written between the tbegin and the signal will be * rolled back anyway. * diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 51b274199dd9..be305c858e51 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration + * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c * * This file is subject to the terms and conditions of the GNU General diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b6becc795bb5..88414dde7e35 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1148,6 +1148,7 @@ void __kprobes program_check_exception(struct pt_regs *regs) goto bail; } if (reason & REASON_TRAP) { + unsigned long bugaddr; /* Debugger is first in line to stop recursive faults in * rcu_lock, notify_die, or atomic_notifier_call_chain */ if (debugger_bpt(regs)) @@ -1158,8 +1159,15 @@ void __kprobes program_check_exception(struct pt_regs *regs) == NOTIFY_STOP) goto bail; + bugaddr = regs->nip; + /* + * Fixup bugaddr for BUG_ON() in real mode + */ + if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR)) + bugaddr += PAGE_OFFSET; + if (!(regs->msr & MSR_PR) && /* not user-mode */ - report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { + report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { regs->nip += 4; goto bail; } @@ -1394,7 +1402,7 @@ void facility_unavailable_exception(struct pt_regs *regs) * is a read DSCR attempt through a mfspr instruction, we * just emulate the instruction instead. This code path will * always emulate all the mfspr instructions till the user - * has attempted atleast one mtspr instruction. This way it + * has attempted at least one mtspr instruction. This way it * preserves the same behaviour when the user is accessing * the DSCR through privilege level only SPR number (0x11) * which is emulated through illegal instruction exception. diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 162d0f714941..1c2e7a343bf5 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -91,6 +91,10 @@ _GLOBAL(load_up_altivec) oris r12,r12,MSR_VEC@h std r12,_MSR(r1) #endif + /* Don't care if r4 overflows, this is desired behaviour */ + lbz r4,THREAD_LOAD_VEC(r5) + addi r4,r4,1 + stb r4,THREAD_LOAD_VEC(r5) addi r6,r5,THREAD_VRSTATE li r4,1 li r10,VRSTATE_VSCR @@ -102,36 +106,20 @@ _GLOBAL(load_up_altivec) blr /* - * __giveup_altivec(tsk) - * Disable VMX for the task given as the argument, - * and save the vector registers in its thread_struct. + * save_altivec(tsk) + * Save the vector registers to its thread_struct */ -_GLOBAL(__giveup_altivec) +_GLOBAL(save_altivec) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r7,THREAD_VRSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r7,0 bne 2f addi r7,r3,THREAD_VRSTATE -2: PPC_LCMPI 0,r5,0 - SAVE_32VRS(0,r4,r7) +2: SAVE_32VRS(0,r4,r7) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 - beq 1f - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - lis r3,(MSR_VEC|MSR_VSX)@h -FTR_SECTION_ELSE - lis r3,MSR_VEC@h -ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) -#else - lis r3,MSR_VEC@h -#endif - andc r4,r4,r3 /* disable FP for previous task */ - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: blr #ifdef CONFIG_VSX @@ -163,23 +151,6 @@ _GLOBAL(load_up_vsx) std r12,_MSR(r1) b fast_exception_return -/* - * __giveup_vsx(tsk) - * Disable VSX for the task given as the argument. - * Does NOT save vsx registers. - */ -_GLOBAL(__giveup_vsx) - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r3,MSR_VSX@h - andc r4,r4,r3 /* disable VSX for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: - blr - #endif /* CONFIG_VSX */ diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 55c4d51ea3e2..999106991a76 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -22,7 +22,7 @@ #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#include <asm/mmu-hash32.h> +#include <asm/book3s/32/mmu-hash.h> #include <asm/machdep.h> #include <asm/mmu_context.h> #include <asm/hw_irq.h> diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 9bf7031a67ff..b9131aa1aedf 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -26,7 +26,7 @@ #include <asm/tlbflush.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#include <asm/mmu-hash64.h> +#include <asm/book3s/64/mmu-hash.h> /* #define DEBUG_MMU */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 913cd2198fa6..114edace6cdd 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -23,7 +23,7 @@ #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#include <asm/mmu-hash64.h> +#include <asm/book3s/64/mmu-hash.h> #include <asm/machdep.h> #include <asm/mmu_context.h> #include <asm/hw_irq.h> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index fb37290a57b4..c7b78d8336b2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -32,7 +32,7 @@ #include <asm/tlbflush.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#include <asm/mmu-hash64.h> +#include <asm/book3s/64/mmu-hash.h> #include <asm/hvcall.h> #include <asm/synch.h> #include <asm/ppc-opcode.h> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 54cf9bc94dad..9c3b76bb69d9 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -30,7 +30,7 @@ #include <asm/tlbflush.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#include <asm/mmu-hash64.h> +#include <asm/book3s/64/mmu-hash.h> #include <asm/hvcall.h> #include <asm/synch.h> #include <asm/ppc-opcode.h> diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 89e96b3e0039..039028d3ccb5 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -29,7 +29,7 @@ #include <asm/tlbflush.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#include <asm/mmu-hash64.h> +#include <asm/book3s/64/mmu-hash.h> #include <asm/hvcall.h> #include <asm/synch.h> #include <asm/ppc-opcode.h> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 91700518bbf3..4cb8db05f3e5 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -17,7 +17,7 @@ #include <asm/tlbflush.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#include <asm/mmu-hash64.h> +#include <asm/book3s/64/mmu-hash.h> #include <asm/hvcall.h> #include <asm/synch.h> #include <asm/ppc-opcode.h> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 6ee26de9a1de..c613fee0b9f7 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -27,7 +27,7 @@ #include <asm/asm-offsets.h> #include <asm/exception-64s.h> #include <asm/kvm_book3s_asm.h> -#include <asm/mmu-hash64.h> +#include <asm/book3s/64/mmu-hash.h> #include <asm/tm.h> #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 905e94a1370f..46871d554057 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -432,7 +432,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, * the whole masked_pending business which is about not * losing interrupts that occur while masked. * - * I don't differenciate normal deliveries and resends, this + * I don't differentiate normal deliveries and resends, this * implementation will differ from PAPR and not lose such * interrupts. */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 778ef86e187e..4d66f44a1657 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -992,7 +992,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_restart_interrupt(vcpu, exit_nr); /* - * get last instruction before beeing preempted + * get last instruction before being preempted * TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA */ switch (exit_nr) { diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index cda695de8aa7..f48a0c22e8f9 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -182,7 +182,7 @@ int kvmppc_core_check_processor_compat(void) r = 0; #ifdef CONFIG_ALTIVEC /* - * Since guests have the priviledge to enable AltiVec, we need AltiVec + * Since guests have the privilege to enable AltiVec, we need AltiVec * support in the host to save/restore their context. * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit * because it's cleared in the absence of CONFIG_ALTIVEC! diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index e7c04542ba62..47d1b26effc6 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -44,7 +44,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * a write access. Since this is 4K insert of 64K page size * also add _PAGE_COMBO */ - new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE; + new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; if (access & _PAGE_RW) new_pte |= _PAGE_DIRTY; } while (old_pte != __cmpxchg_u64((unsigned long *)ptep, @@ -106,7 +106,7 @@ repeat: } } /* - * Hypervisor failure. Restore old pmd and return -1 + * Hypervisor failure. Restore old pte and return -1 * similar to __hash_page_* */ if (unlikely(slot == -2)) { diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 0762c1e08c88..b2d659cf51c6 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -111,7 +111,13 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, */ if (!(old_pte & _PAGE_COMBO)) { flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags); - old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND; + /* + * clear the old slot details from the old and new pte. + * On hash insert failure we use old pte value and we don't + * want slot information there if we have a insert failure. + */ + old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND); + new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND); goto htab_insert_hpte; } /* @@ -182,7 +188,7 @@ repeat: } } /* - * Hypervisor failure. Restore old pmd and return -1 + * Hypervisor failure. Restore old pte and return -1 * similar to __hash_page_* */ if (unlikely(slot == -2)) { @@ -243,8 +249,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, return 0; /* * Try to lock the PTE, add ACCESSED and DIRTY if it was - * a write access. Since this is 4K insert of 64K page size - * also add _PAGE_COMBO + * a write access. */ new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; if (access & _PAGE_RW) @@ -305,7 +310,7 @@ repeat: } } /* - * Hypervisor failure. Restore old pmd and return -1 + * Hypervisor failure. Restore old pte and return -1 * similar to __hash_page_* */ if (unlikely(slot == -2)) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index ba59d5977f34..90dd9280894f 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -168,11 +168,11 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) rflags |= HPTE_R_N; /* * PP bits: - * Linux use slb key 0 for kernel and 1 for user. - * kernel areas are mapped by PP bits 00 - * and and there is no kernel RO (_PAGE_KERNEL_RO). - * User area mapped by 0x2 and read only use by - * 0x3. + * Linux uses slb key 0 for kernel and 1 for user. + * kernel areas are mapped with PP=00 + * and there is no kernel RO (_PAGE_KERNEL_RO). + * User area is mapped with PP=0x2 for read/write + * or PP=0x3 for read-only (including writeable but clean pages). */ if (pteflags & _PAGE_USER) { rflags |= 0x2; @@ -263,28 +263,32 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, return ret < 0 ? ret : 0; } -#ifdef CONFIG_MEMORY_HOTPLUG int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize) { unsigned long vaddr; unsigned int step, shift; + int rc; + int ret = 0; shift = mmu_psize_defs[psize].shift; step = 1 << shift; - if (!ppc_md.hpte_removebolted) { - printk(KERN_WARNING "Platform doesn't implement " - "hpte_removebolted\n"); - return -EINVAL; - } + if (!ppc_md.hpte_removebolted) + return -ENODEV; - for (vaddr = vstart; vaddr < vend; vaddr += step) - ppc_md.hpte_removebolted(vaddr, psize, ssize); + for (vaddr = vstart; vaddr < vend; vaddr += step) { + rc = ppc_md.hpte_removebolted(vaddr, psize, ssize); + if (rc == -ENOENT) { + ret = -ENOENT; + continue; + } + if (rc < 0) + return rc; + } - return 0; + return ret; } -#endif /* CONFIG_MEMORY_HOTPLUG */ static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, @@ -605,10 +609,28 @@ static int __init htab_dt_scan_pftsize(unsigned long node, return 0; } -static unsigned long __init htab_get_table_size(void) +unsigned htab_shift_for_mem_size(unsigned long mem_size) { - unsigned long mem_size, rnd_mem_size, pteg_count, psize; + unsigned memshift = __ilog2(mem_size); + unsigned pshift = mmu_psize_defs[mmu_virtual_psize].shift; + unsigned pteg_shift; + + /* round mem_size up to next power of 2 */ + if ((1UL << memshift) < mem_size) + memshift += 1; + + /* aim for 2 pages / pteg */ + pteg_shift = memshift - (pshift + 1); + /* + * 2^11 PTEGS of 128 bytes each, ie. 2^18 bytes is the minimum htab + * size permitted by the architecture. + */ + return max(pteg_shift + 7, 18U); +} + +static unsigned long __init htab_get_table_size(void) +{ /* If hash size isn't already provided by the platform, we try to * retrieve it from the device-tree. If it's not there neither, we * calculate it now based on the total RAM size @@ -618,31 +640,30 @@ static unsigned long __init htab_get_table_size(void) if (ppc64_pft_size) return 1UL << ppc64_pft_size; - /* round mem_size up to next power of 2 */ - mem_size = memblock_phys_mem_size(); - rnd_mem_size = 1UL << __ilog2(mem_size); - if (rnd_mem_size < mem_size) - rnd_mem_size <<= 1; - - /* # pages / 2 */ - psize = mmu_psize_defs[mmu_virtual_psize].shift; - pteg_count = max(rnd_mem_size >> (psize + 1), 1UL << 11); - - return pteg_count << 7; + return 1UL << htab_shift_for_mem_size(memblock_phys_mem_size()); } #ifdef CONFIG_MEMORY_HOTPLUG int create_section_mapping(unsigned long start, unsigned long end) { - return htab_bolt_mapping(start, end, __pa(start), - pgprot_val(PAGE_KERNEL), mmu_linear_psize, - mmu_kernel_ssize); + int rc = htab_bolt_mapping(start, end, __pa(start), + pgprot_val(PAGE_KERNEL), mmu_linear_psize, + mmu_kernel_ssize); + + if (rc < 0) { + int rc2 = htab_remove_mapping(start, end, mmu_linear_psize, + mmu_kernel_ssize); + BUG_ON(rc2 && (rc2 != -ENOENT)); + } + return rc; } int remove_section_mapping(unsigned long start, unsigned long end) { - return htab_remove_mapping(start, end, mmu_linear_psize, - mmu_kernel_ssize); + int rc = htab_remove_mapping(start, end, mmu_linear_psize, + mmu_kernel_ssize); + WARN_ON(rc < 0); + return rc; } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 49b152b0f926..eb2accdd76fd 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -78,9 +78,19 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * base page size. This is because demote_segment won't flush * hash page table entries. */ - if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) + if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) { flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, ssize, flags); + /* + * With THP, we also clear the slot information with + * respect to all the 64K hash pte mapping the 16MB + * page. They are all invalid now. This make sure we + * don't find the slot valid when we fault with 4k + * base page size. + * + */ + memset(hpte_slot_array, 0, PTE_FRAG_SIZE); + } } valid = hpte_valid(hpte_slot_array, index); diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index e2138c7ae70f..8555fce902fe 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -76,7 +76,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, if (old_pte & _PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & _PAGE_F_GIX) >> 12; + slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT; if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, mmu_psize, ssize, flags) == -1) @@ -105,7 +105,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, return -1; } - new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); + new_pte |= (slot << _PAGE_F_GIX_SHIFT) & + (_PAGE_F_SECOND | _PAGE_F_GIX); } /* diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 744e24bcb85c..6dd272b6196f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -107,8 +107,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, kmem_cache_free(cachep, new); else { #ifdef CONFIG_PPC_BOOK3S_64 - hpdp->pd = (unsigned long)new | - (shift_to_mmu_psize(pshift) << 2); + hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2); #else hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; #endif diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 379a6a90644b..ba655666186d 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -85,6 +85,11 @@ static void pgd_ctor(void *addr) memset(addr, 0, PGD_TABLE_SIZE); } +static void pud_ctor(void *addr) +{ + memset(addr, 0, PUD_TABLE_SIZE); +} + static void pmd_ctor(void *addr) { memset(addr, 0, PMD_TABLE_SIZE); @@ -138,14 +143,18 @@ void pgtable_cache_init(void) { pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); + /* + * In all current configs, when the PUD index exists it's the + * same size as either the pgd or pmd index except with THP enabled + * on book3s 64 + */ + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) + pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); + if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) panic("Couldn't allocate pgtable caches"); - /* In all current configs, when the PUD index exists it's the - * same size as either the pgd or pmd index. Verify that the - * initialization above has also created a PUD cache. This - * will need re-examiniation if we add new possibilities for - * the pagetable layout. */ - BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)); + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) + panic("Couldn't allocate pud pgtable caches"); } #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -188,9 +197,9 @@ static int __meminit vmemmap_populated(unsigned long start, int page_size) */ #ifdef CONFIG_PPC_BOOK3E -static void __meminit vmemmap_create_mapping(unsigned long start, - unsigned long page_size, - unsigned long phys) +static int __meminit vmemmap_create_mapping(unsigned long start, + unsigned long page_size, + unsigned long phys) { /* Create a PTE encoding without page size */ unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED | @@ -208,6 +217,8 @@ static void __meminit vmemmap_create_mapping(unsigned long start, */ for (i = 0; i < page_size; i += PAGE_SIZE) BUG_ON(map_kernel_page(start + i, phys, flags)); + + return 0; } #ifdef CONFIG_MEMORY_HOTPLUG @@ -217,25 +228,31 @@ static void vmemmap_remove_mapping(unsigned long start, } #endif #else /* CONFIG_PPC_BOOK3E */ -static void __meminit vmemmap_create_mapping(unsigned long start, - unsigned long page_size, - unsigned long phys) +static int __meminit vmemmap_create_mapping(unsigned long start, + unsigned long page_size, + unsigned long phys) { - int mapped = htab_bolt_mapping(start, start + page_size, phys, - pgprot_val(PAGE_KERNEL), - mmu_vmemmap_psize, - mmu_kernel_ssize); - BUG_ON(mapped < 0); + int rc = htab_bolt_mapping(start, start + page_size, phys, + pgprot_val(PAGE_KERNEL), + mmu_vmemmap_psize, mmu_kernel_ssize); + if (rc < 0) { + int rc2 = htab_remove_mapping(start, start + page_size, + mmu_vmemmap_psize, + mmu_kernel_ssize); + BUG_ON(rc2 && (rc2 != -ENOENT)); + } + return rc; } #ifdef CONFIG_MEMORY_HOTPLUG static void vmemmap_remove_mapping(unsigned long start, unsigned long page_size) { - int mapped = htab_remove_mapping(start, start + page_size, - mmu_vmemmap_psize, - mmu_kernel_ssize); - BUG_ON(mapped < 0); + int rc = htab_remove_mapping(start, start + page_size, + mmu_vmemmap_psize, + mmu_kernel_ssize); + BUG_ON((rc < 0) && (rc != -ENOENT)); + WARN_ON(rc == -ENOENT); } #endif @@ -303,6 +320,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) for (; start < end; start += page_size) { void *p; + int rc; if (vmemmap_populated(start, page_size)) continue; @@ -316,7 +334,13 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pr_debug(" * %016lx..%016lx allocated at %p\n", start, start + page_size, p); - vmemmap_create_mapping(start, page_size, __pa(p)); + rc = vmemmap_create_mapping(start, page_size, __pa(p)); + if (rc < 0) { + pr_warning( + "vmemmap_populate: Unable to create vmemmap mapping: %d\n", + rc); + return -EFAULT; + } } return 0; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d0f0a514b04e..f980da6d7569 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -119,12 +119,18 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) struct zone *zone; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + int rc; pgdata = NODE_DATA(nid); start = (unsigned long)__va(start); - if (create_section_mapping(start, start + size)) - return -EINVAL; + rc = create_section_mapping(start, start + size); + if (rc) { + pr_warning( + "Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", + start, start + size, rc); + return -EFAULT; + } /* this should work for most non-highmem platforms */ zone = pgdata->node_zones + diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 9f58ff44a075..898d63365cdd 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -110,7 +110,8 @@ extern unsigned long Hash_size, Hash_mask; #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 -extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags); +extern int map_kernel_page(unsigned long ea, unsigned long pa, + unsigned long flags); #endif /* CONFIG_PPC64 */ extern unsigned long ioremap_bot; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 3124a20d0fab..0eb53128ca2a 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -88,7 +88,7 @@ static __ref void *early_alloc_pgtable(unsigned long size) * map_kernel_page adds an entry to the ioremap page table * and adds an entry to the HPT, possibly bolting it */ -int map_kernel_page(unsigned long ea, unsigned long pa, int flags) +int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) { pgd_t *pgdp; pud_t *pudp; @@ -646,6 +646,28 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) return pgtable; } +void pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + VM_BUG_ON(REGION_ID(address) != USER_REGION_ID); + + /* + * We can't mark the pmd none here, because that will cause a race + * against exit_mmap. We need to continue mark pmd TRANS HUGE, while + * we spilt, but at the same time we wan't rest of the ppc64 code + * not to insert hash pte on this, because we will be modifying + * the deposited pgtable in the caller of this function. Hence + * clear the _PAGE_USER so that we move the fault handling to + * higher level function and that will serialize against ptl. + * We need to flush existing hash pte entries here even though, + * the translation is still valid, because we will withdraw + * pgtable_t after this. + */ + pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0); +} + + /* * set a new huge pmd. We should not be called for updating * an existing pmd entry. That should go via pmd_hugepage_update. @@ -663,10 +685,20 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } +/* + * We use this to invalidate a pmdp entry before switching from a + * hugepte to regular pmd entry. + */ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); + + /* + * This ensures that generic code that rely on IRQ disabling + * to prevent a parallel THP split work as expected. + */ + kick_all_cpus_sync(); } /* @@ -717,7 +749,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) { unsigned long pmdv; - pmdv = pfn << PTE_RPN_SHIFT; + pmdv = (pfn << PTE_RPN_SHIFT) & PTE_RPN_MASK; return pmd_set_protbits(__pmd(pmdv), pgprot); } @@ -785,6 +817,13 @@ pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, int has_transparent_hugepage(void) { + + BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) >= MAX_ORDER, + "hugepages can't be allocated by the buddy allocator"); + + BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) < 2, + "We need more than 2 pages to do deferred thp split"); + if (!mmu_has_feature(MMU_FTR_16M_PAGE)) return 0; /* diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 29d6987c37ba..eb82d787d99a 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -895,7 +895,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS) BEGIN_MMU_FTR_SECTION virt_page_table_tlb_miss_done: - /* We have overriden MAS2:EPN but currently our primary TLB miss + /* We have overridden MAS2:EPN but currently our primary TLB miss * handler will always restore it so that should not be an issue, * if we ever optimize the primary handler to not write MAS2 on * some cases, we'll have to restore MAS2:EPN here based on the diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index 68c477592e43..eabecfcaef7c 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -108,7 +108,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) blr 2: #ifdef CONFIG_PPC_47x - oris r7,r6,0x8000 /* specify way explicitely */ + oris r7,r6,0x8000 /* specify way explicitly */ clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */ ori r4,r4,PPC47x_TLBE_SIZE tlbwe r4,r7,0 /* write it */ @@ -149,7 +149,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) li r3,-1 /* Current set */ lis r10,tlb_47x_boltmap@h ori r10,r10,tlb_47x_boltmap@l - lis r7,0x8000 /* Specify way explicitely */ + lis r7,0x8000 /* Specify way explicitly */ b 9f /* For each set */ diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 863d89386f60..c82497a31c54 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -208,7 +208,7 @@ static void pm_rtas_reset_signals(u32 node) /* * The debug bus is being set to the passthru disable state. - * However, the FW still expects atleast one legal signal routing + * However, the FW still expects at least one legal signal routing * entry or it will return an error on the arguments. If we don't * supply a valid entry, we must ignore all return values. Ignoring * all return values means we might miss an error we should be @@ -1008,7 +1008,7 @@ static int initial_lfsr[] = { * * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit * LFSR sequence is broken into four ranges. The spacing of the precomputed - * values is adjusted in each range so the error between the user specifed + * values is adjusted in each range so the error between the user specified * number (N) of events between samples and the actual number of events based * on the precomputed value will be les then about 6.2%. Note, if the user * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used. diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index d1e65ce545b3..97a1d40d8696 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -651,7 +651,7 @@ static void pmao_restore_workaround(bool ebb) /* * We are already soft-disabled in power_pmu_enable(). We need to hard - * enable to actually prevent the PMU exception from firing. + * disable to actually prevent the PMU exception from firing. */ hard_irq_disable(); diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 9f9dfda9ed2c..59012e750abe 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -27,20 +27,6 @@ #include "hv-24x7-catalog.h" #include "hv-common.h" -static const char *event_domain_suffix(unsigned domain) -{ - switch (domain) { -#define DOMAIN(n, v, x, c) \ - case HV_PERF_DOMAIN_##n: \ - return "__" #n; -#include "hv-24x7-domains.h" -#undef DOMAIN - default: - WARN(1, "unknown domain %d\n", domain); - return "__UNKNOWN_DOMAIN_SUFFIX"; - } -} - static bool domain_is_valid(unsigned domain) { switch (domain) { @@ -68,6 +54,24 @@ static bool is_physical_domain(unsigned domain) } } +static const char *domain_name(unsigned domain) +{ + if (!domain_is_valid(domain)) + return NULL; + + switch (domain) { + case HV_PERF_DOMAIN_PHYS_CHIP: return "Physical Chip"; + case HV_PERF_DOMAIN_PHYS_CORE: return "Physical Core"; + case HV_PERF_DOMAIN_VCPU_HOME_CORE: return "VCPU Home Core"; + case HV_PERF_DOMAIN_VCPU_HOME_CHIP: return "VCPU Home Chip"; + case HV_PERF_DOMAIN_VCPU_HOME_NODE: return "VCPU Home Node"; + case HV_PERF_DOMAIN_VCPU_REMOTE_NODE: return "VCPU Remote Node"; + } + + WARN_ON_ONCE(domain); + return NULL; +} + static bool catalog_entry_domain_is_valid(unsigned domain) { return is_physical_domain(domain); @@ -101,6 +105,7 @@ static bool catalog_entry_domain_is_valid(unsigned domain) EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3); /* u16 */ EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31); +EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31); EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31); /* u32, see "data_offset" */ EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63); @@ -115,6 +120,7 @@ static struct attribute *format_attrs[] = { &format_attr_domain.attr, &format_attr_offset.attr, &format_attr_core.attr, + &format_attr_chip.attr, &format_attr_vcpu.attr, &format_attr_lpar.attr, NULL, @@ -274,32 +280,70 @@ static unsigned long h_get_24x7_catalog_page(char page[], version, index); } -static unsigned core_domains[] = { - HV_PERF_DOMAIN_PHYS_CORE, - HV_PERF_DOMAIN_VCPU_HOME_CORE, - HV_PERF_DOMAIN_VCPU_HOME_CHIP, - HV_PERF_DOMAIN_VCPU_HOME_NODE, - HV_PERF_DOMAIN_VCPU_REMOTE_NODE, -}; -/* chip event data always yeilds a single event, core yeilds multiple */ -#define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains) - +/* + * Each event we find in the catalog, will have a sysfs entry. Format the + * data for this sysfs entry based on the event's domain. + * + * Events belonging to the Chip domain can only be monitored in that domain. + * i.e the domain for these events is a fixed/knwon value. + * + * Events belonging to the Core domain can be monitored either in the physical + * core or in one of the virtual CPU domains. So the domain value for these + * events must be specified by the user (i.e is a required parameter). Format + * the Core events with 'domain=?' so the perf-tool can error check required + * parameters. + * + * NOTE: For the Core domain events, rather than making domain a required + * parameter we could default it to PHYS_CORE and allowe users to + * override the domain to one of the VCPU domains. + * + * However, this can make the interface a little inconsistent. + * + * If we set domain=2 (PHYS_CHIP) and allow user to override this field + * the user may be tempted to also modify the "offset=x" field in which + * can lead to confusing usage. Consider the HPM_PCYC (offset=0x18) and + * HPM_INST (offset=0x20) events. With: + * + * perf stat -e hv_24x7/HPM_PCYC,offset=0x20/ + * + * we end up monitoring HPM_INST, while the command line has HPM_PCYC. + * + * By not assigning a default value to the domain for the Core events, + * we can have simple guidelines: + * + * - Specifying values for parameters with "=?" is required. + * + * - Specifying (i.e overriding) values for other parameters + * is undefined. + */ static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain) { const char *sindex; const char *lpar; + const char *domain_str; + char buf[8]; - if (is_physical_domain(domain)) { + switch (domain) { + case HV_PERF_DOMAIN_PHYS_CHIP: + snprintf(buf, sizeof(buf), "%d", domain); + domain_str = buf; + lpar = "0x0"; + sindex = "chip"; + break; + case HV_PERF_DOMAIN_PHYS_CORE: + domain_str = "?"; lpar = "0x0"; sindex = "core"; - } else { + break; + default: + domain_str = "?"; lpar = "?"; sindex = "vcpu"; } return kasprintf(GFP_KERNEL, - "domain=0x%x,offset=0x%x,%s=?,lpar=%s", - domain, + "domain=%s,offset=0x%x,%s=?,lpar=%s", + domain_str, be16_to_cpu(event->event_counter_offs) + be16_to_cpu(event->event_group_record_offs), sindex, @@ -339,6 +383,15 @@ static struct attribute *device_str_attr_create_(char *name, char *str) return &attr->attr.attr; } +/* + * Allocate and initialize strings representing event attributes. + * + * NOTE: The strings allocated here are never destroyed and continue to + * exist till shutdown. This is to allow us to create as many events + * from the catalog as possible, even if we encounter errors with some. + * In case of changes to error paths in future, these may need to be + * freed by the caller. + */ static struct attribute *device_str_attr_create(char *name, int name_max, int name_nonce, char *str, size_t str_max) @@ -370,16 +423,6 @@ out_s: return NULL; } -static void device_str_attr_destroy(struct attribute *attr) -{ - struct dev_ext_attribute *d; - - d = container_of(attr, struct dev_ext_attribute, attr.attr); - kfree(d->var); - kfree(d->attr.attr.name); - kfree(d); -} - static struct attribute *event_to_attr(unsigned ix, struct hv_24x7_event_data *event, unsigned domain, @@ -387,7 +430,6 @@ static struct attribute *event_to_attr(unsigned ix, { int event_name_len; char *ev_name, *a_ev_name, *val; - const char *ev_suffix; struct attribute *attr; if (!domain_is_valid(domain)) { @@ -400,14 +442,13 @@ static struct attribute *event_to_attr(unsigned ix, if (!val) return NULL; - ev_suffix = event_domain_suffix(domain); ev_name = event_name(event, &event_name_len); if (!nonce) - a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s", - (int)event_name_len, ev_name, ev_suffix); + a_ev_name = kasprintf(GFP_KERNEL, "%.*s", + (int)event_name_len, ev_name); else - a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d", - (int)event_name_len, ev_name, ev_suffix, nonce); + a_ev_name = kasprintf(GFP_KERNEL, "%.*s__%d", + (int)event_name_len, ev_name, nonce); if (!a_ev_name) goto out_val; @@ -452,45 +493,14 @@ event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce) return device_str_attr_create(name, nl, nonce, desc, dl); } -static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs, +static int event_data_to_attrs(unsigned ix, struct attribute **attrs, struct hv_24x7_event_data *event, int nonce) { - unsigned i; - - switch (event->domain) { - case HV_PERF_DOMAIN_PHYS_CHIP: - *attrs = event_to_attr(ix, event, event->domain, nonce); - return 1; - case HV_PERF_DOMAIN_PHYS_CORE: - for (i = 0; i < ARRAY_SIZE(core_domains); i++) { - attrs[i] = event_to_attr(ix, event, core_domains[i], - nonce); - if (!attrs[i]) { - pr_warn("catalog event %u: individual attr %u " - "creation failure\n", ix, i); - for (; i; i--) - device_str_attr_destroy(attrs[i - 1]); - return -1; - } - } - return i; - default: - pr_warn("catalog event %u: domain %u is not allowed in the " - "catalog\n", ix, event->domain); + *attrs = event_to_attr(ix, event, event->domain, nonce); + if (!*attrs) return -1; - } -} -static size_t event_to_attr_ct(struct hv_24x7_event_data *event) -{ - switch (event->domain) { - case HV_PERF_DOMAIN_PHYS_CHIP: - return 1; - case HV_PERF_DOMAIN_PHYS_CORE: - return ARRAY_SIZE(core_domains); - default: - return 0; - } + return 0; } static unsigned long vmalloc_to_phys(void *v) @@ -726,9 +736,8 @@ static int create_events_from_catalog(struct attribute ***events_, goto e_free; } - if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) { - pr_err("event_entry_count %zu is invalid\n", - event_entry_count); + if (SIZE_MAX - 1 < event_entry_count) { + pr_err("event_entry_count %zu is invalid\n", event_entry_count); ret = -EIO; goto e_free; } @@ -801,7 +810,7 @@ static int create_events_from_catalog(struct attribute ***events_, continue; } - attr_max += event_to_attr_ct(event); + attr_max++; } event_idx_last = event_idx; @@ -851,12 +860,12 @@ static int create_events_from_catalog(struct attribute ***events_, nonce = event_uniq_add(&ev_uniq, name, nl, event->domain); ct = event_data_to_attrs(event_idx, events + event_attr_ct, event, nonce); - if (ct <= 0) { + if (ct < 0) { pr_warn("event %zu (%.*s) creation failure, skipping\n", event_idx, nl, name); junk_events++; } else { - event_attr_ct += ct; + event_attr_ct++; event_descs[desc_ct] = event_to_desc_attr(event, nonce); if (event_descs[desc_ct]) desc_ct++; @@ -961,6 +970,27 @@ e_free: return ret; } +static ssize_t domains_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + int d, n, count = 0; + const char *str; + + for (d = 0; d < HV_PERF_DOMAIN_MAX; d++) { + str = domain_name(d); + if (!str) + continue; + + n = sprintf(page, "%d: %s\n", d, str); + if (n < 0) + break; + + count += n; + page += n; + } + return count; +} + #define PAGE_0_ATTR(_name, _fmt, _expr) \ static ssize_t _name##_show(struct device *dev, \ struct device_attribute *dev_attr, \ @@ -989,6 +1019,7 @@ PAGE_0_ATTR(catalog_version, "%lld\n", PAGE_0_ATTR(catalog_len, "%lld\n", (unsigned long long)be32_to_cpu(page_0->length) * 4096); static BIN_ATTR_RO(catalog, 0/* real length varies */); +static DEVICE_ATTR_RO(domains); static struct bin_attribute *if_bin_attrs[] = { &bin_attr_catalog, @@ -998,6 +1029,7 @@ static struct bin_attribute *if_bin_attrs[] = { static struct attribute *if_attrs[] = { &dev_attr_catalog_len.attr, &dev_attr_catalog_version.attr, + &dev_attr_domains.attr, NULL, }; @@ -1089,10 +1121,16 @@ static int add_event_to_24x7_request(struct perf_event *event, return -EINVAL; } - if (is_physical_domain(event_get_domain(event))) + switch (event_get_domain(event)) { + case HV_PERF_DOMAIN_PHYS_CHIP: + idx = event_get_chip(event); + break; + case HV_PERF_DOMAIN_PHYS_CORE: idx = event_get_core(event); - else + break; + default: idx = event_get_vcpu(event); + } i = request_buffer->num_requests++; req = &request_buffer->requests[i]; @@ -1208,11 +1246,12 @@ static int h_24x7_event_init(struct perf_event *event) return -EACCES; } - /* see if the event complains */ + /* Get the initial value of the counter for this event */ if (single_24x7_request(event, &ct)) { pr_devel("test hcall failed\n"); return -EIO; } + (void)local64_xchg(&event->hw.prev_count, ct); return 0; } @@ -1275,6 +1314,16 @@ static void h_24x7_event_read(struct perf_event *event) h24x7hw = &get_cpu_var(hv_24x7_hw); h24x7hw->events[i] = event; put_cpu_var(h24x7hw); + /* + * Clear the event count so we can compute the _change_ + * in the 24x7 raw counter value at the end of the txn. + * + * Note that we could alternatively read the 24x7 value + * now and save its value in event->hw.prev_count. But + * that would require issuing a hcall, which would then + * defeat the purpose of using the txn interface. + */ + local64_set(&event->count, 0); } put_cpu_var(hv_24x7_reqb); diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h index 0f9fa21a29f2..791455e7f5cf 100644 --- a/arch/powerpc/perf/hv-24x7.h +++ b/arch/powerpc/perf/hv-24x7.h @@ -7,6 +7,7 @@ enum hv_perf_domains { #define DOMAIN(n, v, x, c) HV_PERF_DOMAIN_##n = v, #include "hv-24x7-domains.h" #undef DOMAIN + HV_PERF_DOMAIN_MAX, }; struct hv_24x7_request { @@ -80,7 +81,7 @@ struct hv_24x7_result { __u8 results_complete; __be16 num_elements_returned; - /* This is a copy of @data_size from the coresponding hv_24x7_request */ + /* This is a copy of @data_size from the corresponding hv_24x7_request */ __be16 result_element_data_size; __u8 reserved[0x2]; diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 856fe6e03c2a..7aa37236bb70 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -127,8 +127,16 @@ static const struct attribute_group *attr_groups[] = { NULL, }; -#define GPCI_MAX_DATA_BYTES \ - (1024 - sizeof(struct hv_get_perf_counter_info_params)) +#define HGPCI_REQ_BUFFER_SIZE 4096 +#define HGPCI_MAX_DATA_BYTES \ + (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params)) + +DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); + +struct hv_gpci_request_buffer { + struct hv_get_perf_counter_info_params params; + uint8_t bytes[HGPCI_MAX_DATA_BYTES]; +} __packed; static unsigned long single_gpci_request(u32 req, u32 starting_index, u16 secondary_index, u8 version_in, u32 offset, u8 length, @@ -137,24 +145,21 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index, unsigned long ret; size_t i; u64 count; + struct hv_gpci_request_buffer *arg; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); - struct { - struct hv_get_perf_counter_info_params params; - uint8_t bytes[GPCI_MAX_DATA_BYTES]; - } __packed __aligned(sizeof(uint64_t)) arg = { - .params = { - .counter_request = cpu_to_be32(req), - .starting_index = cpu_to_be32(starting_index), - .secondary_index = cpu_to_be16(secondary_index), - .counter_info_version_in = version_in, - } - }; + arg->params.counter_request = cpu_to_be32(req); + arg->params.starting_index = cpu_to_be32(starting_index); + arg->params.secondary_index = cpu_to_be16(secondary_index); + arg->params.counter_info_version_in = version_in; ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, - virt_to_phys(&arg), sizeof(arg)); + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); if (ret) { pr_devel("hcall failed: 0x%lx\n", ret); - return ret; + goto out; } /* @@ -163,9 +168,11 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index, */ count = 0; for (i = offset; i < offset + length; i++) - count |= arg.bytes[i] << (i - offset); + count |= arg->bytes[i] << (i - offset); *value = count; +out: + put_cpu_var(hv_gpci_reqb); return ret; } @@ -245,10 +252,10 @@ static int h_gpci_event_init(struct perf_event *event) } /* last byte within the buffer? */ - if ((event_get_offset(event) + length) > GPCI_MAX_DATA_BYTES) { + if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) { pr_devel("request outside of buffer: %zu > %zu\n", (size_t)event_get_offset(event) + length, - GPCI_MAX_DATA_BYTES); + HGPCI_MAX_DATA_BYTES); return -EINVAL; } diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 5b62f2389290..a383c23a9070 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -54,7 +54,7 @@ * Power7 event codes. */ #define EVENT(_name, _code) \ - PME_##_name = _code, + _name = _code, enum { #include "power7-events-list.h" @@ -318,14 +318,14 @@ static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) } static int power7_generic_events[] = { - [PERF_COUNT_HW_CPU_CYCLES] = PME_PM_CYC, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PME_PM_GCT_NOSLOT_CYC, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PME_PM_CMPLU_STALL, - [PERF_COUNT_HW_INSTRUCTIONS] = PME_PM_INST_CMPL, - [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1, - [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN, - [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BR_MPRED, + [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_GCT_NOSLOT_CYC, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_FIN, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED, }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/perf/power8-events-list.h b/arch/powerpc/perf/power8-events-list.h new file mode 100644 index 000000000000..741b77edd03e --- /dev/null +++ b/arch/powerpc/perf/power8-events-list.h @@ -0,0 +1,51 @@ +/* + * Performance counter support for POWER8 processors. + * + * Copyright 2014 Sukadev Bhattiprolu, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Power8 event codes. + */ +EVENT(PM_CYC, 0x0001e) +EVENT(PM_GCT_NOSLOT_CYC, 0x100f8) +EVENT(PM_CMPLU_STALL, 0x4000a) +EVENT(PM_INST_CMPL, 0x00002) +EVENT(PM_BRU_FIN, 0x10068) +EVENT(PM_BR_MPRED_CMPL, 0x400f6) + +/* All L1 D cache load references counted at finish, gated by reject */ +EVENT(PM_LD_REF_L1, 0x100ee) +/* Load Missed L1 */ +EVENT(PM_LD_MISS_L1, 0x3e054) +/* Store Missed L1 */ +EVENT(PM_ST_MISS_L1, 0x300f0) +/* L1 cache data prefetches */ +EVENT(PM_L1_PREF, 0x0d8b8) +/* Instruction fetches from L1 */ +EVENT(PM_INST_FROM_L1, 0x04080) +/* Demand iCache Miss */ +EVENT(PM_L1_ICACHE_MISS, 0x200fd) +/* Instruction Demand sectors wriittent into IL1 */ +EVENT(PM_L1_DEMAND_WRITE, 0x0408c) +/* Instruction prefetch written into IL1 */ +EVENT(PM_IC_PREF_WRITE, 0x0408e) +/* The data cache was reloaded from local core's L3 due to a demand load */ +EVENT(PM_DATA_FROM_L3, 0x4c042) +/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ +EVENT(PM_DATA_FROM_L3MISS, 0x300fe) +/* All successful D-side store dispatches for this thread */ +EVENT(PM_L2_ST, 0x17080) +/* All successful D-side store dispatches for this thread that were L2 Miss */ +EVENT(PM_L2_ST_MISS, 0x17082) +/* Total HW L3 prefetches(Load+store) */ +EVENT(PM_L3_PREF_ALL, 0x4e052) +/* Data PTEG reload */ +EVENT(PM_DTLB_MISS, 0x300fc) +/* ITLB Reloaded */ +EVENT(PM_ITLB_MISS, 0x400fc) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 9958ba8bf0d2..690d9186a855 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -17,48 +17,16 @@ #include <asm/firmware.h> #include <asm/cputable.h> - /* * Some power8 event codes. */ -#define PM_CYC 0x0001e -#define PM_GCT_NOSLOT_CYC 0x100f8 -#define PM_CMPLU_STALL 0x4000a -#define PM_INST_CMPL 0x00002 -#define PM_BRU_FIN 0x10068 -#define PM_BR_MPRED_CMPL 0x400f6 - -/* All L1 D cache load references counted at finish, gated by reject */ -#define PM_LD_REF_L1 0x100ee -/* Load Missed L1 */ -#define PM_LD_MISS_L1 0x3e054 -/* Store Missed L1 */ -#define PM_ST_MISS_L1 0x300f0 -/* L1 cache data prefetches */ -#define PM_L1_PREF 0x0d8b8 -/* Instruction fetches from L1 */ -#define PM_INST_FROM_L1 0x04080 -/* Demand iCache Miss */ -#define PM_L1_ICACHE_MISS 0x200fd -/* Instruction Demand sectors wriittent into IL1 */ -#define PM_L1_DEMAND_WRITE 0x0408c -/* Instruction prefetch written into IL1 */ -#define PM_IC_PREF_WRITE 0x0408e -/* The data cache was reloaded from local core's L3 due to a demand load */ -#define PM_DATA_FROM_L3 0x4c042 -/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ -#define PM_DATA_FROM_L3MISS 0x300fe -/* All successful D-side store dispatches for this thread */ -#define PM_L2_ST 0x17080 -/* All successful D-side store dispatches for this thread that were L2 Miss */ -#define PM_L2_ST_MISS 0x17082 -/* Total HW L3 prefetches(Load+store) */ -#define PM_L3_PREF_ALL 0x4e052 -/* Data PTEG reload */ -#define PM_DTLB_MISS 0x300fc -/* ITLB Reloaded */ -#define PM_ITLB_MISS 0x400fc +#define EVENT(_name, _code) _name = _code, + +enum { +#include "power8-events-list.h" +}; +#undef EVENT /* * Raw event encoding for POWER8: @@ -415,7 +383,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev, pmc_inuse |= 1 << pmc; } - /* In continous sampling mode, update SDAR on TLB miss */ + /* In continuous sampling mode, update SDAR on TLB miss */ mmcra = MMCRA_SDAR_MODE_TLB; mmcr1 = mmcr2 = 0; @@ -604,6 +572,71 @@ static void power8_disable_pmc(unsigned int pmc, unsigned long mmcr[]) mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); } +GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); +GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_GCT_NOSLOT_CYC); +GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); +GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL); +GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_FIN); +GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); +GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); + +CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); +CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); + +CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF); +CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); +CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); +CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); +CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE); + +CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); +CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); +CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL); +CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS); +CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST); + +CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); +CACHE_EVENT_ATTR(branch-loads, PM_BRU_FIN); +CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); +CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); + +static struct attribute *power8_events_attr[] = { + GENERIC_EVENT_PTR(PM_CYC), + GENERIC_EVENT_PTR(PM_GCT_NOSLOT_CYC), + GENERIC_EVENT_PTR(PM_CMPLU_STALL), + GENERIC_EVENT_PTR(PM_INST_CMPL), + GENERIC_EVENT_PTR(PM_BRU_FIN), + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_LD_REF_L1), + GENERIC_EVENT_PTR(PM_LD_MISS_L1), + + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_REF_L1), + CACHE_EVENT_PTR(PM_L1_PREF), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_INST_FROM_L1), + CACHE_EVENT_PTR(PM_IC_PREF_WRITE), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_L3_PREF_ALL), + CACHE_EVENT_PTR(PM_L2_ST_MISS), + CACHE_EVENT_PTR(PM_L2_ST), + + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_BRU_FIN), + + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), + NULL +}; + +static struct attribute_group power8_pmu_events_group = { + .name = "events", + .attrs = power8_events_attr, +}; + PMU_FORMAT_ATTR(event, "config:0-49"); PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); PMU_FORMAT_ATTR(mark, "config:8"); @@ -640,6 +673,7 @@ struct attribute_group power8_pmu_format_group = { static const struct attribute_group *power8_pmu_attr_groups[] = { &power8_pmu_format_group, + &power8_pmu_events_group, NULL, }; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c index 6eb3b2abae90..00282c2b0cae 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c @@ -319,7 +319,7 @@ mpc52xx_pci_setup(struct pci_controller *hose, tmp = in_be32(&pci_regs->gscr); #if 0 - /* Reset the exteral bus ( internal PCI controller is NOT resetted ) */ + /* Reset the exteral bus ( internal PCI controller is NOT reset ) */ /* Not necessary and can be a bad thing if for example the bootloader is displaying a splash screen or ... Just left here for documentation purpose if anyone need it */ diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 5ac70de3e48a..d7e87ff912d7 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -99,7 +99,7 @@ static void mpc85xx_cds_restart(char *cmd) pci_read_config_byte(dev, 0x47, &tmp); /* - * At this point, the harware reset should have triggered. + * At this point, the hardware reset should have triggered. * However, if it doesn't work for some mysterious reason, * just fall through to the default reset below. */ diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index 6be1a4af3359..cc5347eb1662 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -23,7 +23,7 @@ * when going to sleep, when doing a PMU based cpufreq transition, * or when "offlining" a CPU on SMP machines. This code is over * paranoid, but I've had enough issues with various CPU revs and - * bugs that I decided it was worth beeing over cautious + * bugs that I decided it was worth being over cautious */ _GLOBAL(flush_disable_caches) diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 4882bfd90e27..1e02328c3f2d 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -198,7 +198,7 @@ static long ohare_htw_scc_enable(struct device_node *node, long param, if (htw) { /* Side effect: this will also power up the * modem, but it's too messy to figure out on which - * ports this controls the tranceiver and on which + * ports this controls the transceiver and on which * it controls the modem */ if (trans) @@ -463,7 +463,7 @@ static long heathrow_sound_enable(struct device_node *node, long param, unsigned long flags; /* B&W G3 and Yikes don't support that properly (the - * sound appear to never come back after beeing shut down). + * sound appear to never come back after being shut down). */ if (pmac_mb.model_id == PMAC_TYPE_YOSEMITE || pmac_mb.model_id == PMAC_TYPE_YIKES) @@ -2770,7 +2770,7 @@ set_initial_features(void) * but I'm not too sure it was audited for side-effects on other * ohare based machines... * Since I still have difficulties figuring the right way to - * differenciate them all and since that hack was there for a long + * differentiate them all and since that hack was there for a long * time, I'll keep it around */ if (macio_chips[0].type == macio_ohare) { diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index f1516b5ecec9..cd9711e72df6 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -5,7 +5,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o obj-y += opal-kmsg.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o -obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o +obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 5f152b95ca0c..950b3e539057 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -167,42 +167,26 @@ static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val) return 0; } -static int pnv_eeh_outb_dbgfs_set(void *data, u64 val) -{ - return pnv_eeh_dbgfs_set(data, 0xD10, val); -} - -static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val) -{ - return pnv_eeh_dbgfs_get(data, 0xD10, val); -} - -static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val) -{ - return pnv_eeh_dbgfs_set(data, 0xD90, val); -} - -static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val) -{ - return pnv_eeh_dbgfs_get(data, 0xD90, val); -} - -static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val) -{ - return pnv_eeh_dbgfs_set(data, 0xE10, val); -} +#define PNV_EEH_DBGFS_ENTRY(name, reg) \ +static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \ +{ \ + return pnv_eeh_dbgfs_set(data, reg, val); \ +} \ + \ +static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \ +{ \ + return pnv_eeh_dbgfs_get(data, reg, val); \ +} \ + \ +DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \ + pnv_eeh_dbgfs_get_##name, \ + pnv_eeh_dbgfs_set_##name, \ + "0x%llx\n") + +PNV_EEH_DBGFS_ENTRY(outb, 0xD10); +PNV_EEH_DBGFS_ENTRY(inbA, 0xD90); +PNV_EEH_DBGFS_ENTRY(inbB, 0xE10); -static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val) -{ - return pnv_eeh_dbgfs_get(data, 0xE10, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get, - pnv_eeh_outb_dbgfs_set, "0x%llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get, - pnv_eeh_inbA_dbgfs_set, "0x%llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get, - pnv_eeh_inbB_dbgfs_set, "0x%llx\n"); #endif /* CONFIG_DEBUG_FS */ /** @@ -268,13 +252,13 @@ static int pnv_eeh_post_init(void) debugfs_create_file("err_injct_outbound", 0600, phb->dbgfs, hose, - &pnv_eeh_outb_dbgfs_ops); + &pnv_eeh_dbgfs_ops_outb); debugfs_create_file("err_injct_inboundA", 0600, phb->dbgfs, hose, - &pnv_eeh_inbA_dbgfs_ops); + &pnv_eeh_dbgfs_ops_inbA); debugfs_create_file("err_injct_inboundB", 0600, phb->dbgfs, hose, - &pnv_eeh_inbB_dbgfs_ops); + &pnv_eeh_dbgfs_ops_inbB); #endif /* CONFIG_DEBUG_FS */ } @@ -387,6 +371,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) edev->mode &= 0xFFFFFF00; edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); + edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; @@ -444,9 +429,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) * PCI devices of the PE are expected to be removed prior * to PE reset. */ - if (!edev->pe->bus) + if (!(edev->pe->state & EEH_PE_PRI_BUS)) { edev->pe->bus = pci_find_bus(hose->global_number, pdn->busno); + if (edev->pe->bus) + edev->pe->state |= EEH_PE_PRI_BUS; + } /* * Enable EEH explicitly so that we will do EEH check @@ -892,6 +880,120 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev) } } +static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, + int pos, u16 mask) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + int i, status = 0; + + /* Wait for Transaction Pending bit to be cleared */ + for (i = 0; i < 4; i++) { + eeh_ops->read_config(pdn, pos, 2, &status); + if (!(status & mask)) + return; + + msleep((1 << i) * 100); + } + + pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n", + __func__, type, + edev->phb->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); +} + +static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 reg = 0; + + if (WARN_ON(!edev->pcie_cap)) + return -ENOTTY; + + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); + if (!(reg & PCI_EXP_DEVCAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + pnv_eeh_wait_for_pending(pdn, "", + edev->pcie_cap + PCI_EXP_DEVSTA, + PCI_EXP_DEVSTA_TRPND); + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg |= PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg &= ~PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 cap = 0; + + if (WARN_ON(!edev->af_cap)) + return -ENOTTY; + + eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); + if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + /* + * Wait for Transaction Pending bit to clear. A word-aligned + * test is used, so we use the conrol offset rather than status + * and shift the test bit to match. + */ + pnv_eeh_wait_for_pending(pdn, "AF", + edev->af_cap + PCI_AF_CTRL, + PCI_AF_STATUS_TP << 8); + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, + 1, PCI_AF_CTRL_FLR); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option) +{ + struct eeh_dev *edev; + struct pci_dn *pdn; + int ret; + + /* The VF PE should have only one child device */ + edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list); + pdn = eeh_dev_to_pdn(edev); + if (!pdn) + return -ENXIO; + + ret = pnv_eeh_do_flr(pdn, option); + if (!ret) + return ret; + + return pnv_eeh_do_af_flr(pdn, option); +} + /** * pnv_eeh_reset - Reset the specified PE * @pe: EEH PE @@ -953,7 +1055,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) } bus = eeh_pe_bus_get(pe); - if (pci_is_root_bus(bus) || + if (pe->type & EEH_PE_VF) + ret = pnv_eeh_reset_vf_pe(pe, option); + else if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) ret = pnv_eeh_root_reset(hose, option); else @@ -1092,6 +1196,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) if (!edev || !edev->pe) return false; + /* + * We will issue FLR or AF FLR to all VFs, which are contained + * in VF PE. It relies on the EEH PCI config accessors. So we + * can't block them during the window. + */ + if (edev->physfn && (edev->pe->state & EEH_PE_RESET)) + return false; + if (edev->pe->state & EEH_PE_CFG_BLOCKED) return true; @@ -1476,6 +1588,65 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) return ret; } +static int pnv_eeh_restore_vf_config(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 devctl, cmd, cap2, aer_capctl; + int old_mps; + + if (edev->pcie_cap) { + /* Restore MPS */ + old_mps = (ffs(pdn->mps) - 8) << 5; + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; + devctl |= old_mps; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + + /* Disable Completion Timeout */ + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, + 4, &cap2); + if (cap2 & 0x10) { + eeh_ops->read_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, &cap2); + cap2 |= 0x10; + eeh_ops->write_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, cap2); + } + } + + /* Enable SERR and parity checking */ + eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); + cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); + eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); + + /* Enable report various errors */ + if (edev->pcie_cap) { + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_CERE; + devctl |= (PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + } + + /* Enable ECRC generation and check */ + if (edev->pcie_cap && edev->aer_cap) { + eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, &aer_capctl); + aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, aer_capctl); + } + + return 0; +} + static int pnv_eeh_restore_config(struct pci_dn *pdn) { struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -1485,9 +1656,21 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) if (!edev) return -EEXIST; - phb = edev->phb->private_data; - ret = opal_pci_reinit(phb->opal_id, - OPAL_REINIT_PCI_DEV, edev->config_addr); + /* + * We have to restore the PCI config space after reset since the + * firmware can't see SRIOV VFs. + * + * FIXME: The MPS, error routing rules, timeout setting are worthy + * to be exported by firmware in extendible way. + */ + if (edev->physfn) { + ret = pnv_eeh_restore_vf_config(pdn); + } else { + phb = edev->phb->private_data; + ret = opal_pci_reinit(phb->opal_id, + OPAL_REINIT_PCI_DEV, edev->config_addr); + } + if (ret) { pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", __func__, edev->config_addr, ret); @@ -1516,6 +1699,40 @@ static struct eeh_ops pnv_eeh_ops = { .restore_config = pnv_eeh_restore_config }; +void pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!pdev->is_virtfn) + return; + + /* + * The following operations will fail if VF's sysfs files + * aren't created or its resources aren't finalized. + */ + eeh_add_device_early(pdn); + eeh_add_device_late(pdev); + eeh_sysfs_add_device(pdev); +} + +#ifdef CONFIG_PCI_IOV +static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + int parent_mps; + + if (!pdev->is_virtfn) + return; + + /* Synchronize MPS for VF and PF */ + parent_mps = pcie_get_mps(pdev->physfn); + if ((128 << pdev->pcie_mpss) >= parent_mps) + pcie_set_mps(pdev, parent_mps); + pdn->mps = pcie_get_mps(pdev); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps); +#endif /* CONFIG_PCI_IOV */ + /** * eeh_powernv_init - Register platform dependent EEH operations * diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 15bfbcd5debc..fcc8b6861b63 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -35,9 +35,9 @@ int pnv_save_sprs_for_winkle(void) int rc; /* - * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross + * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across * all cpus at boot. Get these reg values of current cpu and use the - * same accross all cpus. + * same across all cpus. */ uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; uint64_t hid0_val = mfspr(SPRN_HID0); @@ -185,7 +185,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, * fastsleep workaround needs to be left in 'applied' state on all * the cores. Do this by- * 1. Patching out the call to 'undo' workaround in fastsleep exit path - * 2. Sending ipi to all the cores which have atleast one online thread + * 2. Sending ipi to all the cores which have at least one online thread * 3. Patching out the call to 'apply' workaround in fastsleep entry * path * There is no need to send ipi to cores which have all threads diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index e85aa900f5c0..7229acd9bb3a 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -278,7 +278,7 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe) /* * Enable/disable bypass mode on the NPU. The NPU only supports one - * window per link, so bypass needs to be explicity enabled or + * window per link, so bypass needs to be explicitly enabled or * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be * active at the same time. */ diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 44ed78af1a0d..39d6ff9e5630 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -31,26 +31,25 @@ struct memcons { __be32 in_cons; }; -static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, - loff_t pos, size_t count) +static struct memcons *opal_memcons = NULL; + +ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) { - struct memcons *mc = bin_attr->private; const char *conbuf; ssize_t ret; size_t first_read = 0; uint32_t out_pos, avail; - if (!mc) + if (!opal_memcons) return -ENODEV; - out_pos = be32_to_cpu(ACCESS_ONCE(mc->out_pos)); + out_pos = be32_to_cpu(ACCESS_ONCE(opal_memcons->out_pos)); /* Now we've read out_pos, put a barrier in before reading the new * data it points to in conbuf. */ smp_rmb(); - conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys)); + conbuf = phys_to_virt(be64_to_cpu(opal_memcons->obuf_phys)); /* When the buffer has wrapped, read from the out_pos marker to the end * of the buffer, and then read the remaining data as in the un-wrapped @@ -58,7 +57,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, if (out_pos & MEMCONS_OUT_POS_WRAP) { out_pos &= MEMCONS_OUT_POS_MASK; - avail = be32_to_cpu(mc->obuf_size) - out_pos; + avail = be32_to_cpu(opal_memcons->obuf_size) - out_pos; ret = memory_read_from_buffer(to, count, &pos, conbuf + out_pos, avail); @@ -76,7 +75,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, } /* Sanity check. The firmware should not do this to us. */ - if (out_pos > be32_to_cpu(mc->obuf_size)) { + if (out_pos > be32_to_cpu(opal_memcons->obuf_size)) { pr_err("OPAL: memory console corruption. Aborting read.\n"); return -EINVAL; } @@ -91,6 +90,13 @@ out: return ret; } +static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *to, + loff_t pos, size_t count) +{ + return opal_msglog_copy(to, pos, count); +} + static struct bin_attribute opal_msglog_attr = { .attr = {.name = "msglog", .mode = 0444}, .read = opal_msglog_read @@ -117,7 +123,15 @@ void __init opal_msglog_init(void) return; } - opal_msglog_attr.private = mc; + opal_memcons = mc; +} + +void __init opal_msglog_sysfs_init(void) +{ + if (!opal_memcons) { + pr_warn("OPAL: message log initialisation failed, not creating sysfs entry\n"); + return; + } if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0) pr_warn("OPAL: sysfs file creation failed\n"); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 4e0da5af94a1..0256d0729252 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -724,6 +724,9 @@ static int __init opal_init(void) of_node_put(leds); } + /* Initialise OPAL message log interface */ + opal_msglog_init(); + /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { @@ -739,8 +742,8 @@ static int __init opal_init(void) opal_platform_dump_init(); /* Setup system parameters interface */ opal_sys_param_init(); - /* Setup message log interface. */ - opal_msglog_init(); + /* Setup message log sysfs interface. */ + opal_msglog_sysfs_init(); } /* Initialize platform devices: IPMI backend, PRD & flash interface */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 573ae1994097..c5baaf3cc4e5 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -872,9 +872,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) if (!res->flags || !res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) - continue; - /* * The actual IOV BAR range is determined by the start address * and the actual size for num_vfs VFs BAR. This check is to @@ -903,9 +900,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) if (!res->flags || !res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) - continue; - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); res2 = *res; res->start += size * offset; @@ -1196,29 +1190,36 @@ static void pnv_pci_ioda_setup_PEs(void) } #ifdef CONFIG_PCI_IOV -static int pnv_pci_vf_release_m64(struct pci_dev *pdev) +static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) { struct pci_bus *bus; struct pci_controller *hose; struct pnv_phb *phb; struct pci_dn *pdn; int i, j; + int m64_bars; bus = pdev->bus; hose = pci_bus_to_host(bus); phb = hose->private_data; pdn = pci_get_pdn(pdev); + if (pdn->m64_single_mode) + m64_bars = num_vfs; + else + m64_bars = 1; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - for (j = 0; j < M64_PER_IOV; j++) { - if (pdn->m64_wins[i][j] == IODA_INVALID_M64) + for (j = 0; j < m64_bars; j++) { + if (pdn->m64_map[j][i] == IODA_INVALID_M64) continue; opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0); - clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64_bar_alloc); - pdn->m64_wins[i][j] = IODA_INVALID_M64; + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0); + clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc); + pdn->m64_map[j][i] = IODA_INVALID_M64; } + kfree(pdn->m64_map); return 0; } @@ -1235,8 +1236,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) int total_vfs; resource_size_t size, start; int pe_num; - int vf_groups; - int vf_per_group; + int m64_bars; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1244,29 +1244,26 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) pdn = pci_get_pdn(pdev); total_vfs = pci_sriov_get_totalvfs(pdev); - /* Initialize the m64_wins to IODA_INVALID_M64 */ - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - for (j = 0; j < M64_PER_IOV; j++) - pdn->m64_wins[i][j] = IODA_INVALID_M64; + if (pdn->m64_single_mode) + m64_bars = num_vfs; + else + m64_bars = 1; + + pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL); + if (!pdn->m64_map) + return -ENOMEM; + /* Initialize the m64_map to IODA_INVALID_M64 */ + for (i = 0; i < m64_bars ; i++) + for (j = 0; j < PCI_SRIOV_NUM_BARS; j++) + pdn->m64_map[i][j] = IODA_INVALID_M64; - if (pdn->m64_per_iov == M64_PER_IOV) { - vf_groups = (num_vfs <= M64_PER_IOV) ? num_vfs: M64_PER_IOV; - vf_per_group = (num_vfs <= M64_PER_IOV)? 1: - roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - } else { - vf_groups = 1; - vf_per_group = 1; - } for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || !res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) - continue; - - for (j = 0; j < vf_groups; j++) { + for (j = 0; j < m64_bars; j++) { do { win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, phb->ioda.m64_bar_idx + 1, 0); @@ -1275,12 +1272,11 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) goto m64_failed; } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); - pdn->m64_wins[i][j] = win; + pdn->m64_map[j][i] = win; - if (pdn->m64_per_iov == M64_PER_IOV) { + if (pdn->m64_single_mode) { size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i); - size = size * vf_per_group; start = res->start + size * j; } else { size = resource_size(res); @@ -1288,16 +1284,16 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) } /* Map the M64 here */ - if (pdn->m64_per_iov == M64_PER_IOV) { - pe_num = pdn->offset + j; + if (pdn->m64_single_mode) { + pe_num = pdn->pe_num_map[j]; rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe_num, OPAL_M64_WINDOW_TYPE, - pdn->m64_wins[i][j], 0); + pdn->m64_map[j][i], 0); } rc = opal_pci_set_phb_mem_window(phb->opal_id, OPAL_M64_WINDOW_TYPE, - pdn->m64_wins[i][j], + pdn->m64_map[j][i], start, 0, /* unused */ size); @@ -1309,12 +1305,12 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) goto m64_failed; } - if (pdn->m64_per_iov == M64_PER_IOV) + if (pdn->m64_single_mode) rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2); + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2); else rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1); + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1); if (rc != OPAL_SUCCESS) { dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n", @@ -1326,7 +1322,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) return 0; m64_failed: - pnv_pci_vf_release_m64(pdev); + pnv_pci_vf_release_m64(pdev, num_vfs); return -EBUSY; } @@ -1353,15 +1349,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); } -static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) +static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) { struct pci_bus *bus; struct pci_controller *hose; struct pnv_phb *phb; struct pnv_ioda_pe *pe, *pe_n; struct pci_dn *pdn; - u16 vf_index; - int64_t rc; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1371,35 +1365,6 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) if (!pdev->is_physfn) return; - if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) { - int vf_group; - int vf_per_group; - int vf_index1; - - vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - - for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) - for (vf_index = vf_group * vf_per_group; - vf_index < (vf_group + 1) * vf_per_group && - vf_index < num_vfs; - vf_index++) - for (vf_index1 = vf_group * vf_per_group; - vf_index1 < (vf_group + 1) * vf_per_group && - vf_index1 < num_vfs; - vf_index1++){ - - rc = opal_pci_set_peltv(phb->opal_id, - pdn->offset + vf_index, - pdn->offset + vf_index1, - OPAL_REMOVE_PE_FROM_DOMAIN); - - if (rc) - dev_warn(&pdev->dev, "%s: Failed to unlink same group PE#%d(%lld)\n", - __func__, - pdn->offset + vf_index1, rc); - } - } - list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { if (pe->parent_dev != pdev) continue; @@ -1424,7 +1389,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) struct pnv_phb *phb; struct pci_dn *pdn; struct pci_sriov *iov; - u16 num_vfs; + u16 num_vfs, i; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1434,18 +1399,25 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) num_vfs = pdn->num_vfs; /* Release VF PEs */ - pnv_ioda_release_vf_PE(pdev, num_vfs); + pnv_ioda_release_vf_PE(pdev); if (phb->type == PNV_PHB_IODA2) { - if (pdn->m64_per_iov == 1) - pnv_pci_vf_resource_shift(pdev, -pdn->offset); + if (!pdn->m64_single_mode) + pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map); /* Release M64 windows */ - pnv_pci_vf_release_m64(pdev); + pnv_pci_vf_release_m64(pdev, num_vfs); /* Release PE numbers */ - bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); - pdn->offset = 0; + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + if (pdn->pe_num_map[i] != IODA_INVALID_PE) + pnv_ioda_free_pe(phb, pdn->pe_num_map[i]); + } + } else + bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); + /* Releasing pe_num_map */ + kfree(pdn->pe_num_map); } } @@ -1460,7 +1432,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) int pe_num; u16 vf_index; struct pci_dn *pdn; - int64_t rc; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1472,7 +1443,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { - pe_num = pdn->offset + vf_index; + if (pdn->m64_single_mode) + pe_num = pdn->pe_num_map[vf_index]; + else + pe_num = *pdn->pe_num_map + vf_index; pe = &phb->ioda.pe_array[pe_num]; pe->pe_number = pe_num; @@ -1505,37 +1479,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pnv_pci_ioda2_setup_dma_pe(phb, pe); } - - if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) { - int vf_group; - int vf_per_group; - int vf_index1; - - vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - - for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) { - for (vf_index = vf_group * vf_per_group; - vf_index < (vf_group + 1) * vf_per_group && - vf_index < num_vfs; - vf_index++) { - for (vf_index1 = vf_group * vf_per_group; - vf_index1 < (vf_group + 1) * vf_per_group && - vf_index1 < num_vfs; - vf_index1++) { - - rc = opal_pci_set_peltv(phb->opal_id, - pdn->offset + vf_index, - pdn->offset + vf_index1, - OPAL_ADD_PE_TO_DOMAIN); - - if (rc) - dev_warn(&pdev->dev, "%s: Failed to link same group PE#%d(%lld)\n", - __func__, - pdn->offset + vf_index1, rc); - } - } - } - } } int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) @@ -1545,6 +1488,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) struct pnv_phb *phb; struct pci_dn *pdn; int ret; + u16 i; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1552,20 +1496,59 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) pdn = pci_get_pdn(pdev); if (phb->type == PNV_PHB_IODA2) { + if (!pdn->vfs_expanded) { + dev_info(&pdev->dev, "don't support this SRIOV device" + " with non 64bit-prefetchable IOV BAR\n"); + return -ENOSPC; + } + + /* + * When M64 BARs functions in Single PE mode, the number of VFs + * could be enabled must be less than the number of M64 BARs. + */ + if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) { + dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n"); + return -EBUSY; + } + + /* Allocating pe_num_map */ + if (pdn->m64_single_mode) + pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs, + GFP_KERNEL); + else + pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); + + if (!pdn->pe_num_map) + return -ENOMEM; + + if (pdn->m64_single_mode) + for (i = 0; i < num_vfs; i++) + pdn->pe_num_map[i] = IODA_INVALID_PE; + /* Calculate available PE for required VFs */ - mutex_lock(&phb->ioda.pe_alloc_mutex); - pdn->offset = bitmap_find_next_zero_area( - phb->ioda.pe_alloc, phb->ioda.total_pe, - 0, num_vfs, 0); - if (pdn->offset >= phb->ioda.total_pe) { + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + pdn->pe_num_map[i] = pnv_ioda_alloc_pe(phb); + if (pdn->pe_num_map[i] == IODA_INVALID_PE) { + ret = -EBUSY; + goto m64_failed; + } + } + } else { + mutex_lock(&phb->ioda.pe_alloc_mutex); + *pdn->pe_num_map = bitmap_find_next_zero_area( + phb->ioda.pe_alloc, phb->ioda.total_pe, + 0, num_vfs, 0); + if (*pdn->pe_num_map >= phb->ioda.total_pe) { + mutex_unlock(&phb->ioda.pe_alloc_mutex); + dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); + kfree(pdn->pe_num_map); + return -EBUSY; + } + bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); mutex_unlock(&phb->ioda.pe_alloc_mutex); - dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); - pdn->offset = 0; - return -EBUSY; } - bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs); pdn->num_vfs = num_vfs; - mutex_unlock(&phb->ioda.pe_alloc_mutex); /* Assign M64 window accordingly */ ret = pnv_pci_vf_assign_m64(pdev, num_vfs); @@ -1579,8 +1562,8 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) * the IOV BAR according to the PE# allocated to the VFs. * Otherwise, the PE# for the VF will conflict with others. */ - if (pdn->m64_per_iov == 1) { - ret = pnv_pci_vf_resource_shift(pdev, pdn->offset); + if (!pdn->m64_single_mode) { + ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map); if (ret) goto m64_failed; } @@ -1592,8 +1575,16 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) return 0; m64_failed: - bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); - pdn->offset = 0; + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + if (pdn->pe_num_map[i] != IODA_INVALID_PE) + pnv_ioda_free_pe(phb, pdn->pe_num_map[i]); + } + } else + bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); + + /* Releasing pe_num_map */ + kfree(pdn->pe_num_map); return ret; } @@ -1612,8 +1603,7 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) /* Allocate PCI data */ add_dev_pci_data(pdev); - pnv_pci_sriov_enable(pdev, num_vfs); - return 0; + return pnv_pci_sriov_enable(pdev, num_vfs); } #endif /* CONFIG_PCI_IOV */ @@ -2851,45 +2841,58 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } #ifdef CONFIG_PCI_IOV static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) { - struct pci_controller *hose; - struct pnv_phb *phb; + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + const resource_size_t gate = phb->ioda.m64_segsize >> 2; struct resource *res; int i; - resource_size_t size; + resource_size_t size, total_vf_bar_sz; struct pci_dn *pdn; int mul, total_vfs; if (!pdev->is_physfn || pdev->is_added) return; - hose = pci_bus_to_host(pdev->bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); pdn->vfs_expanded = 0; + pdn->m64_single_mode = false; total_vfs = pci_sriov_get_totalvfs(pdev); - pdn->m64_per_iov = 1; mul = phb->ioda.total_pe; + total_vf_bar_sz = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; if (!pnv_pci_is_mem_pref_64(res->flags)) { - dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n", + dev_warn(&pdev->dev, "Don't support SR-IOV with" + " non M64 VF BAR%d: %pR. \n", i, res); - continue; + goto truncate_iov; } - size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); + total_vf_bar_sz += pci_iov_resource_size(pdev, + i + PCI_IOV_RESOURCES); - /* bigger than 64M */ - if (size > (1 << 26)) { - dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n", - i, res); - pdn->m64_per_iov = M64_PER_IOV; + /* + * If bigger than quarter of M64 segment size, just round up + * power of two. + * + * Generally, one M64 BAR maps one IOV BAR. To avoid conflict + * with other devices, IOV BAR size is expanded to be + * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64 + * segment size , the expanded size would equal to half of the + * whole M64 space size, which will exhaust the M64 Space and + * limit the system flexibility. This is a design decision to + * set the boundary to quarter of the M64 segment size. + */ + if (total_vf_bar_sz > gate) { mul = roundup_pow_of_two(total_vfs); + dev_info(&pdev->dev, + "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n", + total_vf_bar_sz, gate, mul); + pdn->m64_single_mode = true; break; } } @@ -2898,20 +2901,31 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) { - dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n", - i, res); - continue; - } - dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); + /* + * On PHB3, the minimum size alignment of M64 BAR in single + * mode is 32MB. + */ + if (pdn->m64_single_mode && (size < SZ_32M)) + goto truncate_iov; + dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); res->end = res->start + size * mul - 1; dev_dbg(&pdev->dev, " %pR\n", res); dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", i, res, mul); } pdn->vfs_expanded = mul; + + return; + +truncate_iov: + /* To save MMIO space, IOV BAR is truncated. */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + res->flags = 0; + res->end = res->start - 1; + } } #endif /* CONFIG_PCI_IOV */ @@ -3125,18 +3139,35 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno) { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(pdev); - resource_size_t align, iov_align; - - iov_align = resource_size(&pdev->resource[resno]); - if (iov_align) - return iov_align; + resource_size_t align; + /* + * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the + * SR-IOV. While from hardware perspective, the range mapped by M64 + * BAR should be size aligned. + * + * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra + * powernv-specific hardware restriction is gone. But if just use the + * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with + * in one segment of M64 #15, which introduces the PE conflict between + * PF and VF. Based on this, the minimum alignment of an IOV BAR is + * m64_segsize. + * + * This function returns the total IOV BAR size if M64 BAR is in + * Shared PE mode or just VF BAR size if not. + * If the M64 BAR is in Single PE mode, return the VF BAR size or + * M64 segment size if IOV BAR size is less. + */ align = pci_iov_resource_size(pdev, resno); - if (pdn->vfs_expanded) - return pdn->vfs_expanded * align; + if (!pdn->vfs_expanded) + return align; + if (pdn->m64_single_mode) + return max(align, (resource_size_t)phb->ioda.m64_segsize); - return align; + return pdn->vfs_expanded * align; } #endif /* CONFIG_PCI_IOV */ @@ -3180,6 +3211,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose) static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, + .dma_bus_setup = pnv_pci_dma_bus_setup, #ifdef CONFIG_PCI_MSI .setup_msi_irqs = pnv_setup_msi_irqs, .teardown_msi_irqs = pnv_teardown_msi_irqs, diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c deleted file mode 100644 index f2bdfea3b68d..000000000000 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Support PCI/PCIe on PowerNV platforms - * - * Currently supports only P5IOC2 - * - * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/irq.h> -#include <linux/io.h> -#include <linux/msi.h> - -#include <asm/sections.h> -#include <asm/io.h> -#include <asm/prom.h> -#include <asm/pci-bridge.h> -#include <asm/machdep.h> -#include <asm/msi_bitmap.h> -#include <asm/ppc-pci.h> -#include <asm/opal.h> -#include <asm/iommu.h> -#include <asm/tce.h> - -#include "powernv.h" -#include "pci.h" - -/* For now, use a fixed amount of TCE memory for each p5ioc2 - * hub, 16M will do - */ -#define P5IOC2_TCE_MEMORY 0x01000000 - -#ifdef CONFIG_PCI_MSI -static int pnv_pci_p5ioc2_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, - unsigned int hwirq, unsigned int virq, - unsigned int is_64, struct msi_msg *msg) -{ - if (WARN_ON(!is_64)) - return -ENXIO; - msg->data = hwirq - phb->msi_base; - msg->address_hi = 0x10000000; - msg->address_lo = 0; - - return 0; -} - -static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) -{ - unsigned int count; - const __be32 *prop = of_get_property(phb->hose->dn, - "ibm,opal-msi-ranges", NULL); - if (!prop) - return; - - /* Don't do MSI's on p5ioc2 PCI-X are they are not properly - * verified in HW - */ - if (of_device_is_compatible(phb->hose->dn, "ibm,p5ioc2-pcix")) - return; - phb->msi_base = be32_to_cpup(prop); - count = be32_to_cpup(prop + 1); - if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { - pr_err("PCI %d: Failed to allocate MSI bitmap !\n", - phb->hose->global_number); - return; - } - phb->msi_setup = pnv_pci_p5ioc2_msi_setup; - phb->msi32_support = 0; - pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", - count, phb->msi_base); -} -#else -static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } -#endif /* CONFIG_PCI_MSI */ - -static struct iommu_table_ops pnv_p5ioc2_iommu_ops = { - .set = pnv_tce_build, -#ifdef CONFIG_IOMMU_API - .exchange = pnv_tce_xchg, -#endif - .clear = pnv_tce_free, - .get = pnv_tce_get, -}; - -static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, - struct pci_dev *pdev) -{ - struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0]; - - if (!tbl->it_map) { - tbl->it_ops = &pnv_p5ioc2_iommu_ops; - iommu_init_table(tbl, phb->hose->node); - iommu_register_group(&phb->p5ioc2.table_group, - pci_domain_nr(phb->hose->bus), phb->opal_id); - INIT_LIST_HEAD_RCU(&tbl->it_group_list); - pnv_pci_link_table_and_group(phb->hose->node, 0, - tbl, &phb->p5ioc2.table_group); - } - - set_iommu_table_base(&pdev->dev, tbl); - iommu_add_device(&pdev->dev); -} - -static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, -#ifdef CONFIG_PCI_MSI - .setup_msi_irqs = pnv_setup_msi_irqs, - .teardown_msi_irqs = pnv_teardown_msi_irqs, -#endif -}; - -static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, - void *tce_mem, u64 tce_size) -{ - struct pnv_phb *phb; - const __be64 *prop64; - u64 phb_id; - int64_t rc; - static int primary = 1; - struct iommu_table_group *table_group; - struct iommu_table *tbl; - - pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name); - - prop64 = of_get_property(np, "ibm,opal-phbid", NULL); - if (!prop64) { - pr_err(" Missing \"ibm,opal-phbid\" property !\n"); - return; - } - phb_id = be64_to_cpup(prop64); - pr_devel(" PHB-ID : 0x%016llx\n", phb_id); - pr_devel(" TCE AT : 0x%016lx\n", __pa(tce_mem)); - pr_devel(" TCE SZ : 0x%016llx\n", tce_size); - - rc = opal_pci_set_phb_tce_memory(phb_id, __pa(tce_mem), tce_size); - if (rc != OPAL_SUCCESS) { - pr_err(" Failed to set TCE memory, OPAL error %lld\n", rc); - return; - } - - phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); - phb->hose = pcibios_alloc_controller(np); - if (!phb->hose) { - pr_err(" Failed to allocate PCI controller\n"); - return; - } - - spin_lock_init(&phb->lock); - phb->hose->first_busno = 0; - phb->hose->last_busno = 0xff; - phb->hose->private_data = phb; - phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops; - phb->hub_id = hub_id; - phb->opal_id = phb_id; - phb->type = PNV_PHB_P5IOC2; - phb->model = PNV_PHB_MODEL_P5IOC2; - - phb->regs = of_iomap(np, 0); - - if (phb->regs == NULL) - pr_err(" Failed to map registers !\n"); - else { - pr_devel(" P_BUID = 0x%08x\n", in_be32(phb->regs + 0x100)); - pr_devel(" P_IOSZ = 0x%08x\n", in_be32(phb->regs + 0x1b0)); - pr_devel(" P_IO_ST = 0x%08x\n", in_be32(phb->regs + 0x1e0)); - pr_devel(" P_MEM1_H = 0x%08x\n", in_be32(phb->regs + 0x1a0)); - pr_devel(" P_MEM1_L = 0x%08x\n", in_be32(phb->regs + 0x190)); - pr_devel(" P_MSZ1_L = 0x%08x\n", in_be32(phb->regs + 0x1c0)); - pr_devel(" P_MEM_ST = 0x%08x\n", in_be32(phb->regs + 0x1d0)); - pr_devel(" P_MEM2_H = 0x%08x\n", in_be32(phb->regs + 0x2c0)); - pr_devel(" P_MEM2_L = 0x%08x\n", in_be32(phb->regs + 0x2b0)); - pr_devel(" P_MSZ2_H = 0x%08x\n", in_be32(phb->regs + 0x2d0)); - pr_devel(" P_MSZ2_L = 0x%08x\n", in_be32(phb->regs + 0x2e0)); - } - - /* Interpret the "ranges" property */ - /* This also maps the I/O region and sets isa_io/mem_base */ - pci_process_bridge_OF_ranges(phb->hose, np, primary); - primary = 0; - - phb->hose->ops = &pnv_pci_ops; - - /* Setup MSI support */ - pnv_pci_init_p5ioc2_msis(phb); - - /* Setup TCEs */ - phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup; - pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table, - tce_mem, tce_size, 0, - IOMMU_PAGE_SHIFT_4K); - /* - * We do not allocate iommu_table as we do not support - * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table() - * should not be called for phb->p5ioc2.table_group.tables[0] ever. - */ - tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table; - table_group = &phb->p5ioc2.table_group; - table_group->tce32_start = tbl->it_offset << tbl->it_page_shift; - table_group->tce32_size = tbl->it_size << tbl->it_page_shift; -} - -void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) -{ - struct device_node *phbn; - const __be64 *prop64; - u64 hub_id; - void *tce_mem; - uint64_t tce_per_phb; - int64_t rc; - int phb_count = 0; - - pr_info("Probing p5ioc2 IO-Hub %s\n", np->full_name); - - prop64 = of_get_property(np, "ibm,opal-hubid", NULL); - if (!prop64) { - pr_err(" Missing \"ibm,opal-hubid\" property !\n"); - return; - } - hub_id = be64_to_cpup(prop64); - pr_info(" HUB-ID : 0x%016llx\n", hub_id); - - /* Count child PHBs and calculate TCE space per PHB */ - for_each_child_of_node(np, phbn) { - if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || - of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) - phb_count++; - } - - if (phb_count <= 0) { - pr_info(" No PHBs for Hub %s\n", np->full_name); - return; - } - - tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count); - pr_info(" Allocating %lld MB of TCE memory per PHB\n", - tce_per_phb >> 20); - - /* Currently allocate 16M of TCE memory for every Hub - * - * XXX TODO: Make it chip local if possible - */ - tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY); - pr_debug(" TCE : 0x%016lx..0x%016lx\n", - __pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1); - rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem), - P5IOC2_TCE_MEMORY); - if (rc != OPAL_SUCCESS) { - pr_err(" Failed to allocate TCE memory, OPAL error %lld\n", rc); - return; - } - - /* Initialize PHBs */ - for_each_child_of_node(np, phbn) { - if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || - of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) { - pnv_pci_init_p5ioc2_phb(phbn, hub_id, - tce_mem, tce_per_phb); - tce_mem += tce_per_phb; - } - } -} diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 2f55c86df703..73c8dc2a353f 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -380,10 +380,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn) */ pe_no = pdn->pe_number; if (pe_no == IODA_INVALID_PE) { - if (phb->type == PNV_PHB_P5IOC2) - pe_no = 0; - else - pe_no = phb->ioda.reserved_pe; + pe_no = phb->ioda.reserved_pe; } /* @@ -599,6 +596,9 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, u64 rpn = __pa(uaddr) >> tbl->it_page_shift; long i; + if (proto_tce & TCE_PCI_WRITE) + proto_tce |= TCE_PCI_READ; + for (i = 0; i < npages; i++) { unsigned long newtce = proto_tce | ((rpn + i) << tbl->it_page_shift); @@ -620,6 +620,9 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index, BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); + if (newtce & TCE_PCI_WRITE) + newtce |= TCE_PCI_READ; + oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)); *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE); *direction = iommu_tce_direction(oldtce); @@ -760,6 +763,26 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) phb->dma_dev_setup(phb, pdev); } +void pnv_pci_dma_bus_setup(struct pci_bus *bus) +{ + struct pci_controller *hose = bus->sysdata; + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; + + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))) + continue; + + if (!pe->pbus) + continue; + + if (bus->number == ((pe->rid >> 8) & 0xFF)) { + pe->pbus = bus; + break; + } + } +} + void pnv_pci_shutdown(void) { struct pci_controller *hose; @@ -779,7 +802,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk); void __init pnv_pci_init(void) { struct device_node *np; - bool found_ioda = false; pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN); @@ -787,20 +809,11 @@ void __init pnv_pci_init(void) if (!firmware_has_feature(FW_FEATURE_OPAL)) return; - /* Look for IODA IO-Hubs. We don't support mixing IODA - * and p5ioc2 due to the need to change some global - * probing flags - */ + /* Look for IODA IO-Hubs. */ for_each_compatible_node(np, NULL, "ibm,ioda-hub") { pnv_pci_init_ioda_hub(np); - found_ioda = true; } - /* Look for p5ioc2 IO-Hubs */ - if (!found_ioda) - for_each_compatible_node(np, NULL, "ibm,p5ioc2") - pnv_pci_init_p5ioc2_hub(np); - /* Look for ioda2 built-in PHB3's */ for_each_compatible_node(np, NULL, "ibm,ioda2-phb") pnv_pci_init_ioda2_phb(np); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 7f56313e8d72..3f814f382b2e 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -4,16 +4,14 @@ struct pci_dn; enum pnv_phb_type { - PNV_PHB_P5IOC2 = 0, - PNV_PHB_IODA1 = 1, - PNV_PHB_IODA2 = 2, - PNV_PHB_NPU = 3, + PNV_PHB_IODA1 = 0, + PNV_PHB_IODA2 = 1, + PNV_PHB_NPU = 2, }; /* Precise PHB model for error management */ enum pnv_phb_model { PNV_PHB_MODEL_UNKNOWN, - PNV_PHB_MODEL_P5IOC2, PNV_PHB_MODEL_P7IOC, PNV_PHB_MODEL_PHB3, PNV_PHB_MODEL_NPU, @@ -121,81 +119,74 @@ struct pnv_phb { void (*freeze_pe)(struct pnv_phb *phb, int pe_no); int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); - union { - struct { - struct iommu_table iommu_table; - struct iommu_table_group table_group; - } p5ioc2; - - struct { - /* Global bridge info */ - unsigned int total_pe; - unsigned int reserved_pe; - - /* 32-bit MMIO window */ - unsigned int m32_size; - unsigned int m32_segsize; - unsigned int m32_pci_base; - - /* 64-bit MMIO window */ - unsigned int m64_bar_idx; - unsigned long m64_size; - unsigned long m64_segsize; - unsigned long m64_base; - unsigned long m64_bar_alloc; - - /* IO ports */ - unsigned int io_size; - unsigned int io_segsize; - unsigned int io_pci_base; - - /* PE allocation bitmap */ - unsigned long *pe_alloc; - /* PE allocation mutex */ - struct mutex pe_alloc_mutex; - - /* M32 & IO segment maps */ - unsigned int *m32_segmap; - unsigned int *io_segmap; - struct pnv_ioda_pe *pe_array; - - /* IRQ chip */ - int irq_chip_init; - struct irq_chip irq_chip; - - /* Sorted list of used PE's based - * on the sequence of creation - */ - struct list_head pe_list; - struct mutex pe_list_mutex; - - /* Reverse map of PEs, will have to extend if - * we are to support more than 256 PEs, indexed - * bus { bus, devfn } - */ - unsigned char pe_rmap[0x10000]; - - /* 32-bit TCE tables allocation */ - unsigned long tce32_count; - - /* Total "weight" for the sake of DMA resources - * allocation - */ - unsigned int dma_weight; - unsigned int dma_pe_count; - - /* Sorted list of used PE's, sorted at - * boot for resource allocation purposes - */ - struct list_head pe_dma_list; - - /* TCE cache invalidate registers (physical and - * remapped) - */ - phys_addr_t tce_inval_reg_phys; - __be64 __iomem *tce_inval_reg; - } ioda; - }; + struct { + /* Global bridge info */ + unsigned int total_pe; + unsigned int reserved_pe; + + /* 32-bit MMIO window */ + unsigned int m32_size; + unsigned int m32_segsize; + unsigned int m32_pci_base; + + /* 64-bit MMIO window */ + unsigned int m64_bar_idx; + unsigned long m64_size; + unsigned long m64_segsize; + unsigned long m64_base; + unsigned long m64_bar_alloc; + + /* IO ports */ + unsigned int io_size; + unsigned int io_segsize; + unsigned int io_pci_base; + + /* PE allocation bitmap */ + unsigned long *pe_alloc; + /* PE allocation mutex */ + struct mutex pe_alloc_mutex; + + /* M32 & IO segment maps */ + unsigned int *m32_segmap; + unsigned int *io_segmap; + struct pnv_ioda_pe *pe_array; + + /* IRQ chip */ + int irq_chip_init; + struct irq_chip irq_chip; + + /* Sorted list of used PE's based + * on the sequence of creation + */ + struct list_head pe_list; + struct mutex pe_list_mutex; + + /* Reverse map of PEs, will have to extend if + * we are to support more than 256 PEs, indexed + * bus { bus, devfn } + */ + unsigned char pe_rmap[0x10000]; + + /* 32-bit TCE tables allocation */ + unsigned long tce32_count; + + /* Total "weight" for the sake of DMA resources + * allocation + */ + unsigned int dma_weight; + unsigned int dma_pe_count; + + /* Sorted list of used PE's, sorted at + * boot for resource allocation purposes + */ + struct list_head pe_dma_list; + + /* TCE cache invalidate registers (physical and + * remapped) + */ + phys_addr_t tce_inval_reg_phys; + __be64 __iomem *tce_inval_reg; + } ioda; /* PHB and hub status structure */ union { @@ -232,7 +223,6 @@ extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift); -extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_init_npu_phb(struct device_node *np); @@ -242,6 +232,7 @@ extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev); +extern void pnv_pci_dma_bus_setup(struct pci_bus *bus); extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 503a73f59359..0babef11136f 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -407,7 +407,7 @@ static DEVICE_ATTR(subcores_per_core, 0644, static int subcore_init(void) { - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + if (!cpu_has_feature(CPU_FTR_SUBCORE)) return 0; /* diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c index 20b46a19a48f..09bf24d616a5 100644 --- a/arch/powerpc/platforms/ps3/gelic_udbg.c +++ b/arch/powerpc/platforms/ps3/gelic_udbg.c @@ -13,6 +13,12 @@ * */ +#include <linux/if_ether.h> +#include <linux/etherdevice.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/udp.h> + #include <asm/io.h> #include <asm/udbg.h> #include <asm/lv1call.h> @@ -56,39 +62,8 @@ struct debug_block { u8 pkt[1520]; } __packed; -struct ethhdr { - u8 dest[6]; - u8 src[6]; - u16 type; -} __packed; - -struct vlantag { - u16 vlan; - u16 subtype; -} __packed; - -struct iphdr { - u8 ver_len; - u8 dscp_ecn; - u16 total_length; - u16 ident; - u16 frag_off_flags; - u8 ttl; - u8 proto; - u16 checksum; - u32 src; - u32 dest; -} __packed; - -struct udphdr { - u16 src; - u16 dest; - u16 len; - u16 checksum; -} __packed; - static __iomem struct ethhdr *h_eth; -static __iomem struct vlantag *h_vlan; +static __iomem struct vlan_hdr *h_vlan; static __iomem struct iphdr *h_ip; static __iomem struct udphdr *h_udp; @@ -173,8 +148,8 @@ static void gelic_debug_init(void) h_eth = (struct ethhdr *)dbg.pkt; - memset(&h_eth->dest, 0xff, 6); - memcpy(&h_eth->src, &mac, 6); + eth_broadcast_addr(h_eth->h_dest); + memcpy(&h_eth->h_source, &mac, ETH_ALEN); header_size = sizeof(struct ethhdr); @@ -183,28 +158,29 @@ static void gelic_debug_init(void) GELIC_LV1_VLAN_TX_ETHERNET_0, 0, 0, &vlan_id, &v2); if (!result) { - h_eth->type = 0x8100; + h_eth->h_proto= ETH_P_8021Q; - header_size += sizeof(struct vlantag); - h_vlan = (struct vlantag *)(h_eth + 1); - h_vlan->vlan = vlan_id; - h_vlan->subtype = 0x0800; + header_size += sizeof(struct vlan_hdr); + h_vlan = (struct vlan_hdr *)(h_eth + 1); + h_vlan->h_vlan_TCI = vlan_id; + h_vlan->h_vlan_encapsulated_proto = ETH_P_IP; h_ip = (struct iphdr *)(h_vlan + 1); } else { - h_eth->type = 0x0800; + h_eth->h_proto= 0x0800; h_ip = (struct iphdr *)(h_eth + 1); } header_size += sizeof(struct iphdr); - h_ip->ver_len = 0x45; + h_ip->version = 4; + h_ip->ihl = 5; h_ip->ttl = 10; - h_ip->proto = 0x11; - h_ip->src = 0x00000000; - h_ip->dest = 0xffffffff; + h_ip->protocol = 0x11; + h_ip->saddr = 0x00000000; + h_ip->daddr = 0xffffffff; header_size += sizeof(struct udphdr); h_udp = (struct udphdr *)(h_ip + 1); - h_udp->src = GELIC_DEBUG_PORT; + h_udp->source = GELIC_DEBUG_PORT; h_udp->dest = GELIC_DEBUG_PORT; pmsgc = pmsg = (char *)(h_udp + 1); @@ -225,16 +201,16 @@ static void gelic_sendbuf(int msgsize) int i; dbg.descr.buf_size = header_size + msgsize; - h_ip->total_length = msgsize + sizeof(struct udphdr) + + h_ip->tot_len = msgsize + sizeof(struct udphdr) + sizeof(struct iphdr); h_udp->len = msgsize + sizeof(struct udphdr); - h_ip->checksum = 0; + h_ip->check = 0; sum = 0; p = (u16 *)h_ip; for (i = 0; i < 5; i++) sum += *p++; - h_ip->checksum = ~(sum + (sum >> 16)); + h_ip->check = ~(sum + (sum >> 16)); dbg.descr.dmac_cmd_status = GELIC_DESCR_DMA_CMD_NO_CHKSUM | GELIC_DESCR_TX_DMA_FRAME_TAIL; diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 638c4060938e..b831638e6f4a 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -78,7 +78,7 @@ struct ps3_bmp { /** * struct ps3_private - a per cpu data structure * @bmp: ps3_bmp structure - * @bmp_lock: Syncronize access to bmp. + * @bmp_lock: Synchronize access to bmp. * @ipi_debug_brk_mask: Mask for debug break IPIs * @ppe_id: HV logical_ppe_id * @thread_id: HV thread_id diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c index 849b29b3e9ae..74da18de853a 100644 --- a/arch/powerpc/platforms/pseries/hvconsole.c +++ b/arch/powerpc/platforms/pseries/hvconsole.c @@ -31,7 +31,7 @@ #include <asm/plpar_wrappers.h> /** - * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper + * hvc_get_chars - retrieve characters from firmware for denoted vterm adapter * @vtermno: The vtermno or unit_address of the adapter from which to fetch the * data. * @buf: The character buffer into which to put the character data fetched from diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 477290ad855e..2415a0d31f8f 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -505,8 +505,8 @@ static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, } #endif -static void pSeries_lpar_hpte_removebolted(unsigned long ea, - int psize, int ssize) +static int pSeries_lpar_hpte_removebolted(unsigned long ea, + int psize, int ssize) { unsigned long vpn; unsigned long slot, vsid; @@ -515,11 +515,14 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea, vpn = hpt_vpn(ea, vsid, ssize); slot = pSeries_lpar_hpte_find(vpn, psize, ssize); - BUG_ON(slot == -1); + if (slot == -1) + return -ENOENT; + /* * lpar doesn't use the passed actual page size */ pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0); + return 0; } /* diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 36df46eaba24..6e944fc6e5f9 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -515,7 +515,7 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); - /* By default, only probe PCI (can be overriden by rtas_pci) */ + /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); /* Find and initialize PCI host bridges */ diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index c69e88e91459..85729f49764f 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -575,7 +575,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) { /* use fsl_indirect_read_config for PCIe */ hose->ops = &fsl_indirect_pcie_ops; - /* For PCIE read HEADER_TYPE to identify controler mode */ + /* For PCIE read HEADER_TYPE to identify controller mode */ early_read_config_byte(hose, 0, 0, PCI_HEADER_TYPE, &hdr_type); if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) goto no_bridge; diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c index b48197ae44d0..ffe0ee832768 100644 --- a/arch/powerpc/sysdev/fsl_rmu.c +++ b/arch/powerpc/sysdev/fsl_rmu.c @@ -570,7 +570,7 @@ int fsl_rio_port_write_init(struct fsl_rio_pw *pw) out_be32(&pw->pw_regs->pwsr, (RIO_IPWSR_TE | RIO_IPWSR_QFI | RIO_IPWSR_PWD)); - /* Configure port write contoller for snooping enable all reporting, + /* Configure port write controller for snooping enable all reporting, clear queue full */ out_be32(&pw->pw_regs->pwmr, RIO_IPWMR_SEN | RIO_IPWMR_QFIE | RIO_IPWMR_EIE | RIO_IPWMR_CQ); diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 6f99ed3967fd..aa2c186d3115 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -238,7 +238,7 @@ void i8259_init(struct device_node *node, unsigned long intack_addr) /* init master interrupt controller */ outb(0x11, 0x20); /* Start init sequence */ outb(0x00, 0x21); /* Vector base */ - outb(0x04, 0x21); /* edge tiggered, Cascade (slave) on IRQ2 */ + outb(0x04, 0x21); /* edge triggered, Cascade (slave) on IRQ2 */ outb(0x01, 0x21); /* Select 8086 mode */ /* init slave interrupt controller */ diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 2a0452e364ba..afe3c7cd395d 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -2,7 +2,7 @@ * arch/powerpc/kernel/mpic.c * * Driver for interrupt controllers following the OpenPIC standard, the - * common implementation beeing IBM's MPIC. This driver also can deal + * common implementation being IBM's MPIC. This driver also can deal * with various broken implementations of this HW. * * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. @@ -1657,7 +1657,7 @@ void __init mpic_init(struct mpic *mpic) } } - /* FSL mpic error interrupt intialization */ + /* FSL mpic error interrupt initialization */ if (mpic->flags & MPIC_FSL_HAS_EIMR) mpic_err_int_init(mpic, MPIC_FSL_ERR_INT); } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 07a8508cb7fa..942796fa4767 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -47,6 +47,9 @@ #include <asm/debug.h> #include <asm/hw_breakpoint.h> +#include <asm/opal.h> +#include <asm/firmware.h> + #ifdef CONFIG_PPC64 #include <asm/hvcall.h> #include <asm/paca.h> @@ -119,6 +122,16 @@ static void dump(void); static void prdump(unsigned long, long); static int ppc_inst_dump(unsigned long, long, int); static void dump_log_buf(void); + +#ifdef CONFIG_PPC_POWERNV +static void dump_opal_msglog(void); +#else +static inline void dump_opal_msglog(void) +{ + printf("Machine is not running OPAL firmware.\n"); +} +#endif + static void backtrace(struct pt_regs *); static void excprint(struct pt_regs *); static void prregs(struct pt_regs *); @@ -150,6 +163,7 @@ static int cpu_cmd(void); static void csum(void); static void bootcmds(void); static void proccall(void); +static void show_tasks(void); void dump_segments(void); static void symbol_lookup(void); static void xmon_show_stack(unsigned long sp, unsigned long lr, @@ -202,6 +216,10 @@ Commands:\n\ df dump float values\n\ dd dump double values\n\ dl dump the kernel log buffer\n" +#ifdef CONFIG_PPC_POWERNV + "\ + do dump the OPAL message log\n" +#endif #ifdef CONFIG_PPC64 "\ dp[#] dump paca for current cpu, or cpu #\n\ @@ -221,6 +239,7 @@ Commands:\n\ mz zero a block of memory\n\ mi show information about memory allocation\n\ p call a procedure\n\ + P list processes/tasks\n\ r print registers\n\ s single step\n" #ifdef CONFIG_SPU_BASE @@ -233,7 +252,7 @@ Commands:\n\ " S print special registers\n\ t print backtrace\n\ x exit monitor and recover\n\ - X exit monitor and dont recover\n" + X exit monitor and don't recover\n" #if defined(CONFIG_PPC64) && !defined(CONFIG_PPC_BOOK3E) " u dump segment table or SLB\n" #elif defined(CONFIG_PPC_STD_MMU_32) @@ -950,6 +969,9 @@ cmds(struct pt_regs *excp) case 'p': proccall(); break; + case 'P': + show_tasks(); + break; #ifdef CONFIG_PPC_STD_MMU case 'u': dump_segments(); @@ -2253,6 +2275,8 @@ dump(void) last_cmd = "di\n"; } else if (c == 'l') { dump_log_buf(); + } else if (c == 'o') { + dump_opal_msglog(); } else if (c == 'r') { scanhex(&ndump); if (ndump == 0) @@ -2395,6 +2419,45 @@ dump_log_buf(void) catch_memory_errors = 0; } +#ifdef CONFIG_PPC_POWERNV +static void dump_opal_msglog(void) +{ + unsigned char buf[128]; + ssize_t res; + loff_t pos = 0; + + if (!firmware_has_feature(FW_FEATURE_OPAL)) { + printf("Machine is not running OPAL firmware.\n"); + return; + } + + if (setjmp(bus_error_jmp) != 0) { + printf("Error dumping OPAL msglog!\n"); + return; + } + + catch_memory_errors = 1; + sync(); + + xmon_start_pagination(); + while ((res = opal_msglog_copy(buf, pos, sizeof(buf) - 1))) { + if (res < 0) { + printf("Error dumping OPAL msglog! Error: %zd\n", res); + break; + } + buf[res] = '\0'; + printf("%s", buf); + pos += res; + } + xmon_end_pagination(); + + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + catch_memory_errors = 0; +} +#endif + /* * Memory operations - move, set, print differences */ @@ -2508,6 +2571,61 @@ memzcan(void) printf("%.8x\n", a - mskip); } +static void show_task(struct task_struct *tsk) +{ + char state; + + /* + * Cloned from kdb_task_state_char(), which is not entirely + * appropriate for calling from xmon. This could be moved + * to a common, generic, routine used by both. + */ + state = (tsk->state == 0) ? 'R' : + (tsk->state < 0) ? 'U' : + (tsk->state & TASK_UNINTERRUPTIBLE) ? 'D' : + (tsk->state & TASK_STOPPED) ? 'T' : + (tsk->state & TASK_TRACED) ? 'C' : + (tsk->exit_state & EXIT_ZOMBIE) ? 'Z' : + (tsk->exit_state & EXIT_DEAD) ? 'E' : + (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; + + printf("%p %016lx %6d %6d %c %2d %s\n", tsk, + tsk->thread.ksp, + tsk->pid, tsk->parent->pid, + state, task_thread_info(tsk)->cpu, + tsk->comm); +} + +static void show_tasks(void) +{ + unsigned long tskv; + struct task_struct *tsk = NULL; + + printf(" task_struct ->thread.ksp PID PPID S P CMD\n"); + + if (scanhex(&tskv)) + tsk = (struct task_struct *)tskv; + + if (setjmp(bus_error_jmp) != 0) { + catch_memory_errors = 0; + printf("*** Error dumping task %p\n", tsk); + return; + } + + catch_memory_errors = 1; + sync(); + + if (tsk) + show_task(tsk); + else + for_each_process(tsk) + show_task(tsk); + + sync(); + __delay(200); + catch_memory_errors = 0; +} + static void proccall(void) { unsigned long args[8]; diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index be2ac5ce349f..8a55c1aa11aa 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC_WERROR) += -Werror cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o debugfs.o pci.o trace.o cxl-y += vphb.o api.o +cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o obj-$(CONFIG_CXL) += cxl.o obj-$(CONFIG_CXL_BASE) += base.o diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index ea3eeb7011e1..2107c948406d 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -51,8 +51,6 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) if (rc) goto err_mapping; - cxl_assign_psn_space(ctx); - return ctx; err_mapping: @@ -78,7 +76,6 @@ struct device *cxl_get_phys_dev(struct pci_dev *dev) return afu->adapter->dev.parent; } -EXPORT_SYMBOL_GPL(cxl_get_phys_dev); int cxl_release_context(struct cxl_context *ctx) { @@ -91,28 +88,11 @@ int cxl_release_context(struct cxl_context *ctx) } EXPORT_SYMBOL_GPL(cxl_release_context); -int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) -{ - if (num == 0) - num = ctx->afu->pp_irqs; - return afu_allocate_irqs(ctx, num); -} -EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); - -void cxl_free_afu_irqs(struct cxl_context *ctx) -{ - afu_irq_name_free(ctx); - cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); -} -EXPORT_SYMBOL_GPL(cxl_free_afu_irqs); - static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) { __u16 range; int r; - WARN_ON(num == 0); - for (r = 0; r < CXL_IRQ_RANGES; r++) { range = ctx->irqs.range[r]; if (num < range) { @@ -123,6 +103,44 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) return 0; } +int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) +{ + int res; + irq_hw_number_t hwirq; + + if (num == 0) + num = ctx->afu->pp_irqs; + res = afu_allocate_irqs(ctx, num); + if (!res && !cpu_has_feature(CPU_FTR_HVMODE)) { + /* In a guest, the PSL interrupt is not multiplexed. It was + * allocated above, and we need to set its handler + */ + hwirq = cxl_find_afu_irq(ctx, 0); + if (hwirq) + cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl"); + } + return res; +} +EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); + +void cxl_free_afu_irqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + unsigned int virq; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) { + hwirq = cxl_find_afu_irq(ctx, 0); + if (hwirq) { + virq = irq_find_mapping(NULL, hwirq); + if (virq) + cxl_unmap_irq(virq, ctx); + } + } + afu_irq_name_free(ctx); + cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); +} +EXPORT_SYMBOL_GPL(cxl_free_afu_irqs); + int cxl_map_afu_irq(struct cxl_context *ctx, int num, irq_handler_t handler, void *cookie, char *name) { @@ -178,7 +196,7 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, cxl_ctx_get(); - if ((rc = cxl_attach_process(ctx, kernel, wed , 0))) { + if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { put_pid(ctx->pid); cxl_ctx_put(); goto out; @@ -193,7 +211,7 @@ EXPORT_SYMBOL_GPL(cxl_start_context); int cxl_process_element(struct cxl_context *ctx) { - return ctx->pe; + return ctx->external_pe; } EXPORT_SYMBOL_GPL(cxl_process_element); @@ -207,7 +225,6 @@ EXPORT_SYMBOL_GPL(cxl_stop_context); void cxl_set_master(struct cxl_context *ctx) { ctx->master = true; - cxl_assign_psn_space(ctx); } EXPORT_SYMBOL_GPL(cxl_set_master); @@ -325,15 +342,11 @@ EXPORT_SYMBOL_GPL(cxl_start_work); void __iomem *cxl_psa_map(struct cxl_context *ctx) { - struct cxl_afu *afu = ctx->afu; - int rc; - - rc = cxl_afu_check_and_enable(afu); - if (rc) + if (ctx->status != STARTED) return NULL; pr_devel("%s: psn_phys%llx size:%llx\n", - __func__, afu->psn_phys, afu->adapter->ps_size); + __func__, ctx->psn_phys, ctx->psn_size); return ioremap(ctx->psn_phys, ctx->psn_size); } EXPORT_SYMBOL_GPL(cxl_psa_map); @@ -349,11 +362,11 @@ int cxl_afu_reset(struct cxl_context *ctx) struct cxl_afu *afu = ctx->afu; int rc; - rc = __cxl_afu_reset(afu); + rc = cxl_ops->afu_reset(afu); if (rc) return rc; - return cxl_afu_check_and_enable(afu); + return cxl_ops->afu_check_and_enable(afu); } EXPORT_SYMBOL_GPL(cxl_afu_reset); @@ -363,3 +376,11 @@ void cxl_perst_reloads_same_image(struct cxl_afu *afu, afu->adapter->perst_same_image = perst_reloads_same_image; } EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image); + +ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) +{ + struct cxl_afu *afu = cxl_pci_to_afu(dev); + + return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); +} +EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd); diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index a9f0dd3255a2..9b90ec6c07cd 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -11,6 +11,7 @@ #include <linux/rcupdate.h> #include <asm/errno.h> #include <misc/cxl-base.h> +#include <linux/of_platform.h> #include "cxl.h" /* protected by rcu */ @@ -84,3 +85,34 @@ void unregister_cxl_calls(struct cxl_calls *calls) synchronize_rcu(); } EXPORT_SYMBOL_GPL(unregister_cxl_calls); + +int cxl_update_properties(struct device_node *dn, + struct property *new_prop) +{ + return of_update_property(dn, new_prop); +} +EXPORT_SYMBOL_GPL(cxl_update_properties); + +static int __init cxl_base_init(void) +{ + struct device_node *np = NULL; + struct platform_device *dev; + int count = 0; + + /* + * Scan for compatible devices in guest only + */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + return 0; + + while ((np = of_find_compatible_node(np, NULL, + "ibm,coherent-platform-facility"))) { + dev = of_platform_device_create(np, NULL, NULL); + if (dev) + count++; + } + pr_devel("Found %d cxl device(s)\n", count); + return 0; +} + +module_init(cxl_base_init); diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 262b88eac414..10370f280500 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -95,7 +95,12 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, return i; ctx->pe = i; - ctx->elem = &ctx->afu->spa[i]; + if (cpu_has_feature(CPU_FTR_HVMODE)) { + ctx->elem = &ctx->afu->native->spa[i]; + ctx->external_pe = ctx->pe; + } else { + ctx->external_pe = -1; /* assigned when attaching */ + } ctx->pe_inserted = false; /* @@ -214,8 +219,8 @@ int __detach_context(struct cxl_context *ctx) /* Only warn if we detached while the link was OK. * If detach fails when hw is down, we don't care. */ - WARN_ON(cxl_detach_process(ctx) && - cxl_adapter_link_ok(ctx->afu->adapter)); + WARN_ON(cxl_ops->detach_process(ctx) && + cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)); flush_work(&ctx->fault_work); /* Only needed for dedicated process */ /* release the reference to the group leader and mm handling pid */ diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index a521bc72cec2..38e21cf7806e 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -324,6 +324,10 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; #define CXL_MODE_TIME_SLICED 0x4 #define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED) +#define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */ +#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS) +#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS) + enum cxl_context_status { CLOSED, OPENED, @@ -336,6 +340,12 @@ enum prefault_modes { CXL_PREFAULT_ALL, }; +enum cxl_attrs { + CXL_ADAPTER_ATTRS, + CXL_AFU_MASTER_ATTRS, + CXL_AFU_ATTRS, +}; + struct cxl_sste { __be64 esid_data; __be64 vsid_data; @@ -344,18 +354,46 @@ struct cxl_sste { #define to_cxl_adapter(d) container_of(d, struct cxl, dev) #define to_cxl_afu(d) container_of(d, struct cxl_afu, dev) -struct cxl_afu { +struct cxl_afu_native { + void __iomem *p1n_mmio; + void __iomem *afu_desc_mmio; irq_hw_number_t psl_hwirq; + unsigned int psl_virq; + struct mutex spa_mutex; + /* + * Only the first part of the SPA is used for the process element + * linked list. The only other part that software needs to worry about + * is sw_command_status, which we store a separate pointer to. + * Everything else in the SPA is only used by hardware + */ + struct cxl_process_element *spa; + __be64 *sw_command_status; + unsigned int spa_size; + int spa_order; + int spa_max_procs; + u64 pp_offset; +}; + +struct cxl_afu_guest { + u64 handle; + phys_addr_t p2n_phys; + u64 p2n_size; + int max_ints; + struct mutex recovery_lock; + int previous_state; +}; + +struct cxl_afu { + struct cxl_afu_native *native; + struct cxl_afu_guest *guest; irq_hw_number_t serr_hwirq; - char *err_irq_name; - char *psl_irq_name; unsigned int serr_virq; - void __iomem *p1n_mmio; + char *psl_irq_name; + char *err_irq_name; void __iomem *p2n_mmio; phys_addr_t psn_phys; - u64 pp_offset; u64 pp_size; - void __iomem *afu_desc_mmio; + struct cxl *adapter; struct device dev; struct cdev afu_cdev_s, afu_cdev_m, afu_cdev_d; @@ -363,26 +401,12 @@ struct cxl_afu { struct idr contexts_idr; struct dentry *debugfs; struct mutex contexts_lock; - struct mutex spa_mutex; spinlock_t afu_cntl_lock; /* AFU error buffer fields and bin attribute for sysfs */ u64 eb_len, eb_offset; struct bin_attribute attr_eb; - /* - * Only the first part of the SPA is used for the process element - * linked list. The only other part that software needs to worry about - * is sw_command_status, which we store a separate pointer to. - * Everything else in the SPA is only used by hardware - */ - struct cxl_process_element *spa; - __be64 *sw_command_status; - unsigned int spa_size; - int spa_order; - int spa_max_procs; - unsigned int psl_virq; - /* pointer to the vphb */ struct pci_controller *phb; @@ -421,6 +445,12 @@ struct cxl_irq_name { char *name; }; +struct irq_avail { + irq_hw_number_t offset; + irq_hw_number_t range; + unsigned long *bitmap; +}; + /* * This is a cxl context. If the PSL is in dedicated mode, there will be one * of these per AFU. If in AFU directed there can be lots of these. @@ -476,7 +506,19 @@ struct cxl_context { struct cxl_process_element *elem; - int pe; /* process element handle */ + /* + * pe is the process element handle, assigned by this driver when the + * context is initialized. + * + * external_pe is the PE shown outside of cxl. + * On bare-metal, pe=external_pe, because we decide what the handle is. + * In a guest, we only find out about the pe used by pHyp when the + * context is attached, and that's the value we want to report outside + * of cxl. + */ + int pe; + int external_pe; + u32 irq_count; bool pe_inserted; bool master; @@ -488,11 +530,34 @@ struct cxl_context { struct rcu_head rcu; }; -struct cxl { +struct cxl_native { + u64 afu_desc_off; + u64 afu_desc_size; void __iomem *p1_mmio; void __iomem *p2_mmio; irq_hw_number_t err_hwirq; unsigned int err_virq; + u64 ps_off; +}; + +struct cxl_guest { + struct platform_device *pdev; + int irq_nranges; + struct cdev cdev; + irq_hw_number_t irq_base_offset; + struct irq_avail *irq_avail; + spinlock_t irq_alloc_lock; + u64 handle; + char *status; + u16 vendor; + u16 device; + u16 subsystem_vendor; + u16 subsystem; +}; + +struct cxl { + struct cxl_native *native; + struct cxl_guest *guest; spinlock_t afu_list_lock; struct cxl_afu *afu[CXL_MAX_SLICES]; struct device dev; @@ -503,9 +568,6 @@ struct cxl { struct bin_attribute cxl_attr; int adapter_num; int user_irqs; - u64 afu_desc_off; - u64 afu_desc_size; - u64 ps_off; u64 ps_size; u16 psl_rev; u16 base_image; @@ -519,13 +581,15 @@ struct cxl { bool perst_same_image; }; -int cxl_alloc_one_irq(struct cxl *adapter); -void cxl_release_one_irq(struct cxl *adapter, int hwirq); -int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num); -void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter); -int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq); +int cxl_pci_alloc_one_irq(struct cxl *adapter); +void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq); +int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num); +void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter); +int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq); int cxl_update_image_control(struct cxl *adapter); -int cxl_reset(struct cxl *adapter); +int cxl_pci_reset(struct cxl *adapter); +void cxl_pci_release_afu(struct device *dev); +ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len); /* common == phyp + powernv */ struct cxl_process_element_common { @@ -555,29 +619,32 @@ struct cxl_process_element { __be32 software_state; } __packed; -static inline bool cxl_adapter_link_ok(struct cxl *cxl) +static inline bool cxl_adapter_link_ok(struct cxl *cxl, struct cxl_afu *afu) { struct pci_dev *pdev; - pdev = to_pci_dev(cxl->dev.parent); - return !pci_channel_offline(pdev); + if (cpu_has_feature(CPU_FTR_HVMODE)) { + pdev = to_pci_dev(cxl->dev.parent); + return !pci_channel_offline(pdev); + } + return true; } static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg) { WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); - return cxl->p1_mmio + cxl_reg_off(reg); + return cxl->native->p1_mmio + cxl_reg_off(reg); } static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val) { - if (likely(cxl_adapter_link_ok(cxl))) + if (likely(cxl_adapter_link_ok(cxl, NULL))) out_be64(_cxl_p1_addr(cxl, reg), val); } static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg) { - if (likely(cxl_adapter_link_ok(cxl))) + if (likely(cxl_adapter_link_ok(cxl, NULL))) return in_be64(_cxl_p1_addr(cxl, reg)); else return ~0ULL; @@ -586,18 +653,18 @@ static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg) static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg) { WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); - return afu->p1n_mmio + cxl_reg_off(reg); + return afu->native->p1n_mmio + cxl_reg_off(reg); } static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val) { - if (likely(cxl_adapter_link_ok(afu->adapter))) + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) out_be64(_cxl_p1n_addr(afu, reg), val); } static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg) { - if (likely(cxl_adapter_link_ok(afu->adapter))) + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) return in_be64(_cxl_p1n_addr(afu, reg)); else return ~0ULL; @@ -610,39 +677,19 @@ static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val) { - if (likely(cxl_adapter_link_ok(afu->adapter))) + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) out_be64(_cxl_p2n_addr(afu, reg), val); } static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg) { - if (likely(cxl_adapter_link_ok(afu->adapter))) + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) return in_be64(_cxl_p2n_addr(afu, reg)); else return ~0ULL; } -static inline u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off) -{ - if (likely(cxl_adapter_link_ok(afu->adapter))) - return in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + - ((cr) * (afu)->crs_len) + (off)); - else - return ~0ULL; -} - -static inline u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off) -{ - if (likely(cxl_adapter_link_ok(afu->adapter))) - return in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + - ((cr) * (afu)->crs_len) + (off)); - else - return 0xffffffff; -} -u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off); -u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off); - -ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, +ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, loff_t off, size_t count); @@ -652,13 +699,14 @@ struct cxl_calls { }; int register_cxl_calls(struct cxl_calls *calls); void unregister_cxl_calls(struct cxl_calls *calls); +int cxl_update_properties(struct device_node *dn, struct property *new_prop); -int cxl_alloc_adapter_nr(struct cxl *adapter); void cxl_remove_adapter_nr(struct cxl *adapter); int cxl_alloc_spa(struct cxl_afu *afu); void cxl_release_spa(struct cxl_afu *afu); +dev_t cxl_get_dev(void); int cxl_file_init(void); void cxl_file_exit(void); int cxl_register_adapter(struct cxl *adapter); @@ -679,21 +727,19 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu); int cxl_sysfs_afu_m_add(struct cxl_afu *afu); void cxl_sysfs_afu_m_remove(struct cxl_afu *afu); -int cxl_afu_activate_mode(struct cxl_afu *afu, int mode); -int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode); -int cxl_afu_deactivate_mode(struct cxl_afu *afu); +struct cxl *cxl_alloc_adapter(void); +struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice); int cxl_afu_select_best_mode(struct cxl_afu *afu); -int cxl_register_psl_irq(struct cxl_afu *afu); -void cxl_release_psl_irq(struct cxl_afu *afu); -int cxl_register_psl_err_irq(struct cxl *adapter); -void cxl_release_psl_err_irq(struct cxl *adapter); -int cxl_register_serr_irq(struct cxl_afu *afu); -void cxl_release_serr_irq(struct cxl_afu *afu); +int cxl_native_register_psl_irq(struct cxl_afu *afu); +void cxl_native_release_psl_irq(struct cxl_afu *afu); +int cxl_native_register_psl_err_irq(struct cxl *adapter); +void cxl_native_release_psl_err_irq(struct cxl *adapter); +int cxl_native_register_serr_irq(struct cxl_afu *afu); +void cxl_native_release_serr_irq(struct cxl_afu *afu); int afu_register_irqs(struct cxl_context *ctx, u32 count); void afu_release_irqs(struct cxl_context *ctx, void *cookie); void afu_irq_name_free(struct cxl_context *ctx); -irqreturn_t cxl_slice_irq_err(int irq, void *data); int cxl_debugfs_init(void); void cxl_debugfs_exit(void); @@ -707,6 +753,7 @@ void cxl_prefault(struct cxl_context *ctx, u64 wed); struct cxl *get_cxl_adapter(int num); int cxl_alloc_sst(struct cxl_context *ctx); +void cxl_dump_debug_buffer(void *addr, size_t size); void init_cxl_native(void); @@ -720,40 +767,54 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, void cxl_unmap_irq(unsigned int virq, void *cookie); int __detach_context(struct cxl_context *ctx); -/* This matches the layout of the H_COLLECT_CA_INT_INFO retbuf */ +/* + * This must match the layout of the H_COLLECT_CA_INT_INFO retbuf defined + * in PAPR. + * A word about endianness: a pointer to this structure is passed when + * calling the hcall. However, it is not a block of memory filled up by + * the hypervisor. The return values are found in registers, and copied + * one by one when returning from the hcall. See the end of the call to + * plpar_hcall9() in hvCall.S + * As a consequence: + * - we don't need to do any endianness conversion + * - the pid and tid are an exception. They are 32-bit values returned in + * the same 64-bit register. So we do need to worry about byte ordering. + */ struct cxl_irq_info { u64 dsisr; u64 dar; u64 dsr; +#ifndef CONFIG_CPU_LITTLE_ENDIAN u32 pid; u32 tid; +#else + u32 tid; + u32 pid; +#endif u64 afu_err; u64 errstat; - u64 padding[3]; /* to match the expected retbuf size for plpar_hcall9 */ + u64 proc_handle; + u64 padding[2]; /* to match the expected retbuf size for plpar_hcall9 */ }; void cxl_assign_psn_space(struct cxl_context *ctx); -int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, - u64 amr); -int cxl_detach_process(struct cxl_context *ctx); - -int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info); -int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); +irqreturn_t cxl_irq(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info); +int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler, + void *cookie, irq_hw_number_t *dest_hwirq, + unsigned int *dest_virq, const char *name); int cxl_check_error(struct cxl_afu *afu); int cxl_afu_slbia(struct cxl_afu *afu); int cxl_tlb_slb_invalidate(struct cxl *adapter); int cxl_afu_disable(struct cxl_afu *afu); -int __cxl_afu_reset(struct cxl_afu *afu); -int cxl_afu_check_and_enable(struct cxl_afu *afu); int cxl_psl_purge(struct cxl_afu *afu); void cxl_stop_trace(struct cxl *cxl); int cxl_pci_vphb_add(struct cxl_afu *afu); -void cxl_pci_vphb_reconfigure(struct cxl_afu *afu); void cxl_pci_vphb_remove(struct cxl_afu *afu); extern struct pci_driver cxl_pci_driver; +extern struct platform_driver cxl_of_driver; int afu_allocate_irqs(struct cxl_context *ctx, u32 count); int afu_open(struct inode *inode, struct file *file); @@ -764,4 +825,61 @@ unsigned int afu_poll(struct file *file, struct poll_table_struct *poll); ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off); extern const struct file_operations afu_fops; +struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *dev); +void cxl_guest_remove_adapter(struct cxl *adapter); +int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np); +int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np); +ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len); +ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len); +int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np); +void cxl_guest_remove_afu(struct cxl_afu *afu); +int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np); +int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *afu_np); +int cxl_guest_add_chardev(struct cxl *adapter); +void cxl_guest_remove_chardev(struct cxl *adapter); +void cxl_guest_reload_module(struct cxl *adapter); +int cxl_of_probe(struct platform_device *pdev); + +struct cxl_backend_ops { + struct module *module; + int (*adapter_reset)(struct cxl *adapter); + int (*alloc_one_irq)(struct cxl *adapter); + void (*release_one_irq)(struct cxl *adapter, int hwirq); + int (*alloc_irq_ranges)(struct cxl_irq_ranges *irqs, + struct cxl *adapter, unsigned int num); + void (*release_irq_ranges)(struct cxl_irq_ranges *irqs, + struct cxl *adapter); + int (*setup_irq)(struct cxl *adapter, unsigned int hwirq, + unsigned int virq); + irqreturn_t (*handle_psl_slice_error)(struct cxl_context *ctx, + u64 dsisr, u64 errstat); + irqreturn_t (*psl_interrupt)(int irq, void *data); + int (*ack_irq)(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); + int (*attach_process)(struct cxl_context *ctx, bool kernel, + u64 wed, u64 amr); + int (*detach_process)(struct cxl_context *ctx); + bool (*support_attributes)(const char *attr_name, enum cxl_attrs type); + bool (*link_ok)(struct cxl *cxl, struct cxl_afu *afu); + void (*release_afu)(struct device *dev); + ssize_t (*afu_read_err_buffer)(struct cxl_afu *afu, char *buf, + loff_t off, size_t count); + int (*afu_check_and_enable)(struct cxl_afu *afu); + int (*afu_activate_mode)(struct cxl_afu *afu, int mode); + int (*afu_deactivate_mode)(struct cxl_afu *afu, int mode); + int (*afu_reset)(struct cxl_afu *afu); + int (*afu_cr_read8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 *val); + int (*afu_cr_read16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 *val); + int (*afu_cr_read32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 *val); + int (*afu_cr_read64)(struct cxl_afu *afu, int cr_idx, u64 offset, u64 *val); + int (*afu_cr_write8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 val); + int (*afu_cr_write16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 val); + int (*afu_cr_write32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 val); + ssize_t (*read_adapter_vpd)(struct cxl *adapter, void *buf, size_t count); +}; +extern const struct cxl_backend_ops cxl_native_ops; +extern const struct cxl_backend_ops cxl_guest_ops; +extern const struct cxl_backend_ops *cxl_ops; + +/* check if the given pci_dev is on the the cxl vphb bus */ +bool cxl_pci_is_vphb_device(struct pci_dev *dev); #endif diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c index 18df6f44af2a..5751899e0c17 100644 --- a/drivers/misc/cxl/debugfs.c +++ b/drivers/misc/cxl/debugfs.c @@ -118,6 +118,10 @@ void cxl_debugfs_afu_remove(struct cxl_afu *afu) int __init cxl_debugfs_init(void) { struct dentry *ent; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return 0; + ent = debugfs_create_dir("cxl", NULL); if (IS_ERR(ent)) return PTR_ERR(ent); diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index 81c3f75b7330..9a8650bcb042 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -101,7 +101,7 @@ static void cxl_ack_ae(struct cxl_context *ctx) { unsigned long flags; - cxl_ack_irq(ctx, CXL_PSL_TFC_An_AE, 0); + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0); spin_lock_irqsave(&ctx->lock, flags); ctx->pending_fault = true; @@ -125,7 +125,7 @@ static int cxl_handle_segment_miss(struct cxl_context *ctx, else { mb(); /* Order seg table write to TFC MMIO write */ - cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0); + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); } return IRQ_HANDLED; @@ -163,7 +163,7 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, local_irq_restore(flags); pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); - cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0); + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); } /* @@ -254,14 +254,17 @@ void cxl_handle_fault(struct work_struct *fault_work) u64 dar = ctx->dar; struct mm_struct *mm = NULL; - if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr || - cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar || - cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) { - /* Most likely explanation is harmless - a dedicated process - * has detached and these were cleared by the PSL purge, but - * warn about it just in case */ - dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n"); - return; + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr || + cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar || + cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) { + /* Most likely explanation is harmless - a dedicated + * process has detached and these were cleared by the + * PSL purge, but warn about it just in case + */ + dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n"); + return; + } } /* Early return if the context is being / has been detached */ diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 783337d22f36..eec468f1612f 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -26,9 +26,7 @@ #include "trace.h" #define CXL_NUM_MINORS 256 /* Total to reserve */ -#define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */ -#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS) #define CXL_AFU_MINOR_D(afu) (CXL_CARD_MINOR(afu->adapter) + 1 + (3 * afu->slice)) #define CXL_AFU_MINOR_M(afu) (CXL_AFU_MINOR_D(afu) + 1) #define CXL_AFU_MINOR_S(afu) (CXL_AFU_MINOR_D(afu) + 2) @@ -36,7 +34,6 @@ #define CXL_AFU_MKDEV_M(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_M(afu)) #define CXL_AFU_MKDEV_S(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_S(afu)) -#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS) #define CXL_DEVT_AFU(dev) ((MINOR(dev) % CXL_DEV_MINORS - 1) / 3) #define CXL_DEVT_IS_CARD(dev) (MINOR(dev) % CXL_DEV_MINORS == 0) @@ -79,7 +76,7 @@ static int __afu_open(struct inode *inode, struct file *file, bool master) if (!afu->current_mode) goto err_put_afu; - if (!cxl_adapter_link_ok(adapter)) { + if (!cxl_ops->link_ok(adapter, afu)) { rc = -EIO; goto err_put_afu; } @@ -210,8 +207,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); - if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor, - amr))) { + if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor, + amr))) { afu_release_irqs(ctx, ctx); goto out; } @@ -222,12 +219,13 @@ out: mutex_unlock(&ctx->status_mutex); return rc; } + static long afu_ioctl_process_element(struct cxl_context *ctx, int __user *upe) { pr_devel("%s: pe: %i\n", __func__, ctx->pe); - if (copy_to_user(upe, &ctx->pe, sizeof(__u32))) + if (copy_to_user(upe, &ctx->external_pe, sizeof(__u32))) return -EFAULT; return 0; @@ -259,7 +257,7 @@ long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (ctx->status == CLOSED) return -EIO; - if (!cxl_adapter_link_ok(ctx->afu->adapter)) + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) return -EIO; pr_devel("afu_ioctl\n"); @@ -289,7 +287,7 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm) if (ctx->status != STARTED) return -EIO; - if (!cxl_adapter_link_ok(ctx->afu->adapter)) + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) return -EIO; return cxl_context_iomap(ctx, vm); @@ -336,7 +334,7 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, int rc; DEFINE_WAIT(wait); - if (!cxl_adapter_link_ok(ctx->afu->adapter)) + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) return -EIO; if (count < CXL_READ_MIN_SIZE) @@ -349,7 +347,7 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, if (ctx_event_pending(ctx)) break; - if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { rc = -EIO; goto out; } @@ -445,7 +443,8 @@ static const struct file_operations afu_master_fops = { static char *cxl_devnode(struct device *dev, umode_t *mode) { - if (CXL_DEVT_IS_CARD(dev->devt)) { + if (cpu_has_feature(CPU_FTR_HVMODE) && + CXL_DEVT_IS_CARD(dev->devt)) { /* * These minor numbers will eventually be used to program the * PSL and AFUs once we have dynamic reprogramming support @@ -546,6 +545,11 @@ int cxl_register_adapter(struct cxl *adapter) return device_register(&adapter->dev); } +dev_t cxl_get_dev(void) +{ + return cxl_dev; +} + int __init cxl_file_init(void) { int rc; diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c new file mode 100644 index 000000000000..68dd0b7da471 --- /dev/null +++ b/drivers/misc/cxl/flash.c @@ -0,0 +1,538 @@ +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/semaphore.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <asm/rtas.h> + +#include "cxl.h" +#include "hcalls.h" + +#define DOWNLOAD_IMAGE 1 +#define VALIDATE_IMAGE 2 + +struct ai_header { + u16 version; + u8 reserved0[6]; + u16 vendor; + u16 device; + u16 subsystem_vendor; + u16 subsystem; + u64 image_offset; + u64 image_length; + u8 reserved1[96]; +}; + +static struct semaphore sem; +unsigned long *buffer[CXL_AI_MAX_ENTRIES]; +struct sg_list *le; +static u64 continue_token; +static unsigned int transfer; + +struct update_props_workarea { + __be32 phandle; + __be32 state; + __be64 reserved; + __be32 nprops; +} __packed; + +struct update_nodes_workarea { + __be32 state; + __be64 unit_address; + __be32 reserved; +} __packed; + +#define DEVICE_SCOPE 3 +#define NODE_ACTION_MASK 0xff000000 +#define NODE_COUNT_MASK 0x00ffffff +#define OPCODE_DELETE 0x01000000 +#define OPCODE_UPDATE 0x02000000 +#define OPCODE_ADD 0x03000000 + +static int rcall(int token, char *buf, s32 scope) +{ + int rc; + + spin_lock(&rtas_data_buf_lock); + + memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE); + rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope); + memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE); + + spin_unlock(&rtas_data_buf_lock); + return rc; +} + +static int update_property(struct device_node *dn, const char *name, + u32 vd, char *value) +{ + struct property *new_prop; + u32 *val; + int rc; + + new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); + if (!new_prop) + return -ENOMEM; + + new_prop->name = kstrdup(name, GFP_KERNEL); + if (!new_prop->name) { + kfree(new_prop); + return -ENOMEM; + } + + new_prop->length = vd; + new_prop->value = kzalloc(new_prop->length, GFP_KERNEL); + if (!new_prop->value) { + kfree(new_prop->name); + kfree(new_prop); + return -ENOMEM; + } + memcpy(new_prop->value, value, vd); + + val = (u32 *)new_prop->value; + rc = cxl_update_properties(dn, new_prop); + pr_devel("%s: update property (%s, length: %i, value: %#x)\n", + dn->name, name, vd, be32_to_cpu(*val)); + + if (rc) { + kfree(new_prop->name); + kfree(new_prop->value); + kfree(new_prop); + } + return rc; +} + +static int update_node(__be32 phandle, s32 scope) +{ + struct update_props_workarea *upwa; + struct device_node *dn; + int i, rc, ret; + char *prop_data; + char *buf; + int token; + u32 nprops; + u32 vd; + + token = rtas_token("ibm,update-properties"); + if (token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); + if (!dn) { + kfree(buf); + return -ENOENT; + } + + upwa = (struct update_props_workarea *)&buf[0]; + upwa->phandle = phandle; + do { + rc = rcall(token, buf, scope); + if (rc < 0) + break; + + prop_data = buf + sizeof(*upwa); + nprops = be32_to_cpu(upwa->nprops); + + if (*prop_data == 0) { + prop_data++; + vd = be32_to_cpu(*(__be32 *)prop_data); + prop_data += vd + sizeof(vd); + nprops--; + } + + for (i = 0; i < nprops; i++) { + char *prop_name; + + prop_name = prop_data; + prop_data += strlen(prop_name) + 1; + vd = be32_to_cpu(*(__be32 *)prop_data); + prop_data += sizeof(vd); + + if ((vd != 0x00000000) && (vd != 0x80000000)) { + ret = update_property(dn, prop_name, vd, + prop_data); + if (ret) + pr_err("cxl: Could not update property %s - %i\n", + prop_name, ret); + + prop_data += vd; + } + } + } while (rc == 1); + + of_node_put(dn); + kfree(buf); + return rc; +} + +static int update_devicetree(struct cxl *adapter, s32 scope) +{ + struct update_nodes_workarea *unwa; + u32 action, node_count; + int token, rc, i; + __be32 *data, drc_index, phandle; + char *buf; + + token = rtas_token("ibm,update-nodes"); + if (token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + unwa = (struct update_nodes_workarea *)&buf[0]; + unwa->unit_address = cpu_to_be64(adapter->guest->handle); + do { + rc = rcall(token, buf, scope); + if (rc && rc != 1) + break; + + data = (__be32 *)buf + 4; + while (be32_to_cpu(*data) & NODE_ACTION_MASK) { + action = be32_to_cpu(*data) & NODE_ACTION_MASK; + node_count = be32_to_cpu(*data) & NODE_COUNT_MASK; + pr_devel("device reconfiguration - action: %#x, nodes: %#x\n", + action, node_count); + data++; + + for (i = 0; i < node_count; i++) { + phandle = *data++; + + switch (action) { + case OPCODE_DELETE: + /* nothing to do */ + break; + case OPCODE_UPDATE: + update_node(phandle, scope); + break; + case OPCODE_ADD: + /* nothing to do, just move pointer */ + drc_index = *data++; + break; + } + } + } + } while (rc == 1); + + kfree(buf); + return 0; +} + +static int handle_image(struct cxl *adapter, int operation, + long (*fct)(u64, u64, u64, u64 *), + struct cxl_adapter_image *ai) +{ + size_t mod, s_copy, len_chunk = 0; + struct ai_header *header = NULL; + unsigned int entries = 0, i; + void *dest, *from; + int rc = 0, need_header; + + /* base adapter image header */ + need_header = (ai->flags & CXL_AI_NEED_HEADER); + if (need_header) { + header = kzalloc(sizeof(struct ai_header), GFP_KERNEL); + if (!header) + return -ENOMEM; + header->version = cpu_to_be16(1); + header->vendor = cpu_to_be16(adapter->guest->vendor); + header->device = cpu_to_be16(adapter->guest->device); + header->subsystem_vendor = cpu_to_be16(adapter->guest->subsystem_vendor); + header->subsystem = cpu_to_be16(adapter->guest->subsystem); + header->image_offset = cpu_to_be64(CXL_AI_HEADER_SIZE); + header->image_length = cpu_to_be64(ai->len_image); + } + + /* number of entries in the list */ + len_chunk = ai->len_data; + if (need_header) + len_chunk += CXL_AI_HEADER_SIZE; + + entries = len_chunk / CXL_AI_BUFFER_SIZE; + mod = len_chunk % CXL_AI_BUFFER_SIZE; + if (mod) + entries++; + + if (entries > CXL_AI_MAX_ENTRIES) { + rc = -EINVAL; + goto err; + } + + /* < -- MAX_CHUNK_SIZE = 4096 * 256 = 1048576 bytes --> + * chunk 0 ---------------------------------------------------- + * | header | data | + * ---------------------------------------------------- + * chunk 1 ---------------------------------------------------- + * | data | + * ---------------------------------------------------- + * .... + * chunk n ---------------------------------------------------- + * | data | + * ---------------------------------------------------- + */ + from = (void *) ai->data; + for (i = 0; i < entries; i++) { + dest = buffer[i]; + s_copy = CXL_AI_BUFFER_SIZE; + + if ((need_header) && (i == 0)) { + /* add adapter image header */ + memcpy(buffer[i], header, sizeof(struct ai_header)); + s_copy = CXL_AI_BUFFER_SIZE - CXL_AI_HEADER_SIZE; + dest += CXL_AI_HEADER_SIZE; /* image offset */ + } + if ((i == (entries - 1)) && mod) + s_copy = mod; + + /* copy data */ + if (copy_from_user(dest, from, s_copy)) + goto err; + + /* fill in the list */ + le[i].phys_addr = cpu_to_be64(virt_to_phys(buffer[i])); + le[i].len = cpu_to_be64(CXL_AI_BUFFER_SIZE); + if ((i == (entries - 1)) && mod) + le[i].len = cpu_to_be64(mod); + from += s_copy; + } + pr_devel("%s (op: %i, need header: %i, entries: %i, token: %#llx)\n", + __func__, operation, need_header, entries, continue_token); + + /* + * download/validate the adapter image to the coherent + * platform facility + */ + rc = fct(adapter->guest->handle, virt_to_phys(le), entries, + &continue_token); + if (rc == 0) /* success of download/validation operation */ + continue_token = 0; + +err: + kfree(header); + + return rc; +} + +static int transfer_image(struct cxl *adapter, int operation, + struct cxl_adapter_image *ai) +{ + int rc = 0; + int afu; + + switch (operation) { + case DOWNLOAD_IMAGE: + rc = handle_image(adapter, operation, + &cxl_h_download_adapter_image, ai); + if (rc < 0) { + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + } + return rc; + + case VALIDATE_IMAGE: + rc = handle_image(adapter, operation, + &cxl_h_validate_adapter_image, ai); + if (rc < 0) { + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + return rc; + } + if (rc == 0) { + pr_devel("remove curent afu\n"); + for (afu = 0; afu < adapter->slices; afu++) + cxl_guest_remove_afu(adapter->afu[afu]); + + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + + /* The entire image has now been + * downloaded and the validation has + * been successfully performed. + * After that, the partition should call + * ibm,update-nodes and + * ibm,update-properties to receive the + * current configuration + */ + rc = update_devicetree(adapter, DEVICE_SCOPE); + transfer = 1; + } + return rc; + } + + return -EINVAL; +} + +static long ioctl_transfer_image(struct cxl *adapter, int operation, + struct cxl_adapter_image __user *uai) +{ + struct cxl_adapter_image ai; + + pr_devel("%s\n", __func__); + + if (copy_from_user(&ai, uai, sizeof(struct cxl_adapter_image))) + return -EFAULT; + + /* + * Make sure reserved fields and bits are set to 0 + */ + if (ai.reserved1 || ai.reserved2 || ai.reserved3 || ai.reserved4 || + (ai.flags & ~CXL_AI_ALL)) + return -EINVAL; + + return transfer_image(adapter, operation, &ai); +} + +static int device_open(struct inode *inode, struct file *file) +{ + int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev); + struct cxl *adapter; + int rc = 0, i; + + pr_devel("in %s\n", __func__); + + BUG_ON(sizeof(struct ai_header) != CXL_AI_HEADER_SIZE); + + /* Allows one process to open the device by using a semaphore */ + if (down_interruptible(&sem) != 0) + return -EPERM; + + if (!(adapter = get_cxl_adapter(adapter_num))) + return -ENODEV; + + file->private_data = adapter; + continue_token = 0; + transfer = 0; + + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) + buffer[i] = NULL; + + /* aligned buffer containing list entries which describes up to + * 1 megabyte of data (256 entries of 4096 bytes each) + * Logical real address of buffer 0 - Buffer 0 length in bytes + * Logical real address of buffer 1 - Buffer 1 length in bytes + * Logical real address of buffer 2 - Buffer 2 length in bytes + * .... + * .... + * Logical real address of buffer N - Buffer N length in bytes + */ + le = (struct sg_list *)get_zeroed_page(GFP_KERNEL); + if (!le) { + rc = -ENOMEM; + goto err; + } + + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { + buffer[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (!buffer[i]) { + rc = -ENOMEM; + goto err1; + } + } + + return 0; + +err1: + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { + if (buffer[i]) + free_page((unsigned long) buffer[i]); + } + + if (le) + free_page((unsigned long) le); +err: + put_device(&adapter->dev); + + return rc; +} + +static long device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct cxl *adapter = file->private_data; + + pr_devel("in %s\n", __func__); + + if (cmd == CXL_IOCTL_DOWNLOAD_IMAGE) + return ioctl_transfer_image(adapter, + DOWNLOAD_IMAGE, + (struct cxl_adapter_image __user *)arg); + else if (cmd == CXL_IOCTL_VALIDATE_IMAGE) + return ioctl_transfer_image(adapter, + VALIDATE_IMAGE, + (struct cxl_adapter_image __user *)arg); + else + return -EINVAL; +} + +static long device_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return device_ioctl(file, cmd, arg); +} + +static int device_close(struct inode *inode, struct file *file) +{ + struct cxl *adapter = file->private_data; + int i; + + pr_devel("in %s\n", __func__); + + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { + if (buffer[i]) + free_page((unsigned long) buffer[i]); + } + + if (le) + free_page((unsigned long) le); + + up(&sem); + put_device(&adapter->dev); + continue_token = 0; + + /* reload the module */ + if (transfer) + cxl_guest_reload_module(adapter); + else { + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + } + + transfer = 0; + return 0; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = device_open, + .unlocked_ioctl = device_ioctl, + .compat_ioctl = device_compat_ioctl, + .release = device_close, +}; + +void cxl_guest_remove_chardev(struct cxl *adapter) +{ + cdev_del(&adapter->guest->cdev); +} + +int cxl_guest_add_chardev(struct cxl *adapter) +{ + dev_t devt; + int rc; + + devt = MKDEV(MAJOR(cxl_get_dev()), CXL_CARD_MINOR(adapter)); + cdev_init(&adapter->guest->cdev, &fops); + if ((rc = cdev_add(&adapter->guest->cdev, devt, 1))) { + dev_err(&adapter->dev, + "Unable to add chardev on adapter (card%i): %i\n", + adapter->adapter_num, rc); + goto err; + } + adapter->dev.devt = devt; + sema_init(&sem, 1); +err: + return rc; +} diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c new file mode 100644 index 000000000000..8213372de2b7 --- /dev/null +++ b/drivers/misc/cxl/guest.c @@ -0,0 +1,1177 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/delay.h> + +#include "cxl.h" +#include "hcalls.h" +#include "trace.h" + +#define CXL_ERROR_DETECTED_EVENT 1 +#define CXL_SLOT_RESET_EVENT 2 +#define CXL_RESUME_EVENT 3 + +static void pci_error_handlers(struct cxl_afu *afu, + int bus_error_event, + pci_channel_state_t state) +{ + struct pci_dev *afu_dev; + + if (afu->phb == NULL) + return; + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + if (!afu_dev->driver) + continue; + + switch (bus_error_event) { + case CXL_ERROR_DETECTED_EVENT: + afu_dev->error_state = state; + + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->error_detected) + afu_dev->driver->err_handler->error_detected(afu_dev, state); + break; + case CXL_SLOT_RESET_EVENT: + afu_dev->error_state = state; + + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->slot_reset) + afu_dev->driver->err_handler->slot_reset(afu_dev); + break; + case CXL_RESUME_EVENT: + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->resume) + afu_dev->driver->err_handler->resume(afu_dev); + break; + } + } +} + +static irqreturn_t guest_handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, + u64 errstat) +{ + pr_devel("in %s\n", __func__); + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat); + + return cxl_ops->ack_irq(ctx, 0, errstat); +} + +static ssize_t guest_collect_vpd(struct cxl *adapter, struct cxl_afu *afu, + void *buf, size_t len) +{ + unsigned int entries, mod; + unsigned long **vpd_buf = NULL; + struct sg_list *le; + int rc = 0, i, tocopy; + u64 out = 0; + + if (buf == NULL) + return -EINVAL; + + /* number of entries in the list */ + entries = len / SG_BUFFER_SIZE; + mod = len % SG_BUFFER_SIZE; + if (mod) + entries++; + + if (entries > SG_MAX_ENTRIES) { + entries = SG_MAX_ENTRIES; + len = SG_MAX_ENTRIES * SG_BUFFER_SIZE; + mod = 0; + } + + vpd_buf = kzalloc(entries * sizeof(unsigned long *), GFP_KERNEL); + if (!vpd_buf) + return -ENOMEM; + + le = (struct sg_list *)get_zeroed_page(GFP_KERNEL); + if (!le) { + rc = -ENOMEM; + goto err1; + } + + for (i = 0; i < entries; i++) { + vpd_buf[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (!vpd_buf[i]) { + rc = -ENOMEM; + goto err2; + } + le[i].phys_addr = cpu_to_be64(virt_to_phys(vpd_buf[i])); + le[i].len = cpu_to_be64(SG_BUFFER_SIZE); + if ((i == (entries - 1)) && mod) + le[i].len = cpu_to_be64(mod); + } + + if (adapter) + rc = cxl_h_collect_vpd_adapter(adapter->guest->handle, + virt_to_phys(le), entries, &out); + else + rc = cxl_h_collect_vpd(afu->guest->handle, 0, + virt_to_phys(le), entries, &out); + pr_devel("length of available (entries: %i), vpd: %#llx\n", + entries, out); + + if (!rc) { + /* + * hcall returns in 'out' the size of available VPDs. + * It fills the buffer with as much data as possible. + */ + if (out < len) + len = out; + rc = len; + if (out) { + for (i = 0; i < entries; i++) { + if (len < SG_BUFFER_SIZE) + tocopy = len; + else + tocopy = SG_BUFFER_SIZE; + memcpy(buf, vpd_buf[i], tocopy); + buf += tocopy; + len -= tocopy; + } + } + } +err2: + for (i = 0; i < entries; i++) { + if (vpd_buf[i]) + free_page((unsigned long) vpd_buf[i]); + } + free_page((unsigned long) le); +err1: + kfree(vpd_buf); + return rc; +} + +static int guest_get_irq_info(struct cxl_context *ctx, struct cxl_irq_info *info) +{ + return cxl_h_collect_int_info(ctx->afu->guest->handle, ctx->process_token, info); +} + +static irqreturn_t guest_psl_irq(int irq, void *data) +{ + struct cxl_context *ctx = data; + struct cxl_irq_info irq_info; + int rc; + + pr_devel("%d: received PSL interrupt %i\n", ctx->pe, irq); + rc = guest_get_irq_info(ctx, &irq_info); + if (rc) { + WARN(1, "Unable to get IRQ info: %i\n", rc); + return IRQ_HANDLED; + } + + rc = cxl_irq(irq, ctx, &irq_info); + return rc; +} + +static int afu_read_error_state(struct cxl_afu *afu, int *state_out) +{ + u64 state; + int rc = 0; + + rc = cxl_h_read_error_state(afu->guest->handle, &state); + if (!rc) { + WARN_ON(state != H_STATE_NORMAL && + state != H_STATE_DISABLE && + state != H_STATE_TEMP_UNAVAILABLE && + state != H_STATE_PERM_UNAVAILABLE); + *state_out = state & 0xffffffff; + } + return rc; +} + +static irqreturn_t guest_slice_irq_err(int irq, void *data) +{ + struct cxl_afu *afu = data; + int rc; + u64 serr; + + WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); + rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, &serr); + if (rc) { + dev_crit(&afu->dev, "Couldn't read PSL_SERR_An: %d\n", rc); + return IRQ_HANDLED; + } + dev_crit(&afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); + + rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr); + if (rc) + dev_crit(&afu->dev, "Couldn't ack slice error interrupt: %d\n", + rc); + + return IRQ_HANDLED; +} + + +static int irq_alloc_range(struct cxl *adapter, int len, int *irq) +{ + int i, n; + struct irq_avail *cur; + + for (i = 0; i < adapter->guest->irq_nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + n = bitmap_find_next_zero_area(cur->bitmap, cur->range, + 0, len, 0); + if (n < cur->range) { + bitmap_set(cur->bitmap, n, len); + *irq = cur->offset + n; + pr_devel("guest: allocate IRQs %#x->%#x\n", + *irq, *irq + len - 1); + + return 0; + } + } + return -ENOSPC; +} + +static int irq_free_range(struct cxl *adapter, int irq, int len) +{ + int i, n; + struct irq_avail *cur; + + if (len == 0) + return -ENOENT; + + for (i = 0; i < adapter->guest->irq_nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + if (irq >= cur->offset && + (irq + len) <= (cur->offset + cur->range)) { + n = irq - cur->offset; + bitmap_clear(cur->bitmap, n, len); + pr_devel("guest: release IRQs %#x->%#x\n", + irq, irq + len - 1); + return 0; + } + } + return -ENOENT; +} + +static int guest_reset(struct cxl *adapter) +{ + struct cxl_afu *afu = NULL; + int i, rc; + + pr_devel("Adapter reset request\n"); + for (i = 0; i < adapter->slices; i++) { + if ((afu = adapter->afu[i])) { + pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, + pci_channel_io_frozen); + cxl_context_detach_all(afu); + } + } + + rc = cxl_h_reset_adapter(adapter->guest->handle); + for (i = 0; i < adapter->slices; i++) { + if (!rc && (afu = adapter->afu[i])) { + pci_error_handlers(afu, CXL_SLOT_RESET_EVENT, + pci_channel_io_normal); + pci_error_handlers(afu, CXL_RESUME_EVENT, 0); + } + } + return rc; +} + +static int guest_alloc_one_irq(struct cxl *adapter) +{ + int irq; + + spin_lock(&adapter->guest->irq_alloc_lock); + if (irq_alloc_range(adapter, 1, &irq)) + irq = -ENOSPC; + spin_unlock(&adapter->guest->irq_alloc_lock); + return irq; +} + +static void guest_release_one_irq(struct cxl *adapter, int irq) +{ + spin_lock(&adapter->guest->irq_alloc_lock); + irq_free_range(adapter, irq, 1); + spin_unlock(&adapter->guest->irq_alloc_lock); +} + +static int guest_alloc_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter, unsigned int num) +{ + int i, try, irq; + + memset(irqs, 0, sizeof(struct cxl_irq_ranges)); + + spin_lock(&adapter->guest->irq_alloc_lock); + for (i = 0; i < CXL_IRQ_RANGES && num; i++) { + try = num; + while (try) { + if (irq_alloc_range(adapter, try, &irq) == 0) + break; + try /= 2; + } + if (!try) + goto error; + irqs->offset[i] = irq; + irqs->range[i] = try; + num -= try; + } + if (num) + goto error; + spin_unlock(&adapter->guest->irq_alloc_lock); + return 0; + +error: + for (i = 0; i < CXL_IRQ_RANGES; i++) + irq_free_range(adapter, irqs->offset[i], irqs->range[i]); + spin_unlock(&adapter->guest->irq_alloc_lock); + return -ENOSPC; +} + +static void guest_release_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter) +{ + int i; + + spin_lock(&adapter->guest->irq_alloc_lock); + for (i = 0; i < CXL_IRQ_RANGES; i++) + irq_free_range(adapter, irqs->offset[i], irqs->range[i]); + spin_unlock(&adapter->guest->irq_alloc_lock); +} + +static int guest_register_serr_irq(struct cxl_afu *afu) +{ + afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&afu->dev)); + if (!afu->err_irq_name) + return -ENOMEM; + + if (!(afu->serr_virq = cxl_map_irq(afu->adapter, afu->serr_hwirq, + guest_slice_irq_err, afu, afu->err_irq_name))) { + kfree(afu->err_irq_name); + afu->err_irq_name = NULL; + return -ENOMEM; + } + + return 0; +} + +static void guest_release_serr_irq(struct cxl_afu *afu) +{ + cxl_unmap_irq(afu->serr_virq, afu); + cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq); + kfree(afu->err_irq_name); +} + +static int guest_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) +{ + return cxl_h_control_faults(ctx->afu->guest->handle, ctx->process_token, + tfc >> 32, (psl_reset_mask != 0)); +} + +static void disable_afu_irqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + unsigned int virq; + int r, i; + + pr_devel("Disabling AFU(%d) interrupts\n", ctx->afu->slice); + for (r = 0; r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + virq = irq_find_mapping(NULL, hwirq); + disable_irq(virq); + } + } +} + +static void enable_afu_irqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + unsigned int virq; + int r, i; + + pr_devel("Enabling AFU(%d) interrupts\n", ctx->afu->slice); + for (r = 0; r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + virq = irq_find_mapping(NULL, hwirq); + enable_irq(virq); + } + } +} + +static int _guest_afu_cr_readXX(int sz, struct cxl_afu *afu, int cr_idx, + u64 offset, u64 *val) +{ + unsigned long cr; + char c; + int rc = 0; + + if (afu->crs_len < sz) + return -ENOENT; + + if (unlikely(offset >= afu->crs_len)) + return -ERANGE; + + cr = get_zeroed_page(GFP_KERNEL); + if (!cr) + return -ENOMEM; + + rc = cxl_h_get_config(afu->guest->handle, cr_idx, offset, + virt_to_phys((void *)cr), sz); + if (rc) + goto err; + + switch (sz) { + case 1: + c = *((char *) cr); + *val = c; + break; + case 2: + *val = in_le16((u16 *)cr); + break; + case 4: + *val = in_le32((unsigned *)cr); + break; + case 8: + *val = in_le64((u64 *)cr); + break; + default: + WARN_ON(1); + } +err: + free_page(cr); + return rc; +} + +static int guest_afu_cr_read32(struct cxl_afu *afu, int cr_idx, u64 offset, + u32 *out) +{ + int rc; + u64 val; + + rc = _guest_afu_cr_readXX(4, afu, cr_idx, offset, &val); + if (!rc) + *out = (u32) val; + return rc; +} + +static int guest_afu_cr_read16(struct cxl_afu *afu, int cr_idx, u64 offset, + u16 *out) +{ + int rc; + u64 val; + + rc = _guest_afu_cr_readXX(2, afu, cr_idx, offset, &val); + if (!rc) + *out = (u16) val; + return rc; +} + +static int guest_afu_cr_read8(struct cxl_afu *afu, int cr_idx, u64 offset, + u8 *out) +{ + int rc; + u64 val; + + rc = _guest_afu_cr_readXX(1, afu, cr_idx, offset, &val); + if (!rc) + *out = (u8) val; + return rc; +} + +static int guest_afu_cr_read64(struct cxl_afu *afu, int cr_idx, u64 offset, + u64 *out) +{ + return _guest_afu_cr_readXX(8, afu, cr_idx, offset, out); +} + +static int guest_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in) +{ + /* config record is not writable from guest */ + return -EPERM; +} + +static int guest_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in) +{ + /* config record is not writable from guest */ + return -EPERM; +} + +static int guest_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in) +{ + /* config record is not writable from guest */ + return -EPERM; +} + +static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) +{ + struct cxl_process_element_hcall *elem; + struct cxl *adapter = ctx->afu->adapter; + const struct cred *cred; + u32 pid, idx; + int rc, r, i; + u64 mmio_addr, mmio_size; + __be64 flags = 0; + + /* Must be 8 byte aligned and cannot cross a 4096 byte boundary */ + if (!(elem = (struct cxl_process_element_hcall *) + get_zeroed_page(GFP_KERNEL))) + return -ENOMEM; + + elem->version = cpu_to_be64(CXL_PROCESS_ELEMENT_VERSION); + if (ctx->kernel) { + pid = 0; + flags |= CXL_PE_TRANSLATION_ENABLED; + flags |= CXL_PE_PRIVILEGED_PROCESS; + if (mfmsr() & MSR_SF) + flags |= CXL_PE_64_BIT; + } else { + pid = current->pid; + flags |= CXL_PE_PROBLEM_STATE; + flags |= CXL_PE_TRANSLATION_ENABLED; + if (!test_tsk_thread_flag(current, TIF_32BIT)) + flags |= CXL_PE_64_BIT; + cred = get_current_cred(); + if (uid_eq(cred->euid, GLOBAL_ROOT_UID)) + flags |= CXL_PE_PRIVILEGED_PROCESS; + put_cred(cred); + } + elem->flags = cpu_to_be64(flags); + elem->common.tid = cpu_to_be32(0); /* Unused */ + elem->common.pid = cpu_to_be32(pid); + elem->common.csrp = cpu_to_be64(0); /* disable */ + elem->common.aurp0 = cpu_to_be64(0); /* disable */ + elem->common.aurp1 = cpu_to_be64(0); /* disable */ + + cxl_prefault(ctx, wed); + + elem->common.sstp0 = cpu_to_be64(ctx->sstp0); + elem->common.sstp1 = cpu_to_be64(ctx->sstp1); + for (r = 0; r < CXL_IRQ_RANGES; r++) { + for (i = 0; i < ctx->irqs.range[r]; i++) { + if (r == 0 && i == 0) { + elem->pslVirtualIsn = cpu_to_be32(ctx->irqs.offset[0]); + } else { + idx = ctx->irqs.offset[r] + i - adapter->guest->irq_base_offset; + elem->applicationVirtualIsnBitmap[idx / 8] |= 0x80 >> (idx % 8); + } + } + } + elem->common.amr = cpu_to_be64(amr); + elem->common.wed = cpu_to_be64(wed); + + disable_afu_irqs(ctx); + + rc = cxl_h_attach_process(ctx->afu->guest->handle, elem, + &ctx->process_token, &mmio_addr, &mmio_size); + if (rc == H_SUCCESS) { + if (ctx->master || !ctx->afu->pp_psa) { + ctx->psn_phys = ctx->afu->psn_phys; + ctx->psn_size = ctx->afu->adapter->ps_size; + } else { + ctx->psn_phys = mmio_addr; + ctx->psn_size = mmio_size; + } + if (ctx->afu->pp_psa && mmio_size && + ctx->afu->pp_size == 0) { + /* + * There's no property in the device tree to read the + * pp_size. We only find out at the 1st attach. + * Compared to bare-metal, it is too late and we + * should really lock here. However, on powerVM, + * pp_size is really only used to display in /sys. + * Being discussed with pHyp for their next release. + */ + ctx->afu->pp_size = mmio_size; + } + /* from PAPR: process element is bytes 4-7 of process token */ + ctx->external_pe = ctx->process_token & 0xFFFFFFFF; + pr_devel("CXL pe=%i is known as %i for pHyp, mmio_size=%#llx", + ctx->pe, ctx->external_pe, ctx->psn_size); + ctx->pe_inserted = true; + enable_afu_irqs(ctx); + } + + free_page((u64)elem); + return rc; +} + +static int guest_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) +{ + pr_devel("in %s\n", __func__); + + ctx->kernel = kernel; + if (ctx->afu->current_mode == CXL_MODE_DIRECTED) + return attach_afu_directed(ctx, wed, amr); + + /* dedicated mode not supported on FW840 */ + + return -EINVAL; +} + +static int detach_afu_directed(struct cxl_context *ctx) +{ + if (!ctx->pe_inserted) + return 0; + if (cxl_h_detach_process(ctx->afu->guest->handle, ctx->process_token)) + return -1; + return 0; +} + +static int guest_detach_process(struct cxl_context *ctx) +{ + pr_devel("in %s\n", __func__); + trace_cxl_detach(ctx); + + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) + return -EIO; + + if (ctx->afu->current_mode == CXL_MODE_DIRECTED) + return detach_afu_directed(ctx); + + return -EINVAL; +} + +static void guest_release_afu(struct device *dev) +{ + struct cxl_afu *afu = to_cxl_afu(dev); + + pr_devel("%s\n", __func__); + + idr_destroy(&afu->contexts_idr); + + kfree(afu->guest); + kfree(afu); +} + +ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len) +{ + return guest_collect_vpd(NULL, afu, buf, len); +} + +#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE +static ssize_t guest_afu_read_err_buffer(struct cxl_afu *afu, char *buf, + loff_t off, size_t count) +{ + void *tbuf = NULL; + int rc = 0; + + tbuf = (void *) get_zeroed_page(GFP_KERNEL); + if (!tbuf) + return -ENOMEM; + + rc = cxl_h_get_afu_err(afu->guest->handle, + off & 0x7, + virt_to_phys(tbuf), + count); + if (rc) + goto err; + + if (count > ERR_BUFF_MAX_COPY_SIZE) + count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7); + memcpy(buf, tbuf, count); +err: + free_page((u64)tbuf); + + return rc; +} + +static int guest_afu_check_and_enable(struct cxl_afu *afu) +{ + return 0; +} + +static bool guest_support_attributes(const char *attr_name, + enum cxl_attrs type) +{ + switch (type) { + case CXL_ADAPTER_ATTRS: + if ((strcmp(attr_name, "base_image") == 0) || + (strcmp(attr_name, "load_image_on_perst") == 0) || + (strcmp(attr_name, "perst_reloads_same_image") == 0) || + (strcmp(attr_name, "image_loaded") == 0)) + return false; + break; + case CXL_AFU_MASTER_ATTRS: + if ((strcmp(attr_name, "pp_mmio_off") == 0)) + return false; + break; + case CXL_AFU_ATTRS: + break; + default: + break; + } + + return true; +} + +static int activate_afu_directed(struct cxl_afu *afu) +{ + int rc; + + dev_info(&afu->dev, "Activating AFU(%d) directed mode\n", afu->slice); + + afu->current_mode = CXL_MODE_DIRECTED; + + afu->num_procs = afu->max_procs_virtualised; + + if ((rc = cxl_chardev_m_afu_add(afu))) + return rc; + + if ((rc = cxl_sysfs_afu_m_add(afu))) + goto err; + + if ((rc = cxl_chardev_s_afu_add(afu))) + goto err1; + + return 0; +err1: + cxl_sysfs_afu_m_remove(afu); +err: + cxl_chardev_afu_remove(afu); + return rc; +} + +static int guest_afu_activate_mode(struct cxl_afu *afu, int mode) +{ + if (!mode) + return 0; + if (!(mode & afu->modes_supported)) + return -EINVAL; + + if (mode == CXL_MODE_DIRECTED) + return activate_afu_directed(afu); + + if (mode == CXL_MODE_DEDICATED) + dev_err(&afu->dev, "Dedicated mode not supported\n"); + + return -EINVAL; +} + +static int deactivate_afu_directed(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Deactivating AFU(%d) directed mode\n", afu->slice); + + afu->current_mode = 0; + afu->num_procs = 0; + + cxl_sysfs_afu_m_remove(afu); + cxl_chardev_afu_remove(afu); + + cxl_ops->afu_reset(afu); + + return 0; +} + +static int guest_afu_deactivate_mode(struct cxl_afu *afu, int mode) +{ + if (!mode) + return 0; + if (!(mode & afu->modes_supported)) + return -EINVAL; + + if (mode == CXL_MODE_DIRECTED) + return deactivate_afu_directed(afu); + return 0; +} + +static int guest_afu_reset(struct cxl_afu *afu) +{ + pr_devel("AFU(%d) reset request\n", afu->slice); + return cxl_h_reset_afu(afu->guest->handle); +} + +static int guest_map_slice_regs(struct cxl_afu *afu) +{ + if (!(afu->p2n_mmio = ioremap(afu->guest->p2n_phys, afu->guest->p2n_size))) { + dev_err(&afu->dev, "Error mapping AFU(%d) MMIO regions\n", + afu->slice); + return -ENOMEM; + } + return 0; +} + +static void guest_unmap_slice_regs(struct cxl_afu *afu) +{ + if (afu->p2n_mmio) + iounmap(afu->p2n_mmio); +} + +static int afu_update_state(struct cxl_afu *afu) +{ + int rc, cur_state; + + rc = afu_read_error_state(afu, &cur_state); + if (rc) + return rc; + + if (afu->guest->previous_state == cur_state) + return 0; + + pr_devel("AFU(%d) update state to %#x\n", afu->slice, cur_state); + + switch (cur_state) { + case H_STATE_NORMAL: + afu->guest->previous_state = cur_state; + rc = 1; + break; + + case H_STATE_DISABLE: + pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, + pci_channel_io_frozen); + + cxl_context_detach_all(afu); + if ((rc = cxl_ops->afu_reset(afu))) + pr_devel("reset hcall failed %d\n", rc); + + rc = afu_read_error_state(afu, &cur_state); + if (!rc && cur_state == H_STATE_NORMAL) { + pci_error_handlers(afu, CXL_SLOT_RESET_EVENT, + pci_channel_io_normal); + pci_error_handlers(afu, CXL_RESUME_EVENT, 0); + rc = 1; + } + afu->guest->previous_state = 0; + break; + + case H_STATE_TEMP_UNAVAILABLE: + afu->guest->previous_state = cur_state; + break; + + case H_STATE_PERM_UNAVAILABLE: + dev_err(&afu->dev, "AFU is in permanent error state\n"); + pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, + pci_channel_io_perm_failure); + afu->guest->previous_state = cur_state; + break; + + default: + pr_err("Unexpected AFU(%d) error state: %#x\n", + afu->slice, cur_state); + return -EINVAL; + } + + return rc; +} + +static int afu_do_recovery(struct cxl_afu *afu) +{ + int rc; + + /* many threads can arrive here, in case of detach_all for example. + * Only one needs to drive the recovery + */ + if (mutex_trylock(&afu->guest->recovery_lock)) { + rc = afu_update_state(afu); + mutex_unlock(&afu->guest->recovery_lock); + return rc; + } + return 0; +} + +static bool guest_link_ok(struct cxl *cxl, struct cxl_afu *afu) +{ + int state; + + if (afu) { + if (afu_read_error_state(afu, &state) || + state != H_STATE_NORMAL) { + if (afu_do_recovery(afu) > 0) { + /* check again in case we've just fixed it */ + if (!afu_read_error_state(afu, &state) && + state == H_STATE_NORMAL) + return true; + } + return false; + } + } + + return true; +} + +static int afu_properties_look_ok(struct cxl_afu *afu) +{ + if (afu->pp_irqs < 0) { + dev_err(&afu->dev, "Unexpected per-process minimum interrupt value\n"); + return -EINVAL; + } + + if (afu->max_procs_virtualised < 1) { + dev_err(&afu->dev, "Unexpected max number of processes virtualised value\n"); + return -EINVAL; + } + + if (afu->crs_len < 0) { + dev_err(&afu->dev, "Unexpected configuration record size value\n"); + return -EINVAL; + } + + return 0; +} + +int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np) +{ + struct cxl_afu *afu; + bool free = true; + int rc; + + pr_devel("in %s - AFU(%d)\n", __func__, slice); + if (!(afu = cxl_alloc_afu(adapter, slice))) + return -ENOMEM; + + if (!(afu->guest = kzalloc(sizeof(struct cxl_afu_guest), GFP_KERNEL))) { + kfree(afu); + return -ENOMEM; + } + + mutex_init(&afu->guest->recovery_lock); + + if ((rc = dev_set_name(&afu->dev, "afu%i.%i", + adapter->adapter_num, + slice))) + goto err1; + + adapter->slices++; + + if ((rc = cxl_of_read_afu_handle(afu, afu_np))) + goto err1; + + if ((rc = cxl_ops->afu_reset(afu))) + goto err1; + + if ((rc = cxl_of_read_afu_properties(afu, afu_np))) + goto err1; + + if ((rc = afu_properties_look_ok(afu))) + goto err1; + + if ((rc = guest_map_slice_regs(afu))) + goto err1; + + if ((rc = guest_register_serr_irq(afu))) + goto err2; + + /* + * After we call this function we must not free the afu directly, even + * if it returns an error! + */ + if ((rc = cxl_register_afu(afu))) + goto err_put1; + + if ((rc = cxl_sysfs_afu_add(afu))) + goto err_put1; + + /* + * pHyp doesn't expose the programming models supported by the + * AFU. pHyp currently only supports directed mode. If it adds + * dedicated mode later, this version of cxl has no way to + * detect it. So we'll initialize the driver, but the first + * attach will fail. + * Being discussed with pHyp to do better (likely new property) + */ + if (afu->max_procs_virtualised == 1) + afu->modes_supported = CXL_MODE_DEDICATED; + else + afu->modes_supported = CXL_MODE_DIRECTED; + + if ((rc = cxl_afu_select_best_mode(afu))) + goto err_put2; + + adapter->afu[afu->slice] = afu; + + afu->enabled = true; + + if ((rc = cxl_pci_vphb_add(afu))) + dev_info(&afu->dev, "Can't register vPHB\n"); + + return 0; + +err_put2: + cxl_sysfs_afu_remove(afu); +err_put1: + device_unregister(&afu->dev); + free = false; + guest_release_serr_irq(afu); +err2: + guest_unmap_slice_regs(afu); +err1: + if (free) { + kfree(afu->guest); + kfree(afu); + } + return rc; +} + +void cxl_guest_remove_afu(struct cxl_afu *afu) +{ + pr_devel("in %s - AFU(%d)\n", __func__, afu->slice); + + if (!afu) + return; + + cxl_pci_vphb_remove(afu); + cxl_sysfs_afu_remove(afu); + + spin_lock(&afu->adapter->afu_list_lock); + afu->adapter->afu[afu->slice] = NULL; + spin_unlock(&afu->adapter->afu_list_lock); + + cxl_context_detach_all(afu); + cxl_ops->afu_deactivate_mode(afu, afu->current_mode); + guest_release_serr_irq(afu); + guest_unmap_slice_regs(afu); + + device_unregister(&afu->dev); +} + +static void free_adapter(struct cxl *adapter) +{ + struct irq_avail *cur; + int i; + + if (adapter->guest->irq_avail) { + for (i = 0; i < adapter->guest->irq_nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + kfree(cur->bitmap); + } + kfree(adapter->guest->irq_avail); + } + kfree(adapter->guest->status); + cxl_remove_adapter_nr(adapter); + kfree(adapter->guest); + kfree(adapter); +} + +static int properties_look_ok(struct cxl *adapter) +{ + /* The absence of this property means that the operational + * status is unknown or okay + */ + if (strlen(adapter->guest->status) && + strcmp(adapter->guest->status, "okay")) { + pr_err("ABORTING:Bad operational status of the device\n"); + return -EINVAL; + } + + return 0; +} + +ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len) +{ + return guest_collect_vpd(adapter, NULL, buf, len); +} + +void cxl_guest_remove_adapter(struct cxl *adapter) +{ + pr_devel("in %s\n", __func__); + + cxl_sysfs_adapter_remove(adapter); + + cxl_guest_remove_chardev(adapter); + device_unregister(&adapter->dev); +} + +static void release_adapter(struct device *dev) +{ + free_adapter(to_cxl_adapter(dev)); +} + +struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *pdev) +{ + struct cxl *adapter; + bool free = true; + int rc; + + if (!(adapter = cxl_alloc_adapter())) + return ERR_PTR(-ENOMEM); + + if (!(adapter->guest = kzalloc(sizeof(struct cxl_guest), GFP_KERNEL))) { + free_adapter(adapter); + return ERR_PTR(-ENOMEM); + } + + adapter->slices = 0; + adapter->guest->pdev = pdev; + adapter->dev.parent = &pdev->dev; + adapter->dev.release = release_adapter; + dev_set_drvdata(&pdev->dev, adapter); + + if ((rc = cxl_of_read_adapter_handle(adapter, np))) + goto err1; + + if ((rc = cxl_of_read_adapter_properties(adapter, np))) + goto err1; + + if ((rc = properties_look_ok(adapter))) + goto err1; + + if ((rc = cxl_guest_add_chardev(adapter))) + goto err1; + + /* + * After we call this function we must not free the adapter directly, + * even if it returns an error! + */ + if ((rc = cxl_register_adapter(adapter))) + goto err_put1; + + if ((rc = cxl_sysfs_adapter_add(adapter))) + goto err_put1; + + return adapter; + +err_put1: + device_unregister(&adapter->dev); + free = false; + cxl_guest_remove_chardev(adapter); +err1: + if (free) + free_adapter(adapter); + return ERR_PTR(rc); +} + +void cxl_guest_reload_module(struct cxl *adapter) +{ + struct platform_device *pdev; + + pdev = adapter->guest->pdev; + cxl_guest_remove_adapter(adapter); + + cxl_of_probe(pdev); +} + +const struct cxl_backend_ops cxl_guest_ops = { + .module = THIS_MODULE, + .adapter_reset = guest_reset, + .alloc_one_irq = guest_alloc_one_irq, + .release_one_irq = guest_release_one_irq, + .alloc_irq_ranges = guest_alloc_irq_ranges, + .release_irq_ranges = guest_release_irq_ranges, + .setup_irq = NULL, + .handle_psl_slice_error = guest_handle_psl_slice_error, + .psl_interrupt = guest_psl_irq, + .ack_irq = guest_ack_irq, + .attach_process = guest_attach_process, + .detach_process = guest_detach_process, + .support_attributes = guest_support_attributes, + .link_ok = guest_link_ok, + .release_afu = guest_release_afu, + .afu_read_err_buffer = guest_afu_read_err_buffer, + .afu_check_and_enable = guest_afu_check_and_enable, + .afu_activate_mode = guest_afu_activate_mode, + .afu_deactivate_mode = guest_afu_deactivate_mode, + .afu_reset = guest_afu_reset, + .afu_cr_read8 = guest_afu_cr_read8, + .afu_cr_read16 = guest_afu_cr_read16, + .afu_cr_read32 = guest_afu_cr_read32, + .afu_cr_read64 = guest_afu_cr_read64, + .afu_cr_write8 = guest_afu_cr_write8, + .afu_cr_write16 = guest_afu_cr_write16, + .afu_cr_write32 = guest_afu_cr_write32, + .read_adapter_vpd = cxl_guest_read_adapter_vpd, +}; diff --git a/drivers/misc/cxl/hcalls.c b/drivers/misc/cxl/hcalls.c new file mode 100644 index 000000000000..d6d11f4056d7 --- /dev/null +++ b/drivers/misc/cxl/hcalls.c @@ -0,0 +1,647 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include <linux/compiler.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <asm/byteorder.h> +#include "hcalls.h" +#include "trace.h" + +#define CXL_HCALL_TIMEOUT 60000 +#define CXL_HCALL_TIMEOUT_DOWNLOAD 120000 + +#define H_ATTACH_CA_PROCESS 0x344 +#define H_CONTROL_CA_FUNCTION 0x348 +#define H_DETACH_CA_PROCESS 0x34C +#define H_COLLECT_CA_INT_INFO 0x350 +#define H_CONTROL_CA_FAULTS 0x354 +#define H_DOWNLOAD_CA_FUNCTION 0x35C +#define H_DOWNLOAD_CA_FACILITY 0x364 +#define H_CONTROL_CA_FACILITY 0x368 + +#define H_CONTROL_CA_FUNCTION_RESET 1 /* perform a reset */ +#define H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS 2 /* suspend a process from being executed */ +#define H_CONTROL_CA_FUNCTION_RESUME_PROCESS 3 /* resume a process to be executed */ +#define H_CONTROL_CA_FUNCTION_READ_ERR_STATE 4 /* read the error state */ +#define H_CONTROL_CA_FUNCTION_GET_AFU_ERR 5 /* collect the AFU error buffer */ +#define H_CONTROL_CA_FUNCTION_GET_CONFIG 6 /* collect configuration record */ +#define H_CONTROL_CA_FUNCTION_GET_DOWNLOAD_STATE 7 /* query to return download status */ +#define H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS 8 /* terminate the process before completion */ +#define H_CONTROL_CA_FUNCTION_COLLECT_VPD 9 /* collect VPD */ +#define H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT 11 /* read the function-wide error data based on an interrupt */ +#define H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT 12 /* acknowledge function-wide error data based on an interrupt */ +#define H_CONTROL_CA_FUNCTION_GET_ERROR_LOG 13 /* retrieve the Platform Log ID (PLID) of an error log */ + +#define H_CONTROL_CA_FAULTS_RESPOND_PSL 1 +#define H_CONTROL_CA_FAULTS_RESPOND_AFU 2 + +#define H_CONTROL_CA_FACILITY_RESET 1 /* perform a reset */ +#define H_CONTROL_CA_FACILITY_COLLECT_VPD 2 /* collect VPD */ + +#define H_DOWNLOAD_CA_FACILITY_DOWNLOAD 1 /* download adapter image */ +#define H_DOWNLOAD_CA_FACILITY_VALIDATE 2 /* validate adapter image */ + + +#define _CXL_LOOP_HCALL(call, rc, retbuf, fn, ...) \ + { \ + unsigned int delay, total_delay = 0; \ + u64 token = 0; \ + \ + memset(retbuf, 0, sizeof(retbuf)); \ + while (1) { \ + rc = call(fn, retbuf, __VA_ARGS__, token); \ + token = retbuf[0]; \ + if (rc != H_BUSY && !H_IS_LONG_BUSY(rc)) \ + break; \ + \ + if (rc == H_BUSY) \ + delay = 10; \ + else \ + delay = get_longbusy_msecs(rc); \ + \ + total_delay += delay; \ + if (total_delay > CXL_HCALL_TIMEOUT) { \ + WARN(1, "Warning: Giving up waiting for CXL hcall " \ + "%#x after %u msec\n", fn, total_delay); \ + rc = H_BUSY; \ + break; \ + } \ + msleep(delay); \ + } \ + } +#define CXL_H_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall, __VA_ARGS__) +#define CXL_H9_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall9, __VA_ARGS__) + +#define _PRINT_MSG(rc, format, ...) \ + { \ + if ((rc != H_SUCCESS) && (rc != H_CONTINUE)) \ + pr_err(format, __VA_ARGS__); \ + else \ + pr_devel(format, __VA_ARGS__); \ + } \ + + +static char *afu_op_names[] = { + "UNKNOWN_OP", /* 0 undefined */ + "RESET", /* 1 */ + "SUSPEND_PROCESS", /* 2 */ + "RESUME_PROCESS", /* 3 */ + "READ_ERR_STATE", /* 4 */ + "GET_AFU_ERR", /* 5 */ + "GET_CONFIG", /* 6 */ + "GET_DOWNLOAD_STATE", /* 7 */ + "TERMINATE_PROCESS", /* 8 */ + "COLLECT_VPD", /* 9 */ + "UNKNOWN_OP", /* 10 undefined */ + "GET_FUNCTION_ERR_INT", /* 11 */ + "ACK_FUNCTION_ERR_INT", /* 12 */ + "GET_ERROR_LOG", /* 13 */ +}; + +static char *control_adapter_op_names[] = { + "UNKNOWN_OP", /* 0 undefined */ + "RESET", /* 1 */ + "COLLECT_VPD", /* 2 */ +}; + +static char *download_op_names[] = { + "UNKNOWN_OP", /* 0 undefined */ + "DOWNLOAD", /* 1 */ + "VALIDATE", /* 2 */ +}; + +static char *op_str(unsigned int op, char *name_array[], int array_len) +{ + if (op >= array_len) + return "UNKNOWN_OP"; + return name_array[op]; +} + +#define OP_STR(op, name_array) op_str(op, name_array, ARRAY_SIZE(name_array)) + +#define OP_STR_AFU(op) OP_STR(op, afu_op_names) +#define OP_STR_CONTROL_ADAPTER(op) OP_STR(op, control_adapter_op_names) +#define OP_STR_DOWNLOAD_ADAPTER(op) OP_STR(op, download_op_names) + + +long cxl_h_attach_process(u64 unit_address, + struct cxl_process_element_hcall *element, + u64 *process_token, u64 *mmio_addr, u64 *mmio_size) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_ATTACH_CA_PROCESS, unit_address, virt_to_phys(element)); + _PRINT_MSG(rc, "cxl_h_attach_process(%#.16llx, %#.16lx): %li\n", + unit_address, virt_to_phys(element), rc); + trace_cxl_hcall_attach(unit_address, virt_to_phys(element), retbuf[0], retbuf[1], retbuf[2], rc); + + pr_devel("token: 0x%.8lx mmio_addr: 0x%lx mmio_size: 0x%lx\nProcess Element Structure:\n", + retbuf[0], retbuf[1], retbuf[2]); + cxl_dump_debug_buffer(element, sizeof(*element)); + + switch (rc) { + case H_SUCCESS: /* The process info is attached to the coherent platform function */ + *process_token = retbuf[0]; + if (mmio_addr) + *mmio_addr = retbuf[1]; + if (mmio_size) + *mmio_size = retbuf[2]; + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + case H_FUNCTION: /* The function is not supported. */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The coherent platform function does not have enough additional resource to attach the process */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform function is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_detach_process - Detach a process element from a coherent + * platform function. + */ +long cxl_h_detach_process(u64 unit_address, u64 process_token) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_DETACH_CA_PROCESS, unit_address, process_token); + _PRINT_MSG(rc, "cxl_h_detach_process(%#.16llx, 0x%.8llx): %li\n", unit_address, process_token, rc); + trace_cxl_hcall_detach(unit_address, process_token, rc); + + switch (rc) { + case H_SUCCESS: /* The process was detached from the coherent platform function */ + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the detach operation */ + case H_STATE: /* The coherent platform function is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_control_function - This H_CONTROL_CA_FUNCTION hypervisor call allows + * the partition to manipulate or query + * certain coherent platform function behaviors. + */ +static long cxl_h_control_function(u64 unit_address, u64 op, + u64 p1, u64 p2, u64 p3, u64 p4, u64 *out) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + long rc; + + CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FUNCTION, unit_address, op, p1, p2, p3, p4); + _PRINT_MSG(rc, "cxl_h_control_function(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n", + unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc); + trace_cxl_hcall_control_function(unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* The operation is completed for the coherent platform function */ + if ((op == H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT || + op == H_CONTROL_CA_FUNCTION_READ_ERR_STATE || + op == H_CONTROL_CA_FUNCTION_COLLECT_VPD)) + *out = retbuf[0]; + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + case H_FUNCTION: /* The function is not supported. */ + case H_NOT_FOUND: /* The operation supplied was not valid */ + case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */ + case H_SG_LIST: /* An block list entry was invalid */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform function is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_reset_afu - Perform a reset to the coherent platform function. + */ +long cxl_h_reset_afu(u64 unit_address) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_RESET, + 0, 0, 0, 0, + NULL); +} + +/** + * cxl_h_suspend_process - Suspend a process from being executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_suspend_process(u64 unit_address, u64 process_token) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS, + process_token, 0, 0, 0, + NULL); +} + +/** + * cxl_h_resume_process - Resume a process to be executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_resume_process(u64 unit_address, u64 process_token) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_RESUME_PROCESS, + process_token, 0, 0, 0, + NULL); +} + +/** + * cxl_h_read_error_state - Checks the error state of the coherent + * platform function. + * R4 contains the error state + */ +long cxl_h_read_error_state(u64 unit_address, u64 *state) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_READ_ERR_STATE, + 0, 0, 0, 0, + state); +} + +/** + * cxl_h_get_afu_err - collect the AFU error buffer + * Parameter1 = byte offset into error buffer to retrieve, valid values + * are between 0 and (ibm,error-buffer-size - 1) + * Parameter2 = 4K aligned real address of error buffer, to be filled in + * Parameter3 = length of error buffer, valid values are 4K or less + */ +long cxl_h_get_afu_err(u64 unit_address, u64 offset, + u64 buf_address, u64 len) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_AFU_ERR, + offset, buf_address, len, 0, + NULL); +} + +/** + * cxl_h_get_config - collect configuration record for the + * coherent platform function + * Parameter1 = # of configuration record to retrieve, valid values are + * between 0 and (ibm,#config-records - 1) + * Parameter2 = byte offset into configuration record to retrieve, + * valid values are between 0 and (ibm,config-record-size - 1) + * Parameter3 = 4K aligned real address of configuration record buffer, + * to be filled in + * Parameter4 = length of configuration buffer, valid values are 4K or less + */ +long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset, + u64 buf_address, u64 len) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_CONFIG, + cr_num, offset, buf_address, len, + NULL); +} + +/** + * cxl_h_terminate_process - Terminate the process before completion + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_terminate_process(u64 unit_address, u64 process_token) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS, + process_token, 0, 0, 0, + NULL); +} + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = # of VPD record to retrieve, valid values are between 0 + * and (ibm,#config-records - 1). + * Parameter2 = 4K naturally aligned real buffer containing block + * list entries + * Parameter3 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address, + u64 num, u64 *out) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_COLLECT_VPD, + record, list_address, num, 0, + out); +} + +/** + * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt + */ +long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT, + 0, 0, 0, 0, reg); +} + +/** + * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data + * based on an interrupt + * Parameter1 = value to write to the function-wide error interrupt register + */ +long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT, + value, 0, 0, 0, + NULL); +} + +/** + * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of + * an error log + */ +long cxl_h_get_error_log(u64 unit_address, u64 value) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_ERROR_LOG, + 0, 0, 0, 0, + NULL); +} + +/** + * cxl_h_collect_int_info - Collect interrupt info about a coherent + * platform function after an interrupt occurred. + */ +long cxl_h_collect_int_info(u64 unit_address, u64 process_token, + struct cxl_irq_info *info) +{ + long rc; + + BUG_ON(sizeof(*info) != sizeof(unsigned long[PLPAR_HCALL9_BUFSIZE])); + + rc = plpar_hcall9(H_COLLECT_CA_INT_INFO, (unsigned long *) info, + unit_address, process_token); + _PRINT_MSG(rc, "cxl_h_collect_int_info(%#.16llx, 0x%llx): %li\n", + unit_address, process_token, rc); + trace_cxl_hcall_collect_int_info(unit_address, process_token, rc); + + switch (rc) { + case H_SUCCESS: /* The interrupt info is returned in return registers. */ + pr_devel("dsisr:%#llx, dar:%#llx, dsr:%#llx, pid:%u, tid:%u, afu_err:%#llx, errstat:%#llx\n", + info->dsisr, info->dar, info->dsr, info->pid, + info->tid, info->afu_err, info->errstat); + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall. */ + case H_HARDWARE: /* A hardware event prevented the collection of the interrupt info.*/ + case H_STATE: /* The coherent platform function is not in a valid state to collect interrupt info. */ + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_control_faults - Control the operation of a coherent platform + * function after a fault occurs. + * + * Parameters + * control-mask: value to control the faults + * looks like PSL_TFC_An shifted >> 32 + * reset-mask: mask to control reset of function faults + * Set reset_mask = 1 to reset PSL errors + */ +long cxl_h_control_faults(u64 unit_address, u64 process_token, + u64 control_mask, u64 reset_mask) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + memset(retbuf, 0, sizeof(retbuf)); + + rc = plpar_hcall(H_CONTROL_CA_FAULTS, retbuf, unit_address, + H_CONTROL_CA_FAULTS_RESPOND_PSL, process_token, + control_mask, reset_mask); + _PRINT_MSG(rc, "cxl_h_control_faults(%#.16llx, 0x%llx, %#llx, %#llx): %li (%#lx)\n", + unit_address, process_token, control_mask, reset_mask, + rc, retbuf[0]); + trace_cxl_hcall_control_faults(unit_address, process_token, + control_mask, reset_mask, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* Faults were successfully controlled for the function. */ + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + return -EINVAL; + case H_HARDWARE: /* A hardware event prevented the control of faults. */ + case H_STATE: /* The function was in an invalid state. */ + case H_AUTHORITY: /* The partition does not have authority to perform this hcall; the coherent platform facilities may need to be licensed. */ + return -EBUSY; + case H_FUNCTION: /* The function is not supported */ + case H_NOT_FOUND: /* The operation supplied was not valid */ + return -EINVAL; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_control_facility - This H_CONTROL_CA_FACILITY hypervisor call + * allows the partition to manipulate or query + * certain coherent platform facility behaviors. + */ +static long cxl_h_control_facility(u64 unit_address, u64 op, + u64 p1, u64 p2, u64 p3, u64 p4, u64 *out) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + long rc; + + CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FACILITY, unit_address, op, p1, p2, p3, p4); + _PRINT_MSG(rc, "cxl_h_control_facility(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n", + unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc); + trace_cxl_hcall_control_facility(unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* The operation is completed for the coherent platform facility */ + if (op == H_CONTROL_CA_FACILITY_COLLECT_VPD) + *out = retbuf[0]; + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + case H_FUNCTION: /* The function is not supported. */ + case H_NOT_FOUND: /* The operation supplied was not valid */ + case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */ + case H_SG_LIST: /* An block list entry was invalid */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform facility is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_reset_adapter - Perform a reset to the coherent platform facility. + */ +long cxl_h_reset_adapter(u64 unit_address) +{ + return cxl_h_control_facility(unit_address, + H_CONTROL_CA_FACILITY_RESET, + 0, 0, 0, 0, + NULL); +} + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = 4K naturally aligned real buffer containing block + * list entries + * Parameter2 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address, + u64 num, u64 *out) +{ + return cxl_h_control_facility(unit_address, + H_CONTROL_CA_FACILITY_COLLECT_VPD, + list_address, num, 0, 0, + out); +} + +/** + * cxl_h_download_facility - This H_DOWNLOAD_CA_FACILITY + * hypervisor call provide platform support for + * downloading a base adapter image to the coherent + * platform facility, and for validating the entire + * image after the download. + * Parameters + * op: operation to perform to the coherent platform function + * Download: operation = 1, the base image in the coherent platform + * facility is first erased, and then + * programmed using the image supplied + * in the scatter/gather list. + * Validate: operation = 2, the base image in the coherent platform + * facility is compared with the image + * supplied in the scatter/gather list. + * list_address: 4K naturally aligned real buffer containing + * scatter/gather list entries. + * num: number of block list entries in the scatter/gather list. + */ +static long cxl_h_download_facility(u64 unit_address, u64 op, + u64 list_address, u64 num, + u64 *out) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + unsigned int delay, total_delay = 0; + u64 token = 0; + long rc; + + if (*out != 0) + token = *out; + + memset(retbuf, 0, sizeof(retbuf)); + while (1) { + rc = plpar_hcall(H_DOWNLOAD_CA_FACILITY, retbuf, + unit_address, op, list_address, num, + token); + token = retbuf[0]; + if (rc != H_BUSY && !H_IS_LONG_BUSY(rc)) + break; + + if (rc != H_BUSY) { + delay = get_longbusy_msecs(rc); + total_delay += delay; + if (total_delay > CXL_HCALL_TIMEOUT_DOWNLOAD) { + WARN(1, "Warning: Giving up waiting for CXL hcall " + "%#x after %u msec\n", + H_DOWNLOAD_CA_FACILITY, total_delay); + rc = H_BUSY; + break; + } + msleep(delay); + } + } + _PRINT_MSG(rc, "cxl_h_download_facility(%#.16llx, %s(%#llx, %#llx), %#lx): %li\n", + unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc); + trace_cxl_hcall_download_facility(unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* The operation is completed for the coherent platform facility */ + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied */ + case H_FUNCTION: /* The function is not supported. */ + case H_SG_LIST: /* An block list entry was invalid */ + case H_BAD_DATA: /* Image verification failed */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform facility is not in a valid state */ + case H_BUSY: + return -EBUSY; + case H_CONTINUE: + *out = retbuf[0]; + return 1; /* More data is needed for the complete image */ + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_download_adapter_image - Download the base image to the coherent + * platform facility. + */ +long cxl_h_download_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out) +{ + return cxl_h_download_facility(unit_address, + H_DOWNLOAD_CA_FACILITY_DOWNLOAD, + list_address, num, out); +} + +/** + * cxl_h_validate_adapter_image - Validate the base image in the coherent + * platform facility. + */ +long cxl_h_validate_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out) +{ + return cxl_h_download_facility(unit_address, + H_DOWNLOAD_CA_FACILITY_VALIDATE, + list_address, num, out); +} diff --git a/drivers/misc/cxl/hcalls.h b/drivers/misc/cxl/hcalls.h new file mode 100644 index 000000000000..3e25522a5df6 --- /dev/null +++ b/drivers/misc/cxl/hcalls.h @@ -0,0 +1,204 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _HCALLS_H +#define _HCALLS_H + +#include <linux/types.h> +#include <asm/byteorder.h> +#include <asm/hvcall.h> +#include "cxl.h" + +#define SG_BUFFER_SIZE 4096 +#define SG_MAX_ENTRIES 256 + +struct sg_list { + u64 phys_addr; + u64 len; +}; + +/* + * This is straight out of PAPR, but replacing some of the compound fields with + * a single field, where they were identical to the register layout. + * + * The 'flags' parameter regroups the various bit-fields + */ +#define CXL_PE_CSRP_VALID (1ULL << 63) +#define CXL_PE_PROBLEM_STATE (1ULL << 62) +#define CXL_PE_SECONDARY_SEGMENT_TBL_SRCH (1ULL << 61) +#define CXL_PE_TAGS_ACTIVE (1ULL << 60) +#define CXL_PE_USER_STATE (1ULL << 59) +#define CXL_PE_TRANSLATION_ENABLED (1ULL << 58) +#define CXL_PE_64_BIT (1ULL << 57) +#define CXL_PE_PRIVILEGED_PROCESS (1ULL << 56) + +#define CXL_PROCESS_ELEMENT_VERSION 1 +struct cxl_process_element_hcall { + __be64 version; + __be64 flags; + u8 reserved0[12]; + __be32 pslVirtualIsn; + u8 applicationVirtualIsnBitmap[256]; + u8 reserved1[144]; + struct cxl_process_element_common common; + u8 reserved4[12]; +} __packed; + +#define H_STATE_NORMAL 1 +#define H_STATE_DISABLE 2 +#define H_STATE_TEMP_UNAVAILABLE 3 +#define H_STATE_PERM_UNAVAILABLE 4 + +/* NOTE: element must be a logical real address, and must be pinned */ +long cxl_h_attach_process(u64 unit_address, struct cxl_process_element_hcall *element, + u64 *process_token, u64 *mmio_addr, u64 *mmio_size); + +/** + * cxl_h_detach_process - Detach a process element from a coherent + * platform function. + */ +long cxl_h_detach_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_reset_afu - Perform a reset to the coherent platform function. + */ +long cxl_h_reset_afu(u64 unit_address); + +/** + * cxl_h_suspend_process - Suspend a process from being executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_suspend_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_resume_process - Resume a process to be executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_resume_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_read_error_state - Reads the error state of the coherent + * platform function. + * R4 contains the error state + */ +long cxl_h_read_error_state(u64 unit_address, u64 *state); + +/** + * cxl_h_get_afu_err - collect the AFU error buffer + * Parameter1 = byte offset into error buffer to retrieve, valid values + * are between 0 and (ibm,error-buffer-size - 1) + * Parameter2 = 4K aligned real address of error buffer, to be filled in + * Parameter3 = length of error buffer, valid values are 4K or less + */ +long cxl_h_get_afu_err(u64 unit_address, u64 offset, u64 buf_address, u64 len); + +/** + * cxl_h_get_config - collect configuration record for the + * coherent platform function + * Parameter1 = # of configuration record to retrieve, valid values are + * between 0 and (ibm,#config-records - 1) + * Parameter2 = byte offset into configuration record to retrieve, + * valid values are between 0 and (ibm,config-record-size - 1) + * Parameter3 = 4K aligned real address of configuration record buffer, + * to be filled in + * Parameter4 = length of configuration buffer, valid values are 4K or less + */ +long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset, + u64 buf_address, u64 len); + +/** + * cxl_h_terminate_process - Terminate the process before completion + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_terminate_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = # of VPD record to retrieve, valid values are between 0 + * and (ibm,#config-records - 1). + * Parameter2 = 4K naturally aligned real buffer containing block + * list entries + * Parameter3 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address, + u64 num, u64 *out); + +/** + * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt + */ +long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg); + +/** + * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data + * based on an interrupt + * Parameter1 = value to write to the function-wide error interrupt register + */ +long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value); + +/** + * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of + * an error log + */ +long cxl_h_get_error_log(u64 unit_address, u64 value); + +/** + * cxl_h_collect_int_info - Collect interrupt info about a coherent + * platform function after an interrupt occurred. + */ +long cxl_h_collect_int_info(u64 unit_address, u64 process_token, + struct cxl_irq_info *info); + +/** + * cxl_h_control_faults - Control the operation of a coherent platform + * function after a fault occurs. + * + * Parameters + * control-mask: value to control the faults + * looks like PSL_TFC_An shifted >> 32 + * reset-mask: mask to control reset of function faults + * Set reset_mask = 1 to reset PSL errors + */ +long cxl_h_control_faults(u64 unit_address, u64 process_token, + u64 control_mask, u64 reset_mask); + +/** + * cxl_h_reset_adapter - Perform a reset to the coherent platform facility. + */ +long cxl_h_reset_adapter(u64 unit_address); + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = 4K naturally aligned real buffer containing block + * list entries + * Parameter2 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address, + u64 num, u64 *out); + +/** + * cxl_h_download_adapter_image - Download the base image to the coherent + * platform facility. + */ +long cxl_h_download_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out); + +/** + * cxl_h_validate_adapter_image - Validate the base image in the coherent + * platform facility. + */ +long cxl_h_validate_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out); +#endif /* _HCALLS_H */ diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 09a406058c46..be646dc41a2c 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -19,70 +19,11 @@ #include "cxl.h" #include "trace.h" -/* XXX: This is implementation specific */ -static irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, u64 errstat) +static int afu_irq_range_start(void) { - u64 fir1, fir2, fir_slice, serr, afu_debug; - - fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1); - fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2); - fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An); - serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); - afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); - - dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); - dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); - dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2); - dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); - dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); - dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); - - dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); - cxl_stop_trace(ctx->afu->adapter); - - return cxl_ack_irq(ctx, 0, errstat); -} - -irqreturn_t cxl_slice_irq_err(int irq, void *data) -{ - struct cxl_afu *afu = data; - u64 fir_slice, errstat, serr, afu_debug; - - WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); - - serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); - fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); - errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); - afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); - dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); - dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); - dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat); - dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); - - cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); - - return IRQ_HANDLED; -} - -static irqreturn_t cxl_irq_err(int irq, void *data) -{ - struct cxl *adapter = data; - u64 fir1, fir2, err_ivte; - - WARN(1, "CXL ERROR interrupt %i\n", irq); - - err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); - dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte); - - dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); - cxl_stop_trace(adapter); - - fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); - fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); - - dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2); - - return IRQ_HANDLED; + if (cpu_has_feature(CPU_FTR_HVMODE)) + return 1; + return 0; } static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 dar) @@ -93,9 +34,8 @@ static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 da return IRQ_HANDLED; } -static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) +irqreturn_t cxl_irq(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info) { - struct cxl_context *ctx = data; u64 dsisr, dar; dsisr = irq_info->dsisr; @@ -145,7 +85,8 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) if (dsisr & CXL_PSL_DSISR_An_UR) pr_devel("CXL interrupt: AURP PTE not found\n"); if (dsisr & CXL_PSL_DSISR_An_PE) - return handle_psl_slice_error(ctx, dsisr, irq_info->errstat); + return cxl_ops->handle_psl_slice_error(ctx, dsisr, + irq_info->errstat); if (dsisr & CXL_PSL_DSISR_An_AE) { pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err); @@ -169,7 +110,7 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) wake_up_all(&ctx->wq); } - cxl_ack_irq(ctx, CXL_PSL_TFC_An_A, 0); + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0); return IRQ_HANDLED; } if (dsisr & CXL_PSL_DSISR_An_OC) @@ -179,54 +120,27 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) return IRQ_HANDLED; } -static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info) -{ - if (irq_info->dsisr & CXL_PSL_DSISR_TRANS) - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); - else - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); - - return IRQ_HANDLED; -} - -static irqreturn_t cxl_irq_multiplexed(int irq, void *data) -{ - struct cxl_afu *afu = data; - struct cxl_context *ctx; - struct cxl_irq_info irq_info; - int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff; - int ret; - - if ((ret = cxl_get_irq(afu, &irq_info))) { - WARN(1, "Unable to get CXL IRQ Info: %i\n", ret); - return fail_psl_irq(afu, &irq_info); - } - - rcu_read_lock(); - ctx = idr_find(&afu->contexts_idr, ph); - if (ctx) { - ret = cxl_irq(irq, ctx, &irq_info); - rcu_read_unlock(); - return ret; - } - rcu_read_unlock(); - - WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR" - " %016llx\n(Possible AFU HW issue - was a term/remove acked" - " with outstanding transactions?)\n", ph, irq_info.dsisr, - irq_info.dar); - return fail_psl_irq(afu, &irq_info); -} - static irqreturn_t cxl_irq_afu(int irq, void *data) { struct cxl_context *ctx = data; irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq)); - int irq_off, afu_irq = 1; + int irq_off, afu_irq = 0; __u16 range; int r; - for (r = 1; r < CXL_IRQ_RANGES; r++) { + /* + * Look for the interrupt number. + * On bare-metal, we know range 0 only contains the PSL + * interrupt so we could start counting at range 1 and initialize + * afu_irq at 1. + * In a guest, range 0 also contains AFU interrupts, so it must + * be counted for. Therefore we initialize afu_irq at 0 to take into + * account the PSL interrupt. + * + * For code-readability, it just seems easier to go over all + * the ranges on bare-metal and guest. The end result is the same. + */ + for (r = 0; r < CXL_IRQ_RANGES; r++) { irq_off = hwirq - ctx->irqs.offset[r]; range = ctx->irqs.range[r]; if (irq_off >= 0 && irq_off < range) { @@ -236,7 +150,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data) afu_irq += range; } if (unlikely(r >= CXL_IRQ_RANGES)) { - WARN(1, "Recieved AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n", + WARN(1, "Received AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n", ctx->pe, irq, hwirq); return IRQ_HANDLED; } @@ -246,7 +160,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data) afu_irq, ctx->pe, irq, hwirq); if (unlikely(!ctx->irq_bitmap)) { - WARN(1, "Recieved AFU IRQ for context with no IRQ bitmap\n"); + WARN(1, "Received AFU IRQ for context with no IRQ bitmap\n"); return IRQ_HANDLED; } spin_lock(&ctx->lock); @@ -272,7 +186,8 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, return 0; } - cxl_setup_irq(adapter, hwirq, virq); + if (cxl_ops->setup_irq) + cxl_ops->setup_irq(adapter, hwirq, virq); pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq); @@ -291,16 +206,16 @@ void cxl_unmap_irq(unsigned int virq, void *cookie) irq_dispose_mapping(virq); } -static int cxl_register_one_irq(struct cxl *adapter, - irq_handler_t handler, - void *cookie, - irq_hw_number_t *dest_hwirq, - unsigned int *dest_virq, - const char *name) +int cxl_register_one_irq(struct cxl *adapter, + irq_handler_t handler, + void *cookie, + irq_hw_number_t *dest_hwirq, + unsigned int *dest_virq, + const char *name) { int hwirq, virq; - if ((hwirq = cxl_alloc_one_irq(adapter)) < 0) + if ((hwirq = cxl_ops->alloc_one_irq(adapter)) < 0) return hwirq; if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name))) @@ -312,108 +227,10 @@ static int cxl_register_one_irq(struct cxl *adapter, return 0; err: - cxl_release_one_irq(adapter, hwirq); + cxl_ops->release_one_irq(adapter, hwirq); return -ENOMEM; } -int cxl_register_psl_err_irq(struct cxl *adapter) -{ - int rc; - - adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", - dev_name(&adapter->dev)); - if (!adapter->irq_name) - return -ENOMEM; - - if ((rc = cxl_register_one_irq(adapter, cxl_irq_err, adapter, - &adapter->err_hwirq, - &adapter->err_virq, - adapter->irq_name))) { - kfree(adapter->irq_name); - adapter->irq_name = NULL; - return rc; - } - - cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->err_hwirq & 0xffff); - - return 0; -} - -void cxl_release_psl_err_irq(struct cxl *adapter) -{ - if (adapter->err_virq != irq_find_mapping(NULL, adapter->err_hwirq)) - return; - - cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); - cxl_unmap_irq(adapter->err_virq, adapter); - cxl_release_one_irq(adapter, adapter->err_hwirq); - kfree(adapter->irq_name); -} - -int cxl_register_serr_irq(struct cxl_afu *afu) -{ - u64 serr; - int rc; - - afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", - dev_name(&afu->dev)); - if (!afu->err_irq_name) - return -ENOMEM; - - if ((rc = cxl_register_one_irq(afu->adapter, cxl_slice_irq_err, afu, - &afu->serr_hwirq, - &afu->serr_virq, afu->err_irq_name))) { - kfree(afu->err_irq_name); - afu->err_irq_name = NULL; - return rc; - } - - serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); - serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff); - cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); - - return 0; -} - -void cxl_release_serr_irq(struct cxl_afu *afu) -{ - if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq)) - return; - - cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); - cxl_unmap_irq(afu->serr_virq, afu); - cxl_release_one_irq(afu->adapter, afu->serr_hwirq); - kfree(afu->err_irq_name); -} - -int cxl_register_psl_irq(struct cxl_afu *afu) -{ - int rc; - - afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s", - dev_name(&afu->dev)); - if (!afu->psl_irq_name) - return -ENOMEM; - - if ((rc = cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu, - &afu->psl_hwirq, &afu->psl_virq, - afu->psl_irq_name))) { - kfree(afu->psl_irq_name); - afu->psl_irq_name = NULL; - } - return rc; -} - -void cxl_release_psl_irq(struct cxl_afu *afu) -{ - if (afu->psl_virq != irq_find_mapping(NULL, afu->psl_hwirq)) - return; - - cxl_unmap_irq(afu->psl_virq, afu); - cxl_release_one_irq(afu->adapter, afu->psl_hwirq); - kfree(afu->psl_irq_name); -} - void afu_irq_name_free(struct cxl_context *ctx) { struct cxl_irq_name *irq_name, *tmp; @@ -429,16 +246,33 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) { int rc, r, i, j = 1; struct cxl_irq_name *irq_name; + int alloc_count; + + /* + * In native mode, range 0 is reserved for the multiplexed + * PSL interrupt. It has been allocated when the AFU was initialized. + * + * In a guest, the PSL interrupt is not mutliplexed, but per-context, + * and is the first interrupt from range 0. It still needs to be + * allocated, so bump the count by one. + */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + alloc_count = count; + else + alloc_count = count + 1; /* Initialize the list head to hold irq names */ INIT_LIST_HEAD(&ctx->irq_names); - if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count))) + if ((rc = cxl_ops->alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, + alloc_count))) return rc; - /* Multiplexed PSL Interrupt */ - ctx->irqs.offset[0] = ctx->afu->psl_hwirq; - ctx->irqs.range[0] = 1; + if (cpu_has_feature(CPU_FTR_HVMODE)) { + /* Multiplexed PSL Interrupt */ + ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq; + ctx->irqs.range[0] = 1; + } ctx->irq_count = count; ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count), @@ -450,7 +284,7 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) * Allocate names first. If any fail, bail out before allocating * actual hardware IRQs. */ - for (r = 1; r < CXL_IRQ_RANGES; r++) { + for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { for (i = 0; i < ctx->irqs.range[r]; i++) { irq_name = kmalloc(sizeof(struct cxl_irq_name), GFP_KERNEL); @@ -471,7 +305,7 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) return 0; out: - cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); + cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); afu_irq_name_free(ctx); return -ENOMEM; } @@ -480,15 +314,30 @@ static void afu_register_hwirqs(struct cxl_context *ctx) { irq_hw_number_t hwirq; struct cxl_irq_name *irq_name; - int r,i; + int r, i; + irqreturn_t (*handler)(int irq, void *data); /* We've allocated all memory now, so let's do the irq allocations */ irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list); - for (r = 1; r < CXL_IRQ_RANGES; r++) { + for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { hwirq = ctx->irqs.offset[r]; for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { - cxl_map_irq(ctx->afu->adapter, hwirq, - cxl_irq_afu, ctx, irq_name->name); + if (r == 0 && i == 0) + /* + * The very first interrupt of range 0 is + * always the PSL interrupt, but we only + * need to connect a handler for guests, + * because there's one PSL interrupt per + * context. + * On bare-metal, the PSL interrupt is + * multiplexed and was setup when the AFU + * was configured. + */ + handler = cxl_ops->psl_interrupt; + else + handler = cxl_irq_afu; + cxl_map_irq(ctx->afu->adapter, hwirq, handler, ctx, + irq_name->name); irq_name = list_next_entry(irq_name, list); } } @@ -504,7 +353,7 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count) afu_register_hwirqs(ctx); return 0; - } +} void afu_release_irqs(struct cxl_context *ctx, void *cookie) { @@ -512,7 +361,7 @@ void afu_release_irqs(struct cxl_context *ctx, void *cookie) unsigned int virq; int r, i; - for (r = 1; r < CXL_IRQ_RANGES; r++) { + for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { hwirq = ctx->irqs.offset[r]; for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { virq = irq_find_mapping(NULL, hwirq); @@ -522,7 +371,7 @@ void afu_release_irqs(struct cxl_context *ctx, void *cookie) } afu_irq_name_free(ctx); - cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); + cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); ctx->irq_count = 0; } diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 9fde75ed4fac..ae68c3201156 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -32,6 +32,29 @@ uint cxl_verbose; module_param_named(verbose, cxl_verbose, uint, 0600); MODULE_PARM_DESC(verbose, "Enable verbose dmesg output"); +const struct cxl_backend_ops *cxl_ops; + +int cxl_afu_slbia(struct cxl_afu *afu) +{ + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + + pr_devel("cxl_afu_slbia issuing SLBIA command\n"); + cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL); + while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); + return -EBUSY; + } + /* If the adapter has gone down, we can assume that we + * will PERST it and that will invalidate everything. + */ + if (!cxl_ops->link_ok(afu->adapter, afu)) + return -EIO; + cpu_relax(); + } + return 0; +} + static inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm) { struct task_struct *task; @@ -139,6 +162,32 @@ int cxl_alloc_sst(struct cxl_context *ctx) return 0; } +/* print buffer content as integers when debugging */ +void cxl_dump_debug_buffer(void *buf, size_t buf_len) +{ +#ifdef DEBUG + int i, *ptr; + + /* + * We want to regroup up to 4 integers per line, which means they + * need to be in the same pr_devel() statement + */ + ptr = (int *) buf; + for (i = 0; i * 4 < buf_len; i += 4) { + if ((i + 3) * 4 < buf_len) + pr_devel("%.8x %.8x %.8x %.8x\n", ptr[i], ptr[i + 1], + ptr[i + 2], ptr[i + 3]); + else if ((i + 2) * 4 < buf_len) + pr_devel("%.8x %.8x %.8x\n", ptr[i], ptr[i + 1], + ptr[i + 2]); + else if ((i + 1) * 4 < buf_len) + pr_devel("%.8x %.8x\n", ptr[i], ptr[i + 1]); + else + pr_devel("%.8x\n", ptr[i]); + } +#endif /* DEBUG */ +} + /* Find a CXL adapter by it's number and increase it's refcount */ struct cxl *get_cxl_adapter(int num) { @@ -152,7 +201,7 @@ struct cxl *get_cxl_adapter(int num) return adapter; } -int cxl_alloc_adapter_nr(struct cxl *adapter) +static int cxl_alloc_adapter_nr(struct cxl *adapter) { int i; @@ -174,13 +223,58 @@ void cxl_remove_adapter_nr(struct cxl *adapter) idr_remove(&cxl_adapter_idr, adapter->adapter_num); } +struct cxl *cxl_alloc_adapter(void) +{ + struct cxl *adapter; + + if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) + return NULL; + + spin_lock_init(&adapter->afu_list_lock); + + if (cxl_alloc_adapter_nr(adapter)) + goto err1; + + if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) + goto err2; + + return adapter; + +err2: + cxl_remove_adapter_nr(adapter); +err1: + kfree(adapter); + return NULL; +} + +struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) +{ + struct cxl_afu *afu; + + if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL))) + return NULL; + + afu->adapter = adapter; + afu->dev.parent = &adapter->dev; + afu->dev.release = cxl_ops->release_afu; + afu->slice = slice; + idr_init(&afu->contexts_idr); + mutex_init(&afu->contexts_lock); + spin_lock_init(&afu->afu_cntl_lock); + + afu->prefault_mode = CXL_PREFAULT_NONE; + afu->irqs_max = afu->adapter->user_irqs; + + return afu; +} + int cxl_afu_select_best_mode(struct cxl_afu *afu) { if (afu->modes_supported & CXL_MODE_DIRECTED) - return cxl_afu_activate_mode(afu, CXL_MODE_DIRECTED); + return cxl_ops->afu_activate_mode(afu, CXL_MODE_DIRECTED); if (afu->modes_supported & CXL_MODE_DEDICATED) - return cxl_afu_activate_mode(afu, CXL_MODE_DEDICATED); + return cxl_ops->afu_activate_mode(afu, CXL_MODE_DEDICATED); dev_warn(&afu->dev, "No supported programming modes available\n"); /* We don't fail this so the user can inspect sysfs */ @@ -191,9 +285,6 @@ static int __init init_cxl(void) { int rc = 0; - if (!cpu_has_feature(CPU_FTR_HVMODE)) - return -EPERM; - if ((rc = cxl_file_init())) return rc; @@ -202,7 +293,17 @@ static int __init init_cxl(void) if ((rc = register_cxl_calls(&cxl_calls))) goto err; - if ((rc = pci_register_driver(&cxl_pci_driver))) + if (cpu_has_feature(CPU_FTR_HVMODE)) { + cxl_ops = &cxl_native_ops; + rc = pci_register_driver(&cxl_pci_driver); + } +#ifdef CONFIG_PPC_PSERIES + else { + cxl_ops = &cxl_guest_ops; + rc = platform_driver_register(&cxl_of_driver); + } +#endif + if (rc) goto err1; return 0; @@ -217,7 +318,12 @@ err: static void exit_cxl(void) { - pci_unregister_driver(&cxl_pci_driver); + if (cpu_has_feature(CPU_FTR_HVMODE)) + pci_unregister_driver(&cxl_pci_driver); +#ifdef CONFIG_PPC_PSERIES + else + platform_driver_unregister(&cxl_of_driver); +#endif cxl_debugfs_exit(); cxl_file_exit(); diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index f40909793490..387fcbdf9793 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -42,7 +42,7 @@ static int afu_control(struct cxl_afu *afu, u64 command, goto out; } - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { afu->enabled = enabled; rc = -EIO; goto out; @@ -80,7 +80,7 @@ int cxl_afu_disable(struct cxl_afu *afu) } /* This will disable as well as reset */ -int __cxl_afu_reset(struct cxl_afu *afu) +static int native_afu_reset(struct cxl_afu *afu) { pr_devel("AFU reset request\n"); @@ -90,9 +90,9 @@ int __cxl_afu_reset(struct cxl_afu *afu) false); } -int cxl_afu_check_and_enable(struct cxl_afu *afu) +static int native_afu_check_and_enable(struct cxl_afu *afu) { - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { WARN(1, "Refusing to enable afu while link down!\n"); return -EIO; } @@ -114,7 +114,7 @@ int cxl_psl_purge(struct cxl_afu *afu) pr_devel("PSL purge request\n"); - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n"); rc = -EIO; goto out; @@ -136,7 +136,7 @@ int cxl_psl_purge(struct cxl_afu *afu) rc = -EBUSY; goto out; } - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { rc = -EIO; goto out; } @@ -186,22 +186,22 @@ static int spa_max_procs(int spa_size) int cxl_alloc_spa(struct cxl_afu *afu) { /* Work out how many pages to allocate */ - afu->spa_order = 0; + afu->native->spa_order = 0; do { - afu->spa_order++; - afu->spa_size = (1 << afu->spa_order) * PAGE_SIZE; - afu->spa_max_procs = spa_max_procs(afu->spa_size); - } while (afu->spa_max_procs < afu->num_procs); + afu->native->spa_order++; + afu->native->spa_size = (1 << afu->native->spa_order) * PAGE_SIZE; + afu->native->spa_max_procs = spa_max_procs(afu->native->spa_size); + } while (afu->native->spa_max_procs < afu->num_procs); - WARN_ON(afu->spa_size > 0x100000); /* Max size supported by the hardware */ + WARN_ON(afu->native->spa_size > 0x100000); /* Max size supported by the hardware */ - if (!(afu->spa = (struct cxl_process_element *) - __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->spa_order))) { + if (!(afu->native->spa = (struct cxl_process_element *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->native->spa_order))) { pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n"); return -ENOMEM; } pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n", - 1<<afu->spa_order, afu->spa_max_procs, afu->num_procs); + 1<<afu->native->spa_order, afu->native->spa_max_procs, afu->num_procs); return 0; } @@ -210,13 +210,15 @@ static void attach_spa(struct cxl_afu *afu) { u64 spap; - afu->sw_command_status = (__be64 *)((char *)afu->spa + - ((afu->spa_max_procs + 3) * 128)); + afu->native->sw_command_status = (__be64 *)((char *)afu->native->spa + + ((afu->native->spa_max_procs + 3) * 128)); - spap = virt_to_phys(afu->spa) & CXL_PSL_SPAP_Addr; - spap |= ((afu->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size; + spap = virt_to_phys(afu->native->spa) & CXL_PSL_SPAP_Addr; + spap |= ((afu->native->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size; spap |= CXL_PSL_SPAP_V; - pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap); + pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", + afu->native->spa, afu->native->spa_max_procs, + afu->native->sw_command_status, spap); cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap); } @@ -227,9 +229,10 @@ static inline void detach_spa(struct cxl_afu *afu) void cxl_release_spa(struct cxl_afu *afu) { - if (afu->spa) { - free_pages((unsigned long) afu->spa, afu->spa_order); - afu->spa = NULL; + if (afu->native->spa) { + free_pages((unsigned long) afu->native->spa, + afu->native->spa_order); + afu->native->spa = NULL; } } @@ -247,7 +250,7 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); return -EBUSY; } - if (!cxl_adapter_link_ok(adapter)) + if (!cxl_ops->link_ok(adapter, NULL)) return -EIO; cpu_relax(); } @@ -258,28 +261,7 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); return -EBUSY; } - if (!cxl_adapter_link_ok(adapter)) - return -EIO; - cpu_relax(); - } - return 0; -} - -int cxl_afu_slbia(struct cxl_afu *afu) -{ - unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); - - pr_devel("cxl_afu_slbia issuing SLBIA command\n"); - cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL); - while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) { - if (time_after_eq(jiffies, timeout)) { - dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); - return -EBUSY; - } - /* If the adapter has gone down, we can assume that we - * will PERST it and that will invalidate everything. - */ - if (!cxl_adapter_link_ok(afu->adapter)) + if (!cxl_ops->link_ok(adapter, NULL)) return -EIO; cpu_relax(); } @@ -312,7 +294,7 @@ static void slb_invalid(struct cxl_context *ctx) struct cxl *adapter = ctx->afu->adapter; u64 slbia; - WARN_ON(!mutex_is_locked(&ctx->afu->spa_mutex)); + WARN_ON(!mutex_is_locked(&ctx->afu->native->spa_mutex)); cxl_p1_write(adapter, CXL_PSL_LBISEL, ((u64)be32_to_cpu(ctx->elem->common.pid) << 32) | @@ -320,7 +302,7 @@ static void slb_invalid(struct cxl_context *ctx) cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); while (1) { - if (!cxl_adapter_link_ok(adapter)) + if (!cxl_ops->link_ok(adapter, NULL)) break; slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); if (!(slbia & CXL_TLB_SLB_P)) @@ -342,7 +324,7 @@ static int do_process_element_cmd(struct cxl_context *ctx, ctx->elem->software_state = cpu_to_be32(pe_state); smp_wmb(); - *(ctx->afu->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe); + *(ctx->afu->native->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe); smp_mb(); cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe); while (1) { @@ -351,12 +333,12 @@ static int do_process_element_cmd(struct cxl_context *ctx, rc = -EBUSY; goto out; } - if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n"); rc = -EIO; goto out; } - state = be64_to_cpup(ctx->afu->sw_command_status); + state = be64_to_cpup(ctx->afu->native->sw_command_status); if (state == ~0ULL) { pr_err("cxl: Error adding process element to AFU\n"); rc = -1; @@ -384,12 +366,12 @@ static int add_process_element(struct cxl_context *ctx) { int rc = 0; - mutex_lock(&ctx->afu->spa_mutex); + mutex_lock(&ctx->afu->native->spa_mutex); pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe); if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V))) ctx->pe_inserted = true; pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe); - mutex_unlock(&ctx->afu->spa_mutex); + mutex_unlock(&ctx->afu->native->spa_mutex); return rc; } @@ -401,18 +383,18 @@ static int terminate_process_element(struct cxl_context *ctx) if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V))) return rc; - mutex_lock(&ctx->afu->spa_mutex); + mutex_lock(&ctx->afu->native->spa_mutex); pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe); /* We could be asked to terminate when the hw is down. That * should always succeed: it's not running if the hw has gone * away and is being reset. */ - if (cxl_adapter_link_ok(ctx->afu->adapter)) + if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); ctx->elem->software_state = 0; /* Remove Valid bit */ pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe); - mutex_unlock(&ctx->afu->spa_mutex); + mutex_unlock(&ctx->afu->native->spa_mutex); return rc; } @@ -420,20 +402,20 @@ static int remove_process_element(struct cxl_context *ctx) { int rc = 0; - mutex_lock(&ctx->afu->spa_mutex); + mutex_lock(&ctx->afu->native->spa_mutex); pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe); /* We could be asked to remove when the hw is down. Again, if * the hw is down, the PE is gone, so we succeed. */ - if (cxl_adapter_link_ok(ctx->afu->adapter)) + if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0); if (!rc) ctx->pe_inserted = false; slb_invalid(ctx); pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe); - mutex_unlock(&ctx->afu->spa_mutex); + mutex_unlock(&ctx->afu->native->spa_mutex); return rc; } @@ -446,7 +428,7 @@ void cxl_assign_psn_space(struct cxl_context *ctx) ctx->psn_size = ctx->afu->adapter->ps_size; } else { ctx->psn_phys = ctx->afu->psn_phys + - (ctx->afu->pp_offset + ctx->afu->pp_size * ctx->pe); + (ctx->afu->native->pp_offset + ctx->afu->pp_size * ctx->pe); ctx->psn_size = ctx->afu->pp_size; } } @@ -458,7 +440,7 @@ static int activate_afu_directed(struct cxl_afu *afu) dev_info(&afu->dev, "Activating AFU directed mode\n"); afu->num_procs = afu->max_procs_virtualised; - if (afu->spa == NULL) { + if (afu->native->spa == NULL) { if (cxl_alloc_spa(afu)) return -ENOMEM; } @@ -552,7 +534,7 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) ctx->elem->common.wed = cpu_to_be64(wed); /* first guy needs to enable */ - if ((result = cxl_afu_check_and_enable(ctx->afu))) + if ((result = cxl_ops->afu_check_and_enable(ctx->afu))) return result; return add_process_element(ctx); @@ -568,7 +550,7 @@ static int deactivate_afu_directed(struct cxl_afu *afu) cxl_sysfs_afu_m_remove(afu); cxl_chardev_afu_remove(afu); - __cxl_afu_reset(afu); + cxl_ops->afu_reset(afu); cxl_afu_disable(afu); cxl_psl_purge(afu); @@ -632,7 +614,7 @@ static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) /* master only context for dedicated */ cxl_assign_psn_space(ctx); - if ((rc = __cxl_afu_reset(afu))) + if ((rc = cxl_ops->afu_reset(afu))) return rc; cxl_p2n_write(afu, CXL_PSL_WED_An, wed); @@ -652,7 +634,7 @@ static int deactivate_dedicated_process(struct cxl_afu *afu) return 0; } -int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode) +static int native_afu_deactivate_mode(struct cxl_afu *afu, int mode) { if (mode == CXL_MODE_DIRECTED) return deactivate_afu_directed(afu); @@ -661,19 +643,14 @@ int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode) return 0; } -int cxl_afu_deactivate_mode(struct cxl_afu *afu) -{ - return _cxl_afu_deactivate_mode(afu, afu->current_mode); -} - -int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) +static int native_afu_activate_mode(struct cxl_afu *afu, int mode) { if (!mode) return 0; if (!(mode & afu->modes_supported)) return -EINVAL; - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { WARN(1, "Device link is down, refusing to activate!\n"); return -EIO; } @@ -686,9 +663,10 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) return -EINVAL; } -int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) +static int native_attach_process(struct cxl_context *ctx, bool kernel, + u64 wed, u64 amr) { - if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { WARN(1, "Device link is down, refusing to attach process!\n"); return -EIO; } @@ -705,7 +683,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) static inline int detach_process_native_dedicated(struct cxl_context *ctx) { - __cxl_afu_reset(ctx->afu); + cxl_ops->afu_reset(ctx->afu); cxl_afu_disable(ctx->afu); cxl_psl_purge(ctx->afu); return 0; @@ -723,7 +701,7 @@ static inline int detach_process_native_afu_directed(struct cxl_context *ctx) return 0; } -int cxl_detach_process(struct cxl_context *ctx) +static int native_detach_process(struct cxl_context *ctx) { trace_cxl_detach(ctx); @@ -733,14 +711,14 @@ int cxl_detach_process(struct cxl_context *ctx) return detach_process_native_afu_directed(ctx); } -int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) +static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info) { u64 pidtid; /* If the adapter has gone away, we can't get any meaningful * information. */ - if (!cxl_adapter_link_ok(afu->adapter)) + if (!cxl_ops->link_ok(afu->adapter, afu)) return -EIO; info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); @@ -751,10 +729,214 @@ int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) info->tid = pidtid & 0xffffffff; info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An); info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + info->proc_handle = 0; + + return 0; +} + +static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx, + u64 dsisr, u64 errstat) +{ + u64 fir1, fir2, fir_slice, serr, afu_debug; + + fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2); + fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An); + serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); + afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); + + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); + dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); + dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2); + dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); + dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); + + dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); + cxl_stop_trace(ctx->afu->adapter); + + return cxl_ops->ack_irq(ctx, 0, errstat); +} + +static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info) +{ + if (irq_info->dsisr & CXL_PSL_DSISR_TRANS) + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + else + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + + return IRQ_HANDLED; +} + +static irqreturn_t native_irq_multiplexed(int irq, void *data) +{ + struct cxl_afu *afu = data; + struct cxl_context *ctx; + struct cxl_irq_info irq_info; + int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff; + int ret; + + if ((ret = native_get_irq_info(afu, &irq_info))) { + WARN(1, "Unable to get CXL IRQ Info: %i\n", ret); + return fail_psl_irq(afu, &irq_info); + } + + rcu_read_lock(); + ctx = idr_find(&afu->contexts_idr, ph); + if (ctx) { + ret = cxl_irq(irq, ctx, &irq_info); + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); + + WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR" + " %016llx\n(Possible AFU HW issue - was a term/remove acked" + " with outstanding transactions?)\n", ph, irq_info.dsisr, + irq_info.dar); + return fail_psl_irq(afu, &irq_info); +} + +static irqreturn_t native_slice_irq_err(int irq, void *data) +{ + struct cxl_afu *afu = data; + u64 fir_slice, errstat, serr, afu_debug; + + WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); + + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); + errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); + dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); + dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat); + dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); + + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); + + return IRQ_HANDLED; +} + +static irqreturn_t native_irq_err(int irq, void *data) +{ + struct cxl *adapter = data; + u64 fir1, fir2, err_ivte; + + WARN(1, "CXL ERROR interrupt %i\n", irq); + + err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); + dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte); + + dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); + cxl_stop_trace(adapter); + + fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); + + dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2); + + return IRQ_HANDLED; +} + +int cxl_native_register_psl_err_irq(struct cxl *adapter) +{ + int rc; + + adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&adapter->dev)); + if (!adapter->irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(adapter, native_irq_err, adapter, + &adapter->native->err_hwirq, + &adapter->native->err_virq, + adapter->irq_name))) { + kfree(adapter->irq_name); + adapter->irq_name = NULL; + return rc; + } + + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->native->err_hwirq & 0xffff); + + return 0; +} + +void cxl_native_release_psl_err_irq(struct cxl *adapter) +{ + if (adapter->native->err_virq != irq_find_mapping(NULL, adapter->native->err_hwirq)) + return; + + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); + cxl_unmap_irq(adapter->native->err_virq, adapter); + cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq); + kfree(adapter->irq_name); +} + +int cxl_native_register_serr_irq(struct cxl_afu *afu) +{ + u64 serr; + int rc; + + afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&afu->dev)); + if (!afu->err_irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(afu->adapter, native_slice_irq_err, afu, + &afu->serr_hwirq, + &afu->serr_virq, afu->err_irq_name))) { + kfree(afu->err_irq_name); + afu->err_irq_name = NULL; + return rc; + } + + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff); + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); return 0; } +void cxl_native_release_serr_irq(struct cxl_afu *afu) +{ + if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq)) + return; + + cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); + cxl_unmap_irq(afu->serr_virq, afu); + cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq); + kfree(afu->err_irq_name); +} + +int cxl_native_register_psl_irq(struct cxl_afu *afu) +{ + int rc; + + afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s", + dev_name(&afu->dev)); + if (!afu->psl_irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(afu->adapter, native_irq_multiplexed, + afu, &afu->native->psl_hwirq, &afu->native->psl_virq, + afu->psl_irq_name))) { + kfree(afu->psl_irq_name); + afu->psl_irq_name = NULL; + } + return rc; +} + +void cxl_native_release_psl_irq(struct cxl_afu *afu) +{ + if (afu->native->psl_virq != irq_find_mapping(NULL, afu->native->psl_hwirq)) + return; + + cxl_unmap_irq(afu->native->psl_virq, afu); + cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq); + kfree(afu->psl_irq_name); +} + static void recover_psl_err(struct cxl_afu *afu, u64 errstat) { u64 dsisr; @@ -769,7 +951,7 @@ static void recover_psl_err(struct cxl_afu *afu, u64 errstat) cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat); } -int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) +static int native_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) { trace_cxl_psl_irq_ack(ctx, tfc); if (tfc) @@ -784,3 +966,132 @@ int cxl_check_error(struct cxl_afu *afu) { return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL); } + +static bool native_support_attributes(const char *attr_name, + enum cxl_attrs type) +{ + return true; +} + +static int native_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off, u64 *out) +{ + if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) + return -EIO; + if (unlikely(off >= afu->crs_len)) + return -ERANGE; + *out = in_le64(afu->native->afu_desc_mmio + afu->crs_offset + + (cr * afu->crs_len) + off); + return 0; +} + +static int native_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off, u32 *out) +{ + if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) + return -EIO; + if (unlikely(off >= afu->crs_len)) + return -ERANGE; + *out = in_le32(afu->native->afu_desc_mmio + afu->crs_offset + + (cr * afu->crs_len) + off); + return 0; +} + +static int native_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off, u16 *out) +{ + u64 aligned_off = off & ~0x3L; + u32 val; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val); + if (!rc) + *out = (val >> ((off & 0x3) * 8)) & 0xffff; + return rc; +} + +static int native_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off, u8 *out) +{ + u64 aligned_off = off & ~0x3L; + u32 val; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val); + if (!rc) + *out = (val >> ((off & 0x3) * 8)) & 0xff; + return rc; +} + +static int native_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in) +{ + if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) + return -EIO; + if (unlikely(off >= afu->crs_len)) + return -ERANGE; + out_le32(afu->native->afu_desc_mmio + afu->crs_offset + + (cr * afu->crs_len) + off, in); + return 0; +} + +static int native_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in) +{ + u64 aligned_off = off & ~0x3L; + u32 val32, mask, shift; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val32); + if (rc) + return rc; + shift = (off & 0x3) * 8; + WARN_ON(shift == 24); + mask = 0xffff << shift; + val32 = (val32 & ~mask) | (in << shift); + + rc = native_afu_cr_write32(afu, cr, aligned_off, val32); + return rc; +} + +static int native_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in) +{ + u64 aligned_off = off & ~0x3L; + u32 val32, mask, shift; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val32); + if (rc) + return rc; + shift = (off & 0x3) * 8; + mask = 0xff << shift; + val32 = (val32 & ~mask) | (in << shift); + + rc = native_afu_cr_write32(afu, cr, aligned_off, val32); + return rc; +} + +const struct cxl_backend_ops cxl_native_ops = { + .module = THIS_MODULE, + .adapter_reset = cxl_pci_reset, + .alloc_one_irq = cxl_pci_alloc_one_irq, + .release_one_irq = cxl_pci_release_one_irq, + .alloc_irq_ranges = cxl_pci_alloc_irq_ranges, + .release_irq_ranges = cxl_pci_release_irq_ranges, + .setup_irq = cxl_pci_setup_irq, + .handle_psl_slice_error = native_handle_psl_slice_error, + .psl_interrupt = NULL, + .ack_irq = native_ack_irq, + .attach_process = native_attach_process, + .detach_process = native_detach_process, + .support_attributes = native_support_attributes, + .link_ok = cxl_adapter_link_ok, + .release_afu = cxl_pci_release_afu, + .afu_read_err_buffer = cxl_pci_afu_read_err_buffer, + .afu_check_and_enable = native_afu_check_and_enable, + .afu_activate_mode = native_afu_activate_mode, + .afu_deactivate_mode = native_afu_deactivate_mode, + .afu_reset = native_afu_reset, + .afu_cr_read8 = native_afu_cr_read8, + .afu_cr_read16 = native_afu_cr_read16, + .afu_cr_read32 = native_afu_cr_read32, + .afu_cr_read64 = native_afu_cr_read64, + .afu_cr_write8 = native_afu_cr_write8, + .afu_cr_write16 = native_afu_cr_write16, + .afu_cr_write32 = native_afu_cr_write32, + .read_adapter_vpd = cxl_pci_read_adapter_vpd, +}; diff --git a/drivers/misc/cxl/of.c b/drivers/misc/cxl/of.c new file mode 100644 index 000000000000..edc458395f68 --- /dev/null +++ b/drivers/misc/cxl/of.c @@ -0,0 +1,513 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> + +#include "cxl.h" + + +static const __be32 *read_prop_string(const struct device_node *np, + const char *prop_name) +{ + const __be32 *prop; + + prop = of_get_property(np, prop_name, NULL); + if (cxl_verbose && prop) + pr_info("%s: %s\n", prop_name, (char *) prop); + return prop; +} + +static const __be32 *read_prop_dword(const struct device_node *np, + const char *prop_name, u32 *val) +{ + const __be32 *prop; + + prop = of_get_property(np, prop_name, NULL); + if (prop) + *val = be32_to_cpu(prop[0]); + if (cxl_verbose && prop) + pr_info("%s: %#x (%u)\n", prop_name, *val, *val); + return prop; +} + +static const __be64 *read_prop64_dword(const struct device_node *np, + const char *prop_name, u64 *val) +{ + const __be64 *prop; + + prop = of_get_property(np, prop_name, NULL); + if (prop) + *val = be64_to_cpu(prop[0]); + if (cxl_verbose && prop) + pr_info("%s: %#llx (%llu)\n", prop_name, *val, *val); + return prop; +} + + +static int read_handle(struct device_node *np, u64 *handle) +{ + const __be32 *prop; + u64 size; + + /* Get address and size of the node */ + prop = of_get_address(np, 0, &size, NULL); + if (size) + return -EINVAL; + + /* Helper to read a big number; size is in cells (not bytes) */ + *handle = of_read_number(prop, of_n_addr_cells(np)); + return 0; +} + +static int read_phys_addr(struct device_node *np, char *prop_name, + struct cxl_afu *afu) +{ + int i, len, entry_size, naddr, nsize, type; + u64 addr, size; + const __be32 *prop; + + naddr = of_n_addr_cells(np); + nsize = of_n_size_cells(np); + + prop = of_get_property(np, prop_name, &len); + if (prop) { + entry_size = naddr + nsize; + for (i = 0; i < (len / 4); i += entry_size, prop += entry_size) { + type = be32_to_cpu(prop[0]); + addr = of_read_number(prop, naddr); + size = of_read_number(&prop[naddr], nsize); + switch (type) { + case 0: /* unit address */ + afu->guest->handle = addr; + break; + case 1: /* p2 area */ + afu->guest->p2n_phys += addr; + afu->guest->p2n_size = size; + break; + case 2: /* problem state area */ + afu->psn_phys += addr; + afu->adapter->ps_size = size; + break; + default: + pr_err("Invalid address type %d found in %s property of AFU\n", + type, prop_name); + return -EINVAL; + } + if (cxl_verbose) + pr_info("%s: %#x %#llx (size %#llx)\n", + prop_name, type, addr, size); + } + } + return 0; +} + +static int read_vpd(struct cxl *adapter, struct cxl_afu *afu) +{ + char vpd[256]; + int rc; + size_t len = sizeof(vpd); + + memset(vpd, 0, len); + + if (adapter) + rc = cxl_guest_read_adapter_vpd(adapter, vpd, len); + else + rc = cxl_guest_read_afu_vpd(afu, vpd, len); + + if (rc > 0) { + cxl_dump_debug_buffer(vpd, rc); + rc = 0; + } + return rc; +} + +int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np) +{ + if (read_handle(afu_np, &afu->guest->handle)) + return -EINVAL; + pr_devel("AFU handle: 0x%.16llx\n", afu->guest->handle); + + return 0; +} + +int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *np) +{ + int i, len, rc; + char *p; + const __be32 *prop; + u16 device_id, vendor_id; + u32 val = 0, class_code; + + /* Properties are read in the same order as listed in PAPR */ + + if (cxl_verbose) { + pr_info("Dump of the 'ibm,coherent-platform-function' node properties:\n"); + + prop = of_get_property(np, "compatible", &len); + i = 0; + while (i < len) { + p = (char *) prop + i; + pr_info("compatible: %s\n", p); + i += strlen(p) + 1; + } + read_prop_string(np, "name"); + } + + rc = read_phys_addr(np, "reg", afu); + if (rc) + return rc; + + rc = read_phys_addr(np, "assigned-addresses", afu); + if (rc) + return rc; + + if (afu->psn_phys == 0) + afu->psa = false; + else + afu->psa = true; + + if (cxl_verbose) { + read_prop_string(np, "ibm,loc-code"); + read_prop_string(np, "device_type"); + } + + read_prop_dword(np, "ibm,#processes", &afu->max_procs_virtualised); + + if (cxl_verbose) { + read_prop_dword(np, "ibm,scratchpad-size", &val); + read_prop_dword(np, "ibm,programmable", &val); + read_prop_string(np, "ibm,phandle"); + read_vpd(NULL, afu); + } + + read_prop_dword(np, "ibm,max-ints-per-process", &afu->guest->max_ints); + afu->irqs_max = afu->guest->max_ints; + + prop = read_prop_dword(np, "ibm,min-ints-per-process", &afu->pp_irqs); + if (prop) { + /* One extra interrupt for the PSL interrupt is already + * included. Remove it now to keep only AFU interrupts and + * match the native case. + */ + afu->pp_irqs--; + } + + if (cxl_verbose) { + read_prop_dword(np, "ibm,max-ints", &val); + read_prop_dword(np, "ibm,vpd-size", &val); + } + + read_prop64_dword(np, "ibm,error-buffer-size", &afu->eb_len); + afu->eb_offset = 0; + + if (cxl_verbose) + read_prop_dword(np, "ibm,config-record-type", &val); + + read_prop64_dword(np, "ibm,config-record-size", &afu->crs_len); + afu->crs_offset = 0; + + read_prop_dword(np, "ibm,#config-records", &afu->crs_num); + + if (cxl_verbose) { + for (i = 0; i < afu->crs_num; i++) { + rc = cxl_ops->afu_cr_read16(afu, i, PCI_DEVICE_ID, + &device_id); + if (!rc) + pr_info("record %d - device-id: %#x\n", + i, device_id); + rc = cxl_ops->afu_cr_read16(afu, i, PCI_VENDOR_ID, + &vendor_id); + if (!rc) + pr_info("record %d - vendor-id: %#x\n", + i, vendor_id); + rc = cxl_ops->afu_cr_read32(afu, i, PCI_CLASS_REVISION, + &class_code); + if (!rc) { + class_code >>= 8; + pr_info("record %d - class-code: %#x\n", + i, class_code); + } + } + + read_prop_dword(np, "ibm,function-number", &val); + read_prop_dword(np, "ibm,privileged-function", &val); + read_prop_dword(np, "vendor-id", &val); + read_prop_dword(np, "device-id", &val); + read_prop_dword(np, "revision-id", &val); + read_prop_dword(np, "class-code", &val); + read_prop_dword(np, "subsystem-vendor-id", &val); + read_prop_dword(np, "subsystem-id", &val); + } + /* + * if "ibm,process-mmio" doesn't exist then per-process mmio is + * not supported + */ + val = 0; + prop = read_prop_dword(np, "ibm,process-mmio", &val); + if (prop && val == 1) + afu->pp_psa = true; + else + afu->pp_psa = false; + + if (cxl_verbose) { + read_prop_dword(np, "ibm,supports-aur", &val); + read_prop_dword(np, "ibm,supports-csrp", &val); + read_prop_dword(np, "ibm,supports-prr", &val); + } + + prop = read_prop_dword(np, "ibm,function-error-interrupt", &val); + if (prop) + afu->serr_hwirq = val; + + pr_devel("AFU handle: %#llx\n", afu->guest->handle); + pr_devel("p2n_phys: %#llx (size %#llx)\n", + afu->guest->p2n_phys, afu->guest->p2n_size); + pr_devel("psn_phys: %#llx (size %#llx)\n", + afu->psn_phys, afu->adapter->ps_size); + pr_devel("Max number of processes virtualised=%i\n", + afu->max_procs_virtualised); + pr_devel("Per-process irqs min=%i, max=%i\n", afu->pp_irqs, + afu->irqs_max); + pr_devel("Slice error interrupt=%#lx\n", afu->serr_hwirq); + + return 0; +} + +static int read_adapter_irq_config(struct cxl *adapter, struct device_node *np) +{ + const __be32 *ranges; + int len, nranges, i; + struct irq_avail *cur; + + ranges = of_get_property(np, "interrupt-ranges", &len); + if (ranges == NULL || len < (2 * sizeof(int))) + return -EINVAL; + + /* + * encoded array of two cells per entry, each cell encoded as + * with encode-int + */ + nranges = len / (2 * sizeof(int)); + if (nranges == 0 || (nranges * 2 * sizeof(int)) != len) + return -EINVAL; + + adapter->guest->irq_avail = kzalloc(nranges * sizeof(struct irq_avail), + GFP_KERNEL); + if (adapter->guest->irq_avail == NULL) + return -ENOMEM; + + adapter->guest->irq_base_offset = be32_to_cpu(ranges[0]); + for (i = 0; i < nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + cur->offset = be32_to_cpu(ranges[i * 2]); + cur->range = be32_to_cpu(ranges[i * 2 + 1]); + cur->bitmap = kcalloc(BITS_TO_LONGS(cur->range), + sizeof(*cur->bitmap), GFP_KERNEL); + if (cur->bitmap == NULL) + goto err; + if (cur->offset < adapter->guest->irq_base_offset) + adapter->guest->irq_base_offset = cur->offset; + if (cxl_verbose) + pr_info("available IRQ range: %#lx-%#lx (%lu)\n", + cur->offset, cur->offset + cur->range - 1, + cur->range); + } + adapter->guest->irq_nranges = nranges; + spin_lock_init(&adapter->guest->irq_alloc_lock); + + return 0; +err: + for (i--; i >= 0; i--) { + cur = &adapter->guest->irq_avail[i]; + kfree(cur->bitmap); + } + kfree(adapter->guest->irq_avail); + adapter->guest->irq_avail = NULL; + return -ENOMEM; +} + +int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np) +{ + if (read_handle(np, &adapter->guest->handle)) + return -EINVAL; + pr_devel("Adapter handle: 0x%.16llx\n", adapter->guest->handle); + + return 0; +} + +int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np) +{ + int rc, len, naddr, i; + char *p; + const __be32 *prop; + u32 val = 0; + + /* Properties are read in the same order as listed in PAPR */ + + naddr = of_n_addr_cells(np); + + if (cxl_verbose) { + pr_info("Dump of the 'ibm,coherent-platform-facility' node properties:\n"); + + read_prop_dword(np, "#address-cells", &val); + read_prop_dword(np, "#size-cells", &val); + + prop = of_get_property(np, "compatible", &len); + i = 0; + while (i < len) { + p = (char *) prop + i; + pr_info("compatible: %s\n", p); + i += strlen(p) + 1; + } + read_prop_string(np, "name"); + read_prop_string(np, "model"); + + prop = of_get_property(np, "reg", NULL); + if (prop) { + pr_info("reg: addr:%#llx size:%#x\n", + of_read_number(prop, naddr), + be32_to_cpu(prop[naddr])); + } + + read_prop_string(np, "ibm,loc-code"); + } + + if ((rc = read_adapter_irq_config(adapter, np))) + return rc; + + if (cxl_verbose) { + read_prop_string(np, "device_type"); + read_prop_string(np, "ibm,phandle"); + } + + prop = read_prop_dword(np, "ibm,caia-version", &val); + if (prop) { + adapter->caia_major = (val & 0xFF00) >> 8; + adapter->caia_minor = val & 0xFF; + } + + prop = read_prop_dword(np, "ibm,psl-revision", &val); + if (prop) + adapter->psl_rev = val; + + prop = read_prop_string(np, "status"); + if (prop) { + adapter->guest->status = kasprintf(GFP_KERNEL, "%s", (char *) prop); + if (adapter->guest->status == NULL) + return -ENOMEM; + } + + prop = read_prop_dword(np, "vendor-id", &val); + if (prop) + adapter->guest->vendor = val; + + prop = read_prop_dword(np, "device-id", &val); + if (prop) + adapter->guest->device = val; + + if (cxl_verbose) { + read_prop_dword(np, "ibm,privileged-facility", &val); + read_prop_dword(np, "revision-id", &val); + read_prop_dword(np, "class-code", &val); + } + + prop = read_prop_dword(np, "subsystem-vendor-id", &val); + if (prop) + adapter->guest->subsystem_vendor = val; + + prop = read_prop_dword(np, "subsystem-id", &val); + if (prop) + adapter->guest->subsystem = val; + + if (cxl_verbose) + read_vpd(adapter, NULL); + + return 0; +} + +static int cxl_of_remove(struct platform_device *pdev) +{ + struct cxl *adapter; + int afu; + + adapter = dev_get_drvdata(&pdev->dev); + for (afu = 0; afu < adapter->slices; afu++) + cxl_guest_remove_afu(adapter->afu[afu]); + + cxl_guest_remove_adapter(adapter); + return 0; +} + +static void cxl_of_shutdown(struct platform_device *pdev) +{ + cxl_of_remove(pdev); +} + +int cxl_of_probe(struct platform_device *pdev) +{ + struct device_node *np = NULL; + struct device_node *afu_np = NULL; + struct cxl *adapter = NULL; + int ret; + int slice, slice_ok; + + pr_devel("in %s\n", __func__); + + np = pdev->dev.of_node; + if (np == NULL) + return -ENODEV; + + /* init adapter */ + adapter = cxl_guest_init_adapter(np, pdev); + if (IS_ERR(adapter)) { + dev_err(&pdev->dev, "guest_init_adapter failed: %li\n", PTR_ERR(adapter)); + return PTR_ERR(adapter); + } + + /* init afu */ + slice_ok = 0; + for (afu_np = NULL, slice = 0; (afu_np = of_get_next_child(np, afu_np)); slice++) { + if ((ret = cxl_guest_init_afu(adapter, slice, afu_np))) + dev_err(&pdev->dev, "AFU %i failed to initialise: %i\n", + slice, ret); + else + slice_ok++; + } + + if (slice_ok == 0) { + dev_info(&pdev->dev, "No active AFU"); + adapter->slices = 0; + } + + if (afu_np) + of_node_put(afu_np); + return 0; +} + +static const struct of_device_id cxl_of_match[] = { + { .compatible = "ibm,coherent-platform-facility",}, + {}, +}; +MODULE_DEVICE_TABLE(of, cxl_of_match); + +struct platform_driver cxl_of_driver = { + .driver = { + .name = "cxl_of", + .of_match_table = cxl_of_match, + .owner = THIS_MODULE + }, + .probe = cxl_of_probe, + .remove = cxl_of_remove, + .shutdown = cxl_of_shutdown, +}; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 4c1903f781fc..6634b7a3c42b 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -90,8 +90,8 @@ /* This works a little different than the p1/p2 register accesses to make it * easier to pull out individual fields */ -#define AFUD_READ(afu, off) in_be64(afu->afu_desc_mmio + off) -#define AFUD_READ_LE(afu, off) in_le64(afu->afu_desc_mmio + off) +#define AFUD_READ(afu, off) in_be64(afu->native->afu_desc_mmio + off) +#define AFUD_READ_LE(afu, off) in_le64(afu->native->afu_desc_mmio + off) #define EXTRACT_PPC_BIT(val, bit) (!!(val & PPC_BIT(bit))) #define EXTRACT_PPC_BITS(val, bs, be) ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be)) @@ -116,24 +116,6 @@ #define AFUD_EB_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) #define AFUD_READ_EB_OFF(afu) AFUD_READ(afu, 0x48) -u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off) -{ - u64 aligned_off = off & ~0x3L; - u32 val; - - val = cxl_afu_cr_read32(afu, cr, aligned_off); - return (val >> ((off & 0x2) * 8)) & 0xffff; -} - -u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off) -{ - u64 aligned_off = off & ~0x3L; - u32 val; - - val = cxl_afu_cr_read32(afu, cr, aligned_off); - return (val >> ((off & 0x3) * 8)) & 0xff; -} - static const struct pci_device_id cxl_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), }, { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), }, @@ -433,8 +415,8 @@ static int init_implementation_afu_regs(struct cxl_afu *afu) return 0; } -int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, - unsigned int virq) +int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, + unsigned int virq) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); @@ -476,28 +458,30 @@ int cxl_update_image_control(struct cxl *adapter) return 0; } -int cxl_alloc_one_irq(struct cxl *adapter) +int cxl_pci_alloc_one_irq(struct cxl *adapter) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); return pnv_cxl_alloc_hwirqs(dev, 1); } -void cxl_release_one_irq(struct cxl *adapter, int hwirq) +void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); return pnv_cxl_release_hwirqs(dev, hwirq, 1); } -int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num) +int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter, unsigned int num) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); return pnv_cxl_alloc_hwirq_ranges(irqs, dev, num); } -void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter) +void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); @@ -558,7 +542,7 @@ static int switch_card_to_cxl(struct pci_dev *dev) return 0; } -static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) +static int pci_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) { u64 p1n_base, p2n_base, afu_desc; const u64 p1n_size = 0x100; @@ -566,15 +550,15 @@ static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct p p1n_base = p1_base(dev) + 0x10000 + (afu->slice * p1n_size); p2n_base = p2_base(dev) + (afu->slice * p2n_size); - afu->psn_phys = p2_base(dev) + (adapter->ps_off + (afu->slice * adapter->ps_size)); - afu_desc = p2_base(dev) + adapter->afu_desc_off + (afu->slice * adapter->afu_desc_size); + afu->psn_phys = p2_base(dev) + (adapter->native->ps_off + (afu->slice * adapter->ps_size)); + afu_desc = p2_base(dev) + adapter->native->afu_desc_off + (afu->slice * adapter->native->afu_desc_size); - if (!(afu->p1n_mmio = ioremap(p1n_base, p1n_size))) + if (!(afu->native->p1n_mmio = ioremap(p1n_base, p1n_size))) goto err; if (!(afu->p2n_mmio = ioremap(p2n_base, p2n_size))) goto err1; if (afu_desc) { - if (!(afu->afu_desc_mmio = ioremap(afu_desc, adapter->afu_desc_size))) + if (!(afu->native->afu_desc_mmio = ioremap(afu_desc, adapter->native->afu_desc_size))) goto err2; } @@ -582,62 +566,41 @@ static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct p err2: iounmap(afu->p2n_mmio); err1: - iounmap(afu->p1n_mmio); + iounmap(afu->native->p1n_mmio); err: dev_err(&afu->dev, "Error mapping AFU MMIO regions\n"); return -ENOMEM; } -static void cxl_unmap_slice_regs(struct cxl_afu *afu) +static void pci_unmap_slice_regs(struct cxl_afu *afu) { if (afu->p2n_mmio) { iounmap(afu->p2n_mmio); afu->p2n_mmio = NULL; } - if (afu->p1n_mmio) { - iounmap(afu->p1n_mmio); - afu->p1n_mmio = NULL; + if (afu->native->p1n_mmio) { + iounmap(afu->native->p1n_mmio); + afu->native->p1n_mmio = NULL; } - if (afu->afu_desc_mmio) { - iounmap(afu->afu_desc_mmio); - afu->afu_desc_mmio = NULL; + if (afu->native->afu_desc_mmio) { + iounmap(afu->native->afu_desc_mmio); + afu->native->afu_desc_mmio = NULL; } } -static void cxl_release_afu(struct device *dev) +void cxl_pci_release_afu(struct device *dev) { struct cxl_afu *afu = to_cxl_afu(dev); - pr_devel("cxl_release_afu\n"); + pr_devel("%s\n", __func__); idr_destroy(&afu->contexts_idr); cxl_release_spa(afu); + kfree(afu->native); kfree(afu); } -static struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) -{ - struct cxl_afu *afu; - - if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL))) - return NULL; - - afu->adapter = adapter; - afu->dev.parent = &adapter->dev; - afu->dev.release = cxl_release_afu; - afu->slice = slice; - idr_init(&afu->contexts_idr); - mutex_init(&afu->contexts_lock); - spin_lock_init(&afu->afu_cntl_lock); - mutex_init(&afu->spa_mutex); - - afu->prefault_mode = CXL_PREFAULT_NONE; - afu->irqs_max = afu->adapter->user_irqs; - - return afu; -} - /* Expects AFU struct to have recently been zeroed out */ static int cxl_read_afu_descriptor(struct cxl_afu *afu) { @@ -659,7 +622,7 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu) afu->pp_size = AFUD_PPPSA_LEN(val) * 4096; afu->psa = AFUD_PPPSA_PSA(val); if ((afu->pp_psa = AFUD_PPPSA_PP(val))) - afu->pp_offset = AFUD_READ_PPPSA_OFF(afu); + afu->native->pp_offset = AFUD_READ_PPPSA_OFF(afu); val = AFUD_READ_CR(afu); afu->crs_len = AFUD_CR_LEN(val) * 256; @@ -686,10 +649,11 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu) static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) { - int i; + int i, rc; + u32 val; if (afu->psa && afu->adapter->ps_size < - (afu->pp_offset + afu->pp_size*afu->max_procs_virtualised)) { + (afu->native->pp_offset + afu->pp_size*afu->max_procs_virtualised)) { dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n"); return -ENODEV; } @@ -698,7 +662,8 @@ static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) dev_warn(&afu->dev, "AFU uses < PAGE_SIZE per-process PSA!"); for (i = 0; i < afu->crs_num; i++) { - if ((cxl_afu_cr_read32(afu, i, 0) == 0)) { + rc = cxl_ops->afu_cr_read32(afu, i, 0, &val); + if (rc || val == 0) { dev_err(&afu->dev, "ABORTING: AFU configuration record %i is invalid\n", i); return -EINVAL; } @@ -719,7 +684,7 @@ static int sanitise_afu_regs(struct cxl_afu *afu) reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg); - if (__cxl_afu_reset(afu)) + if (cxl_ops->afu_reset(afu)) return -EIO; if (cxl_afu_disable(afu)) return -EIO; @@ -767,13 +732,13 @@ static int sanitise_afu_regs(struct cxl_afu *afu) * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte * aligned the function uses a bounce buffer which can be max PAGE_SIZE. */ -ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, +ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, loff_t off, size_t count) { loff_t aligned_start, aligned_end; size_t aligned_length; void *tbuf; - const void __iomem *ebuf = afu->afu_desc_mmio + afu->eb_offset; + const void __iomem *ebuf = afu->native->afu_desc_mmio + afu->eb_offset; if (count == 0 || off < 0 || (size_t)off >= afu->eb_len) return 0; @@ -804,18 +769,18 @@ ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, return count; } -static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) +static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) { int rc; - if ((rc = cxl_map_slice_regs(afu, adapter, dev))) + if ((rc = pci_map_slice_regs(afu, adapter, dev))) return rc; if ((rc = sanitise_afu_regs(afu))) goto err1; /* We need to reset the AFU before we can read the AFU descriptor */ - if ((rc = __cxl_afu_reset(afu))) + if ((rc = cxl_ops->afu_reset(afu))) goto err1; if (cxl_verbose) @@ -830,44 +795,50 @@ static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc if ((rc = init_implementation_afu_regs(afu))) goto err1; - if ((rc = cxl_register_serr_irq(afu))) + if ((rc = cxl_native_register_serr_irq(afu))) goto err1; - if ((rc = cxl_register_psl_irq(afu))) + if ((rc = cxl_native_register_psl_irq(afu))) goto err2; return 0; err2: - cxl_release_serr_irq(afu); + cxl_native_release_serr_irq(afu); err1: - cxl_unmap_slice_regs(afu); + pci_unmap_slice_regs(afu); return rc; } -static void cxl_deconfigure_afu(struct cxl_afu *afu) +static void pci_deconfigure_afu(struct cxl_afu *afu) { - cxl_release_psl_irq(afu); - cxl_release_serr_irq(afu); - cxl_unmap_slice_regs(afu); + cxl_native_release_psl_irq(afu); + cxl_native_release_serr_irq(afu); + pci_unmap_slice_regs(afu); } -static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) +static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) { struct cxl_afu *afu; - int rc; + int rc = -ENOMEM; afu = cxl_alloc_afu(adapter, slice); if (!afu) return -ENOMEM; + afu->native = kzalloc(sizeof(struct cxl_afu_native), GFP_KERNEL); + if (!afu->native) + goto err_free_afu; + + mutex_init(&afu->native->spa_mutex); + rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice); if (rc) - goto err_free; + goto err_free_native; - rc = cxl_configure_afu(afu, adapter, dev); + rc = pci_configure_afu(afu, adapter, dev); if (rc) - goto err_free; + goto err_free_native; /* Don't care if this fails */ cxl_debugfs_afu_add(afu); @@ -890,24 +861,27 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) return 0; err_put1: - cxl_deconfigure_afu(afu); + pci_deconfigure_afu(afu); cxl_debugfs_afu_remove(afu); device_unregister(&afu->dev); return rc; -err_free: +err_free_native: + kfree(afu->native); +err_free_afu: kfree(afu); return rc; } -static void cxl_remove_afu(struct cxl_afu *afu) +static void cxl_pci_remove_afu(struct cxl_afu *afu) { - pr_devel("cxl_remove_afu\n"); + pr_devel("%s\n", __func__); if (!afu) return; + cxl_pci_vphb_remove(afu); cxl_sysfs_afu_remove(afu); cxl_debugfs_afu_remove(afu); @@ -916,13 +890,13 @@ static void cxl_remove_afu(struct cxl_afu *afu) spin_unlock(&afu->adapter->afu_list_lock); cxl_context_detach_all(afu); - cxl_afu_deactivate_mode(afu); + cxl_ops->afu_deactivate_mode(afu, afu->current_mode); - cxl_deconfigure_afu(afu); + pci_deconfigure_afu(afu); device_unregister(&afu->dev); } -int cxl_reset(struct cxl *adapter) +int cxl_pci_reset(struct cxl *adapter) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); int rc; @@ -956,17 +930,17 @@ static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev) pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx", p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev)); - if (!(adapter->p1_mmio = ioremap(p1_base(dev), p1_size(dev)))) + if (!(adapter->native->p1_mmio = ioremap(p1_base(dev), p1_size(dev)))) goto err3; - if (!(adapter->p2_mmio = ioremap(p2_base(dev), p2_size(dev)))) + if (!(adapter->native->p2_mmio = ioremap(p2_base(dev), p2_size(dev)))) goto err4; return 0; err4: - iounmap(adapter->p1_mmio); - adapter->p1_mmio = NULL; + iounmap(adapter->native->p1_mmio); + adapter->native->p1_mmio = NULL; err3: pci_release_region(dev, 0); err2: @@ -977,14 +951,14 @@ err1: static void cxl_unmap_adapter_regs(struct cxl *adapter) { - if (adapter->p1_mmio) { - iounmap(adapter->p1_mmio); - adapter->p1_mmio = NULL; + if (adapter->native->p1_mmio) { + iounmap(adapter->native->p1_mmio); + adapter->native->p1_mmio = NULL; pci_release_region(to_pci_dev(adapter->dev.parent), 2); } - if (adapter->p2_mmio) { - iounmap(adapter->p2_mmio); - adapter->p2_mmio = NULL; + if (adapter->native->p2_mmio) { + iounmap(adapter->native->p2_mmio); + adapter->native->p2_mmio = NULL; pci_release_region(to_pci_dev(adapter->dev.parent), 0); } } @@ -1025,10 +999,10 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) /* Convert everything to bytes, because there is NO WAY I'd look at the * code a month later and forget what units these are in ;-) */ - adapter->ps_off = ps_off * 64 * 1024; + adapter->native->ps_off = ps_off * 64 * 1024; adapter->ps_size = ps_size * 64 * 1024; - adapter->afu_desc_off = afu_desc_off * 64 * 1024; - adapter->afu_desc_size = afu_desc_size *64 * 1024; + adapter->native->afu_desc_off = afu_desc_off * 64 * 1024; + adapter->native->afu_desc_size = afu_desc_size * 64 * 1024; /* Total IRQs - 1 PSL ERROR - #AFU*(1 slice error + 1 DSI) */ adapter->user_irqs = pnv_cxl_get_irq_count(dev) - 1 - 2*adapter->slices; @@ -1079,21 +1053,26 @@ static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev) return -EINVAL; } - if (!adapter->afu_desc_off || !adapter->afu_desc_size) { + if (!adapter->native->afu_desc_off || !adapter->native->afu_desc_size) { dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n"); return -EINVAL; } - if (adapter->ps_size > p2_size(dev) - adapter->ps_off) { + if (adapter->ps_size > p2_size(dev) - adapter->native->ps_off) { dev_err(&dev->dev, "ABORTING: Problem state size larger than " "available in BAR2: 0x%llx > 0x%llx\n", - adapter->ps_size, p2_size(dev) - adapter->ps_off); + adapter->ps_size, p2_size(dev) - adapter->native->ps_off); return -EINVAL; } return 0; } +ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len) +{ + return pci_read_vpd(to_pci_dev(adapter->dev.parent), 0, len, buf); +} + static void cxl_release_adapter(struct device *dev) { struct cxl *adapter = to_cxl_adapter(dev); @@ -1102,33 +1081,10 @@ static void cxl_release_adapter(struct device *dev) cxl_remove_adapter_nr(adapter); + kfree(adapter->native); kfree(adapter); } -static struct cxl *cxl_alloc_adapter(void) -{ - struct cxl *adapter; - - if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) - return NULL; - - spin_lock_init(&adapter->afu_list_lock); - - if (cxl_alloc_adapter_nr(adapter)) - goto err1; - - if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) - goto err2; - - return adapter; - -err2: - cxl_remove_adapter_nr(adapter); -err1: - kfree(adapter); - return NULL; -} - #define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31)) static int sanitise_adapter_regs(struct cxl *adapter) @@ -1192,7 +1148,7 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = cxl_setup_psl_timebase(adapter, dev))) goto err; - if ((rc = cxl_register_psl_err_irq(adapter))) + if ((rc = cxl_native_register_psl_err_irq(adapter))) goto err; return 0; @@ -1207,13 +1163,13 @@ static void cxl_deconfigure_adapter(struct cxl *adapter) { struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); - cxl_release_psl_err_irq(adapter); + cxl_native_release_psl_err_irq(adapter); cxl_unmap_adapter_regs(adapter); pci_disable_device(pdev); } -static struct cxl *cxl_init_adapter(struct pci_dev *dev) +static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) { struct cxl *adapter; int rc; @@ -1222,6 +1178,12 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev) if (!adapter) return ERR_PTR(-ENOMEM); + adapter->native = kzalloc(sizeof(struct cxl_native), GFP_KERNEL); + if (!adapter->native) { + rc = -ENOMEM; + goto err_release; + } + /* Set defaults for parameters which need to persist over * configure/reconfigure */ @@ -1231,8 +1193,7 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev) rc = cxl_configure_adapter(adapter, dev); if (rc) { pci_disable_device(dev); - cxl_release_adapter(&adapter->dev); - return ERR_PTR(rc); + goto err_release; } /* Don't care if this one fails: */ @@ -1258,9 +1219,13 @@ err_put1: cxl_deconfigure_adapter(adapter); device_unregister(&adapter->dev); return ERR_PTR(rc); + +err_release: + cxl_release_adapter(&adapter->dev); + return ERR_PTR(rc); } -static void cxl_remove_adapter(struct cxl *adapter) +static void cxl_pci_remove_adapter(struct cxl *adapter) { pr_devel("cxl_remove_adapter\n"); @@ -1278,17 +1243,22 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) int slice; int rc; + if (cxl_pci_is_vphb_device(dev)) { + dev_dbg(&dev->dev, "cxl_init_adapter: Ignoring cxl vphb device\n"); + return -ENODEV; + } + if (cxl_verbose) dump_cxl_config_space(dev); - adapter = cxl_init_adapter(dev); + adapter = cxl_pci_init_adapter(dev); if (IS_ERR(adapter)) { dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter)); return PTR_ERR(adapter); } for (slice = 0; slice < adapter->slices; slice++) { - if ((rc = cxl_init_afu(adapter, slice, dev))) { + if ((rc = pci_init_afu(adapter, slice, dev))) { dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc); continue; } @@ -1313,10 +1283,9 @@ static void cxl_remove(struct pci_dev *dev) */ for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; - cxl_pci_vphb_remove(afu); - cxl_remove_afu(afu); + cxl_pci_remove_afu(afu); } - cxl_remove_adapter(adapter); + cxl_pci_remove_adapter(adapter); } static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, @@ -1462,8 +1431,8 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, return result; cxl_context_detach_all(afu); - cxl_afu_deactivate_mode(afu); - cxl_deconfigure_afu(afu); + cxl_ops->afu_deactivate_mode(afu, afu->current_mode); + pci_deconfigure_afu(afu); } cxl_deconfigure_adapter(adapter); @@ -1486,14 +1455,12 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; - if (cxl_configure_afu(afu, adapter, pdev)) + if (pci_configure_afu(afu, adapter, pdev)) goto err; if (cxl_afu_select_best_mode(afu)) goto err; - cxl_pci_vphb_reconfigure(afu); - list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { /* Reset the device context. * TODO: make this less disruptive @@ -1509,7 +1476,7 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) afu_dev->dev.archdata.cxl_ctx = ctx; - if (cxl_afu_check_and_enable(afu)) + if (cxl_ops->afu_check_and_enable(afu)) goto err; afu_dev->error_state = pci_channel_io_normal; diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index 02006f7109a8..907c99b9f5af 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -69,7 +69,7 @@ static ssize_t reset_adapter_store(struct device *device, if ((rc != 1) || (val != 1)) return -EINVAL; - if ((rc = cxl_reset(adapter))) + if ((rc = cxl_ops->adapter_reset(adapter))) return rc; return count; } @@ -165,7 +165,7 @@ static ssize_t pp_mmio_off_show(struct device *device, { struct cxl_afu *afu = to_afu_chardev_m(device); - return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_offset); + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->native->pp_offset); } static ssize_t pp_mmio_len_show(struct device *device, @@ -211,7 +211,7 @@ static ssize_t reset_store_afu(struct device *device, goto err; } - if ((rc = __cxl_afu_reset(afu))) + if ((rc = cxl_ops->afu_reset(afu))) goto err; rc = count; @@ -253,8 +253,14 @@ static ssize_t irqs_max_store(struct device *device, if (irqs_max < afu->pp_irqs) return -EINVAL; - if (irqs_max > afu->adapter->user_irqs) - return -EINVAL; + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (irqs_max > afu->adapter->user_irqs) + return -EINVAL; + } else { + /* pHyp sets a per-AFU limit */ + if (irqs_max > afu->guest->max_ints) + return -EINVAL; + } afu->irqs_max = irqs_max; return count; @@ -348,7 +354,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr, } /* - * cxl_afu_deactivate_mode needs to be done outside the lock, prevent + * afu_deactivate_mode needs to be done outside the lock, prevent * other contexts coming in before we are ready: */ old_mode = afu->current_mode; @@ -357,9 +363,9 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr, mutex_unlock(&afu->contexts_lock); - if ((rc = _cxl_afu_deactivate_mode(afu, old_mode))) + if ((rc = cxl_ops->afu_deactivate_mode(afu, old_mode))) return rc; - if ((rc = cxl_afu_activate_mode(afu, mode))) + if ((rc = cxl_ops->afu_activate_mode(afu, mode))) return rc; return count; @@ -389,7 +395,7 @@ static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj, struct cxl_afu *afu = to_cxl_afu(container_of(kobj, struct device, kobj)); - return cxl_afu_read_err_buffer(afu, buf, off, count); + return cxl_ops->afu_read_err_buffer(afu, buf, off, count); } static struct device_attribute afu_attrs[] = { @@ -406,24 +412,39 @@ static struct device_attribute afu_attrs[] = { int cxl_sysfs_adapter_add(struct cxl *adapter) { + struct device_attribute *dev_attr; int i, rc; for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) { - if ((rc = device_create_file(&adapter->dev, &adapter_attrs[i]))) - goto err; + dev_attr = &adapter_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_ADAPTER_ATTRS)) { + if ((rc = device_create_file(&adapter->dev, dev_attr))) + goto err; + } } return 0; err: - for (i--; i >= 0; i--) - device_remove_file(&adapter->dev, &adapter_attrs[i]); + for (i--; i >= 0; i--) { + dev_attr = &adapter_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_ADAPTER_ATTRS)) + device_remove_file(&adapter->dev, dev_attr); + } return rc; } + void cxl_sysfs_adapter_remove(struct cxl *adapter) { + struct device_attribute *dev_attr; int i; - for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) - device_remove_file(&adapter->dev, &adapter_attrs[i]); + for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) { + dev_attr = &adapter_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_ADAPTER_ATTRS)) + device_remove_file(&adapter->dev, dev_attr); + } } struct afu_config_record { @@ -469,10 +490,12 @@ static ssize_t afu_read_config(struct file *filp, struct kobject *kobj, struct afu_config_record *cr = to_cr(kobj); struct cxl_afu *afu = to_cxl_afu(container_of(kobj->parent, struct device, kobj)); - u64 i, j, val; + u64 i, j, val, rc; for (i = 0; i < count;) { - val = cxl_afu_cr_read64(afu, cr->cr, off & ~0x7); + rc = cxl_ops->afu_cr_read64(afu, cr->cr, off & ~0x7, &val); + if (rc) + val = ~0ULL; for (j = off & 0x7; j < 8 && i < count; i++, j++, off++) buf[i] = (val >> (j * 8)) & 0xff; } @@ -517,14 +540,22 @@ static struct afu_config_record *cxl_sysfs_afu_new_cr(struct cxl_afu *afu, int c return ERR_PTR(-ENOMEM); cr->cr = cr_idx; - cr->device = cxl_afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID); - cr->vendor = cxl_afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID); - cr->class = cxl_afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION) >> 8; + + rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID, &cr->device); + if (rc) + goto err; + rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID, &cr->vendor); + if (rc) + goto err; + rc = cxl_ops->afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION, &cr->class); + if (rc) + goto err; + cr->class >>= 8; /* * Export raw AFU PCIe like config record. For now this is read only by * root - we can expand that later to be readable by non-root and maybe - * even writable provided we have a good use-case. Once we suport + * even writable provided we have a good use-case. Once we support * exposing AFUs through a virtual PHB they will get that for free from * Linux' PCI infrastructure, but until then it's not clear that we * need it for anything since the main use case is just identifying @@ -562,6 +593,7 @@ err: void cxl_sysfs_afu_remove(struct cxl_afu *afu) { + struct device_attribute *dev_attr; struct afu_config_record *cr, *tmp; int i; @@ -569,8 +601,12 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu) if (afu->eb_len) device_remove_bin_file(&afu->dev, &afu->attr_eb); - for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) - device_remove_file(&afu->dev, &afu_attrs[i]); + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { + dev_attr = &afu_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_ATTRS)) + device_remove_file(&afu->dev, &afu_attrs[i]); + } list_for_each_entry_safe(cr, tmp, &afu->crs, list) { sysfs_remove_bin_file(&cr->kobj, &cr->config_attr); @@ -580,14 +616,19 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu) int cxl_sysfs_afu_add(struct cxl_afu *afu) { + struct device_attribute *dev_attr; struct afu_config_record *cr; int i, rc; INIT_LIST_HEAD(&afu->crs); for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { - if ((rc = device_create_file(&afu->dev, &afu_attrs[i]))) - goto err; + dev_attr = &afu_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_ATTRS)) { + if ((rc = device_create_file(&afu->dev, &afu_attrs[i]))) + goto err; + } } /* conditionally create the add the binary file for error info buffer */ @@ -626,32 +667,50 @@ err: /* reset the eb_len as we havent created the bin attr */ afu->eb_len = 0; - for (i--; i >= 0; i--) + for (i--; i >= 0; i--) { + dev_attr = &afu_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_ATTRS)) device_remove_file(&afu->dev, &afu_attrs[i]); + } return rc; } int cxl_sysfs_afu_m_add(struct cxl_afu *afu) { + struct device_attribute *dev_attr; int i, rc; for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) { - if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i]))) - goto err; + dev_attr = &afu_master_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_MASTER_ATTRS)) { + if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i]))) + goto err; + } } return 0; err: - for (i--; i >= 0; i--) - device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + for (i--; i >= 0; i--) { + dev_attr = &afu_master_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_MASTER_ATTRS)) + device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + } return rc; } void cxl_sysfs_afu_m_remove(struct cxl_afu *afu) { + struct device_attribute *dev_attr; int i; - for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) - device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) { + dev_attr = &afu_master_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_MASTER_ATTRS)) + device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + } } diff --git a/drivers/misc/cxl/trace.h b/drivers/misc/cxl/trace.h index 6e1e2adfba8e..751d6119683e 100644 --- a/drivers/misc/cxl/trace.h +++ b/drivers/misc/cxl/trace.h @@ -450,6 +450,199 @@ DEFINE_EVENT(cxl_pe_class, cxl_slbia, TP_ARGS(ctx) ); +TRACE_EVENT(cxl_hcall, + TP_PROTO(u64 unit_address, u64 process_token, long rc), + + TP_ARGS(unit_address, process_token, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(u64, process_token) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->process_token = process_token; + __entry->rc = rc; + ), + + TP_printk("unit_address=0x%016llx process_token=0x%016llx rc=%li", + __entry->unit_address, + __entry->process_token, + __entry->rc + ) +); + +TRACE_EVENT(cxl_hcall_control, + TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, + u64 p4, unsigned long r4, long rc), + + TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(char *, fct) + __field(u64, p1) + __field(u64, p2) + __field(u64, p3) + __field(u64, p4) + __field(unsigned long, r4) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->fct = fct; + __entry->p1 = p1; + __entry->p2 = p2; + __entry->p3 = p3; + __entry->p4 = p4; + __entry->r4 = r4; + __entry->rc = rc; + ), + + TP_printk("unit_address=%#.16llx %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li", + __entry->unit_address, + __entry->fct, + __entry->p1, + __entry->p2, + __entry->p3, + __entry->p4, + __entry->r4, + __entry->rc + ) +); + +TRACE_EVENT(cxl_hcall_attach, + TP_PROTO(u64 unit_address, u64 phys_addr, unsigned long process_token, + unsigned long mmio_addr, unsigned long mmio_size, long rc), + + TP_ARGS(unit_address, phys_addr, process_token, + mmio_addr, mmio_size, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(u64, phys_addr) + __field(unsigned long, process_token) + __field(unsigned long, mmio_addr) + __field(unsigned long, mmio_size) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->phys_addr = phys_addr; + __entry->process_token = process_token; + __entry->mmio_addr = mmio_addr; + __entry->mmio_size = mmio_size; + __entry->rc = rc; + ), + + TP_printk("unit_address=0x%016llx phys_addr=0x%016llx " + "token=0x%.8lx mmio_addr=0x%lx mmio_size=0x%lx rc=%li", + __entry->unit_address, + __entry->phys_addr, + __entry->process_token, + __entry->mmio_addr, + __entry->mmio_size, + __entry->rc + ) +); + +DEFINE_EVENT(cxl_hcall, cxl_hcall_detach, + TP_PROTO(u64 unit_address, u64 process_token, long rc), + TP_ARGS(unit_address, process_token, rc) +); + +DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_function, + TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, + u64 p4, unsigned long r4, long rc), + TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc) +); + +DEFINE_EVENT(cxl_hcall, cxl_hcall_collect_int_info, + TP_PROTO(u64 unit_address, u64 process_token, long rc), + TP_ARGS(unit_address, process_token, rc) +); + +TRACE_EVENT(cxl_hcall_control_faults, + TP_PROTO(u64 unit_address, u64 process_token, + u64 control_mask, u64 reset_mask, unsigned long r4, + long rc), + + TP_ARGS(unit_address, process_token, + control_mask, reset_mask, r4, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(u64, process_token) + __field(u64, control_mask) + __field(u64, reset_mask) + __field(unsigned long, r4) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->process_token = process_token; + __entry->control_mask = control_mask; + __entry->reset_mask = reset_mask; + __entry->r4 = r4; + __entry->rc = rc; + ), + + TP_printk("unit_address=0x%016llx process_token=0x%llx " + "control_mask=%#llx reset_mask=%#llx r4=%#lx rc=%li", + __entry->unit_address, + __entry->process_token, + __entry->control_mask, + __entry->reset_mask, + __entry->r4, + __entry->rc + ) +); + +DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_facility, + TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, + u64 p4, unsigned long r4, long rc), + TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc) +); + +TRACE_EVENT(cxl_hcall_download_facility, + TP_PROTO(u64 unit_address, char *fct, u64 list_address, u64 num, + unsigned long r4, long rc), + + TP_ARGS(unit_address, fct, list_address, num, r4, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(char *, fct) + __field(u64, list_address) + __field(u64, num) + __field(unsigned long, r4) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->fct = fct; + __entry->list_address = list_address; + __entry->num = num; + __entry->r4 = r4; + __entry->rc = rc; + ), + + TP_printk("%#.16llx, %s(%#llx, %#llx), %#lx): %li", + __entry->unit_address, + __entry->fct, + __entry->list_address, + __entry->num, + __entry->r4, + __entry->rc + ) +); + #endif /* _CXL_TRACE_H */ /* This part must be outside protection */ diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index cbd4331fb45c..cdc7723b845d 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -49,7 +49,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) phb = pci_bus_to_host(dev->bus); afu = (struct cxl_afu *)phb->private_data; - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__); return false; } @@ -66,7 +66,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) return false; dev->dev.archdata.cxl_ctx = ctx; - return (cxl_afu_check_and_enable(afu) == 0); + return (cxl_ops->afu_check_and_enable(afu) == 0); } static void cxl_pci_disable_device(struct pci_dev *dev) @@ -99,113 +99,90 @@ static int cxl_pcie_cfg_record(u8 bus, u8 devfn) return (bus << 8) + devfn; } -static unsigned long cxl_pcie_cfg_addr(struct pci_controller* phb, - u8 bus, u8 devfn, int offset) -{ - int record = cxl_pcie_cfg_record(bus, devfn); - - return (unsigned long)phb->cfg_addr + ((unsigned long)phb->cfg_data * record) + offset; -} - - static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, - int offset, int len, - volatile void __iomem **ioaddr, - u32 *mask, int *shift) + struct cxl_afu **_afu, int *_record) { struct pci_controller *phb; struct cxl_afu *afu; - unsigned long addr; + int record; phb = pci_bus_to_host(bus); if (phb == NULL) return PCIBIOS_DEVICE_NOT_FOUND; - afu = (struct cxl_afu *)phb->private_data; - if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num) + afu = (struct cxl_afu *)phb->private_data; + record = cxl_pcie_cfg_record(bus->number, devfn); + if (record > afu->crs_num) return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= (unsigned long)phb->cfg_data) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = cxl_pcie_cfg_addr(phb, bus->number, devfn, offset); - *ioaddr = (void *)(addr & ~0x3ULL); - *shift = ((addr & 0x3) * 8); - switch (len) { - case 1: - *mask = 0xff; - break; - case 2: - *mask = 0xffff; - break; - default: - *mask = 0xffffffff; - break; - } + *_afu = afu; + *_record = record; return 0; } - -static inline bool cxl_config_link_ok(struct pci_bus *bus) -{ - struct pci_controller *phb; - struct cxl_afu *afu; - - /* Config space IO is based on phb->cfg_addr, which is based on - * afu_desc_mmio. This isn't safe to read/write when the link - * goes down, as EEH tears down MMIO space. - * - * Check if the link is OK before proceeding. - */ - - phb = pci_bus_to_host(bus); - if (phb == NULL) - return false; - afu = (struct cxl_afu *)phb->private_data; - return cxl_adapter_link_ok(afu->adapter); -} - static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 *val) { - volatile void __iomem *ioaddr; - int shift, rc; - u32 mask; + int rc, record; + struct cxl_afu *afu; + u8 val8; + u16 val16; + u32 val32; - rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr, - &mask, &shift); + rc = cxl_pcie_config_info(bus, devfn, &afu, &record); if (rc) return rc; - if (!cxl_config_link_ok(bus)) + switch (len) { + case 1: + rc = cxl_ops->afu_cr_read8(afu, record, offset, &val8); + *val = val8; + break; + case 2: + rc = cxl_ops->afu_cr_read16(afu, record, offset, &val16); + *val = val16; + break; + case 4: + rc = cxl_ops->afu_cr_read32(afu, record, offset, &val32); + *val = val32; + break; + default: + WARN_ON(1); + } + + if (rc) return PCIBIOS_DEVICE_NOT_FOUND; - /* Can only read 32 bits */ - *val = (in_le32(ioaddr) >> shift) & mask; return PCIBIOS_SUCCESSFUL; } static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 val) { - volatile void __iomem *ioaddr; - u32 v, mask; - int shift, rc; + int rc, record; + struct cxl_afu *afu; - rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr, - &mask, &shift); + rc = cxl_pcie_config_info(bus, devfn, &afu, &record); if (rc) return rc; - if (!cxl_config_link_ok(bus)) - return PCIBIOS_DEVICE_NOT_FOUND; - - /* Can only write 32 bits so do read-modify-write */ - mask <<= shift; - val <<= shift; + switch (len) { + case 1: + rc = cxl_ops->afu_cr_write8(afu, record, offset, val & 0xff); + break; + case 2: + rc = cxl_ops->afu_cr_write16(afu, record, offset, val & 0xffff); + break; + case 4: + rc = cxl_ops->afu_cr_write32(afu, record, offset, val); + break; + default: + WARN_ON(1); + } - v = (in_le32(ioaddr) & ~mask) | (val & mask); + if (rc) + return PCIBIOS_SET_FAILED; - out_le32(ioaddr, v); return PCIBIOS_SUCCESSFUL; } @@ -233,23 +210,31 @@ int cxl_pci_vphb_add(struct cxl_afu *afu) { struct pci_dev *phys_dev; struct pci_controller *phb, *phys_phb; - - phys_dev = to_pci_dev(afu->adapter->dev.parent); - phys_phb = pci_bus_to_host(phys_dev->bus); + struct device_node *vphb_dn; + struct device *parent; + + if (cpu_has_feature(CPU_FTR_HVMODE)) { + phys_dev = to_pci_dev(afu->adapter->dev.parent); + phys_phb = pci_bus_to_host(phys_dev->bus); + vphb_dn = phys_phb->dn; + parent = &phys_dev->dev; + } else { + vphb_dn = afu->adapter->dev.parent->of_node; + parent = afu->adapter->dev.parent; + } /* Alloc and setup PHB data structure */ - phb = pcibios_alloc_controller(phys_phb->dn); - + phb = pcibios_alloc_controller(vphb_dn); if (!phb) return -ENODEV; /* Setup parent in sysfs */ - phb->parent = &phys_dev->dev; + phb->parent = parent; /* Setup the PHB using arch provided callback */ phb->ops = &cxl_pcie_pci_ops; - phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset; - phb->cfg_data = (void *)(u64)afu->crs_len; + phb->cfg_addr = NULL; + phb->cfg_data = 0; phb->private_data = afu; phb->controller_ops = cxl_pci_controller_ops; @@ -272,15 +257,6 @@ int cxl_pci_vphb_add(struct cxl_afu *afu) return 0; } -void cxl_pci_vphb_reconfigure(struct cxl_afu *afu) -{ - /* When we are reconfigured, the AFU's MMIO space is unmapped - * and remapped. We need to reflect this in the PHB's view of - * the world. - */ - afu->phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset; -} - void cxl_pci_vphb_remove(struct cxl_afu *afu) { struct pci_controller *phb; @@ -296,6 +272,15 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu) pcibios_free_controller(phb); } +bool cxl_pci_is_vphb_device(struct pci_dev *dev) +{ + struct pci_controller *phb; + + phb = pci_bus_to_host(dev->bus); + + return (phb->ops == &cxl_pcie_pci_ops); +} + struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) { struct pci_controller *phb; diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 89b3befc7155..6469ff6208b0 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -271,6 +271,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx) void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { } +void __weak pcibios_bus_add_device(struct pci_dev *pdev) { } + /** * pci_bus_add_device - start driver for a single device * @dev: device to add @@ -285,6 +287,7 @@ void pci_bus_add_device(struct pci_dev *dev) * Can not put in pci_device_add yet because resources * are not assigned yet for some devices. */ + pcibios_bus_add_device(dev); pci_fixup_device(pci_fixup_final, dev); pci_create_sysfs_dev_files(dev); pci_proc_attach_device(dev); diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 31f31d460fc9..fa4f13869fa9 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -113,7 +113,7 @@ resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) return dev->sriov->barsz[resno - PCI_IOV_RESOURCES]; } -static int virtfn_add(struct pci_dev *dev, int id, int reset) +int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) { int i; int rc = -ENOMEM; @@ -188,7 +188,7 @@ failed: return rc; } -static void virtfn_remove(struct pci_dev *dev, int id, int reset) +void pci_iov_remove_virtfn(struct pci_dev *dev, int id, int reset) { char buf[VIRTFN_ID_LEN]; struct pci_dev *virtfn; @@ -321,7 +321,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) } for (i = 0; i < initial; i++) { - rc = virtfn_add(dev, i, 0); + rc = pci_iov_add_virtfn(dev, i, 0); if (rc) goto failed; } @@ -333,7 +333,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) failed: while (i--) - virtfn_remove(dev, i, 0); + pci_iov_remove_virtfn(dev, i, 0); pcibios_sriov_disable(dev); err_pcibios: @@ -359,7 +359,7 @@ static void sriov_disable(struct pci_dev *dev) return; for (i = 0; i < iov->num_VFs; i++) - virtfn_remove(dev, i, 0); + pci_iov_remove_virtfn(dev, i, 0); pcibios_sriov_disable(dev); diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h index 5ada9268a450..580f37006977 100644 --- a/drivers/scsi/cxlflash/common.h +++ b/drivers/scsi/cxlflash/common.h @@ -106,7 +106,6 @@ struct cxlflash_cfg { atomic_t scan_host_needed; struct cxl_afu *cxl_afu; - struct pci_dev *parent_dev; atomic_t recovery_threads; struct mutex ctx_recovery_mutex; diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index f6d90ce8f3b7..e04aae70643b 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -1407,7 +1407,7 @@ static int start_context(struct cxlflash_cfg *cfg) */ static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[]) { - struct pci_dev *dev = cfg->parent_dev; + struct pci_dev *dev = cfg->dev; int rc = 0; int ro_start, ro_size, i, j, k; ssize_t vpd_size; @@ -1416,7 +1416,7 @@ static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[]) char *wwpn_vpd_tags[NUM_FC_PORTS] = { "V5", "V6" }; /* Get the VPD data from the device */ - vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data); + vpd_size = cxl_read_adapter_vpd(dev, vpd_data, sizeof(vpd_data)); if (unlikely(vpd_size <= 0)) { dev_err(&dev->dev, "%s: Unable to read VPD (size = %ld)\n", __func__, vpd_size); @@ -2392,7 +2392,6 @@ static int cxlflash_probe(struct pci_dev *pdev, { struct Scsi_Host *host; struct cxlflash_cfg *cfg = NULL; - struct device *phys_dev; struct dev_dependent_vals *ddv; int rc = 0; @@ -2458,19 +2457,6 @@ static int cxlflash_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, cfg); - /* - * Use the special service provided to look up the physical - * PCI device, since we are called on the probe of the virtual - * PCI host bus (vphb) - */ - phys_dev = cxl_get_phys_dev(pdev); - if (!dev_is_pci(phys_dev)) { - dev_err(&pdev->dev, "%s: not a pci dev\n", __func__); - rc = -ENODEV; - goto out_remove; - } - cfg->parent_dev = to_pci_dev(phys_dev); - cfg->cxl_afu = cxl_pci_to_afu(pdev); rc = init_pci(cfg); diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 0b3c0d39ef75..c370b261c720 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -239,6 +239,14 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #endif +#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE +static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + +} +#endif + #ifndef __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 301de78d65f7..5f3ee5a60a81 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -34,20 +34,29 @@ * The idea here is to build acquire/release variants by adding explicit * barriers on top of the relaxed variant. In the case where the relaxed * variant is already fully ordered, no additional barriers are needed. + * + * Besides, if an arch has a special barrier for acquire/release, it could + * implement its own __atomic_op_* and use the same framework for building + * variants */ +#ifndef __atomic_op_acquire #define __atomic_op_acquire(op, args...) \ ({ \ typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ smp_mb__after_atomic(); \ __ret; \ }) +#endif +#ifndef __atomic_op_release #define __atomic_op_release(op, args...) \ ({ \ smp_mb__before_atomic(); \ op##_relaxed(args); \ }) +#endif +#ifndef __atomic_op_fence #define __atomic_op_fence(op, args...) \ ({ \ typeof(op##_relaxed(args)) __ret; \ @@ -56,6 +65,7 @@ smp_mb__after_atomic(); \ __ret; \ }) +#endif /* atomic_add_return_relaxed */ #ifndef atomic_add_return_relaxed diff --git a/include/linux/bug.h b/include/linux/bug.h index 7f4818673c41..e51b0709e78d 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -20,6 +20,7 @@ struct pt_regs; #define BUILD_BUG_ON_MSG(cond, msg) (0) #define BUILD_BUG_ON(condition) (0) #define BUILD_BUG() (0) +#define MAYBE_BUILD_BUG_ON(cond) (0) #else /* __CHECKER__ */ /* Force a compilation error if a constant expression is not a power of 2 */ @@ -83,6 +84,14 @@ struct pt_regs; */ #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") +#define MAYBE_BUILD_BUG_ON(cond) \ + do { \ + if (__builtin_constant_p((cond))) \ + BUILD_BUG_ON(cond); \ + else \ + BUG_ON(cond); \ + } while (0) + #endif /* __CHECKER__ */ #ifdef CONFIG_GENERIC_BUG diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 459fd25b378e..f12513a20a06 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -111,9 +111,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, __split_huge_pmd(__vma, __pmd, __address); \ } while (0) -#if HPAGE_PMD_ORDER >= MAX_ORDER -#error "hugepages can't be allocated by the buddy allocator" -#endif extern int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice); extern void vma_adjust_trans_huge(struct vm_area_struct *vma, diff --git a/include/linux/pci.h b/include/linux/pci.h index 27df4a6585da..bc435d6293d2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -770,6 +770,7 @@ extern struct list_head pci_root_buses; /* list of all known PCI buses */ int no_pci_devices(void); void pcibios_resource_survey_bus(struct pci_bus *bus); +void pcibios_bus_add_device(struct pci_dev *pdev); void pcibios_add_bus(struct pci_bus *bus); void pcibios_remove_bus(struct pci_bus *bus); void pcibios_fixup_bus(struct pci_bus *); @@ -1738,6 +1739,8 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int id); int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); void pci_disable_sriov(struct pci_dev *dev); +int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset); +void pci_iov_remove_virtfn(struct pci_dev *dev, int id, int reset); int pci_num_vf(struct pci_dev *dev); int pci_vfs_assigned(struct pci_dev *dev); int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); @@ -1754,6 +1757,12 @@ static inline int pci_iov_virtfn_devfn(struct pci_dev *dev, int id) } static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } +static inline int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) +{ + return -ENOSYS; +} +static inline void pci_iov_remove_virtfn(struct pci_dev *dev, + int id, int reset) { } static inline void pci_disable_sriov(struct pci_dev *dev) { } static inline int pci_num_vf(struct pci_dev *dev) { return 0; } static inline int pci_vfs_assigned(struct pci_dev *dev) diff --git a/include/misc/cxl.h b/include/misc/cxl.h index f2ffe5bd720d..7d5e2613c7b8 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -30,9 +30,6 @@ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev); /* Get the AFU conf record number associated with a pci_dev */ unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev); -/* Get the physical device (ie. the PCIe card) which the AFU is attached */ -struct device *cxl_get_phys_dev(struct pci_dev *dev); - /* * Context lifetime overview: @@ -210,4 +207,9 @@ ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count, void cxl_perst_reloads_same_image(struct cxl_afu *afu, bool perst_reloads_same_image); +/* + * Read the VPD for the card where the AFU resides + */ +ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count); + #endif /* _MISC_CXL_H */ diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index 1e889aa8a36e..8cd334f99ddc 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -55,11 +55,35 @@ struct cxl_afu_id { __u64 reserved6; }; +/* base adapter image header is included in the image */ +#define CXL_AI_NEED_HEADER 0x0000000000000001ULL +#define CXL_AI_ALL CXL_AI_NEED_HEADER + +#define CXL_AI_HEADER_SIZE 128 +#define CXL_AI_BUFFER_SIZE 4096 +#define CXL_AI_MAX_ENTRIES 256 +#define CXL_AI_MAX_CHUNK_SIZE (CXL_AI_BUFFER_SIZE * CXL_AI_MAX_ENTRIES) + +struct cxl_adapter_image { + __u64 flags; + __u64 data; + __u64 len_data; + __u64 len_image; + __u64 reserved1; + __u64 reserved2; + __u64 reserved3; + __u64 reserved4; +}; + /* ioctl numbers */ #define CXL_MAGIC 0xCA +/* AFU devices */ #define CXL_IOCTL_START_WORK _IOW(CXL_MAGIC, 0x00, struct cxl_ioctl_start_work) #define CXL_IOCTL_GET_PROCESS_ELEMENT _IOR(CXL_MAGIC, 0x01, __u32) #define CXL_IOCTL_GET_AFU_ID _IOR(CXL_MAGIC, 0x02, struct cxl_afu_id) +/* adapter devices */ +#define CXL_IOCTL_DOWNLOAD_IMAGE _IOW(CXL_MAGIC, 0x0A, struct cxl_adapter_image) +#define CXL_IOCTL_VALIDATE_IMAGE _IOW(CXL_MAGIC, 0x0B, struct cxl_adapter_image) #define CXL_READ_MIN_SIZE 0x1000 /* 4K */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 08fc0ba2207e..36c22a89df61 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -83,7 +83,7 @@ unsigned long transparent_hugepage_flags __read_mostly = (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG); /* default scan 8*512 pte (or vmas) every 30 second */ -static unsigned int khugepaged_pages_to_scan __read_mostly = HPAGE_PMD_NR*8; +static unsigned int khugepaged_pages_to_scan __read_mostly; static unsigned int khugepaged_pages_collapsed; static unsigned int khugepaged_full_scans; static unsigned int khugepaged_scan_sleep_millisecs __read_mostly = 10000; @@ -98,7 +98,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait); * it would have happened if the vma was large enough during page * fault. */ -static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1; +static unsigned int khugepaged_max_ptes_none __read_mostly; static int khugepaged(void *none); static int khugepaged_slab_init(void); @@ -660,6 +660,18 @@ static int __init hugepage_init(void) return -EINVAL; } + khugepaged_pages_to_scan = HPAGE_PMD_NR * 8; + khugepaged_max_ptes_none = HPAGE_PMD_NR - 1; + /* + * hugepages can't be allocated by the buddy allocator + */ + MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER >= MAX_ORDER); + /* + * we use page->mapping and page->index in second tail page + * as list_head: assuming THP order >= 2 + */ + MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER < 2); + err = hugepage_init_sysfs(&hugepage_kobj); if (err) goto err_sysfs; @@ -764,7 +776,6 @@ void prep_transhuge_page(struct page *page) * we use page->mapping and page->indexlru in second tail page * as list_head: assuming THP order >= 2 */ - BUILD_BUG_ON(HPAGE_PMD_ORDER < 2); INIT_LIST_HEAD(page_deferred_list(page)); set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR); @@ -2860,6 +2871,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, young = pmd_young(*pmd); dirty = pmd_dirty(*pmd); + pmdp_huge_split_prepare(vma, haddr, pmd); pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 0c2706bda330..b08f77cbe31b 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -8,7 +8,7 @@ ifeq ($(ARCH),powerpc) GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") -CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) +CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) export CFLAGS @@ -22,7 +22,8 @@ SUB_DIRS = benchmarks \ switch_endian \ syscalls \ tm \ - vphn + vphn \ + math endif diff --git a/tools/testing/selftests/powerpc/basic_asm.h b/tools/testing/selftests/powerpc/basic_asm.h new file mode 100644 index 000000000000..3349a0704d1a --- /dev/null +++ b/tools/testing/selftests/powerpc/basic_asm.h @@ -0,0 +1,70 @@ +#ifndef _SELFTESTS_POWERPC_BASIC_ASM_H +#define _SELFTESTS_POWERPC_BASIC_ASM_H + +#include <ppc-asm.h> +#include <asm/unistd.h> + +#define LOAD_REG_IMMEDIATE(reg,expr) \ + lis reg,(expr)@highest; \ + ori reg,reg,(expr)@higher; \ + rldicr reg,reg,32,31; \ + oris reg,reg,(expr)@high; \ + ori reg,reg,(expr)@l; + +/* + * Note: These macros assume that variables being stored on the stack are + * doublewords, while this is usually the case it may not always be the + * case for each use case. + */ +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define STACK_FRAME_MIN_SIZE 32 +#define STACK_FRAME_TOC_POS 24 +#define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8)) +#define __STACK_FRAME_LOCAL(_num_params,_var_num) ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8)) +#else +#define STACK_FRAME_MIN_SIZE 112 +#define STACK_FRAME_TOC_POS 40 +#define __STACK_FRAME_PARAM(i) (48 + ((i)*8)) + +/* + * Caveat: if a function passed more than 8 doublewords, the caller will have + * made more space... which would render the 112 incorrect. + */ +#define __STACK_FRAME_LOCAL(_num_params,_var_num) (112 + ((_var_num)*8)) +#endif + +/* Parameter x saved to the stack */ +#define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var) + +/* Local variable x saved to the stack after x parameters */ +#define STACK_FRAME_LOCAL(num_params,var) __STACK_FRAME_LOCAL(num_params,var) +#define STACK_FRAME_LR_POS 16 +#define STACK_FRAME_CR_POS 8 + +/* + * It is very important to note here that _extra is the extra amount of + * stack space needed. This space can be accessed using STACK_FRAME_PARAM() + * or STACK_FRAME_LOCAL() macros. + * + * r1 and r2 are not defined in ppc-asm.h (instead they are defined as sp + * and toc). Kernel programmers tend to prefer rX even for r1 and r2, hence + * %1 and %r2. r0 is defined in ppc-asm.h and therefore %r0 gets + * preprocessed incorrectly, hence r0. + */ +#define PUSH_BASIC_STACK(_extra) \ + mflr r0; \ + std r0,STACK_FRAME_LR_POS(%r1); \ + stdu %r1,-(_extra + STACK_FRAME_MIN_SIZE)(%r1); \ + mfcr r0; \ + stw r0,STACK_FRAME_CR_POS(%r1); \ + std %r2,STACK_FRAME_TOC_POS(%r1); + +#define POP_BASIC_STACK(_extra) \ + ld %r2,STACK_FRAME_TOC_POS(%r1); \ + lwz r0,STACK_FRAME_CR_POS(%r1); \ + mtcr r0; \ + addi %r1,%r1,(_extra + STACK_FRAME_MIN_SIZE); \ + ld r0,STACK_FRAME_LR_POS(%r1); \ + mtlr r0; + +#endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */ diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore new file mode 100644 index 000000000000..4fe13a439fd7 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/.gitignore @@ -0,0 +1,6 @@ +fpu_syscall +vmx_syscall +fpu_preempt +vmx_preempt +fpu_signal +vmx_signal diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile new file mode 100644 index 000000000000..5b88875d5955 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -0,0 +1,19 @@ +TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c +$(TEST_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec + +fpu_syscall: fpu_asm.S +fpu_preempt: fpu_asm.S +fpu_signal: fpu_asm.S + +vmx_syscall: vmx_asm.S +vmx_preempt: vmx_asm.S +vmx_signal: vmx_asm.S + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S new file mode 100644 index 000000000000..f3711d80e709 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_asm.S @@ -0,0 +1,198 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "../basic_asm.h" + +#define PUSH_FPU(pos) \ + stfd f14,pos(sp); \ + stfd f15,pos+8(sp); \ + stfd f16,pos+16(sp); \ + stfd f17,pos+24(sp); \ + stfd f18,pos+32(sp); \ + stfd f19,pos+40(sp); \ + stfd f20,pos+48(sp); \ + stfd f21,pos+56(sp); \ + stfd f22,pos+64(sp); \ + stfd f23,pos+72(sp); \ + stfd f24,pos+80(sp); \ + stfd f25,pos+88(sp); \ + stfd f26,pos+96(sp); \ + stfd f27,pos+104(sp); \ + stfd f28,pos+112(sp); \ + stfd f29,pos+120(sp); \ + stfd f30,pos+128(sp); \ + stfd f31,pos+136(sp); + +#define POP_FPU(pos) \ + lfd f14,pos(sp); \ + lfd f15,pos+8(sp); \ + lfd f16,pos+16(sp); \ + lfd f17,pos+24(sp); \ + lfd f18,pos+32(sp); \ + lfd f19,pos+40(sp); \ + lfd f20,pos+48(sp); \ + lfd f21,pos+56(sp); \ + lfd f22,pos+64(sp); \ + lfd f23,pos+72(sp); \ + lfd f24,pos+80(sp); \ + lfd f25,pos+88(sp); \ + lfd f26,pos+96(sp); \ + lfd f27,pos+104(sp); \ + lfd f28,pos+112(sp); \ + lfd f29,pos+120(sp); \ + lfd f30,pos+128(sp); \ + lfd f31,pos+136(sp); + +# Careful calling this, it will 'clobber' fpu (by design) +# Don't call this from C +FUNC_START(load_fpu) + lfd f14,0(r3) + lfd f15,8(r3) + lfd f16,16(r3) + lfd f17,24(r3) + lfd f18,32(r3) + lfd f19,40(r3) + lfd f20,48(r3) + lfd f21,56(r3) + lfd f22,64(r3) + lfd f23,72(r3) + lfd f24,80(r3) + lfd f25,88(r3) + lfd f26,96(r3) + lfd f27,104(r3) + lfd f28,112(r3) + lfd f29,120(r3) + lfd f30,128(r3) + lfd f31,136(r3) + blr +FUNC_END(load_fpu) + +FUNC_START(check_fpu) + mr r4,r3 + li r3,1 # assume a bad result + lfd f0,0(r4) + fcmpu cr1,f0,f14 + bne cr1,1f + lfd f0,8(r4) + fcmpu cr1,f0,f15 + bne cr1,1f + lfd f0,16(r4) + fcmpu cr1,f0,f16 + bne cr1,1f + lfd f0,24(r4) + fcmpu cr1,f0,f17 + bne cr1,1f + lfd f0,32(r4) + fcmpu cr1,f0,f18 + bne cr1,1f + lfd f0,40(r4) + fcmpu cr1,f0,f19 + bne cr1,1f + lfd f0,48(r4) + fcmpu cr1,f0,f20 + bne cr1,1f + lfd f0,56(r4) + fcmpu cr1,f0,f21 + bne cr1,1f + lfd f0,64(r4) + fcmpu cr1,f0,f22 + bne cr1,1f + lfd f0,72(r4) + fcmpu cr1,f0,f23 + bne cr1,1f + lfd f0,80(r4) + fcmpu cr1,f0,f24 + bne cr1,1f + lfd f0,88(r4) + fcmpu cr1,f0,f25 + bne cr1,1f + lfd f0,96(r4) + fcmpu cr1,f0,f26 + bne cr1,1f + lfd f0,104(r4) + fcmpu cr1,f0,f27 + bne cr1,1f + lfd f0,112(r4) + fcmpu cr1,f0,f28 + bne cr1,1f + lfd f0,120(r4) + fcmpu cr1,f0,f29 + bne cr1,1f + lfd f0,128(r4) + fcmpu cr1,f0,f30 + bne cr1,1f + lfd f0,136(r4) + fcmpu cr1,f0,f31 + bne cr1,1f + li r3,0 # Success!!! +1: blr + +FUNC_START(test_fpu) + # r3 holds pointer to where to put the result of fork + # r4 holds pointer to the pid + # f14-f31 are non volatiles + PUSH_BASIC_STACK(256) + std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray + std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid + PUSH_FPU(STACK_FRAME_LOCAL(2,0)) + + bl load_fpu + nop + li r0,__NR_fork + sc + + # pass the result of the fork to the caller + ld r9,STACK_FRAME_PARAM(1)(sp) + std r3,0(r9) + + ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_fpu + nop + + POP_FPU(STACK_FRAME_LOCAL(2,0)) + POP_BASIC_STACK(256) + blr +FUNC_END(test_fpu) + +# int preempt_fpu(double *darray, int *threads_running, int *running) +# On starting will (atomically) decrement not_ready as a signal that the FPU +# has been loaded with darray. Will proceed to check the validity of the FPU +# registers while running is not zero. +FUNC_START(preempt_fpu) + PUSH_BASIC_STACK(256) + std r3,STACK_FRAME_PARAM(0)(sp) # double *darray + std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting + std r5,STACK_FRAME_PARAM(2)(sp) # int *running + PUSH_FPU(STACK_FRAME_LOCAL(3,0)) + + bl load_fpu + nop + + sync + # Atomic DEC + ld r3,STACK_FRAME_PARAM(1)(sp) +1: lwarx r4,0,r3 + addi r4,r4,-1 + stwcx. r4,0,r3 + bne- 1b + +2: ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_fpu + nop + cmpdi r3,0 + bne 3f + ld r4,STACK_FRAME_PARAM(2)(sp) + ld r5,0(r4) + cmpwi r5,0 + bne 2b + +3: POP_FPU(STACK_FRAME_LOCAL(3,0)) + POP_BASIC_STACK(256) + blr +FUNC_END(preempt_fpu) diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c new file mode 100644 index 000000000000..0f85b79d883d --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c @@ -0,0 +1,113 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the FPU registers change across preemption. + * Two things should be noted here a) The check_fpu function in asm only checks + * the non volatile registers as it is reused from the syscall test b) There is + * no way to be sure preemption happened so this test just uses many threads + * and a long wait. As such, a successful test doesn't mean much but a failure + * is bad. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <pthread.h> + +#include "utils.h" + +/* Time to wait for workers to get preempted (seconds) */ +#define PREEMPT_TIME 20 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + + +__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, + 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, + 2.1}; + +int threads_starting; +int running; + +extern void preempt_fpu(double *darray, int *threads_starting, int *running); + +void *preempt_fpu_c(void *p) +{ + int i; + srand(pthread_self()); + for (i = 0; i < 21; i++) + darray[i] = rand(); + + /* Test failed if it ever returns */ + preempt_fpu(darray, &threads_starting, &running); + + return p; +} + +int test_preempt_fpu(void) +{ + int i, rc, threads; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc((threads) * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, preempt_fpu_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + /* Not really necessary but nice to wait for every thread to start */ + printf("\tWaiting for all workers to start..."); + while(threads_starting) + asm volatile("": : :"memory"); + printf("done\n"); + + printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME); + sleep(PREEMPT_TIME); + printf("done\n"); + + printf("\tStopping workers..."); + /* + * Working are checking this value every loop. In preempt_fpu 'cmpwi r5,0; bne 2b'. + * r5 will have loaded the value of running. + */ + running = 0; + for (i = 0; i < threads; i++) { + void *rc_p; + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why preempt_fpu + * returned + */ + if ((long) rc_p) + printf("oops\n"); + FAIL_IF((long) rc_p); + } + printf("done\n"); + + free(tids); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_preempt_fpu, "fpu_preempt"); +} diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c new file mode 100644 index 000000000000..888aa51b4204 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_signal.c @@ -0,0 +1,135 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the FPU registers are correctly reported in a + * signal context. Each worker just spins checking its FPU registers, at some + * point a signal will interrupt it and C code will check the signal context + * ensuring it is also the same. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <pthread.h> + +#include "utils.h" + +/* Number of times each thread should receive the signal */ +#define ITERATIONS 10 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + +__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, + 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, + 2.1}; + +bool bad_context; +int threads_starting; +int running; + +extern long preempt_fpu(double *darray, int *threads_starting, int *running); + +void signal_fpu_sig(int sig, siginfo_t *info, void *context) +{ + int i; + ucontext_t *uc = context; + mcontext_t *mc = &uc->uc_mcontext; + + /* Only the non volatiles were loaded up */ + for (i = 14; i < 32; i++) { + if (mc->fp_regs[i] != darray[i - 14]) { + bad_context = true; + break; + } + } +} + +void *signal_fpu_c(void *p) +{ + int i; + long rc; + struct sigaction act; + act.sa_sigaction = signal_fpu_sig; + act.sa_flags = SA_SIGINFO; + rc = sigaction(SIGUSR1, &act, NULL); + if (rc) + return p; + + srand(pthread_self()); + for (i = 0; i < 21; i++) + darray[i] = rand(); + + rc = preempt_fpu(darray, &threads_starting, &running); + + return (void *) rc; +} + +int test_signal_fpu(void) +{ + int i, j, rc, threads; + void *rc_p; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc(threads * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, signal_fpu_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + printf("\tWaiting for all workers to start..."); + while (threads_starting) + asm volatile("": : :"memory"); + printf("done\n"); + + printf("\tSending signals to all threads %d times...", ITERATIONS); + for (i = 0; i < ITERATIONS; i++) { + for (j = 0; j < threads; j++) { + pthread_kill(tids[j], SIGUSR1); + } + sleep(1); + } + printf("done\n"); + + printf("\tStopping workers..."); + running = 0; + for (i = 0; i < threads; i++) { + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why signal_fpu + * returned + */ + if ((long) rc_p || bad_context) + printf("oops\n"); + if (bad_context) + fprintf(stderr, "\t!! bad_context is true\n"); + FAIL_IF((long) rc_p || bad_context); + } + printf("done\n"); + + free(tids); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_signal_fpu, "fpu_signal"); +} diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c new file mode 100644 index 000000000000..949e6721256d --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c @@ -0,0 +1,90 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the FPU registers change across a syscall (fork). + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> + +#include "utils.h" + +extern int test_fpu(double *darray, pid_t *pid); + +double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, + 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, + 2.1}; + +int syscall_fpu(void) +{ + pid_t fork_pid; + int i; + int ret; + int child_ret; + for (i = 0; i < 1000; i++) { + /* test_fpu will fork() */ + ret = test_fpu(darray, &fork_pid); + if (fork_pid == -1) + return -1; + if (fork_pid == 0) + exit(ret); + waitpid(fork_pid, &child_ret, 0); + if (ret || child_ret) + return 1; + } + + return 0; +} + +int test_syscall_fpu(void) +{ + /* + * Setup an environment with much context switching + */ + pid_t pid2; + pid_t pid = fork(); + int ret; + int child_ret; + FAIL_IF(pid == -1); + + pid2 = fork(); + /* Can't FAIL_IF(pid2 == -1); because already forked once */ + if (pid2 == -1) { + /* + * Couldn't fork, ensure test is a fail + */ + child_ret = ret = 1; + } else { + ret = syscall_fpu(); + if (pid2) + waitpid(pid2, &child_ret, 0); + else + exit(ret); + } + + ret |= child_ret; + + if (pid) + waitpid(pid, &child_ret, 0); + else + exit(ret); + + FAIL_IF(ret || child_ret); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_syscall_fpu, "syscall_fpu"); + +} diff --git a/tools/testing/selftests/powerpc/math/vmx_asm.S b/tools/testing/selftests/powerpc/math/vmx_asm.S new file mode 100644 index 000000000000..1b8c248b3ac1 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_asm.S @@ -0,0 +1,235 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "../basic_asm.h" + +# POS MUST BE 16 ALIGNED! +#define PUSH_VMX(pos,reg) \ + li reg,pos; \ + stvx v20,reg,sp; \ + addi reg,reg,16; \ + stvx v21,reg,sp; \ + addi reg,reg,16; \ + stvx v22,reg,sp; \ + addi reg,reg,16; \ + stvx v23,reg,sp; \ + addi reg,reg,16; \ + stvx v24,reg,sp; \ + addi reg,reg,16; \ + stvx v25,reg,sp; \ + addi reg,reg,16; \ + stvx v26,reg,sp; \ + addi reg,reg,16; \ + stvx v27,reg,sp; \ + addi reg,reg,16; \ + stvx v28,reg,sp; \ + addi reg,reg,16; \ + stvx v29,reg,sp; \ + addi reg,reg,16; \ + stvx v30,reg,sp; \ + addi reg,reg,16; \ + stvx v31,reg,sp; + +# POS MUST BE 16 ALIGNED! +#define POP_VMX(pos,reg) \ + li reg,pos; \ + lvx v20,reg,sp; \ + addi reg,reg,16; \ + lvx v21,reg,sp; \ + addi reg,reg,16; \ + lvx v22,reg,sp; \ + addi reg,reg,16; \ + lvx v23,reg,sp; \ + addi reg,reg,16; \ + lvx v24,reg,sp; \ + addi reg,reg,16; \ + lvx v25,reg,sp; \ + addi reg,reg,16; \ + lvx v26,reg,sp; \ + addi reg,reg,16; \ + lvx v27,reg,sp; \ + addi reg,reg,16; \ + lvx v28,reg,sp; \ + addi reg,reg,16; \ + lvx v29,reg,sp; \ + addi reg,reg,16; \ + lvx v30,reg,sp; \ + addi reg,reg,16; \ + lvx v31,reg,sp; + +# Carefull this will 'clobber' vmx (by design) +# Don't call this from C +FUNC_START(load_vmx) + li r5,0 + lvx v20,r5,r3 + addi r5,r5,16 + lvx v21,r5,r3 + addi r5,r5,16 + lvx v22,r5,r3 + addi r5,r5,16 + lvx v23,r5,r3 + addi r5,r5,16 + lvx v24,r5,r3 + addi r5,r5,16 + lvx v25,r5,r3 + addi r5,r5,16 + lvx v26,r5,r3 + addi r5,r5,16 + lvx v27,r5,r3 + addi r5,r5,16 + lvx v28,r5,r3 + addi r5,r5,16 + lvx v29,r5,r3 + addi r5,r5,16 + lvx v30,r5,r3 + addi r5,r5,16 + lvx v31,r5,r3 + blr +FUNC_END(load_vmx) + +# Should be safe from C, only touches r4, r5 and v0,v1,v2 +FUNC_START(check_vmx) + PUSH_BASIC_STACK(32) + mr r4,r3 + li r3,1 # assume a bad result + li r5,0 + lvx v0,r5,r4 + vcmpequd. v1,v0,v20 + vmr v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v21 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v22 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v23 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v24 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v25 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v26 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v27 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v28 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v29 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v30 + vand v2,v2,v1 + + addi r5,r5,16 + lvx v0,r5,r4 + vcmpequd. v1,v0,v31 + vand v2,v2,v1 + + li r5,STACK_FRAME_LOCAL(0,0) + stvx v2,r5,sp + ldx r0,r5,sp + cmpdi r0,0xffffffffffffffff + bne 1f + li r3,0 +1: POP_BASIC_STACK(32) + blr +FUNC_END(check_vmx) + +# Safe from C +FUNC_START(test_vmx) + # r3 holds pointer to where to put the result of fork + # r4 holds pointer to the pid + # v20-v31 are non-volatile + PUSH_BASIC_STACK(512) + std r3,STACK_FRAME_PARAM(0)(sp) # Address of varray + std r4,STACK_FRAME_PARAM(1)(sp) # address of pid + PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4) + + bl load_vmx + nop + + li r0,__NR_fork + sc + # Pass the result of fork back to the caller + ld r9,STACK_FRAME_PARAM(1)(sp) + std r3,0(r9) + + ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_vmx + nop + + POP_VMX(STACK_FRAME_LOCAL(2,0),r4) + POP_BASIC_STACK(512) + blr +FUNC_END(test_vmx) + +# int preempt_vmx(vector int *varray, int *threads_starting, int *running) +# On starting will (atomically) decrement threads_starting as a signal that +# the VMX have been loaded with varray. Will proceed to check the validity of +# the VMX registers while running is not zero. +FUNC_START(preempt_vmx) + PUSH_BASIC_STACK(512) + std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray + std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting + std r5,STACK_FRAME_PARAM(2)(sp) # int *running + # VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0) + PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4) + + bl load_vmx + nop + + sync + # Atomic DEC + ld r3,STACK_FRAME_PARAM(1)(sp) +1: lwarx r4,0,r3 + addi r4,r4,-1 + stwcx. r4,0,r3 + bne- 1b + +2: ld r3,STACK_FRAME_PARAM(0)(sp) + bl check_vmx + nop + cmpdi r3,0 + bne 3f + ld r4,STACK_FRAME_PARAM(2)(sp) + ld r5,0(r4) + cmpwi r5,0 + bne 2b + +3: POP_VMX(STACK_FRAME_LOCAL(4,0),r4) + POP_BASIC_STACK(512) + blr +FUNC_END(preempt_vmx) diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c new file mode 100644 index 000000000000..9ef376c55b13 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c @@ -0,0 +1,112 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the VMX registers change across preemption. + * Two things should be noted here a) The check_vmx function in asm only checks + * the non volatile registers as it is reused from the syscall test b) There is + * no way to be sure preemption happened so this test just uses many threads + * and a long wait. As such, a successful test doesn't mean much but a failure + * is bad. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <pthread.h> + +#include "utils.h" + +/* Time to wait for workers to get preempted (seconds) */ +#define PREEMPT_TIME 20 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + +__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12}, + {13,14,15,16},{17,18,19,20},{21,22,23,24}, + {25,26,27,28},{29,30,31,32},{33,34,35,36}, + {37,38,39,40},{41,42,43,44},{45,46,47,48}}; + +int threads_starting; +int running; + +extern void preempt_vmx(vector int *varray, int *threads_starting, int *running); + +void *preempt_vmx_c(void *p) +{ + int i, j; + srand(pthread_self()); + for (i = 0; i < 12; i++) + for (j = 0; j < 4; j++) + varray[i][j] = rand(); + + /* Test fails if it ever returns */ + preempt_vmx(varray, &threads_starting, &running); + return p; +} + +int test_preempt_vmx(void) +{ + int i, rc, threads; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc(threads * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, preempt_vmx_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + /* Not really nessesary but nice to wait for every thread to start */ + printf("\tWaiting for all workers to start..."); + while(threads_starting) + asm volatile("": : :"memory"); + printf("done\n"); + + printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME); + sleep(PREEMPT_TIME); + printf("done\n"); + + printf("\tStopping workers..."); + /* + * Working are checking this value every loop. In preempt_vmx 'cmpwi r5,0; bne 2b'. + * r5 will have loaded the value of running. + */ + running = 0; + for (i = 0; i < threads; i++) { + void *rc_p; + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why preempt_vmx + * returned + */ + if ((long) rc_p) + printf("oops\n"); + FAIL_IF((long) rc_p); + } + printf("done\n"); + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_preempt_vmx, "vmx_preempt"); +} diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c new file mode 100644 index 000000000000..671d7533a557 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_signal.c @@ -0,0 +1,156 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the VMX registers are correctly reported in a + * signal context. Each worker just spins checking its VMX registers, at some + * point a signal will interrupt it and C code will check the signal context + * ensuring it is also the same. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <string.h> +#include <pthread.h> +#include <altivec.h> + +#include "utils.h" + +/* Number of times each thread should receive the signal */ +#define ITERATIONS 10 +/* + * Factor by which to multiply number of online CPUs for total number of + * worker threads + */ +#define THREAD_FACTOR 8 + +__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12}, + {13,14,15,16},{17,18,19,20},{21,22,23,24}, + {25,26,27,28},{29,30,31,32},{33,34,35,36}, + {37,38,39,40},{41,42,43,44},{45,46,47,48}}; + +bool bad_context; +int running; +int threads_starting; + +extern int preempt_vmx(vector int *varray, int *threads_starting, int *sentinal); + +void signal_vmx_sig(int sig, siginfo_t *info, void *context) +{ + int i; + ucontext_t *uc = context; + mcontext_t *mc = &uc->uc_mcontext; + + /* Only the non volatiles were loaded up */ + for (i = 20; i < 32; i++) { + if (memcmp(mc->v_regs->vrregs[i], &varray[i - 20], 16)) { + int j; + /* + * Shouldn't printf() in a signal handler, however, this is a + * test and we've detected failure. Understanding what failed + * is paramount. All that happens after this is tests exit with + * failure. + */ + printf("VMX mismatch at reg %d!\n", i); + printf("Reg | Actual | Expected\n"); + for (j = 20; j < 32; j++) { + printf("%d | 0x%04x%04x%04x%04x | 0x%04x%04x%04x%04x\n", j, mc->v_regs->vrregs[j][0], + mc->v_regs->vrregs[j][1], mc->v_regs->vrregs[j][2], mc->v_regs->vrregs[j][3], + varray[j - 20][0], varray[j - 20][1], varray[j - 20][2], varray[j - 20][3]); + } + bad_context = true; + break; + } + } +} + +void *signal_vmx_c(void *p) +{ + int i, j; + long rc; + struct sigaction act; + act.sa_sigaction = signal_vmx_sig; + act.sa_flags = SA_SIGINFO; + rc = sigaction(SIGUSR1, &act, NULL); + if (rc) + return p; + + srand(pthread_self()); + for (i = 0; i < 12; i++) + for (j = 0; j < 4; j++) + varray[i][j] = rand(); + + rc = preempt_vmx(varray, &threads_starting, &running); + + return (void *) rc; +} + +int test_signal_vmx(void) +{ + int i, j, rc, threads; + void *rc_p; + pthread_t *tids; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; + tids = malloc(threads * sizeof(pthread_t)); + FAIL_IF(!tids); + + running = true; + threads_starting = threads; + for (i = 0; i < threads; i++) { + rc = pthread_create(&tids[i], NULL, signal_vmx_c, NULL); + FAIL_IF(rc); + } + + setbuf(stdout, NULL); + printf("\tWaiting for %d workers to start... %d", threads, threads_starting); + while (threads_starting) { + asm volatile("": : :"memory"); + usleep(1000); + printf(", %d", threads_starting); + } + printf(" ...done\n"); + + printf("\tSending signals to all threads %d times...", ITERATIONS); + for (i = 0; i < ITERATIONS; i++) { + for (j = 0; j < threads; j++) { + pthread_kill(tids[j], SIGUSR1); + } + sleep(1); + } + printf("done\n"); + + printf("\tKilling workers..."); + running = 0; + for (i = 0; i < threads; i++) { + pthread_join(tids[i], &rc_p); + + /* + * Harness will say the fail was here, look at why signal_vmx + * returned + */ + if ((long) rc_p || bad_context) + printf("oops\n"); + if (bad_context) + fprintf(stderr, "\t!! bad_context is true\n"); + FAIL_IF((long) rc_p || bad_context); + } + printf("done\n"); + + free(tids); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_signal_vmx, "vmx_signal"); +} diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c new file mode 100644 index 000000000000..a017918ee1ca --- /dev/null +++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c @@ -0,0 +1,91 @@ +/* + * Copyright 2015, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test attempts to see if the VMX registers change across a syscall (fork). + */ + +#include <altivec.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include "utils.h" + +vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12}, + {13,14,15,16},{17,18,19,20},{21,22,23,24}, + {25,26,27,28},{29,30,31,32},{33,34,35,36}, + {37,38,39,40},{41,42,43,44},{45,46,47,48}}; + +extern int test_vmx(vector int *varray, pid_t *pid); + +int vmx_syscall(void) +{ + pid_t fork_pid; + int i; + int ret; + int child_ret; + for (i = 0; i < 1000; i++) { + /* test_vmx will fork() */ + ret = test_vmx(varray, &fork_pid); + if (fork_pid == -1) + return -1; + if (fork_pid == 0) + exit(ret); + waitpid(fork_pid, &child_ret, 0); + if (ret || child_ret) + return 1; + } + + return 0; +} + +int test_vmx_syscall(void) +{ + /* + * Setup an environment with much context switching + */ + pid_t pid2; + pid_t pid = fork(); + int ret; + int child_ret; + FAIL_IF(pid == -1); + + pid2 = fork(); + ret = vmx_syscall(); + /* Can't FAIL_IF(pid2 == -1); because we've already forked */ + if (pid2 == -1) { + /* + * Couldn't fork, ensure child_ret is set and is a fail + */ + ret = child_ret = 1; + } else { + if (pid2) + waitpid(pid2, &child_ret, 0); + else + exit(ret); + } + + ret |= child_ret; + + if (pid) + waitpid(pid, &child_ret, 0); + else + exit(ret); + + FAIL_IF(ret || child_ret); + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_vmx_syscall, "vmx_syscall"); + +} diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c index d86653f282b1..8c54d18b3e9a 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c @@ -40,7 +40,7 @@ void signal_usr1(int signum, siginfo_t *info, void *uc) #ifdef __powerpc64__ ucp->uc_mcontext.gp_regs[PT_MSR] |= (7ULL << 32); #else - ucp->uc_mcontext.regs->gpr[PT_MSR] |= (7ULL); + ucp->uc_mcontext.uc_regs->gregs[PT_MSR] |= (7ULL); #endif /* Should segv on return becuase of invalid context */ segv_expected = 1; |